/* Copyright (c) 1998, 2012, Timothy Mann */

/* This software may be copied, modified, and used for any purpose
 * without fee, provided that (1) the above copyright notice is
 * retained, and (2) modified versions are clearly marked as having
 * been modified, with the modifier's name and the date included.  */

/* asm6txt.c, version 2
 *
 * Usage: asm6txt < foo.asm > foo.txt
 *
 * Decode save files from ZEUS or ASM6 .asm format into human-readable
 * ASCII listings.  This format was used by the ZEUS and ASM6
 * assemblers to save source code with a little bit of compression.
 * It uses a scheme similar to Microsoft Basic, in which a predefined
 * set of commonly used strings are replaced by 8-bit tokens with the
 * high-order bit set, while other characters are saved unchanged in
 * ASCII.  In addition, most of the machine code that the source
 * assembles to is included in the save file.
 *
 * The decoding should be complete in this version.  Vernon Hester
 * noticed that my partial table of tokens reverse engineered from
 * some files in ASM6 format matched the tokens used by ZEUS.  At this
 * point we don't know the relationship between ZEUS and ASM6.
 * 
 * There could still be shortcomings in my decoding logic; take the
 * results with a small grain of salt. Let me know of any improvements
 * you make.
 *
 * Example output and explanation:
 *
 * {00}            START
 * {33} 216e31             LD      HL,UINF
 * {33} 116f31             LD      DE,UINF+1
 * {12} 3e20               LD      A,32
 * {01} 77                 LD      (HL),A
 * {33} 014400             LD      BC,68
 * ...
 * {12} 3e16               LD      A,@EXIT
 * {09} ef                 RST     28H             ;OUT
 * {d1} 24         UMODNAM DEFM    '$USERINF/E80',13
 *
 * The initial byte in { } is a byte of flags that appears on each
 * line in the .asm file. The meanings of the 5 high-order bits are
 * unknown to me. The 3 low-order bits count the number of bytes of
 * machine code for this instruction.
 *
 * Next come the bytes of machine code as recorded in the .asm
 * file. Note that pseudo-ops that generate many bytes seem to have
 * only their first byte recorded in this area.
 *
 * Next comes the important part -- the assembly code.  The mnemonics
 * generated are standard for Z-80 assemblers, so most should be able
 * to assemble the results, unless ZEUS/ASM6-specific pseudo-ops are
 * used.  I don't know the meaning of all the pseudo-ops.
 */

#include <stdio.h>
#include <stdlib.h>

char *expansion[2][128] = {
  {
  /* This table is used for the opcode field */
    /* 0x80 */ "NOP",
    /* 0x81 */ "RLCA",
    /* 0x82 */ "RRCA",
    /* 0x83 */ "RLA",
    /* 0x84 */ "RRA",
    /* 0x85 */ "DAA",
    /* 0x86 */ "CPL",
    /* 0x87 */ "SCF",
    /* 0x88 */ "CCF",
    /* 0x89 */ "HALT",
    /* 0x8a */ "DI",
    /* 0x8b */ "EI",
    /* 0x8c */ "EXX",
    /* 0x8d */ "NEG",
    /* 0x8e */ "RLD",
    /* 0x8f */ "RRD",
    /* 0x90 */ "LDI",
    /* 0x91 */ "CPI",
    /* 0x92 */ "INI",
    /* 0x93 */ "OUTI",
    /* 0x94 */ "LDIR",
    /* 0x95 */ "CPIR",
    /* 0x96 */ "INIR",
    /* 0x97 */ "OTIR",
    /* 0x98 */ "LDD",
    /* 0x99 */ "CPD",
    /* 0x9a */ "IND",
    /* 0x9b */ "OUTD",
    /* 0x9c */ "LDDR",
    /* 0x9d */ "CPDR",
    /* 0x9e */ "INDR",
    /* 0x9f */ "OTDR",
    /* 0xa0 */ "RETI",
    /* 0xa1 */ "RETN",
    /* 0xa2 */ "AND",
    /* 0xa3 */ "CP",
    /* 0xa4 */ "OR",
    /* 0xa5 */ "SUB",
    /* 0xa6 */ "XOR",
    /* 0xa7 */ "RL",
    /* 0xa8 */ "RLC",
    /* 0xa9 */ "RR",
    /* 0xaa */ "RRC",
    /* 0xab */ "SLA",
    /* 0xac */ "SLL",
    /* 0xad */ "SRA",
    /* 0xae */ "SRL",
    /* 0xaf */ "BIT",
    /* 0xb0 */ "RES",
    /* 0xb1 */ "SET",
    /* 0xb2 */ "ADC",
    /* 0xb3 */ "SBC",
    /* 0xb4 */ "ADD",
    /* 0xb5 */ "DEC",
    /* 0xb6 */ "INC",
    /* 0xb7 */ "CALL",
    /* 0xb8 */ "RET",
    /* 0xb9 */ "JP",
    /* 0xba */ "RST",
    /* 0xbb */ "IN",
    /* 0xbc */ "OUT",
    /* 0xbd */ "EX",
    /* 0xbe */ "PUSH",
    /* 0xbf */ "POP",
    /* 0xc0 */ "LD",
    /* 0xc1 */ "JR",
    /* 0xc2 */ "DJNZ",
    /* 0xc3 */ "IM",
    /* -------------------Pseudo-ops------------------- */
    /* 0xc4 */ "ENIF", /* EDAS ENDIF */
    /* 0xc5 */ "END",
    /* 0xc6 */ "ORG",
    /* 0xc7 */ "EQU",
    /* 0xc8 */ "IF",
    /* 0xc9 */ "COMM", /* emit comment block (type 0x1f); EDAS COM */
    /* 0xca */ "SBTL", /* EDAS SUBTTL */
    /* 0xcb */ "TITL", /* emit name block (type 5); EDAS TITLE */
    /* 0xcc */ "LIST", /* EDAS *LIST */
    /* 0xcd */ "GET",  /* EDAS *GET or *INCLUDE */
    /* 0xce */ "PAGE",
    /* 0xcf */ "ERR",
    /* 0xd0 */ "DEFS",
    /* 0xd1 */ "DS",
    /* 0xd2 */ "DEFL",
    /* 0xd3 */ "DL",
    /* 0xd4 */ "DEFM",
    /* 0xd5 */ "DM",
    /* 0xd6 */ "DEFB",
    /* 0xd7 */ "DB",
    /* 0xd8 */ "DEFW",
    /* 0xd9 */ "DW",
    /* 0xda */ "WAIT", /* unknown meaning */
    /* 0xdb */ "SHOW", /* unknown meaning */
    /* 0xdc */ "MESV", /* unknown meaning */
    /* 0xdd */ "MESP", /* unknown meaning */
    /* -------------------Unused------------------- */
    /* 0xde */ "{de}",
    /* 0xdf */ "{df}",
    /* 0xe0 */ "{e0}",
    /* 0xe1 */ "{e1}",
    /* 0xe2 */ "{e2}",
    /* 0xe3 */ "{e3}",
    /* 0xe4 */ "{e4}",
    /* 0xe5 */ "{e5}",
    /* 0xe6 */ "{e6}",
    /* 0xe7 */ "{e7}",
    /* 0xe8 */ "{e8}",
    /* 0xe9 */ "{e9}",
    /* 0xea */ "{ea}",
    /* 0xeb */ "{eb}",
    /* 0xec */ "{ec}",
    /* 0xed */ "{ed}",
    /* 0xee */ "{ee}",
    /* 0xef */ "{ef}",
    /* 0xf0 */ "{f0}",
    /* 0xf1 */ "{f1}",
    /* 0xf2 */ "{f2}",
    /* 0xf3 */ "{f3}",
    /* 0xf4 */ "{f4}",
    /* 0xf5 */ "{f5}",
    /* 0xf6 */ "{f6}",
    /* 0xf7 */ "{f7}",
    /* 0xf8 */ "{f8}",
    /* 0xf9 */ "{f9}",
    /* 0xfa */ "{fa}",
    /* 0xfb */ "{fb}",
    /* 0xfc */ "{fc}",
    /* 0xfd */ "{fd}",
    /* 0xfe */ "{fe}",
    /* 0xff */ "{ff}"
  }, {
  /* This table is used for the operand field */
    /* 0x80 */ "B",
    /* 0x81 */ "C",
    /* 0x82 */ "D",
    /* 0x83 */ "E",
    /* 0x84 */ "H",
    /* 0x85 */ "L",
    /* 0x86 */ "(HL)",
    /* 0x87 */ "A",
    /* 0x88 */ "BC",
    /* 0x89 */ "DE",
    /* 0x8a */ "HL",
    /* 0x8b */ "SP",
    /* 0x8c */ "(BC)",
    /* 0x8d */ "(DE)",
    /* 0x8e */ "AF",
    /* 0x8f */ "AF'",
    /* 0x90 */ "NZ",
    /* 0x91 */ "Z",
    /* 0x92 */ "NC",
    /* 0x93 */ "NOT",
    /* 0x94 */ "PO",
    /* 0x95 */ "PE",
    /* 0x96 */ "P",
    /* 0x97 */ "M",
    /* 0x98 */ "(C)",
    /* 0x99 */ "I",
    /* 0x9a */ "R",
    /* 0x9b */ "(SP)",
    /* 0x9c */ "HX",
    /* 0x9d */ "LX",
    /* 0x9e */ "HY",
    /* 0x9f */ "LY",
    /* 0xa0 */ "OFF",
    /* 0xa1 */ "ON",
    /* 0xa2 */ "IX",
    /* 0xa3 */ "IY",
    /* 0xa4 */ "(IX)",
    /* 0xa5 */ "(IY)",
    /* 0xa6 */ "(IX",
    /* 0xa7 */ "(IY",
    /* 0xa8 */ "(n",
    /* 0xa9 */ "n",
    /* -------------------Unused------------------- */
    /* 0xaa */ "{aa}",
    /* 0xab */ "{ab}",
    /* 0xac */ "{ac}",
    /* 0xad */ "{ad}",
    /* 0xae */ "{ae}",
    /* 0xaf */ "{af}",
    /* 0xb0 */ "{b0}",
    /* 0xb1 */ "{b1}",
    /* 0xb2 */ "{b2}",
    /* 0xb3 */ "{b3}",
    /* 0xb4 */ "{b4}",
    /* 0xb5 */ "{b5}",
    /* 0xb6 */ "{b6}",
    /* 0xb7 */ "{b7}",
    /* 0xb8 */ "{b8}",
    /* 0xb9 */ "{b9}",
    /* 0xba */ "{ba}",
    /* 0xbb */ "{bb}",
    /* 0xbc */ "{bc}",
    /* 0xbd */ "{bd}",
    /* 0xbe */ "{be}",
    /* 0xbf */ "{bf}",
    /* 0xc0 */ "{c0}",
    /* 0xc1 */ "{c1}",
    /* 0xc2 */ "{c2}",
    /* 0xc3 */ "{c3}",
    /* 0xc4 */ "{c4}",
    /* 0xc5 */ "{c5}",
    /* 0xc6 */ "{c6}",
    /* 0xc7 */ "{c7}",
    /* 0xc8 */ "{c8}",
    /* 0xc9 */ "{c9}",
    /* 0xca */ "{ca}",
    /* 0xcb */ "{cb}",
    /* 0xcc */ "{cc}",
    /* 0xcd */ "{cd}",
    /* 0xce */ "{ce}",
    /* 0xcf */ "{cf}",
    /* 0xd0 */ "{d0}",
    /* 0xd1 */ "{d1}",
    /* 0xd2 */ "{d2}",
    /* 0xd3 */ "{d3}",
    /* 0xd4 */ "{d4}",
    /* 0xd5 */ "{d5}",
    /* 0xd6 */ "{d6}",
    /* 0xd7 */ "{d7}",
    /* 0xd8 */ "{d8}",
    /* 0xd9 */ "{d9}",
    /* 0xda */ "{da}",
    /* 0xdb */ "{db}",
    /* 0xdc */ "{dc}",
    /* 0xdd */ "{dd}",
    /* 0xde */ "{de}",
    /* 0xdf */ "{df}",
    /* 0xe0 */ "{e0}",
    /* 0xe1 */ "{e1}",
    /* 0xe2 */ "{e2}",
    /* 0xe3 */ "{e3}",
    /* 0xe4 */ "{e4}",
    /* 0xe5 */ "{e5}",
    /* 0xe6 */ "{e6}",
    /* 0xe7 */ "{e7}",
    /* 0xe8 */ "{e8}",
    /* 0xe9 */ "{e9}",
    /* 0xea */ "{ea}",
    /* 0xeb */ "{eb}",
    /* 0xec */ "{ec}",
    /* 0xed */ "{ed}",
    /* 0xee */ "{ee}",
    /* 0xef */ "{ef}",
    /* 0xf0 */ "{f0}",
    /* 0xf1 */ "{f1}",
    /* 0xf2 */ "{f2}",
    /* 0xf3 */ "{f3}",
    /* 0xf4 */ "{f4}",
    /* 0xf5 */ "{f5}",
    /* 0xf6 */ "{f6}",
    /* 0xf7 */ "{f7}",
    /* 0xf8 */ "{f8}",
    /* 0xf9 */ "{f9}",
    /* 0xfa */ "{fa}",
    /* 0xfb */ "{fb}",
    /* 0xfc */ "{fc}",
    /* 0xfd */ "{fd}",
    /* 0xfe */ "{fe}",
    /* 0xff */ "{ff}"
  }
};

errexit(char *msg)
{
  fprintf(stderr, "asm6txt: %s\n", msg);
  exit(1);
}

main()
{
  int c, count, i, lim, field, prevc;
  unsigned char len, magic;
  for (;;) {
    count = 0;

    c = getchar();
    if (c == EOF) exit(0);
    len = c;
    /*printf("[%3d]", len);*/
    if (len < 2) errexit("line length too small?");
    count++;

    c = getchar();
    if (c == EOF) errexit("EOF inside line");
    count++;
    magic = c;
    printf("{%02x} ", magic);
    
#define MAXCODE 4
    if ((magic&7) > MAXCODE) errexit("too many code bytes?");
    for (i=0; i<MAXCODE; i++) {
      if (i < (magic&7)) {
	c = getchar();
	if (c == EOF) errexit("EOF inside line");
	printf("%02x", (unsigned char) c);
	count++;
      } else {
	printf("  ");
      }
    }
    printf("\t");
      
    lim = len - count;
    field = 0;
    prevc = 0;
    for (i=0; i<lim; i++) {
      c = getchar();
      if (c == EOF) errexit("EOF inside line");
      count += 1;
      if (prevc != '\t' && prevc != ' ' && (c== '\t' || c==' ')) field++;
      if (c == '\r') {
	/*printf("[%d]", count);*/
	if (len != count) printf("[oops: len=%d, count=%d]", len, count);
	putchar('\n');
      } else {
	if (c > 0x7f) {
	  fputs(expansion[field > 1][c - 0x80], stdout);
	} else {
	  putchar(c);
	}
      }
      prevc = c;
    }
  }
}
