/* Copyright (c) 1998, Timothy Mann <mann@pa.dec.com> */

/* This software may be copied, modified, and used for any purpose
 * without fee, provided that (1) the above copyright notice is
 * retained, and (2) modified versions are clearly marked as having
 * been modified, with the modifier's name and the date included.  */

/* Last modified on Mon Oct 26 11:31:05 PST 1998 by mann */

/* asm6txt.c
 *
 * Usage: asm6txt < foo.asm > foo.txt
 *
 * Decode save files from ASM6 .asm format into human-readable ASCII
 * listings.  This format was used by the ASM6 assembler to save
 * source code with a little bit of compression.  It used a scheme
 * similar to Microsoft Basic, in which a predefined set of commonly
 * used strings are replaced by 8-bit tokens with the high-order bit
 * set, while other characters are saved unchanged in ASCII.  In
 * addition, most of the machine code that the source assembles to is
 * included in the save file.
 *
 * The decoding is not complete: opcodes or operand strings that did
 * not occur in the examples I worked from will not be decoded.
 * Instead, they will show up in the output as hex bytes enclosed in
 * curly braces.  It will usually be easy to determine what the
 * decoding should have been and fill in the missing table entries by
 * looking at the machine code in the left column.  There could also
 * be shortcomings in the existing decoding logic; take the results
 * with a small grain of salt. Let me know of any improvements you
 * make.
 *
 * Example output and explanation:
 *
 * {00}            START
 * {33} 216e31             LD      HL,UINF
 * {33} 116f31             LD      DE,UINF+1
 * {12} 3e20               LD      A,32
 * {01} 77                 LD      (HL),A
 * {33} 014400             LD      BC,68
 * ...
 * {12} 3e16               LD      A,@EXIT
 * {09} ef                 RST     28H             ;OUT
 * {d1} 24         UMODNAM DEFM    '$USERINF/E80',13
 *
 * The initial byte in { } is a byte of flags that appears on each
 * line in the .asm file. The meanings of the 5 high-order bits are
 * unknown. The 3 low-order bits count the number of bytes of machine
 * code for this instruction.
 *
 * Next come the bytes of machine code as recorded in the .asm
 * file. Note that pseudo-ops that generate many bytes seem to have
 * only their first byte recorded in this area.
 *
 * Next comes the important part -- the assembly code.  I don't know
 * what mnemonics were originally used by ASM6, especially for
 * pseudo-ops, so I've filled in mnemonics that should work with EDAS.
 * The details of the set DEFB/DB/DEFM/DM/DEFL/EQU/etc. may be off,
 * but I think they are basically right.  The right code seems to be
 * generated, in my limited tests.  */

#include <stdio.h>

char *expansion[2][128] = {
  {
  /* This table is used for the opcode field */
    /* 0x80 */ "{80}",
    /* 0x81 */ "RLCA",
    /* 0x82 */ "{82}",
    /* 0x83 */ "RLA",
    /* 0x84 */ "RRA",
    /* 0x85 */ "{85}",
    /* 0x86 */ "CPL",
    /* 0x87 */ "SCF",
    /* 0x88 */ "CCF",
    /* 0x89 */ "{89}",
    /* 0x8a */ "DI",
    /* 0x8b */ "EI",
    /* 0x8c */ "EXX",
    /* 0x8d */ "NEG",
    /* 0x8e */ "{8e}",
    /* 0x8f */ "{8f}",
    /* 0x90 */ "LDI",
    /* 0x91 */ "{91}",
    /* 0x92 */ "{92}",
    /* 0x93 */ "{93}",
    /* 0x94 */ "LDIR",
    /* 0x95 */ "{95}",
    /* 0x96 */ "{96}",
    /* 0x97 */ "{97}",
    /* 0x98 */ "{98}",
    /* 0x99 */ "{99}",
    /* 0x9a */ "{9a}",
    /* 0x9b */ "{9b}",
    /* 0x9c */ "LDDR",
    /* 0x9d */ "{9d}",
    /* 0x9e */ "{9e}",
    /* 0x9f */ "{9f}",
    /* 0xa0 */ "{a0}",
    /* 0xa1 */ "{a1}",
    /* 0xa2 */ "AND",
    /* 0xa3 */ "CP",
    /* 0xa4 */ "OR",
    /* 0xa5 */ "SUB",
    /* 0xa6 */ "XOR",
    /* 0xa7 */ "RL",
    /* 0xa8 */ "{a8}",
    /* 0xa9 */ "RR",
    /* 0xaa */ "{aa}",
    /* 0xab */ "SLA",
    /* 0xac */ "{ac}",
    /* 0xad */ "{ad}",
    /* 0xae */ "SRL",
    /* 0xaf */ "BIT",
    /* 0xb0 */ "RES",
    /* 0xb1 */ "SET",
    /* 0xb2 */ "{b2}",
    /* 0xb3 */ "SBC",
    /* 0xb4 */ "ADD",
    /* 0xb5 */ "DEC",
    /* 0xb6 */ "INC",
    /* 0xb7 */ "CALL",
    /* 0xb8 */ "RET",
    /* 0xb9 */ "JP",
    /* 0xba */ "RST",
    /* 0xbb */ "IN",
    /* 0xbc */ "OUT",
    /* 0xbd */ "EX",
    /* 0xbe */ "PUSH",
    /* 0xbf */ "POP",
    /* 0xc0 */ "LD",
    /* 0xc1 */ "JR",
    /* 0xc2 */ "DJNZ",
    /* 0xc3 */ "{c3}",
    /* 0xc4 */ "ENDIF", /*?*/
    /* 0xc5 */ "END",
    /* 0xc6 */ "ORG",
    /* 0xc7 */ "EQU",
    /* 0xc8 */ "IF", /*?*/
    /* 0xc9 */ "COM", /*puts a comment in load module, block type 0x1f*/
    /* 0xca */ "{ca}",
    /* 0xcb */ "FILENAME", /*puts module name in load module, block type 5*/
    /* 0xcc */ "*LIST",
    /* 0xcd */ "*GET",
    /* 0xce */ "{ce}",
    /* 0xcf */ "{cf}",
    /* 0xd0 */ "DS",   /*not sure what's the diff between even/odd ones here */
    /* 0xd1 */ "DEFS",
    /* 0xd2 */ "DL",
    /* 0xd3 */ "DEFL",
    /* 0xd4 */ "DM",
    /* 0xd5 */ "DEFM",
    /* 0xd6 */ "DB",
    /* 0xd7 */ "DEFB",
    /* 0xd8 */ "DW",
    /* 0xd9 */ "DEFW",
    /* 0xda */ "{da}",
    /* 0xdb */ "{db}",
    /* 0xdc */ "{dc}",
    /* 0xdd */ "{dd}",
    /* 0xde */ "{de}",
    /* 0xdf */ "{df}",
    /* 0xe0 */ "{e0}",
    /* 0xe1 */ "{e1}",
    /* 0xe2 */ "{e2}",
    /* 0xe3 */ "{e3}",
    /* 0xe4 */ "{e4}",
    /* 0xe5 */ "{e5}",
    /* 0xe6 */ "{e6}",
    /* 0xe7 */ "{e7}",
    /* 0xe8 */ "{e8}",
    /* 0xe9 */ "{e9}",
    /* 0xea */ "{ea}",
    /* 0xeb */ "{eb}",
    /* 0xec */ "{ec}",
    /* 0xed */ "{ed}",
    /* 0xee */ "{ee}",
    /* 0xef */ "{ef}",
    /* 0xf0 */ "{f0}",
    /* 0xf1 */ "{f1}",
    /* 0xf2 */ "{f2}",
    /* 0xf3 */ "{f3}",
    /* 0xf4 */ "{f4}",
    /* 0xf5 */ "{f5}",
    /* 0xf6 */ "{f6}",
    /* 0xf7 */ "{f7}",
    /* 0xf8 */ "{f8}",
    /* 0xf9 */ "{f9}",
    /* 0xfa */ "{fa}",
    /* 0xfb */ "{fb}",
    /* 0xfc */ "{fc}",
    /* 0xfd */ "{fd}",
    /* 0xfe */ "{fe}",
    /* 0xff */ "{ff}"
  }, {
  /* This table is used for the operand field */
    /* 0x80 */ "B",
    /* 0x81 */ "C",
    /* 0x82 */ "D",
    /* 0x83 */ "E",
    /* 0x84 */ "H",
    /* 0x85 */ "L",
    /* 0x86 */ "(HL)",
    /* 0x87 */ "A",
    /* 0x88 */ "BC",
    /* 0x89 */ "DE",
    /* 0x8a */ "HL",
    /* 0x8b */ "SP",
    /* 0x8c */ "(BC)",
    /* 0x8d */ "(DE)",
    /* 0x8e */ "AF",
    /* 0x8f */ "{8f}",
    /* 0x90 */ "NZ",
    /* 0x91 */ "Z",
    /* 0x92 */ "NC",
    /* 0x93 */ "NOT", /*?*/
    /* 0x94 */ "{94}",
    /* 0x95 */ "PE",
    /* 0x96 */ "P",
    /* 0x97 */ "M",
    /* 0x98 */ "{98}",
    /* 0x99 */ "{99}",
    /* 0x9a */ "R",
    /* 0x9b */ "{9b}",
    /* 0x9c */ "{9c}",
    /* 0x9d */ "{9d}",
    /* 0x9e */ "{9e}",
    /* 0x9f */ "{9f}",
    /* 0xa0 */ "OFF",
    /* 0xa1 */ "ON",
    /* 0xa2 */ "IX",
    /* 0xa3 */ "IY",
    /* 0xa4 */ "(IX)",
    /* 0xa5 */ "(IY)",
    /* 0xa6 */ "(IX",
    /* 0xa7 */ "(IY",
    /* 0xa8 */ "{a8}",
    /* 0xa9 */ "{a9}",
    /* 0xaa */ "{aa}",
    /* 0xab */ "{ab}",
    /* 0xac */ "{ac}",
    /* 0xad */ "{ad}",
    /* 0xae */ "{ae}",
    /* 0xaf */ "{af}",
    /* 0xb0 */ "{b0}",
    /* 0xb1 */ "{b1}",
    /* 0xb2 */ "{b2}",
    /* 0xb3 */ "{b3}",
    /* 0xb4 */ "{b4}",
    /* 0xb5 */ "{b5}",
    /* 0xb6 */ "{b6}",
    /* 0xb7 */ "{b7}",
    /* 0xb8 */ "{b8}",
    /* 0xb9 */ "{b9}",
    /* 0xba */ "{ba}",
    /* 0xbb */ "{bb}",
    /* 0xbc */ "{bc}",
    /* 0xbd */ "{bd}",
    /* 0xbe */ "{be}",
    /* 0xbf */ "{bf}",
    /* 0xc0 */ "{c0}",
    /* 0xc1 */ "{c1}",
    /* 0xc2 */ "{c2}",
    /* 0xc3 */ "{c3}",
    /* 0xc4 */ "{c4}",
    /* 0xc5 */ "{c5}",
    /* 0xc6 */ "{c6}",
    /* 0xc7 */ "{c7}",
    /* 0xc8 */ "{c8}",
    /* 0xc9 */ "{c9}",
    /* 0xca */ "{ca}",
    /* 0xcb */ "{cb}",
    /* 0xcc */ "{cc}",
    /* 0xcd */ "{cd}",
    /* 0xce */ "{ce}",
    /* 0xcf */ "{cf}",
    /* 0xd0 */ "{d0}",
    /* 0xd1 */ "{d1}",
    /* 0xd2 */ "{d2}",
    /* 0xd3 */ "{d3}",
    /* 0xd4 */ "{d4}",
    /* 0xd5 */ "{d5}",
    /* 0xd6 */ "{d6}",
    /* 0xd7 */ "{d7}",
    /* 0xd8 */ "{d8}",
    /* 0xd9 */ "{d9}",
    /* 0xda */ "{da}",
    /* 0xdb */ "{db}",
    /* 0xdc */ "{dc}",
    /* 0xdd */ "{dd}",
    /* 0xde */ "{de}",
    /* 0xdf */ "{df}",
    /* 0xe0 */ "{e0}",
    /* 0xe1 */ "{e1}",
    /* 0xe2 */ "{e2}",
    /* 0xe3 */ "{e3}",
    /* 0xe4 */ "{e4}",
    /* 0xe5 */ "{e5}",
    /* 0xe6 */ "{e6}",
    /* 0xe7 */ "{e7}",
    /* 0xe8 */ "{e8}",
    /* 0xe9 */ "{e9}",
    /* 0xea */ "{ea}",
    /* 0xeb */ "{eb}",
    /* 0xec */ "{ec}",
    /* 0xed */ "{ed}",
    /* 0xee */ "{ee}",
    /* 0xef */ "{ef}",
    /* 0xf0 */ "{f0}",
    /* 0xf1 */ "{f1}",
    /* 0xf2 */ "{f2}",
    /* 0xf3 */ "{f3}",
    /* 0xf4 */ "{f4}",
    /* 0xf5 */ "{f5}",
    /* 0xf6 */ "{f6}",
    /* 0xf7 */ "{f7}",
    /* 0xf8 */ "{f8}",
    /* 0xf9 */ "{f9}",
    /* 0xfa */ "{fa}",
    /* 0xfb */ "{fb}",
    /* 0xfc */ "{fc}",
    /* 0xfd */ "{fd}",
    /* 0xfe */ "{fe}",
    /* 0xff */ "{ff}"
  }
};

errexit(char *msg)
{
  fprintf(stderr, "asm6txt: %s\n", msg);
  exit(1);
}

main()
{
  int c, count, i, lim, field, prevc;
  unsigned char len, magic;
  for (;;) {
    count = 0;

    c = getchar();
    if (c == EOF) exit(0);
    len = c;
    /*printf("[%3d]", len);*/
    if (len < 2) errexit("line length too small?");
    count++;

    c = getchar();
    if (c == EOF) errexit("EOF inside line");
    count++;
    magic = c;
    printf("{%02x} ", magic);
    
#define MAXCODE 4
    if ((magic&7) > MAXCODE) errexit("too many code bytes?");
    for (i=0; i<MAXCODE; i++) {
      if (i < (magic&7)) {
	c = getchar();
	if (c == EOF) errexit("EOF inside line");
	printf("%02x", (unsigned char) c);
	count++;
      } else {
	printf("  ");
      }
    }
    printf("\t");
      
    lim = len - count;
    field = 0;
    prevc = 0;
    for (i=0; i<lim; i++) {
      c = getchar();
      if (c == EOF) errexit("EOF inside line");
      count += 1;
      if (prevc != '\t' && prevc != ' ' && (c== '\t' || c==' ')) field++;
      if (c == '\r') {
	/*printf("[%d]", count);*/
	if (len != count) printf("[oops: len=%d, count=%d]", len, count);
	putchar('\n');
      } else {
	if (c > 0x7f) {
	  fputs(expansion[field > 1][c - 0x80], stdout);
	} else {
	  putchar(c);
	}
      }
      prevc = c;
    }
  }
}

