/*
 * Extract documents from a Model II Scripsit DOCUMENT/CTL file
 */

/********************************************************************

Notes on Model II Scripsit DOCUMENT/CTL file format
---------------------------------------------------

All pointers are 16 bit addresses of 256-byte blocks

Start of file
0000:0000 47 00 48 00 3d 04 57 00 49 00 c6 00 00 00 04 00 G.H.=.W.I.......
                            |---- |----
                            |     |
                            |     |first dirent
                            |
                            |last dirent

Directory entry
             prev- next- name---------------------------- 
0000:5600 00 50 00 57 00 42 4f 42 2d 46 4f 52 45 57 4f 52 .P.W.BOB-FOREWOR
          --------------       date1-----------------  --
0000:5610 44 20 20 20 20 56 4d 20 35 2f 32 37 2f 39 30 20 D    VM 5/27/90 
          date2---------------
0000:5620 36 2f 20 39 2f 39 31 04 00 52 4f 42 45 52 54 20 6/ 9/91..ROBERT 
0000:5630 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20                 
0000:5640 20 20 20 20 20 20 20 20 20 52 4f 42 45 52 54 20          ROBERT 
0000:5650 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20                 
0000:5660 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20                 
0000:5670 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20                 
0000:5680 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20                 
0000:5690 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20                 
0000:56a0 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20                 
                                           groups--------
0000:56b0 20 20 20 20 20 1c 00 00 00 04 00[58 00]00 00 00      ......X....
          --------...?
0000:56c0 00 00 00 8e d5 53 aa aa 6a ba aa aa aa aa aa aa .....S..j.......
0000:56d0 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa ................
0000:56e0 aa aa aa aa aa aa aa aa aa aa aa aa aa 0e 09 45 ...............E
0000:56f0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................

First group (additional groups have a valid descriptor pointer
at offset 0, too)
                      desc        desc        desc
0000:5800 04 01 14 00[59 00]16 00[5d 00]26 00[64 00]36 00 ....Y...].&.d.6.
          desc...
0000:5810[6d 00]46 00 00 00 00 00 00 00 00 00 00 00 00 00 m.F.............


Descriptors
             |back pointer to directory entry
             |
             |     |# of blocks containing valid data
             |     | 
             |     |           |size of last block (may omit a trailing space!)
             |---- |-          |-
0000:5900 02 56 00 01 02 01 00 38 06 05 01 00 00 00 00 00 .V.....8........
0000:5910 5a 00 5b 00 5c 00 00 00 00 00 00 00 00 00 00 00 Z.[.\...........
          |----------------------------------------------
          |block pointers

0000:5d00 02 56 00 04 02 01 00 df 09 18 01 00 00 00 00 00 .V.............
0000:5d10 5e 00 5f 00 60 00 61 00 62 00 63 00 00 00 00 00 ^._.`.a.b.c.....

0000:6400 02 56 00 06 02 01 00 14 04 19 01 00 00 00 00 00 .V..............
0000:6410 65 00 66 00 67 00 68 00 69 00 6a 00 6b 00 6c 00 e.f.g.h.i.j.k.l.

0000:6d00 02 56 00 02 02 01 00 e2 01 09 01 00 00 00 00 00 .........
0000:6d10 6e 00 6f 00 70 00 71 00 00 00 00 00 00 00 00 00 n.o.p.q.........

The last vaid block pointed to by a descriptor may be terminated short
of 256 bytes by an 0x8f byte.  Use this, not the size field noted above.

********************************************************************/

#include <stdio.h>

int debug = 0;

/* Offsets from start of document/ctl */
#define DIR_TAIL 0x06
#define DIR_HEAD 0x08

/* Offsets from start of a dirent */
#define DIR_PREV      0x01
#define DIR_NEXT      0x03
#define DIR_NAME      0x05
#define DIR_NAME_LEN  0x10 /*?*/
#define DIR_VM        0x15 /*what is this really?*/
#define DIR_VM_LEN    0x02
#define DIR_DATE1     0x17
#define DIR_DATE2     0x1f
#define DIR_DATE_LEN  0x08
#define DIR_UNK1      0x27
#define DIR_UNK1_LEN  0x02
#define DIR_TEXT1     0x29
#define DIR_TEXT1_LEN 0x20
#define DIR_TEXT2     0x49
#define DIR_TEXT2_LEN 0x6c
#define DIR_UNK2      0xb5
#define DIR_UNK2_LEN  0x06
#define DIR_GROUPP    0xbb
#define DIR_UNK3      0xbd
#define DIR_SIZE      0x100

/* Offsets from start of a group */
#define GRP_PTRS      0x04
#define GRP_PTR_LEN   0x04
#define GRP_SIZE      0x100

/* Offsets from start of a descriptor */
#define DSC_DIRP      0x01  /* 2 byte int */
#define DSC_NBLOCKS   0x03  /* 1 byte int */
#define DSC_NBYTES    0x07  /* 1 byte int */
#define DSC_PTRS      0x10
#define DSC_NEXT      0x1e  /* 2 byte int */

/* Blocks */
#define BLK_SIZE      0x100

unsigned char mustgetc(FILE* f)
{
  int c = getc(f);
  if (c == EOF) {
    fprintf(stderr, "unexpected EOF at %06x\n", ftell(f));
    exit(1);
  }
  return (unsigned char) c;
}

void mustfread(void* buf, int size, int count, FILE* f)
{
  int ret = fread(buf, size, count, f);
  if (ret < count) {
    fprintf(stderr, "unexpected EOF at %06x\n", ftell(f));
    exit(1);
  }
}

unsigned char uc_at(FILE* f, int where)
{
  fseek(f, where, SEEK_SET);
  return mustgetc(f);
}

unsigned short us_at(FILE* f, int where)
{
  fseek(f, where, SEEK_SET);
  return mustgetc(f) + (mustgetc(f) << 8);
}

void print_dirent(FILE* inf, int dirp)
{
  unsigned char buf[DIR_SIZE];
  int ret;
  fseek(inf, dirp, SEEK_SET);
  mustfread(buf, 1, DIR_SIZE, inf);

  /* could print all the unknown numeric fields too...? */
  printf("%.*s\n%.*s %.*s\n%.*s\n%.*s\n",
	 DIR_NAME_LEN, &buf[DIR_NAME],
	 DIR_DATE_LEN, &buf[DIR_DATE1], DIR_DATE_LEN, &buf[DIR_DATE2],
	 DIR_TEXT1_LEN, &buf[DIR_TEXT1],
	 DIR_TEXT2_LEN, &buf[DIR_TEXT2]);
}

void extract_desc(FILE* inf, FILE* outf, int dirp, int descp)
{
  int dirbackp;
  int blockp;
  int nblocks, nbytes;
  unsigned char buf[BLK_SIZE];
  int p, n, i;

  if (debug) {
    printf("descriptor at %06x\n", descp);
  }
  dirbackp = us_at(inf, descp + DSC_DIRP) << 8;
  if (dirp != dirbackp) {
    printf("inconsistent back pointer in descriptor at %06x\n", descp);
  }

  nblocks = uc_at(inf, descp + DSC_NBLOCKS);
  nbytes = uc_at(inf, descp + DSC_NBYTES);
  if (nbytes == 0) {
    nbytes = 256; /*??*/
  }
  p = descp + DSC_PTRS;

  for (i = 1; i <= nblocks; i++) {
    blockp = us_at(inf, p) << 8;
    if (debug) {
      printf("block at %06x", blockp);
    }
    fseek(inf, blockp, SEEK_SET);
    mustfread(buf, 1, BLK_SIZE, inf);
    if (i == nblocks) {
      for (n=0; n<BLK_SIZE; n++) {
	if (buf[n] == 0x8f) break;
      }
#if 0
      if (debug && n != nbytes) {
	printf(", given size %x, last byte %x", nbytes, buf[n-1]);
      }
#endif
    } else {
      n = BLK_SIZE;
    }
    if (debug) printf(", size %x\n", n);
    fwrite(buf, 1, n, outf);
    p += 2;
  }
}

void extract_group(FILE* inf, FILE* outf, int dirp, int groupp, int grp_size)
{
  int p, descp;
  if (debug) {
    printf("group at %06x\n", groupp);
  }
  for (p = groupp; p < groupp + grp_size; p += GRP_PTR_LEN) {
    descp = us_at(inf, p) << 8;
    if (descp == 0) return;
    extract_desc(inf, outf, dirp, descp);
  }
}

void extract_file(FILE* inf, int dirp)
{
  char name[DIR_NAME_LEN+1];
  FILE* outf;
  int groupp, p;
  int descp;

  fseek(inf, dirp + DIR_NAME, SEEK_SET);
  mustfread(name, DIR_NAME_LEN, 1, inf);
  name[DIR_NAME_LEN] = '\0';
  p = DIR_NAME_LEN - 1;
  while (name[p] == ' ') name[p--] = '\0';
  outf = fopen(name, "wb");

  groupp = us_at(inf, dirp + DIR_GROUPP) << 8;
  extract_group(inf, outf, dirp, groupp + GRP_PTRS, GRP_SIZE - GRP_PTRS);
  for (p = dirp + DIR_GROUPP + 2; /*?*/; p += 2) {
    groupp = us_at(inf, p) << 8;
    if (groupp == 0) return;
    extract_group(inf, outf, dirp, groupp, GRP_SIZE);
  }
}


int next_dirp(FILE* inf, int dirp)
{
  return us_at(inf, dirp + DIR_NEXT) << 8;
}

int main(int argc, char **argv)
{
  char *fname;
  FILE* inf;
  int dirp;
  int argn;

  /* -d flag turns on debug output */
  argn = 1;
  if (argn < argc && strcmp(argv[argn], "-d") == 0) {
    debug = 1;
    argn++;
  }

  /* accept one filename arg, or use stdin if none */
  if (argn < argc) {
    inf = fopen(argv[argn], "rb");
    if (inf == NULL) {
      perror(argv[1]);
      exit(1);
    }
    argn++;
  } else {
    inf = stdin;
  }

  if (argn < argc) {
    fprintf(stderr, "usage: %s [-d] [file]\n");
    exit(1);
  }

  /* Find first dirent */
  dirp = us_at(inf, DIR_HEAD) << 8;

  while (dirp) {
    print_dirent(inf, dirp);
    extract_file(inf, dirp);
    dirp = next_dirp(inf, dirp);
    printf("\n");
  }

  exit(0);
}
