/* File MSBPCT.C

Author: Robert Weiner, Programming Plus, rweiner@watsun.cc.columbia.edu

Synopsis: Translates a BOO-encoded file (produced by MSBMKB) back into its
original form.

Modification History:
  29-APR-92	Initial Beta Release
		Ideas taken from old msbpct.c (versions before
		01-may-92) and new msbmkb.c
  01-MAY-92		Added files="-", Added -q
  05-MAY-92	Release after outside testing
			Added void usage() proto
			Thanks to Christian Hemsing for OS-9 testing & defs.
			Thanks to Steve Walton for Amiga testing & defs.
  08-MAY-92	Prepare for general release
			Modified _CDECL define, Added uchar defs,
			Fixed up for MSDOS GNU CC
			Use gcc -DMSDOS to compile.
			This MSDOS GCC defines "unix" which doesn't
				help us at all!
  17-MAY-92		Add AtariST defs & Improved __STDC__ check
				from Bruce Moore
			I think I'm going to leave off the old program
			"does output file exist? overwrite y/n?" check.
			Please let me know if you think its really reqd.
			Actually, I would like to force the user to
			specify the output file name always since
			embedded output names can be used maliciously.
			Removed string fns so don't need string.h.
			Added Check for ~0 removing non-nulls.
		Next general release now ready... Thanks to those
			listed in the directory below:
  12-JUL-92	Near Final release...??
			Added portability items, cmd line overrides
			ifdef UCHAR, VOID, NOANSI
			Shortened lines to 79 max (got them all?)
			Only thing not done is checking #ifdef NOUCHAR
			and adding any anding off bits which signed
			chars may intruduce in unboo().

Beta Testing Informaton, Supported Systems Directory:
=====================================================================
( Testor / Operating System / O.S. Version / Compiler )

Rob Weiner, rweiner@watsun.cc.columbia.edu:
	MSDOS		5.0		MSC 5.1
	MSDOS		5.0		GCC (DJGPP DOS 386/G++ 1.05)
	VAX/VMS		5.4-2		VAXC 3.2
	SUNOS		4.1
	UNIXPC		3.51
Christian Hemsing, chris@v750.lfm.rwth-aachen.de:
	OS-9
Stephen Walton, swalton@solaria.csun.edu:
	AMIGA				MANX C (defines MCH_AMIGA)
Bruce J. Moore, moorebj@icd.ab.com:
	AtariST TOS/GEMDOS		MWC 3.7

Fun stuff such as my favorite testing shell command is now possible:
	$ for i in *
	do
		echo $i:
		cat $i | msbmkb -q - - | msbpct -q - - | cmp -l - $i
	done

This properly implements the Lasner ~0 fixes.

Synopsys: The en-booer writes out printable text from binary text via a 3
input char to 4 output char conversion (called "triple to quad" conversion).
Since the input text can run out before the last triple can be formed, all
en-booers (msbmkb) would add 1 or 2 nulls to the input stream to complete
the triple such that a valid quad can be output.  Thus the problem where
often a de-booer (msbpct) will create an output file from a boo encoded
file, but the output file is larger than the input file by 1 or 2 nulls.
Charles Lasner documented this problem and offered a fix... For each 1 or 2
extra null pad chars added to the input stream, the en-booer should add a
trailing ~0 to the created boo file.  ~X (where X-'0' is a repeat value
which indicates a number of "repeated nulls" does not have a value for the
sequence "~0" which would imply: ``decode into a series of 0 nulls,'' a noop
for "old" debooers.  Hence ~0 can be used as a flag that the input text had
a "padding null" added to it and then the de-booer can know NOT to add these
padding chars to the output stream.  This allows the en-boo/de-boo programs
to finally always guarantee that you get what you started with after passing
through the en-boo then de-boo process.

Some bugs/facts with the MSBPCT/MSBMKB programs which popped up
or were discovered recently (January through March 1992):
 -	CURRENT msbpct will NOT make a correct output file from
	the boo file THIS msbmkb creates.  It loses or adds a char.
		Comes from improper implementation of Lasner changes.
		Note: CURRENT enbooer with CURRENT unbooer make the
		same mistakes encoding/uncoding hense files come out
		more or less ok.
 -	OLD msbpct will create a proper output file from a boo
	file created from THIS en-booer.
 -	Current msbpct also screws up output column checking and can
	override the max (usually ~0~0 at eof) and undercut the
	standard value.
 -	Current msbpct doesn't correctly implement lasner fixes.
 -	Current msbpct tells of "using an old booer" at times
	it can determine that that statement is meaningless.
 -	Addtl improper implementation of Lasner change yields
	(quite often) an additional 2 nulls in the output file which
	are removed by an additional 2 ~0 sequence... to break even.
	ie. where old & this enbooer at eof writes "~A", the
	current (bad) booer writes "~C~0~0".
(other items not listed).

This new msbpct replaces the old one (msbpct's dated before Mar1992).
Credit should be given to the maintainers of the old msbpct:
	Original by Howie Kaye -- Columbia University 3/11/86
	Robert Weiner of Programming Plus,
	Frank da Cruz of Columbia University,
	Davide P. Cervone of University of Rochester,
	Martin Knoblauch of TH-Darmstadt, Germany,
	John Matthews of U of Delaware,
	L. John Junod of DTNSRDC,
	Christian Hemsing, RWTH Aachen, Germany.

Sorry, this seems a bit slower than previous msbpct.  Not sure why yet.
*/


#include <stdio.h>			/* only header we need */

/*
	Version Dependencies... Give each new special case its own defs:
*/

#ifdef VAX11C				/* VAXC032 */
#define SYSTEM		"VAX/VMS"
#define EXIT_GOOD	1
#define EXIT_INFO	3
#define EXIT_BAD	5
#define FOPEN_ROPTS	"r"
			/* open it VMS/RMS Fixed 512 - VMS Executable Format */
#define FOPEN_WOPTS	"wb","ctx=rec","mrs=512","rfm=fix"
#define YES_PROTOS
#endif

#ifdef MSDOS				/* MSC 5.1 */
#define SYSTEM		"MSDOS"
#define EXIT_GOOD	0
#define EXIT_INFO	1
#define EXIT_BAD	2
#define FOPEN_ROPTS	"r"
#define FOPEN_WOPTS	"wb"
#define YES_PROTOS
#endif

#ifdef GEMDOS                           /* AtariST - TOS - MWC v3.7 */
#define SYSTEM          "AtariST/TOS"
#define EXIT_GOOD       0
#define EXIT_INFO       1
#define EXIT_BAD        2
#define FOPEN_ROPTS     "rb"
#define FOPEN_WOPTS     "w"
#define CASE_CHANGE     CHANGE_LOWER    /* lowercase boo file name */
#define YES_PROTOS
#endif

#ifdef OSK
#define SYSTEM          "OS-9"
#define EXIT_GOOD       0
#define EXIT_INFO       1
#define EXIT_BAD        1
#define FOPEN_ROPTS     "r"
#define FOPEN_WOPTS     "w"
#define CASE_CHANGE     CHANGE_NONE     /* leave filename case sensitive */
/*
#undef  YES_PROTOS                      * default OS9 to noprotos *
*/
#endif

#ifndef FOPEN_ROPTS			/* No system found, use unix defaults */
#define SYSTEM		"UNIX/Amiga/Generic"
#define EXIT_GOOD	0
#define EXIT_INFO	1
#define EXIT_BAD	2
#define FOPEN_ROPTS	"r"
#define FOPEN_WOPTS	"w"
/*
#undef  YES_PROTOS			* default UNIX/generic to noprotos *
*/
#endif

#ifndef NOANSI				/* allow cmd line override to STDC */
#ifdef __STDC__				/* Ansi likes prototypes */
#if __STDC__				/* MWC sets this defined but 0 valued */
#define YES_PROTOS
#endif
#endif /* __STDC__ */
#endif /* NOANSI */

#ifndef VOID				/* allow cmd line override to VOID */
#define VOID void			/* assume system likes void */
#endif

#ifndef _CDECL
#define _CDECL
#endif

#ifndef __DATE__
#define __DATE__ "01-MAY-1992"
#endif

#ifndef __TIME__
#define __TIME__ "00:00:00"
#endif

/*
	Typedefs
*/
#ifndef UCHAR				/* allow cmd line override */
typedef unsigned char uchar;		/* possible portability concern */
#define UCHAR	uchar
#else
#define NOUCHAR		1		/* flag saying cmd line changed uchar */
#endif

/*
	BOO Decoder Defs:
*/
#define unchar(c)	( (c) - '0' )

/*
	Here are the function prototypes...
	If your 'C' don't like prototypes, don't declare YES_PROTOS.
*/
#ifdef YES_PROTOS
VOID _CDECL convert    (FILE *, FILE *);
int  _CDECL get4       (FILE *, UCHAR *);
VOID _CDECL output     (FILE *, UCHAR *, int);
VOID _CDECL unboo      (UCHAR *, UCHAR *);
VOID usage	       (VOID);
#else
VOID convert    ();
int  get4       ();
VOID output     ();
VOID unboo      ();
VOID usage	();
#endif

long count_in=0, count_out=0;		/* character counts */
int quiet=0;

main(argc,argv)
int argc;
char **argv;
{
	FILE *fpin, *fpout;
	char outfile[BUFSIZ], *outfilptr;

	while( argc > 1 && *argv[1]=='-' )
		{
		if( argv[1][1] == '\0' )
			break;
		switch( argv[1][1] )
			{
			case 'v':		/* version */
				fprintf(stderr,
				"MSBPCT.C, Date=\"%s, %s\", System=\"%s\"\n",
					__DATE__,__TIME__,SYSTEM);
				fprintf(stderr, "\
Email comments to \"rweiner@kermit.columbia.edu\" \
(Rob Weiner/Programming Plus)\
\n");
				fprintf(stderr,"\n");
				break;
			case 'q':		/* quiet */
				quiet=1;
				break;
			default:
				usage();
			}
		argc--;
		argv++;
		}

	if( argc < 2 || argc > 3 )
		usage();
		
	if( argv[1][0]=='-' && argv[1][1]=='\0' )
		{
		fpin = stdin ;
		}
	else if( (fpin = fopen( argv[1] , FOPEN_ROPTS )) == NULL )
		{
		fprintf(stderr,"Error, cannot open input file \"%s\"\n",
			argv[1]);
		exit(EXIT_BAD);
		}

	if( fgets(outfile, BUFSIZ, fpin) == NULL )
		{
		fprintf(stderr,"Error, cannot read boo filename line\n");
		exit(EXIT_BAD);
		}

/*	outfile[ strlen(outfile) - 1 ] = '\0' ;		* wack \n */
					/* redone w/o strlen... */
	outfilptr = outfile ;
	while( *outfilptr && (*outfilptr != '\n') && (*outfilptr != '\r') )
		outfilptr++;
	*outfilptr = '\0' ;
	outfilptr = outfile ;

	if( argc == 3 )		/* override on internally stored filename */
		{
		outfilptr = argv[2];
		if( !quiet )
			{
			fprintf(stderr,
			"BOO Internally stored output filename = \"%s\"\n",
				outfile);
			fprintf(stderr,
			"Command line output filename override = \"%s\"\n",
				outfilptr);
			}
		}

	if( !quiet )
		fprintf(stderr,
		       "Creating Binary File \"%s\" from BOO File \"%s\"...\n",
				outfilptr,argv[1]);

	if( outfilptr[0]=='-' && outfilptr[1]=='\0' )
		{
		fpout = stdout ;
		}
	else if( (fpout = fopen( outfilptr , FOPEN_WOPTS )) == NULL )
		{
		fprintf(stderr,"Error, cannot open output file \"%s\"\n",
			outfilptr);
		exit(EXIT_BAD);
		}


	convert(fpin,fpout);

	output(fpout,(UCHAR *)"",0);		/* flush output buffering */

	fclose(fpin);
	fclose(fpout);

	if( !quiet )
		{
		fprintf(stderr,"Data bytes in: %ld,  ",  count_in);
		fprintf(stderr,"Data bytes out: %ld,  ", count_out);
		fprintf(stderr,
			"Difference: %ld bytes\n", count_in - count_out);
		}
	exit(EXIT_GOOD);
}

VOID usage()
{
	fprintf(stderr,
		"MSBPCT = Decode Ascii BOO Encoded File into Binary File\n");
	fprintf(stderr, "\
Usage: MSBPCT [-v(version) -q(quiet)] input_boo_file [output_file_override]\n"
			);
	fprintf(stderr,
"              Note: Filenames of '-' are supported for stdin & stdout\n");
	exit(EXIT_INFO);
}

VOID convert(fpin,fpout)		/* convert every 4 chars to 3 */
FILE *fpin, *fpout;
{
	int n;
	int fill_nulls = 0;
	UCHAR inbuf[10], outbuf[10];
	int must_output=0;

	while( (n = get4(fpin,inbuf)) != 0 )
		{
		if( n < 0 )	    /* -n is 1 more than # repeated nulls */
			{
			if( n == -1 )		/* ~0 found */
				{
				fill_nulls++;	/* count #nulls to back up */
				}
			else	{		/* ~X null compression found */
				if( must_output )  /* output last triple */
					{
					output(fpout,outbuf,must_output);
					must_output = 0;
					}
				while( ++n < 0 )
					output(fpout,(UCHAR *)"",1);
				/* ~0 must be after all data */
				fill_nulls = 0 ;
				}
			}
		else	{
			if( must_output )	/* output last triple */
				output(fpout,outbuf,must_output);

			unboo( inbuf , outbuf );

			/* output these chars the next time around */

			fill_nulls = 0 ;	/* ~0 must be after all data */
			must_output = 3 ;	/* must output last triple */
			}
		}

	if( fill_nulls > 0 )
		{
		if( !quiet )
			fprintf(stderr,"Fill Nulls = %d\n",fill_nulls);

		/* by definition, if there are ~0, there must be a triple */
		if( must_output < 3 )	/* we expect a triple when see ~0s */
			{
			fprintf(stderr,
"WARNING: Detected Invalid Boo Format (~0 after non-triple)\n");
			fprintf(stderr,
"WARNING: Output File is probably %d nulls greater than original input file\n",
				fill_nulls);
			}
		else	{
			must_output -= fill_nulls ;

			if( ((fill_nulls>0) && (outbuf[2]!='\0')) ||
			    ((fill_nulls>1) && (outbuf[1]!='\0')) )
				{
				fprintf(stderr,
"WARNING: Detected Invalid Boo Format (Non-Null Chars Removed by ~0)\n");
				}
			}
		}
	if( must_output > 0 )  /* output last, possibly ~0 reduced, triple */
		output(fpout, outbuf, must_output);
}

int get4( fp , buf )	/* return: pos=# read, neg=# nulls + 1 found */
FILE *fp;
UCHAR *buf;
{
	int i=0;		/* amt last read */
	int nulls=0;		/* amt nulls found */
	int c;

	do	{
		if( (c = getc(fp)) == EOF )		/* hit eof */
			{
			if( ferror(fp) )		/* quick check */
				{
				fprintf(stderr,
					"get4(): fread error on input file\n");
				exit(EXIT_BAD);
				}
			break;				/* stop */
			}
		count_in++;

		if( c == '\n' )				/* \n means nothing */
			continue;

		if( i == 0 )			/* not in quad yet */
			{
			if( nulls == 1 )	/* this char IS #nulls now */
				{		/* add 1 as a ~0 flag */
				nulls = unchar( c ) + 1 ;
				return( -nulls ); /* got it, return */
				}
			else if( c == '~' ) /* null repeat prefix */
				{
				nulls=1;
				continue;
				}
			}

		i++;				/* count till 4 */
		*buf++ = c ;			/* save chars */
		} while( i <= 3 );
	return(i);
}

VOID output(fp,s,n)		/* output chars, n==0 = flush buffer */
FILE *fp;
UCHAR *s;
int n;
{
	static char buf[BUFSIZ];
	static char *p=buf;
	int flush = (n==0) ;
	unsigned count;

	if( n < 0 )			/* ~0 backup */
		{
		if( p < buf-n )		/* ensure there is stuff to delete */
			{
			fprintf(stderr,
	"output(): Error, no chars in buffer to backup output stream\n");
			exit(EXIT_BAD);
			}
		p += n ;		/* backup ptr */
		}
	else	{			/* n==0 = flush buffer */
		if( (n != 0) && ((p+n) <= (buf+sizeof(buf))) )
			{		/* will fit in current buffer */
			while( n-- > 0 )
				*p++ = *s++ ;
			}
		else	{		/* won't fit in current buffer */
			/* take what we can, write current, load next */

			while( (n > 0) && (p < (buf+sizeof(buf))) )
				{
				*p++ = *s++ ;
				n-- ;
				}

			/* this must be "p-buf,1" ordered here for VMS
			   varying recs to come out right, probably helps
			   fixed 512 too */

			count = p - buf ;
			if( (count>0) &&
			   (fwrite( buf , count , 1 , fp ) != 1) )
				{
				fprintf(stderr,
				"output(): fwrite error on output file\n");
				exit(EXIT_BAD);
				}

			count_out += count ;
			p = buf ;
			while( n-- > 0 )	/* don't forget leftovers */
				*p++ = *s++ ;
			}
		}
}

VOID unboo( inbuf , outbuf )	/* here is where we unboo 4 into 3 chars */
UCHAR *inbuf, *outbuf;
{
	UCHAR x,y,z,a,b,c,d;

	/* get a,b,c,d the 4 booed bytes */

	a = unchar( *inbuf++ );
	b = unchar( *inbuf++ );
	c = unchar( *inbuf++ );
	d = unchar( *inbuf   );

	/* calc x,y,z the 3 unbooed bytes */
	/* we shouldn't need some of these &ands below,
	   except to make sure input data is still 6 bit */

	x = (a << 2) | ((b >> 4) & 003) ;
	y = (b << 4) | ((c >> 2) & 017) ;
	z = (c << 6) | (d        & 077) ;

	*outbuf++ = x;
	*outbuf++ = y;
	*outbuf   = z;
}

/*
	[EOF]
*/
