/*
 * MP -- Macro and conditional compilation preprocessor for C
 */

/*)BUILD	$(PROGRAM)	= mp
		$(INCLUDE)	= { mpdefs.h mpextr.h }
		$(FILES)	= { mp mpexpr mpsym mputil }
		$(STACK)	= 10000
		$(TKBOPTIONS)	= {
			STACK	= 1024
			TASK	= ...MPC
			ACTFIL	= 14
			UNITS	= 14
		}
*/

#ifdef	DOCUMENTATION

title	mp	Macro Pre-processor
index		Macro Pre-processor for C programs

synopsis

	mp [files]

description

	Mp is a general pre-processor for C programs.  It implements the full
	C pre-processor syntax:
	.lm +16
	.s.i-16;_#line		Ignored (needed for communication with program
	generators such as lex.
	.s.i-16;_#if		Conditional compilation.
	.s.i-16;_#ifdef		Conditional compilation.
	.s.i-16;_#else		Conditional compilation.
	.s.i-16;_#endif		Conditional compilation.
	.s.i-16;_#message	Writes a message on the user terminal.
	.s.i-16;_#define		Define a symbol or macro.
	.s.i-16;_#include	(nested) source files.
	.s.lm -16
	See Kernighan and Ritchie for details of the C source file format.
	.s
	On Decus C, the compiler preprocessor phase may be supressed by
	invoking the compiler with the "-m" switch:
	.s.nf
		MP file.c	(writes file.mpc)
		XCC file.mpc -m
	.s.f

diagnostics

	.lm +8
	.s.i -8;_... many, should be self-explanatory.
	.lm -8

author

	Robert W. Harper, Jr.
	.s
	Mp was distributed via the Unix user's group.

bugs

#endif

/*
 *	Designed and written by Robert W. Harper, Jr. in fullfillment of the
 *	programming requirements for ICSS-580 Systems Programming taken
 *	during Winter quarter 1978/79, with invaluable assistance from
 *	Mike Lutz, my project advisor, who helped me with some sticky
 *	design issues and originally wrote the expression evaluator.
 *
 *	Version 1.10 - 4/18/79 
 *
 *	Edit log -- document all source changes here (and increment
 *		the edit level!) 
 *	   First release: 4/4/79 RWH 
 *	   Fixed conditional compilation bugs and changed #include paths:
 *		4/7/79 RWH 
 *	   Added support for character constants: 4/10/79 RWH 
 *	   Fixed bug re extraneous <SOH> in front of #include line itself:
 *		4/10/79 RWH 
 *	   Fixed incorrect support of character constants: 4/12/79 RWH 
 *	   Made alpha() and alphanum() macro's instead of functions:
 *		4/12/79 RWH 
 *	   Fixed #ifdef/#ifndef argument length bug: 4/12/79 RWH 
 *	   Rewrote concat() to make it faster: 4/12/79 RWH 
 *	   Allowed white space between '#' and directive: 4/13/79 RWH 
 *	   Modified actual() to propagate actuals through nested calls:
 *		4/13/79 RWH 
 *	   Added _FILENAME and _LINE: 4/18/79 RWH
 *	   Fixed pop_ala() so that works correctly: 4/18/79 RWH
 *
 *	   Added code for decus and vms implementations, also for stdio.
 *		05-Sep-80 MM
 *	   More Decus stuff ... provide same predefined symbols as provided
 *	    by the Decus compiler's preprocessor. Also made '-P' flag
 *	    default to 'on', supressing <SOH> stuff, for Decus compiler,
 *	    which does not recognize this.  05-Dec-80 RBD
 *
 *				N O T E
 *		It is necessary to compile this preprocessor
 *		on the TARGET operating system for the pre-
 *		defined symbols to be correct.
 *
 *	   Changed directory search list to look for 'lb:' on RSX, and
 *	    'sy:' on RT-11.  05-Dec-80 RBD
 *
 *	   Eliminate leading whitespace in macro expansion. 06-Dec-80 RBD
 *
 *	   Display filespec string on failing #include file opens, instead
 *	     of useless, terse "File not found". 06-Dec-80 RBD
 *
 *	   Supress call to 'free' on include file open fail for stdio
 *	     version. Fix search list so it looks at "LB:[1,1]" for RSX.
 *	     Bob Denny  29-May-81
 *
 *	   Added automatic insertions of #LINE <line#> p.<page#> <filename>
 *	     for RSX systems.  <SOH> code completely removed.  -P now
 *	     defaults to off.  Introduced default file extensions of .C
 *	     and .MPC.  Made output file default to <input.MPC>.
 *	     Scott Roth, 25-Aug-81
 *
 *	   Merged into "normal" distribution.  #LINE (as defined above)
 *	     is now only available by #define'ing UNIMATION in mpdefs.h.
 *	     Martin Minow, 28-Mar-82
 *
 *	This implementation of the C macro and conditional compilation
 *	preprocessor is designed to bring the currently-distributed version
 *	of the C compiler up to the standards defined in "The C Programming
 *	Language" by Kernighan and Ritchie (sine qua non).  The only
 *	preprocessor feature that remains unimplemented is the "#line"
 *	directive.  This version is implemented as an independent program,
 *	designed to be forked to by "cc", much like the other passes of the
 *	compiler, though it may be used independently of the C compiler for
 *	any similar purpose.  The callling syntax is
 *
 *		"mp [-P] input_file output_file"
 *
 *	where the "-P" switch causes suppression of the <SOH> convention
 *	used by the compiler to flag included files (usually all lines of
 *	source stemming from an include file are preceded by a <SOH>
 *	character so that the compiler's line numbers agree with the source).
 *	NOTE: the '-P' switch is turned on permanently for the Decus version.
 *	(and under the vax native compiler -- MM 28-Mar-82)
 *
 *	All routines are documented with introductory comments preceding the
 *	definition of the function, describing the techniques used therein.
 *	Other terse comments appear within the source code to clarify certain
 *	sticky parts.  The overall design of the processor is line-oriented,
 *	in that it reads a line, processes as necessary and then writes it
 *	out.  Due to lack of clarity in the specification of constant
 *	expressions in the aforementioned text, this processor evaluates
 *	expressions by first running them through the macro expansion routine,
 *	then calling the recursive-descent parser/evaluator.
 *
 *	Note that recursion is used in two major areas of the design -- in
 *	the expression evaluator and in the macro expansion routine.
 *	Inherent in using a recursive solution to a problem is a certain
 *	degree of difficulty that arises when an error condition occurs.
 *	In order to solve this, the processor uses two small assembly-language
 *	routines, envsave() and envreset(), to save the frame pointer prior
 *	to the call of either routine (expand() or expr()); upon encountering
 *	an error, envreset() is called to restore the environment to that of
 *	the top-level caller and force a particular value to be returned.
 *	(envsave and envreset were changed to setjmp() and longjmp() for
 *	compatiblity with other stdio libraries).
 *
 *	The code has been fairly thoroughly tested, but inevitably bugs will
 *	appear and complaints about the design will crop up.  If you encounter
 *	any bugs or have any suggestions for enhancements, contact the author
 *	or Mike Lutz at RIT School of Computer Science.
 */

/*
 * Include compile-time constant definitions, external references,
 * and structures.
 */

#ifdef	decus
#define stdio
#define Digital
#endif

#ifdef	vms
#define	stdio
#define	Digital
#endif

#ifdef	stdio
#include <stdio.h>
#else
#define	NULL	(0)
#endif

#ifdef	vms
/*
 * This creates files in vanilla RMS on VMS V2
 */
extern FILE *fdopen();
#define	CREATE(f, m) fdopen(creat(f, 0, "rat=cr", "rfm=var"), m)
#else
#define	CREATE	fopen
#endif

#include "mpdefs.h"
#include "mpextr.h"

/*
 * Setjmp/longjmp stuff
 */
#include	<setjmp.h>
jmp_buf		jump_env;

/*
 * Directives
 */

char *dir_tbl[] = { /* Directive name table */
	"define",
	"include",
	"undef",
	"if",
	"ifdef",
	"ifndef",
	"else",
	"endif",
	"message",
	"line",
	NIL
};
int dir_type[] = {	/* Directive mnemonics for 'switch' */
	DEF,
	INCL,
	UNDEF,
	IF,
	IFD,
	IFN,
	ELSE,
	ENDIF,
	MSG,
	LINE,
};

int err_cnt = 0;		/* Error counter			*/

#ifdef Digital
int nocomp = TRUE;		/* No <SOH> stuff for Decus compiler	*/
#ifdef rsx
char syn_err[] = {"RSX version. Syntax: mpc ifile ofile"};
#else
#ifdef rt11
char syn_err[] = {"RT-11 Version. Syntax: run mp \"ifile ofile\""};
#else
#ifdef	vms
char syn_err[] = { "Vax-11C version. Syntax: mp ifile ofile"};
#else
char syn_err[] = {"Decus compiler assumed. Syntax: mp ifile ofile"};
#endif
#endif
#endif
#else
/*
 * True Unix
 */
int nocomp = FALSE;		/* No-compilation flag			*/
char syn_err[] = {"Syntax: mp [-P] ifile ofile"};
#endif

/*
 * I/O-related stuff
 */

#ifdef stdio
FILE	*curinfil	= NULL;	/* Pointer to current input file	*/
FILE	*outfil		= NULL;	/* Pointer to output file		*/
#else
struct buf *curinfil	= NIL;	/* Pointer to file buffer for current	*/
				/* input file				*/
struct buf *outfil	= NIL;	/* Current output file buffer pointer	*/
#endif

/*
 * #include
 */

int inclpdl[INCDEPTH];
struct stack inclstk = {
	INCDEPTH,
	0,
	inclpdl
};
int inclvl = 0;			/* #include nesting level		*/
int inclflag = FALSE;		/* Ugly fix for extraneous <SOH> before	*/
				/* #include line itself			*/
char *srchlist[] = {
	"",			/* This must be first!			*/
#ifdef Digital			/* Next, look on device 'C:', then ...	*/
	"c:",
	"lb:",			/* RSTS/E, VMS				*/
#ifdef rt11			/* On RT-11, system disk is SY:		*/
	"sy:@",			/* RSTS/E				*/
	"sy:",
#else
#ifdef rsx
	"sy:@",			/* RSTS/E				*/
	"lb:[1,1]",		/* On RSX, default task/library disk LB:*/
#endif
#endif
#else
	"/usr/include/",	/* UNIX					*/
#endif
	NIL			/* This must be last!			*/
};

/*
 * Conditional compilation
 */

int ifpdl[IFDEPTH];
struct stack ifstack = {
	IFDEPTH,
	0,
	ifpdl
};
int father = TRUE;		/* Previous level expansion flag	*/
int self = TRUE;		/* Current level expansion flag		*/

/*
 * Input line processing related data structures
 */

char	line[LINESZ];
int	lineno[INCDEPTH];
int	in_com = FALSE;			/* TRUE if in a comment		*/
char	fname[INCDEPTH][40] = { NULL};	/* Include file names		*/
int	pageno[INCDEPTH];		/* Include page number		*/
int	linefeeds = 1;			/* Count line feeds as output	*/

struct sym *_line;		/* _LINE macro -- definition is current	*/
				/* line number				*/
char *lp;			/* Line pointer				*/

/*
 * Macro definition and expansion data structures
 */

char def_ala[ALASIZE][MAXIDLEN+1];
				/* Definition ala			*/
char *call_ala[ALASIZE];	/* Dynamically-allocatable call ala	*/
char *callpdl[DEFDEPTH];

struct stack callstk = {
	DEFDEPTH,
	0,
	callpdl
};
				/* Call stack - used to process nested	*/
				/* macro calls				*/
struct sym *refpdl[DEFDEPTH];
struct stack refstack = {
	DEFDEPTH,
	0,
	refpdl
};
				/* Stack for referenced symbols		*/

/*
 * Main driver
 */

main(argc, argv)
char *argv[];
int argc;
{
	register char *tp1, *tp2;
	register int i;
	char name[MAXIDLEN+1];
	char name_buf[50], *f;
	int tvec[2], dir_kind, filecnt;

	/*
	 * Initialize line number stack
	 */
	lineno[0] = 0;
	pageno[0] = 1;

	for (i = 1, filecnt = 0; i < argc; i++)
		if (*argv[i] == '-')
			switch (argv[i][1]) {
			case 'p':
			case 'P':
				nocomp = TRUE;
				continue;
			default:
				puts(syn_err);
				exits(2);
			}
		else
			if (filecnt > 2) {
				puts(syn_err);
				exits(2);
			}
			else
				tvec[filecnt++] = i;

	if (filecnt == 0) {
		puts(syn_err);
		exits(2);
	}

#ifdef stdio
	f = strcpy(name_buf, argv[tvec[0]]);
	while (*f != '.' && *f != EOS)
		f++;
	if (*f == EOS)
		strcpy(f, ".C");
	if ((curinfil = fopen(name_buf, "r")) == NULL) {
		perror(argv[tvec[0]]);
		exits(2);
	}
	if (filecnt == 1)
		*f = EOS;		/* f -> after file name		*/
	else {
		f = strcpy(name_buf, argv[tvec[1]]);
		while (*f != '.' && *f != EOS)
			f++;
	}
	if (*f == EOS)
		strcpy(f, ".MPC");
	if ((outfil = CREATE(name_buf, "w")) == NULL) {
		perror(argv[tvec[1]]);
		exits(2);
	}
	*f = EOS;			/* name_buf has out name	*/
#else
	curinfil = get_mem(sizeof *curinfil);
	if (fopen(argv[tvec[0]], curinfil) < 0) {
		puts("Can't open input file");
		exit(1);
	}
	outfil = get_mem(sizeof *outfil);
	if (fcreat(argv[tvec[1]], outfil) < 0) {
		puts("Can't create output file");
		exit(1);
	}
	strcpy(name_buf, argv[tvec[1]]);
	f = name_buf + strlen(name_buf);
#endif
	sym_init();		/* Initialize symbol table */

	i = strlen(argv[tvec[0]]);
	sym_enter("_FILENAM", 0, tp1 = get_mem(i+3));
	*tp1++ = '"';
	strcpy(tp1, argv[tvec[0]]);
	*(tp1+i) = '"';
	*(tp1+i+1) = EOS;

	_line = sym_enter("_LINE", 0, tp1 = get_mem(9));
	strcpy(tp1, "\"      \"");

#ifdef Digital
#ifdef decus
	sym_enter("decus",0,"");    /* Do predefines like Decus compiler */
#ifdef	nofpu
	sym_enter("nofpu",0,"");
#endif
	sym_enter("pdp11",0,"");
#endif
#ifdef vax
	sym_enter("vax",0,"");
#endif
#ifdef rsx
	sym_enter("rsx",0,"");
#endif
#ifdef rt11
	sym_enter("rt11",0,"");
#endif
#endif
	time(&tvec[0]);		/* Get encoded time and date		*/
	tp1 = ctime(&tvec[0]);	/* Convert to text string 		*/
	*(tp1+24) = '"';	/* Replace ending '\n' with a '"'	*/
	tp2 = get_mem(27);	/* Date-time string is 27 characters	*/
	sym_enter("_DATE", 0, tp2);
	*tp2++ = '"';		/* Emit initial double quote */
	strcpy(tp2, tp1);

	/*
	 * Main driving loop of the processor
	 */

	while (readline() != EOF) {
		tp1 = skipblnk(lp);
		if (tp1[0] == '#' && tp1[1] != EOS && !in_com) {
			/*
			 * A non-null directive?
			 */
			lp = skipblnk(&tp1[1]);
			lp = get_id(lp, name);
			lp = skipblnk(lp);
			if ((dir_kind = dir_find(name)) == ERROR)
				printerr("Illegal directive");
			else {
				if (father && self) non_cond(dir_kind);
				cond(dir_kind);
			}
		}
		if (*tp1 == '#' || !(father && self)) line[0] = EOS;
		writeline(line);
	}

	if (!empty(&ifstack))		/* Check for dangling if's	*/
		printerr("Unterminated #if");
#ifdef stdio
	fclose(outfil);
#else
	fflush(outfil);			/* Flush output file buffer	*/
	close(outfil->fildes);		/* Close output file		*/
#endif
	exits((err_cnt == 0) ? 1 : 0);
} /* end main() */

/*
 *	Process conditional compilation directives.  This procedure is
 *	always executed, regardless of whether or not we are flushing due
 *	to a false conditional. The general technique used here is as follows.
 *	The variables 'self' and 'father' contain the truth value of the
 *	current if-else-endif clause and the immediately enclosing clause,
 *	respectively.  Whenever an if is encountered, both are stacked,
 *	father &= self (to propagate any false conditionals), and
 *	self = eval(arg).  
 *
 *	Upon encountering an else, self = !self, to reverse the sense of the
 *	current condition.  An endif causes the condition stack to be popped
 *	into father and self, restoring our context to the immediately
 *	enclosing level.
 *
 * Note:
 *	This is a bad algorithm, as it requires a stack (and thus a maximum
 *	nesting level).  A true/false counter arraingement is to be preferred.
 *
 */

cond(dir_kind)
register int dir_kind;
{
	char symbol[MAXIDLEN+1];	/* buffer for ifdef/ifndef	*/

	switch(dir_kind) {
	case IF:
	case IFD:
	case IFN:
		if ((push(father, &ifstack) == ERROR)
				|| (push(self, &ifstack) == ERROR)) {
			printerr("Maximum #if depth exceeded");
			break;
		}
		if (father &= self)
			switch (dir_kind) {
			case IF:
				self = (expr(lp) != 0);
				break;
			case IFD:
				lp = get_id(lp, symbol);
				self = (lookup(symbol) != NIL);
				break;
			case IFN:
				lp = get_id(lp, symbol);
				self = (lookup(symbol) == NIL);
				break;
			}
		else
			self = FALSE;
		break;

	case ELSE:
		if (*lp != EOS)
			printerr("Extraneous argument to #else");
		if (empty(&ifstack))
			printerr("#else without #if");
		else
			self = !self;
		break;

	case ENDIF:
		if (*lp != EOS)
			printerr("Extraneous argument");
		if (empty(&ifstack))
			printerr("#endif without #if");
		else {
			self = (int)pop(&ifstack);
			father = (int)pop(&ifstack);
		}
		break;
	}
	return;
}

/*
 *	Non-conditional directive processing is performed here.  This
 *	procedure is executed only if we are not currently flushing.  The
 *	message and undef directives are implemented in a very straightforwar
 *	manner.  The others are a little more complex.  Macro definition is
 *	accomplished in four steps.  First the name of the macro being
 *	defined is picked up; second formal parameters are processed; third,
 *	the actual definition is processed (and index marker substitution is
 *	peformed), and finally, the symbol is entered into the symbol table.
 *	Include processing goes as follows.  First the path name and its
 *	delimiter are picked up, then we attempt to find the file by
 *	prepending the paths specified in 'srchlist[]' to it.  If the file
 *	is found, the current input file is stacked (without closing it) and
 *	the included file is set up as the new current input file.
 */

non_cond(dir_kind)
int dir_kind;
{
	register int i, t, argcnt;
	char *defptr, *fid, *tp, delim, name[MAXIDLEN+1];
	extern char	*get_def();
#ifdef stdio
	FILE	*tbp;
#else
	struct buf *tbp;
#endif

	switch(dir_kind) {
	case LINE:
		writeline(line);		/* Do nothing		*/
		break;
	case MSG:
		printerr(lp);
		break;
	case UNDEF:
		lp = get_id(lp, name);
		if (sym_del(name) == ERROR)
			printerr("Symbol not defined");
		break;
	case DEF:
		lp = get_id(lp, name);
		if (*name == EOS) {
			printerr("No symbol given");
			break;
		}
		if ((argcnt = formal()) == ERROR) {
			printerr("Illegal argument list");
			break;
		}
		defptr = get_def(lp, argcnt);
		sym_enter(name, argcnt, defptr);
		break;
	case INCL:
		if (inclvl == (INCDEPTH-1)) {
			printerr("Maximum #include depth exceeded");
			break;
		}
#ifndef stdio
		tbp = get_mem(sizeof *curinfil);
#endif
		delim = *lp;
		if (delim != '<' && delim != '"') {
			printerr("Illegal file specification delimiter");
			break;
		}
		delim = (delim == '<') ? '>' : '"';
		fid = ++lp;
		while (*lp != delim && *lp != EOS) lp++;
		if (*lp == EOS) {
			printerr("Illegal file specification");
			break;
		}
		*lp = EOS;

		/*
		 * At this point 'fid' points to the null-terminated
		 * file specification.  If the file specification is
		 * enclosed in '"' then the current directory is
		 * first searched for the specified file; if this
		 * fails or if the file spec is delimited by '<'
		 * and '>' then a standard list of paths is searched
		 * (see 'srchlist' array).
		 */

		for (i = (delim == '>') ? 1 : 0; srchlist[i]; i++) {
			tp = concat(srchlist[i], fid);
#ifdef stdio
			if ((tbp = fopen(tp, "r")) != NULL)
				break;
#else
			if ((t = fopen(tp, tbp)) >= 0)
				break;
#endif
		}
#ifdef stdio
		if (tbp == NULL) {
#else
		if (t < 0) {
#endif
			printerr(" ");
			fprintf(stderr, "Failed to open '#include' file");
			fprintf(stderr, "\"%s\" on SY:\n", fid);
			for (i=1;;i++)
			  {
			  if (srchlist[i] == NIL) break;
			  fprintf(stderr," and '%s'",srchlist[i]);
			  }
			fputs(". Sorry.\n",stderr);
#ifndef stdio
			free(tbp);
#endif
			break;
		}
		if (push(curinfil, &inclstk) == ERROR) 
			screech("#include stack overflow (impossible)");
		inclvl++;
		strcpy(fid, fname[inclvl]);
		lineno[inclvl] = 0;
		pageno[inclvl] = 1;
		curinfil = tbp;
		/*
		 * Inhibit <SOH> in front of #include line at top level
		 */
		if (inclvl == 1)
			inclflag = TRUE;
		break;
	} /* end switch */
	return;
} /* end non_cond() */

/*
 *	Read a line of input routine -- loads 'line[]' with the next line
 *	from the current input file.  Line continuation is handled here --
 *	any occurrence of backslash-newline is replaced by a blank and the
 *	next line is tacked on to the current one.
 *
 *	Upon hitting end of file, the include stack is popped to restore
 *	input to the previous level.  If the stack is empty, EOF is returned
 *	(indicating end of file at the top level.
 */

int readline()
{
	register int buf_indx, curch, i;
	int ctn_cnt;			/* Continuation line counter */

	buf_indx = 0, ctn_cnt = 0;
	do {
		while ((curch = getc(curinfil)) != '\n') {
			if (curch == EOF) {	/* Current file exhausted */
#ifdef stdio
				fclose(curinfil);
#else
				close(curinfil->fildes);
				free(curinfil);	/* Release I/O node */
#endif
				if (empty(&inclstk)) {
					/*
					 * Don't lose xyz<EOF>
					 */
					if (buf_indx > 0)
						break;
					else	return(EOF);
				}
				else {
					curinfil =
#ifdef stdio
						(FILE *)
#else
						(struct buf *)
#endif
							pop(&inclstk);
					inclvl--;
					if (!nocomp) {
					   fprintf(outfil,
#ifdef UNIMATION
#ifdef PREPRE
						"#define pg #%d p.%d %s\npg\n",
#else
						"#%d p.%d %s\n",
#endif
						lineno[inclvl]+1,
						pageno[inclvl],
						fname[inclvl]);
#else
						"#d %s\n",
						lineno[inclvl]+1,
						fname[inclvl]);