/*
 * Copyright 1995,96 Thierry Bousch
 * Licensed under the Gnu Public License, Version 2
 *
 * $Id: mainduce.c,v 2.6 1996/09/30 08:29:06 bousch Exp $
 *
 * The main() function of induce, the lexer, and other various things.
 */

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <getopt.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/times.h>
#include <sys/wait.h>
#include <unistd.h>
#include "saml.h"
#include "saml-util.h"
#include "induce.h"
#include "parser.h"

int floating_precision = 0;
int no_literals = 0;
int no_rationals = 0;
int trace_mode = 0;
int quiet;
int parsed_poly_type;
const char *memo_file, *curr_memo_file;
static int use_m4 = 0;
static int m4_argc = 0;
static int use_tmpfile = 0;
static const char **m4_argv;
static FILE *yyin;

const char* tilde_expand (const char *name)
{
	char *home, *newname;

	if (name[0] != '~' || name[1] != '/')
		return name;	/* No tilde expansion */

	home = getenv("HOME");
	if (!home)
		return name;	/* No HOME ? Can it happen? */

	newname = malloc(strlen(home) + strlen(name));
	if (!newname)
		return name;	/* No memory for expansion */

	strcpy(newname, home);
	return strcat(newname, name+1);
}

static const char* find_rules_file (void)
{
	char *path = getenv("INDUCEFILES");
	const char *pelt;

	if (!path) {
		/* Use the default search list */
		path = strdup(_PATH_RULES);
		if (!path)
			return NULL;
	}
	for (pelt = strtok(path,":"); pelt; pelt = strtok(NULL,":")) {
		pelt = tilde_expand(pelt);
		if (access(pelt, R_OK) == 0)
			return pelt;
	}
	/* Not found */
	return NULL;
}

static int parse_file (const char *name)
{
	char buffer[50];
	int ret, child, fd[2];

	name = tilde_expand(name);
	if (use_m4) {
		if (pipe(fd) < 0) {
			perror("induce: pipe");
			return -1;
		}
		if ((child = fork()) < 0) {
			perror("induce: fork");
			close(fd[0]); close(fd[1]);
			return -1;
		} else if (child == 0) {
			/* This is the child */
			close(fd[0]);
			if (dup2(fd[1],1) < 0)
			    perror("induce: dup2");
			if (floating_precision) {
			    sprintf(buffer, "-D__PRECISION__=%d",
		 		16*floating_precision);
			    m4_argv[m4_argc++] = buffer;
			}
			m4_argv[m4_argc++] = name;
			/* The following cast avoids a warning from GCC */
			execv(_PATH_M4, (char **)m4_argv);
			perror("induce: execv");
			exit(-1);
		}
		/* This is the parent */
		close(fd[1]);
		yyin = fdopen(fd[0], "r");
	}
	else {
		/* No preprocessing */
		child = 0;
		if (strcmp(name, "-") == 0)
		  yyin = stdin;
		else
		  yyin = fopen(name, "r");
		if (use_tmpfile)
		  unlink(name);
	}
	if (yyin == NULL) {
		perror("induce: fopen");
		return -1;
	}
	ret = yyparse();
	if (yyin != stdin)
	    fclose(yyin);
	if (child)
	    while (waitpid(child,NULL,0) < 0 && errno == EINTR)
	    	;
	return ret;
}

/*
 * canonic_name(): rewrites the user-supplied nodenames in a canonic way,
 * or returns NULL if it's invalid. This operation is idempotent.
 * It merges index lists, and normalizes the indices, for instance
 * foo[-01,+4][][-0,0,+0] --> foo[-1,4,0,0,0] and bar[] --> bar.
 * This simplifies the parsing in number_ancestors(), avoids aliasing
 * in the memo file, and REALLY checks the syntax. The returned string is
 * dynamically allocated, and not longer than the original string.
 */

static unsigned char* canonic_name (const unsigned char *p)
{
	unsigned char c, sign, *cname, *q;
	int arity = 0;

	cname = q = malloc(strlen(p)+1);
	if (!cname) return NULL;
	/* Copy the alphanumeric part without modification */
	if ((c = *p++) != '_' && !isalpha(c)) {
invalid_name:
		free(cname);
		return NULL;
	}
	while (c == '_' || isalnum(c))
		*q++ = c, c = *p++;
	/* Now c contains the first non-alphanumeric character */
	while (c != '\0') {
		/* There must be an opening bracket here */
		if (c != '[')
			goto invalid_name;
		c = *p++;
closing_bracket:
		if (c == ']') {
			c = *p++;
			continue;
		}
next_index:
		/* We have found one more index */
		*q++ = (arity++ ? ',' : '[');

		/* Now we should have an optional sign, and a number */
		sign = '+';
		if (c == '-' || c == '+')
			sign = c, c = *p++;
		if (!isdigit(c))
			goto invalid_name;
		while (c == '0')
			c = *p++;
		if (isdigit(c)) {
			/* The index is not zero */
			if (sign == '-')
				*q++ = sign;
				while (isdigit(c))
					*q++ = c, c = *p++;
		} else {
			/* The index is zero; ignore the sign */
			*q++ = '0';
		}
		/* Now there can be a comma or a closing bracket */
		if (c == ']')
			goto closing_bracket;
		if (c == ',') {
			/* Eat the comma and proceed */
			c = *p++;
			goto next_index;
		}
		goto invalid_name;
	}
	if (arity)
		*q++ = ']';
	*q++ = '\0';
#if 0
	fprintf(stderr, "cname = %s\n", cname);
#endif
	return cname;
}

static void mnode_statistics (void)
{
	fprintf(stderr,
		"Allocated-reserved-freed mnodes = %ld-%ld-%ld\n",
		nb_mnodes_allocated,
		nb_mnodes_reserved,
		nb_mnodes_freed);
}

static void process_nodes (char **nodes, int count)
{
	int i;
	vertex *v, *vtable[count];

	init_vertex_htable(409);
	if (!quiet) fprintf(stderr, "Building dependencies...\n");
	for (i = 0; i < count; i++) {
		v = make_vertex(nodes[i]);
		vtable[i] = v;
		--(v->used);	/* see below */
	}
	if (!quiet) fprintf(stderr, "Computing depth...");
	for (i = 0; i < count; i++) {
		/*
		 * We only need to compute the depth from "final" vertices,
		 * i.e., those which aren't used later in the calculation.
		 * This explains the --(v->used) kludge above: we are only
		 * counting references internal to the graph.
		 *
		 * This optimization is very important when argc is big.
		 */
		v = vtable[i];
		if (v->used == 0)
			find_depth(v, 0);
	}
	/* Now restore the actual values of the reference counter. */
	for (i = 0; i < count; i++)
		++(vtable[i]->used);
	if (!quiet) {
		fprintf(stderr, " done.\n");
		hash_statistics();
		depth_statistics();
		mnode_statistics();
		fprintf(stderr, "Evaluating vertices...\n");
	}
	eval_vertices();
	for (i = 0; i < count; i++) {
		v = vtable[i];
		if (quiet) {
		    if (i) putchar('\t');
		    fputs(mref_string(v->mr), stdout);
		} else
		    printf("%-15s\t= %s\n", v->name, mref_string(v->mr));
		if (--(v->used) == 0)
		    free_vertex(v);
	}
	if (quiet) putchar('\n');
	fflush(stdout);
	reset_interpreter_stack();
	if (!quiet) {
		mnode_statistics();
		usage_statistics();
	}
}

static void process_line (char *line)
{
	int i, count, count1;
	char c, *p, **nodes1, **nodes, *current, *cname;

	for (count = 1, p = line; (c = *p); p++)
		if (isspace((unsigned)c))
			count++;
	nodes1 = alloca(count * sizeof(char*));
	nodes  = alloca(count * sizeof(char*));
	for (i = 0; *line; line = p+1) {
		/* Look for the next space */
		for (p = line; (c = *p); p++)
			if (isspace((unsigned)c))
				break;
		*p = '\0';
		nodes1[i++] = line;
		if (!c) break;
	}
	count1 = i;
	assert(count1 <= count);
	/* Now sanitize the list (skip empty fields, etc) */
	for (i = count = 0; i < count1; i++) {
		current = nodes1[i];
		if (current[0] == '\0')
			continue;
		cname = canonic_name(current);
		if (!cname)
		  fprintf(stderr, "Invalid nodename `%s', skipped\n",
			current);
		else
		  nodes[count++] = cname;
	}
	process_nodes(nodes, count);
	/* Free the space allocated by canonic_name() */
	for (i = 0; i < count; i++)
		free(nodes[i]);
}

#define VERSION "0.50"

static const char sh_opt[] = "D:e:f:m:n:p:qvT";
static const struct option lg_opt[] = {
	{ "define",	1, NULL, 'D' },
	{ "nice",	1, NULL, 'n' },
	{ "precision",	1, NULL, 'p' },
	{ "file",	1, NULL, 'f' },
	{ "memo",	1, NULL, 'm' },
	{ "enter",	1, NULL, 'e' },
	{ "quiet",	0, NULL, 'q' },
	{ "verbose",	0, NULL, 'v' },
	{ "trace",	0, NULL, 'T' },
	{ "help",	0, NULL, 160 },
	{ "version",	0, NULL, 161 },
	{ "sparse",	0, NULL, 162 },
	{ "dense",	0, NULL, 163 },
	{ NULL, 0, NULL, 0 }
};
static const char Usage[] = "\
Usage: %s [OPTIONS] NODES...
Options:
  -D, --define SYMBOL[=VALUE]	Define a preprocessor symbol
  -f, --file FILE		Read rules from another file
  -m, --memo FILE		Memoize precious things to another file
  -e, --enter TEXT		Enter rules directly on the command line
  -n, --nice NUMBER		Be nice to other processes
  -p, --precision BLOCKS	Use reals with 16*BLOCKS bits of precision
      --sparse			Optimize polynomials for a sparse literal set
      --dense			or for a dense one (the default)
  -q, --quiet			Don't print debugging messages, only errors
  -v, --verbose			Print debugging messages too
  -T, --trace			List all the nodes being computed
      --help			Display this summary, and exit
      --version			Display the version number, and exit
";

int main (int argc, char **argv)
{
	int c, count;
	char *buff, **nodes;
	FILE *fdtmp = NULL;
	char tmp_file[] = _PATH_TMPFILE;
	const char *rules_file = NULL;

	quiet = !isatty(1);
	/* Reserve an array of strings for the command-line of m4 */
	m4_argv = calloc(argc+10, sizeof(char*));
	assert(m4_argv != NULL);
	m4_argv[m4_argc++] = "m4";	/* argv[0] */

	/* Default file for memoizing */
	memo_file = getenv("INDUCEMEMO");
	if (memo_file == NULL || *memo_file == 0)
		memo_file = _PATH_MEMO;
	/* Which representation for polynomials? */
	parsed_poly_type = ST_APOLY;
	buff = getenv("LITERALS");
	if (buff) {
		if (!strcmp(buff, "dense"))
			parsed_poly_type = ST_APOLY;
		if (!strcmp(buff, "sparse"))
			parsed_poly_type = ST_POLY;
	}
	/* Parse the command line */
	while ((c = getopt_long(argc,argv,sh_opt,lg_opt,NULL)) != EOF)
	    switch(c) {
	    case 'D':
	    	/* Define a preprocessor symbol */
	    	buff = alloca(strlen(optarg)+3);
	    	strcpy(buff, "-D");
	    	strcat(buff, optarg);
	    	m4_argv[m4_argc++] = buff;
	    	break;
	    case 'e':
	    	/* Enter a line containing rules */
	    	if (!use_tmpfile) {
	    		use_tmpfile = 1;
	    		fdtmp = fopen(mktemp(tmp_file), "w");
	    		if (!fdtmp) {
	    		    fprintf(stderr,
				"Cannot open temporary file `%s'. Abort.\n",
				tmp_file);
			    exit(1);
			}
		}
		fputs(optarg, fdtmp);
		fputc('\n', fdtmp);
		break;
	    case 'p':
		/* Use real numbers */
		floating_precision = atoi(optarg);
		break;
	    case 'n':
	    	/* Modify niceness */
	    	if (nice(atoi(optarg)) < 0)
	    		perror("nice");
	    	break;
	    case 'q':
	    	/* Quiet mode */
	    	quiet = 1;
	    	break;
	    case 'v':
	    	/* Verbose mode */
	    	quiet = 0;
	    	break;
	    case 'f':
	    	/* Choose another "rules" file */
	    	rules_file = optarg;
	    	break;
	    case 'm':
	    	/* Choose another "memo" file */
	    	memo_file = optarg;
	    	break;
	    case 'T':
	    	/* Enable trace mode */
	    	trace_mode = 1;
	    	break;
	    case 160:
	    	/* Print some help */
	    	printf(Usage, argv[0]);
	    	exit(0);
	    case 161:
	    	/* Print the version number */
	    	puts("Induce " VERSION);
	    	exit(0);
	    case 162:
	    	/* Optimize for sparse literals */
	    	parsed_poly_type = ST_POLY;
	    	break;
	    case 163:
	    	/* Optimize for dense literals */
	    	parsed_poly_type = ST_APOLY;
	    	break;
	    default:
	    	/* Usage error */
	    	fprintf(stderr, Usage, argv[0]);
		exit(2);
	    }
	saml_init();
	/* Export the value of parsed_poly_type into the environment */
	if (parsed_poly_type == ST_APOLY)
		setenv("LITERALS", "dense", 1);
	else /* ST_POLY */
		setenv("LITERALS", "sparse", 1);
	/* And the process ID */
	setenv("SPID", u32toa(getpid()), 1);

	if (use_tmpfile) {
		fclose(fdtmp);
		rules_file = tmp_file;
	}
	else if (rules_file == NULL) {
		/* No filename supplied */
		rules_file = find_rules_file();
		if (rules_file == NULL) {
			fprintf(stderr, "Cannot find the rules. Abort.\n");
			exit(1);
		}
	}
	count = strlen(rules_file);
	if (count >= 3 && strcmp(rules_file+count-3,".m4") == 0) {
		/* The last three chars are ".m4" */
		use_m4 = 1;
	}
	if (parse_file(rules_file) != 0) {
		fprintf(stderr, "Cannot parse file `%s'. Abort.\n",
		    rules_file);
		exit(1);
	}
	free(m4_argv);
	if (optind < argc) {
		/* Get the remaining arguments on the command line */
		nodes = calloc(argc-optind, sizeof(char*));
		assert(nodes != NULL);
		for (count = 0; optind < argc; optind++) {
			char *current = argv[optind];
			char *cname = canonic_name(current);
			if (!cname)
			  fprintf(stderr, "Invalid nodename `%s', skipped\n",
			  	current);
			else
			  nodes[count++] = cname;
		}
		process_nodes(nodes, count);
	} else {
		/* Read the nodes from standard input, line after line */
		gr_string* grs = new_gr_string(0);

		while ((c = getchar()) != EOF) {
			if (c != '\n') {
				grs = grs_append1(grs, c);
				continue;
			}
			/* We have read a whole line */
			grs = grs_append1(grs, 0);
			process_line(grs->s);
			grs->len = 0;
		}
		if (grs->len) {
			/* Incomplete last line */
			grs = grs_append1(grs, 0);
			process_line(grs->s);
		}
	}
	return 0;
}

void usage_statistics (void)
{
	struct tms t;

	times(&t);
	fprintf(stderr, (t.tms_cutime || t.tms_cstime) ?
	  "CPU usage: %.3fu + %.3fs (self), %.3fu + %.3fs (children)\n" :
	  "CPU usage: %.3fu + %.3fs\n",
	  t.tms_utime/(double)CLK_TCK, t.tms_stime/(double)CLK_TCK,
	  t.tms_cutime/(double)CLK_TCK, t.tms_cstime/(double)CLK_TCK);
}

/*
 * The following routines and static variables are used to collect
 * the various pieces handed by the Bison parser.
 */

#define MAX_INDICES 16
static char* table_indices[MAX_INDICES];
static int current_indices = 0;
static const char* rootname = NULL;

static int index_number (const char* name)
{
	int i;

	for (i = 0; i < current_indices; i++)
		if (strcmp(name, table_indices[i]) == 0)
			return i;
	/* Not found */
	return -1;
}

int new_index (char* ident)
{
#ifdef DEBUG_PARSER
	fprintf(stderr, "New index %%%d = `%s'\n", current_indices, ident);
#endif
	assert(current_indices < MAX_INDICES);
	table_indices[current_indices] = ident;
	return current_indices++;
}

static const char* table_iexprs[MAX_INDICES];
static const char* idxvar_rootname = NULL;
static int arity = 0;

void start_idxvar (const char* rootname)
{
#ifdef DEBUG_PARSER
	fprintf(stderr, "Opening idxvar %s(?)\n", rootname);
#endif
	idxvar_rootname = rootname;
	arity = 0;
}

#define MAX_COND 16
static const char* conditions[MAX_COND];
static int nb_cond = 0;

void collect_condition (const char* bcode)
{
#ifdef DEBUG_PARSER
	fprintf(stderr, "Condition `%s' collected\n", bcode);
#endif
	assert(nb_cond < MAX_COND);
	conditions[nb_cond++] = bcode;
}

#define MAX_SEXP 100
static idx_var* subexprs[MAX_SEXP];
static int nbdep = 0;

void start_new_rule (const char* ident)
{
#ifdef DEBUG_PARSER
	fprintf(stderr, "Beginning rule for %s(?)\n", ident);
#endif
	rootname = ident;
	assert(current_indices == 0);
	nbdep = nb_cond = 0; 
}

char* collect_idxvar (void)
{
	int i;
	char buff[16];
	idx_var* current;

#ifdef DEBUG_PARSER
	fprintf(stderr, "Closing idxvar %s[%d], number %d\n",
		idxvar_rootname, arity, nbdep);
#endif
	current = malloc(sizeof(idx_var) + arity * sizeof(char*));
	assert(current != NULL);
	current->rootname = idxvar_rootname;
	current->nbind = arity;
	for (i = 0; i < arity; i++)
		current->ibcode[i] = table_iexprs[i];
	assert(nbdep < MAX_SEXP);
	sprintf(buff, "%dp", nbdep);
	subexprs[nbdep++] = current;
	return strdup(buff);
}

void collect_iexpr (const char* bcode)
{
	assert(arity < MAX_INDICES);
	table_iexprs[arity++] = bcode;
}

void add_this_rule (const char* bcode)
{
	int i;
	idx_var **dep, *cond;
	eval_rule* current;

#ifdef DEBUG_PARSER
	fprintf(stderr, "End of rule %s(%d), bytecode `%s'\n",
		rootname, current_indices, bcode);
#endif
#if 0
	fprintf(stderr, "New rule: %s(%d)\n", rootname, current_indices);
#endif
	current = malloc(sizeof(eval_rule));
	assert(current != NULL);
	current->rootname = rootname;
	current->nbind = current_indices;
	current->nbdep = nbdep;
	dep = calloc(nbdep, sizeof(idx_var*));
	assert(dep != NULL);
	for (i = 0; i < nbdep; i++)
		dep[i] = subexprs[i];
	current->dep = dep;
	current->bytecode = bcode;
	cond = malloc(sizeof(idx_var) + nb_cond * sizeof(char*));
	assert(cond != 0);
	cond->rootname = NULL;
	cond->nbind = nb_cond;
	for (i = 0; i < nb_cond; i++)
		cond->ibcode[i] = conditions[i];
	current->conditions = cond;
	insert_rule(current);
	/* Now we can free the indices */
	for (i = 0; i < current_indices; i++)
		free(table_indices[i]);
	current_indices = 0;
}

/*
 * Here is the lexical analyzer. It has some "state" and remembers which
 * identifiers correspond to indices -- the type of the return object will
 * depend on this. Similarly, unsigned integers can be "big" or "small".
 */

int yylex (void)
{
	int c, d;
	unsigned int number;
	char buffer[1024], *p;

	while(1) {
		/* Get the next character */
		if ((c = getc(yyin)) == EOF)
			return 0;
		/* Skip whitespace */
		if (isspace(c))
			continue;
		/* Skip comments */
		if (c == '#') {
			while ((c = getc(yyin)) != EOF && c != '\n')
			    ;
			continue;
		}
		/* Not a comment nor whitespace */
		break;
	}
	if (isdigit(c)) {
		/* This is the beginning of a number */
		p = buffer;
		do {
			*p++ = c;
			c = getc(yyin);
		}
		while (c != EOF && isdigit(c));
		ungetc(c, yyin);
		*p = '\0';
		number = strtoul(buffer, NULL, 10);
		if (number > INT_MAX) {
			yylval.string = strdup(buffer);
			return BIG_INTEGER;
		} else {
			yylval.integer = number;
			return INTEGER;
		}
	}
	if (c == '_' || isalpha(c)) {
		/* This is the beginning of an identifier */
		p = buffer;
		do {
			*p++ = c;
			c = getc(yyin);
		}
		while (c != EOF && (isalnum(c) || c == '_'));
		ungetc(c, yyin);
		*p = '\0';
		if (!strcmp(buffer,"if") || !strcmp(buffer,"when"))
			return TOK_IF;
		if (!strcmp(buffer,"precious"))
			return TOK_PRECIOUS;
		if ((d = index_number(buffer)) < 0) {
			/* This is not an index */
			yylval.string = strdup(buffer);
			return IDENTIFIER;
		} else {
			/* Return the index number */
			yylval.integer = d;
			return INDEX;
		}
	}
	if (c == '"') {
		/* A string between double quotes */
		p = buffer;
		while ((c = getc(yyin)) != EOF && c != '"')
			*p++ = c;
		*p = '\0';
		yylval.string = strdup(buffer);
		return QSTRING;
	}
	/* Another character; test for multi-byte tokens */
	if (strchr("><=!:", c)) {
		if ((d = getc(yyin)) != '=') {
			/* Not good */
			ungetc(d, yyin);
		}
		else switch(c) {
		    case '>':	return TOK_GEQ;		/* >= */
		    case '<':	return TOK_LEQ;		/* <= */
		    case ':':				/* := */
		    case '=':	return '=';		/* == */
		    case '!':	return TOK_NEQ;		/* != */
		}
	}
	if (c == '-' || c == '>') {
		if ((d = getc(yyin)) != '>') {
			/* Not good */
			ungetc(d, yyin);
		}
		else switch(c) {
		    case '-':	return TOK_ARROW;	/* -> */
		    case '>':	return TOK_SHIFT;	/* >> */
		}
	}
	if (c == '~') {
		if ((d = getc(yyin)) == '~')
			return TOK_MODULO;		/* ~~ */
		ungetc(d, yyin);
	}
	return c;
}
