/*
 * lexer.c
 *
 * Copyright 1998, 1999 Michael Elizabeth Chastain, <mailto:mec@shout.net>.
 * Licensed under the Gnu Public License, version 2.
 */

#include <sys/types.h>
#include <sys/stat.h>

#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>

#include "mconfig.h"
#include "parser.tab.h"



/*
 * Stack of input files.
 *
 * Each input file has a buffer which contains the entire file contents,
 * plus a current index into that buffer.  The buffers are permanent --
 * the lexer sets up piece pointers into the buffer and returns those.
 *
 * I have a stack of files because the "source" command does nested file
 * inclusion.
 */

typedef struct input_tag {
	struct input_tag *input_prev;
	char           *name;
	char           *buffer;
	const char     *index;
	const char     *end;
}               input_type;

static input_type *input_current = NULL;




/*
 * Push a new input file onto the input stack ("source" statement).
 *
 * Return value: 0 for success, -1 for error.  On error, the second
 * parameter is set to a string that describes the error.
 */

#define error( n1 )							\
    do									\
    {									\
	if ( error_string != NULL )					\
	    * error_string = format_system_error( errno, (n1), NULL );	\
	goto label_cleanup;						\
    } while (0)

int 
input_push_file(const char *file, const char **error_string)
{
	input_type     *input_new;
	struct stat     stat_buf;
	int             fd = -1;
	int             size;
	int             count;

	if (error_string != NULL)
		*error_string = NULL;

	if (file == NULL)
		error_internal("input_push_file: null pointer");

	input_new = grab_memory(sizeof(*input_new));
	input_new->name = grab_memory(strlen(file) + 1);
	strcpy(input_new->name, file);

	fd = open(file, O_RDONLY);
	if (fd < 0)
		error(file);
	if (fstat(fd, &stat_buf) != 0)
		error(file);

	size = stat_buf.st_size;
	input_new->buffer = grab_memory(size);
	for (count = 0; count < size;) {
		const int       nbread
		= read(fd, input_new->buffer + count, size - count);
		if (nbread < 0)
			error(file);
		if (nbread == 0)
			error_internal("input_push_file: read returned zero");
		count += nbread;
	}

	if (memchr(input_new->buffer, '\0', size) != NULL) {
		char           *msg = grab_memory(strlen(file) + 64);
		sprintf(msg, "%s: null character in file", input_new->name);
		error_exit(msg);
	}
	if (close(fd) != 0)
		error(file);

	input_new->index = input_new->buffer;
	input_new->end = input_new->buffer + size;
	input_new->input_prev = input_current;
	input_current = input_new;
	return 0;

label_cleanup:

	if (fd >= 0)
		close(fd);
	return -1;
}




/*
 * Push an in-memory string onto the input stack.
 * (e.g. help menus).
 */

void 
input_push_string(const char *str, int len)
{
	input_type     *input_new;

	if (memchr(str, '\0', len) != NULL)
		error_internal("input_push_string: null character in memory");

	input_new = grab_memory(sizeof(*input_new));
	input_new->name = "<memory>";
	input_new->buffer = grab_memory(len);
	memcpy(input_new->buffer, str, len);
	input_new->index = input_new->buffer;
	input_new->end = input_new->buffer + len;
	input_new->input_prev = input_current;
	input_current = input_new;
}



/*
 * Simple token-matching macro.
 */

#define match_token(token, string)					     \
    {									     \
	if ( input->end - input->index >= sizeof(string)-1		     \
	&&   memcmp( input->index, (string), sizeof(string)-1 ) == 0	     \
	&& ( input->end - input->index == sizeof(string)-1		     \
	|| ! map_plain[(unsigned char) input->index[(sizeof(string)-1)]] ) ) \
	{								     \
	    input->index += sizeof(string)-1;				     \
	    return (token);						     \
	}								    \
    }



/*
 * This is the heart of the lexer.
 *
 * Newlines are individual words.
 * Most non-alphanumeric characters are individual words.
 *
 * I handle simple quoting, but don't push your luck.  Also, note that
 * bash quoting and C quoting are different, and I follow bash quoting.
 * So don't complain about '\''.  I do what bash does.
 */

int 
yylex()
{
	static char     map_plain[256];	/* boolean map of plain chars */
	static const char *open_dquote_point = NULL;	/* open "..." point */

	register char   c;
	input_type     *input;

	/*
         * Initialize map_plain.
         */

	if (map_plain['_'] == 0) {
		unsigned int    uic;

		for (uic = 0; uic < 256; uic++)
			map_plain[uic] = !!isalnum(uic);
		map_plain['_'] = 1;
		map_plain['/'] = 1;
		map_plain['-'] = 1;
		map_plain['+'] = 1;
		map_plain['.'] = 1;
	}
	/*
         * Check for magic cookie which tells me whether I am parsing a
         * Config.in file or a defconfig file.
         */
	if (parser_magic_cookie != 0) {
		int             ret_parser_magic_cookie = parser_magic_cookie;
		parser_magic_cookie = 0;
		return ret_parser_magic_cookie;
	}
	/*
         * Check for end of all input.
         */

	input = input_current;
	if (input_current == NULL)
		return 0;



label_getchar:

	/*
         * Pop current file at end of file
         */

	if (input->index == input->end) {
		if (open_dquote_point != NULL) {
			parser_error("unmatched double quote", open_dquote_point);
			exit(1);/* no complicated recovery; fix your damn
				 * input */
		}
		input_current = input = input->input_prev;
		if (input == NULL)
			return 0;

		goto label_getchar;
	}
	/*
         * Get next character
         */

	yylval.value_input_point = input->index;
	c = *input->index++;



	/*
         * Tokens are very simple inside double quotes.
         */

	if (open_dquote_point != NULL) {
		const char     *index;

		/* dquote is a token */
		if (c == '"') {
			open_dquote_point = NULL;
			return c;
		}
		/* dollar is a token */
		if (c == '$')
			return c;

		/* some backslash sequences are special */
		if (c == '\\') {
			if (input->index < input->end) {
				if (*input->index == '\n') {
					input->index++;
					goto label_getchar;
				}
				if (*input->index == '"' || *input->index == '$') {
					/*
					 * skip backslash but keep extending
					 * token normally
					 */
					input->index++;
				}
			}
		}
		/*
		 * accept characters until a delimiter
		 */

		for (index = input->index; index < input->end; index++) {
			c = *index;
			if (c == '$')
				break;
			if (c == '"')
				break;
			if (c == '\\') {
				if (index[1] == '\n' || index[1] == '"' || index[1] == '$') {
					/* gotta break up the atom here */
					break;
				}
			}
		}

		yylval.value_lexeme.piece.ptr = input->index - 1;
		yylval.value_lexeme.piece.len = index - (input->index - 1);
		yylval.value_lexeme.squote = 0;
		input->index = index;
		return LEXEME;
	}
	/*
         * Fan out based on first character of word.
         */

	switch (c) {
	default:
		break;

	case ' ':
	case '\t':
	case '\f':
	case '\v':
		/* eat white space */
		goto label_getchar;

	case '\n':
	case '!':
	case '$':
	case ';':
	case '=':
	case '[':
	case ']':
		/* single-character tokens */
		return c;

	case '\'':
		{
			const char     *index;

			for (index = input->index; index < input->end; index++) {
				if (*index == '\'')
					break;
			}

			if (index == input->end) {
				parser_error("unmatched single quote", input->index - 1);
				exit(1);
			}
			yylval.value_lexeme.piece.ptr = input->index;
			yylval.value_lexeme.piece.len = index - input->index;
			yylval.value_lexeme.squote = 1;
			input->index = index + 1;	/* eat the close quote */
			return LEXEME;
		}
		break;

	case '#':
		{
			const char     *index;

			for (index = input->index; index < input->end; index++) {
				if (*index == '\n')
					break;
			}

			if (input->index[0] == ' '
			    && index - input->index >= 12
			    && memcmp(index - 11, " is not set", 11) == 0) {
				/* # CONFIG_FOO is not set */
				yylval.value_lexeme.piece.ptr = input->index + 1;
				yylval.value_lexeme.piece.len = index - input->index - 12;
				yylval.value_lexeme.squote = 0;
				input->index = index;
				return ISNOTSET;
			} else {
				/* discard this comment */
				input->index = index;
				goto label_getchar;
			}
		}
		break;

	case '\\':
		if (input->index < input->end) {
			if (*input->index == '\n') {
				input->index++;
				goto label_getchar;
			} else {
				/*
				 * skip backslash but keep extending token
				 * normally
				 */
				input->index++;
			}
		}
		break;

	case '"':
		open_dquote_point = input->index - 1;
		return c;

	case '-':
		match_token(DASH_A, "a");
		match_token(DASH_O, "o");
		break;

	case 'b':
		match_token(ASK_BOOL, "ool");
		break;

	case 'c':
		match_token(CHOICE, "hoice");
		match_token(COMMENT, "omment");
		break;

	case 'd':
		match_token(DEF_BOOL, "efine_bool");
		match_token(DEF_HEX, "efine_hex");
		match_token(DEF_INT, "efine_int");
		match_token(DEF_STRING, "efine_string");
		match_token(DEF_TRISTATE, "efine_tristate");
		match_token(DEP_BOOL, "ep_bool");
		match_token(DEP_MBOOL, "ep_mbool");
		match_token(DEP_HEX, "ep_hex");
		match_token(DEP_INT, "ep_int");
		match_token(DEP_STRING, "ep_string");
		match_token(DEP_TRISTATE, "ep_tristate");
		break;

	case 'e':
		match_token(ENDMENU, "ndmenu");
		match_token(ELSE, "lse");
		break;

	case 'f':
		match_token(FI, "i");
		break;

	case 'h':
		match_token(ASK_HEX, "ex");
		break;

	case 'i':
		match_token(IF, "f");
		match_token(ASK_INT, "nt");
		break;

	case 'm':
		match_token(MAINMENU_OPTION, "ainmenu_option");
		match_token(MAINMENU_NAME, "ainmenu_name");
		break;

	case 'n':
		match_token(NCHOICE, "choice");
		break;

	case 's':
		match_token(SOURCE, "ource");
		match_token(ASK_STRING, "tring");
		break;

	case 't':
		match_token(TEXT, "ext");
		match_token(THEN, "hen");
		match_token(ASK_TRISTATE, "ristate");
		break;

	case 'u':
		match_token(UNSET, "nset");
		break;
	}



	/*
         * A non-plain character is a plain lexeme now.
         */

	if (!map_plain[(unsigned char) c]) {
		yylval.value_lexeme.piece.ptr = input->index - 1;
		yylval.value_lexeme.piece.len = 1;
		yylval.value_lexeme.squote = 0;
		return LEXEME;
	}
	/*
         * Convert a string of plain characters into a plain word.
         */

	{
		const char     *index;

		for (index = input->index; index < input->end; index++) {
			if (!map_plain[(unsigned char) *index])
				break;
		}

		yylval.value_lexeme.piece.ptr = input->index - 1;
		yylval.value_lexeme.piece.len = index - (input->index - 1);
		yylval.value_lexeme.squote = 0;
		input->index = index;
		return LEXEME;
	}
}



/*
 * Report a warning or an error at parse time.
 */

static void     print_diagnostic(const char *, const char *, va_list);

void 
yyerror(const char *s)
{
	parser_error(s,
		     input_current != NULL ? input_current->index : NULL);
}

void 
parser_warning(const char *s, const char *warning_point,...)
{
	char           *buffer = grab_memory(strlen(s) + 16);
	va_list         ap;


	sprintf(buffer, "warning: %s", s);
	va_start(ap, warning_point);
	print_diagnostic(buffer, warning_point, ap);
	va_end(ap);
	parser_warning_count++;
}

void 
parser_error(const char *s, const char *error_point,...)
{
	va_list         ap;
	va_start(ap, error_point);
	print_diagnostic(s, error_point, ap);
	va_end(ap);
	parser_error_count++;
}

static void 
print_diagnostic(const char *s, const char *error_point, va_list ap)
{
	const char     *name = "(no file)";
	int             line = -1;
	FILE           *to = (argument.mode == mode_syntax) ? stdout : stderr;

	if (input_current != NULL) {
		if (error_point != NULL
		    && error_point >= input_current->buffer
		    && error_point < input_current->end) {
			name = input_current->name;
			{
				const char     *index;
				for (index = input_current->buffer, line = 1;
				     index < error_point;
				     index++) {
					if (*index == '\n')
						++line;
				}
			}
		}
	}
	fprintf(to, "%s: %d: ", name, line);
	vfprintf(to, s, ap);
	putc('\n', to);
}
