/* parseconf.c - state machine-driven dynamic configuration file parser

   Copyright (C) 2002  Russell Kroll <rkroll@exploits.org>

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/

/* This is the third take on the Network UPS Tools configuration parser.
 *
 * This time around, the states have been split out into their own
 * functions for readability.  The old version did some evil tricks
 * involving switch(), and was probably quite confusing as a result.
 * This one aims for better readability and a flow that makes sense.
 *
 * Interface:
 *
 * You now call parseconf("file") and it calls your arghandler function
 * with the number of arguments and an array of pointers to them.   It 
 * also calls your errhandler function when an error happens, so there 
 * should no longer be any NUT dependencies in here.  Hopefully this 
 * will prove useful to other programmers who want to parse things this 
 * way.
 *
 * Input vs. Output:
 *
 * What it reads		--> What ends up in each argument
 *
 * this is a line 		--> "this" "is" "a" "line"
 * this "is also" a line	--> "this" "is also" "a" "line"
 * embedded\ space		--> "embedded space"
 * embedded\\backslash		--> "embedded\backslash"
 *
 * Arguments are split by whitespace (isspace()) unless that whitespace
 * occurs inside a "quoted pair like this".
 *
 * You can also escape the double quote (") character.  The backslash
 * also allows you to join lines, allowing you to have logical lines
 * that span physical lines, just like you can do in some shells.
 *
 * Lines normally end with a newline, but reaching EOF will also force 
 * parsing on what's been scanned so far.
 * 
 * Design:
 *
 * Characters are read one at a time to drive the state machine.  
 * As words are completed (by hitting whitespace or ending a "" item),
 * they are committed to the next buffer in the arglist.  realloc is
 * used, so the buffer can grow to handle bigger words.
 *
 * The arglist also grows as necessary with a similar approach.  As a
 * result, you can parse extremely long words and lines with an insane
 * number of elements.
 *
 * Finally, there is argsize, which remembers how long each of the
 * arglist elements are.  This is how we know when to expand them.
 *
 * Error handling:
 *
 * Right now the only non-fatal error involves the case where the user
 * drops a # into a quoted block, which is ambiguous.  All other errors
 * come from memory allocation failures, and parseconf calls exit() once 
 * control returns from parseconf_error.
 *
 */

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>	
#include <unistd.h>

#include "parseconf.h"

/* possible states */

#define STATE_FINDWORDSTART	1
#define STATE_FINDEOL		2
#define STATE_QUOTECOLLECT	3
#define STATE_QC_LITERAL	4
#define STATE_COLLECT		5
#define STATE_COLLECTLITERAL	6

	static char	**arglist;
	static int	*argsize;
	static int	numargs, maxargs;

	static char	*wordbuf, *wordptr;
	static int	wordbufsize;

	static int	linenum;

	static void	(*user_arg)(int numargs, char **arglist);
	static void	(*user_err)(int linenum, char *errtest);

/* format the error and give it to the caller's error handler */
static void report_error(char *errtext)
{
	user_err(linenum, errtext);
}

/* restart with a fresh list (but with the same memory for now) */
static void reset_arg_list(void)
{
	numargs = 0;
}

static void add_arg_word(char *val)
{
	int	argpos;

	/* this is where the new value goes */
	argpos = numargs;

	numargs++;

	/* when facing more args than ever before, expand the list */
	if (numargs > maxargs) {
		maxargs = numargs;

		/* resize the lists */
		arglist = realloc(arglist, sizeof(char *) * numargs);

		if (!arglist) {
			report_error("realloc arglist failed");
			exit(1);
		}

		argsize = realloc(argsize, sizeof(int *) * numargs);

		if (!argsize) {
			report_error("realloc argsize failed");
			exit(1);
		}

		/* ensure sane starting values */
		arglist[argpos] = NULL;
		argsize[argpos] = 0;
	}

	/* now see if the string itself grew compared to last time */
	if (strlen(val) >= argsize[argpos]) {
		int	newlen;

		/* allow for the trailing NULL */
		newlen = strlen(val) + 1;

		/* expand the string storage */
		arglist[argpos] = realloc(arglist[argpos], newlen);

		if (!arglist[argpos]) {
			report_error("realloc arglist member failed");
			exit(1);
		}

		/* remember the new size */
		argsize[argpos] = newlen;
	}

	/* strncpy doesn't give us a trailing NULL, so prep the space */
	memset(arglist[argpos], '\0', argsize[argpos]);

	/* finally copy the new value into the provided space */
	strncpy(arglist[argpos], val, strlen(val));
}

static void addchar(char ch)
{
	/* allow for the null */
	if (strlen(wordbuf) >= (wordbufsize - 1)) {
		wordbufsize++;

		wordbuf = realloc(wordbuf, wordbufsize);

		if (!wordbuf) {
			report_error("realloc wordbuf failed");
			exit(1);
		}

		/* repoint as wordbuf may have moved */
		wordptr = &wordbuf[strlen(wordbuf)];
	}

	*wordptr++ = ch;
	*wordptr = '\0';
}

static void endofword(void)
{
	add_arg_word(wordbuf);

	wordptr = wordbuf;
	*wordptr = '\0';
}

static void endofline(void)
{
	linenum++;

	/* only call back if there's something to do */
	if ((numargs != 0) && (arglist)) {

		/* send the data back to the caller */
		user_arg(numargs, arglist);
	}

	/* start over for the next line */
	reset_arg_list();
}

/* look for the beginning of a word */
static int findwordstart(char ch)
{
	/* newline = the physical line is over, so the logical one is too */
	if (ch == 10) {
		endofline();

		return STATE_FINDWORDSTART;
	}

	/* the rest of the line is a comment */
	if (ch == '#')
		return STATE_FINDEOL;

	/* space = not in a word yet, so loop back */
	if (isspace(ch))
		return STATE_FINDWORDSTART;				

	/* \ = literal = accept the next char blindly */
	if (ch == '\\')
		return STATE_COLLECTLITERAL;

	/* " = begin word bounded by quotes */
	if (ch == '"')
		return STATE_QUOTECOLLECT;

	/* at this point the word just started */
	addchar(ch);
	return STATE_COLLECT;
}	

/* eat characters until the end of the line is found */
static int findeol(char ch)
{
	/* newline = found it, so start a new line */
	if (ch == 10) {
		endofline();

		return STATE_FINDWORDSTART;
	}

	/* come back here */
	return STATE_FINDEOL;
}

/* quote characters inside a word bounded by "quotes" */
static int quotecollect(char ch)
{
	/* user is trying to break us */
	if (ch == '#') {
		report_error("Unbalanced word due to unescaped # in quotes");
		endofword();

		return STATE_FINDEOL;
	}

	/* another " means we're done with this word */
	if (ch == '"') {
		endofword();
	
		return STATE_FINDWORDSTART;
	}

	/* literal - special case since it needs to return here */
	if (ch == '\\')
		return STATE_QC_LITERAL;

	/* otherwise save it and loop back */
	addchar(ch);

	return STATE_QUOTECOLLECT;
}

/* take almost anything literally, but return to quotecollect */
static int qc_literal(char ch)
{
	/* continue onto the next line of the file */
	if (ch == 10)
		return STATE_QUOTECOLLECT;

	addchar(ch);
	return STATE_QUOTECOLLECT;
}

/* collect characters inside a word */
static int collect(char ch)
{
	/* comment means the word is done, and skip to the end of the line */
	if (ch == '#') {
		endofword();

		return STATE_FINDEOL;
	}

	/* newline means the word is done, and the line is done */
	if (ch == 10) {
		endofword();

		endofline();

		return STATE_FINDWORDSTART;
	}

	/* space means the word is done */
	if (isspace(ch)) {
		endofword();

		return STATE_FINDWORDSTART;
	}

	/* \ = literal = accept the next char blindly */
	if (ch == '\\')
		return STATE_COLLECTLITERAL;

	/* otherwise store it and come back for more */
	addchar(ch);
	return STATE_COLLECT;
}

/* take almost anything literally */
static int collectliteral(char ch)
{
	/* continue to the next line */
	if (ch == 10)
		return STATE_COLLECT;

	addchar(ch);
	return STATE_COLLECT;
}

/* clean up memory before going back to the user */
static void free_storage(void)
{
	int	i;

	if (wordbuf)
		free(wordbuf);

	/* clear out the individual words first */
	for (i = 0; i < maxargs; i++)
		free(arglist[i]);

	if (arglist)
		free(arglist);

	if (argsize)
		free(argsize);

	/* put things back to the initial state */
	arglist = NULL;
	argsize = NULL;
	numargs = 0;
	maxargs = 0;
}

int parseconf(char *fn, void *arghandler, void *errhandler)
{
	FILE	*f;
	int	state, ch;

	/* initialize things */
	numargs = 0;
	maxargs = 0;
	linenum = 0;
	arglist = NULL;
	argsize = NULL;
	wordbufsize = 16;

	user_arg = arghandler;
	user_err = errhandler;

	wordbuf = malloc(wordbufsize);
	memset(wordbuf, '\0', wordbufsize);

	if (!wordbuf) {
		report_error("malloc wordbuf failed");
		exit(1);
	}

	/* points at current word - advances with each character */
	wordptr = wordbuf;

	f = fopen(fn, "r");

	if (!f) {
		report_error("Can't open configuration file");
		return -1;	/* failed */
	}

	linenum = 1;

	state = STATE_FINDWORDSTART;

	reset_arg_list();

	while ((ch = fgetc(f)) != EOF) {

		switch(state) {
			case STATE_FINDWORDSTART:
				state = findwordstart(ch);
				break;

			case STATE_FINDEOL:
				state = findeol(ch);
				break;

			case STATE_QUOTECOLLECT:
				state = quotecollect(ch);
				break;

			case STATE_QC_LITERAL:
				state = qc_literal(ch);
				break;

			case STATE_COLLECT:
				state = collect(ch);
				break;

			case STATE_COLLECTLITERAL:
				state = collectliteral(ch);
				break;

		}	/* switch */
		
	}	/* while fgetc */				

	fclose(f);

	/* deal with a file that didn't end with a newline */
	if (wordptr != wordbuf) {
		endofword();
		endofline();
	}

	/* clean up memory */
	free_storage();

	return 0;	/* OK */
}
