/*
 *  SPL - The SPL Programming Language
 *  Copyright (C) 2004, 2005  Clifford Wolf <clifford@clifford.at>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 *  mod_format_xml.c: Simple module for loading and dumping XML
 */

/**
 * A simple XML parser/dumper module
 *
 * This module implements simple XML parser and dumper functions.
 */

#define _GNU_SOURCE

#include <assert.h>
#include <expat.h>

#include "spl.h"
#include "compat.h"

// backward compatibility for old expat versions
#ifndef XMLCALL
# define XMLCALL
#endif

// for non-gnu systems
# define my_strndup(s, n)						\
	({								\
		char *__new = (char *) malloc ((n) + 1);		\
		strncpy(__new, (s), (n));				\
		__new[(n)] = '\0'; __new;				\
	})

extern void SPL_ABI(spl_mod_format_xml_init)(struct spl_vm *vm, struct spl_module *mod, int restore);
extern void SPL_ABI(spl_mod_format_xml_done)(struct spl_vm *vm, struct spl_module *mod);

/* copied from mod_encode_xml.c */
static char *xml_encode(const char *source)
{
	int source_i, target_i;

	for (source_i = target_i = 0; source[source_i]; source_i++)
		switch (source[source_i]) {
			case '&':
				/* &amp; */
				target_i += 5;
				break;
			case '<':
				/* &lt; */
				target_i += 4;
				break;
			case '>':
				/* &gt; */
				target_i += 4;
				break;
			case '"':
				/* &quot; */
				target_i += 6;
				break;
			case '\'':
				/* &apos; */
				target_i += 6;
				break;
			default:
				target_i++;
		}

	char *target = malloc(target_i+1);

	for (source_i = target_i = 0; source[source_i]; source_i++)
		switch (source[source_i]) {
			case '&':
				/* &amp; */
				target[target_i++] = '&';
				target[target_i++] = 'a';
				target[target_i++] = 'm';
				target[target_i++] = 'p';
				target[target_i++] = ';';
				break;
			case '<':
				/* &lt; */
				target[target_i++] = '&';
				target[target_i++] = 'l';
				target[target_i++] = 't';
				target[target_i++] = ';';
				break;
			case '>':
				/* &gt; */
				target[target_i++] = '&';
				target[target_i++] = 'g';
				target[target_i++] = 't';
				target[target_i++] = ';';
				break;
			case '"':
				/* &quot; */
				target[target_i++] = '&';
				target[target_i++] = 'q';
				target[target_i++] = 'u';
				target[target_i++] = 'o';
				target[target_i++] = 't';
				target[target_i++] = ';';
				break;
			case '\'':
				/* &apos; */
				target[target_i++] = '&';
				target[target_i++] = 'a';
				target[target_i++] = 'p';
				target[target_i++] = 'o';
				target[target_i++] = 's';
				target[target_i++] = ';';
				break;
			default:
				target[target_i++] = source[source_i];
		}

	target[target_i] = 0;
	return target;
}

/**
 * This function returns a tree of ordered hashes. The keys in the ordered
 * hashes are encoded as following:
 *
 *	A:<Name>
 *		An attribute to this node.
 *
 *	C<n>
 *		Character data. <n> is counting up from zero.
 *
 *	E<n>:<Name>
 *		A child node (element) in the XML tree. <n> is counting up
 *		from zero. So e.g. "E0:realname" is the 1st child element
 *		of the type "realname".
 *
 *		This is an order hash containing attributes, child nodes
 *		and character data again.
 *
 * Because the hash is ordered, it is possible to get the elements in the
 * correct order by using 'foreach' loops or using the 'next' and 'prev'
 * instructions, or directly address the elements. E.g.
 *
 *	var xmldata =
 *	<><?xml version="1.0" ?>
 *		<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
 *		  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
 *		<html>
 *		  <head>
 *		    <meta name="Author" content="Clifford Wolf" />
 *		    <title>This is a simple test HTML page.</title>
 *		    <link rel="Index" href="index.html" />
 *		  </head>
 *		  <body>
 *		    <h1>Nothing interesting here!</h1>
 *		  </body>
 *		</html>
 *	</>;
 *
 *	var xmltree = format_xml_parse(xmldata);
 *
 *	debug xmltree.["E0:html"].["E0:head"].["E0:title"].["C0"];
 */
// builtin format_xml_parse(xmldata)
static struct spl_node *handler_format_xml_parse(struct spl_task *task, void *data UNUSED)
{
	struct stack_el_t {
		char *name;
		int counter;
		struct stack_el_t *next;
	};

	struct stack_t {
		struct spl_node *node;
		struct stack_el_t *el_list;
		struct stack_t *next;
	};

	char *xmlfile = spl_clib_get_string(task);

	struct stack_t *stack = 0;
	int last_is_char = -1;

	void stack_push()
	{
		struct stack_t *s = malloc(sizeof(struct stack_t));
		s->node = spl_get(0);
		s->el_list = 0;
		s->next = stack;
		stack = s;
	}

	void stack_pop()
	{
		struct stack_t *s = stack;
		struct stack_el_t *e = s->el_list;

		while (e) {
			struct stack_el_t *next = e->next;
			free(e->name); free(e); e = next;
		}

		stack = s->next;
		free(s);
	}

	int stack_count(const char *el, int offset)
	{
		struct stack_el_t *e = stack->el_list;

		while (e) {
			if ( !strcmp(e->name, el) ) return (e->counter += offset);
			e = e->next;
		}

		e = malloc(sizeof(struct stack_el_t));
		e->next = stack->el_list;
		stack->el_list = e;

		e->name = strdup(el);
		e->counter = -1;

		return (e->counter += offset);
	}

	void chardata_cleanup()
	{
		if ( last_is_char < 0 ) return;

		char *id, *id_enc, *t0, *t1;
		my_asprintf(&id, "C%d", last_is_char);
		id_enc = spl_hash_encode(id);

		struct spl_node *n = spl_lookup(task, stack->node, id_enc, 0);
		t0 = spl_get_string(n);

		t1 = t0 + strlen(t0);

		while (t1-- > t0) {
			if (*t1 == '\r') continue;
			if (*t1 == '\n') continue;
			if (*t1 == '\t') continue;
			if (*t1 ==  ' ') continue;
			break;
		}
		t1[1] = 0;

		if (!*t0) {
			spl_delete(task, stack->node, id_enc);
			stack_count(" chardata", -1);
			free(id_enc); free(id);
			return;
		}

		for (t1 = t0; *t1; t1++) {
			if (*t1 == '\r') continue;
			if (*t1 == '\n') continue;
			if (*t1 == '\t') continue;
			if (*t1 ==  ' ') continue;
			break;
		}

		if (t0 != t1) {
			t1 = strdup(t1);
			spl_set_string(n, t1);
		}

		free(id_enc); free(id);
	}

	void XMLCALL element_start_hdl(void *data UNUSED, const char *el, const char **attr)
	{
		char *id, *id_enc;

		if (last_is_char >= 0)
			chardata_cleanup();

		my_asprintf(&id, "E%d:%s", stack_count(el, +1), el);
		id_enc = spl_hash_encode(id);

		stack_push();
		spl_create(task, stack->next->node, id_enc, stack->node, SPL_CREATE_LOCAL);

		free(id_enc);
		free(id);

		for (int i = 0; attr[i]; i += 2) {
			my_asprintf(&id, "A:%s", attr[i]);
			id_enc = spl_hash_encode(id);
			spl_create(task, stack->node, id_enc, SPL_NEW_STRING_DUP(attr[i+1]), SPL_CREATE_LOCAL);
			free(id_enc); free(id);
		}

		last_is_char = -1;
	}

	void XMLCALL element_end_hdl(void *data UNUSED, const char *el UNUSED)
	{
		if (last_is_char >= 0)
			chardata_cleanup();
		stack_pop();
		last_is_char = -1;
	}

	void XMLCALL chardata_hdl(void *userData UNUSED, const XML_Char *s, int len)
	{
		if ( last_is_char < 0 ) {
			char *id, *id_enc;
			last_is_char = stack_count(" chardata", +1);
			my_asprintf(&id, "C%d", last_is_char);
			id_enc = spl_hash_encode(id);

			spl_create(task, stack->node, id_enc, SPL_NEW_STRING(my_strndup(s, len)), SPL_CREATE_LOCAL);
			free(id_enc); free(id);
		} else {
			char *id, *id_enc, *txt;
			my_asprintf(&id, "C%d", last_is_char);
			id_enc = spl_hash_encode(id);

			struct spl_node *n = spl_lookup(task, stack->node, id_enc, 0);
			my_asprintf(&txt, "%s%.*s", spl_get_string(n), len, s);
			spl_set_string(n, txt);
			free(id_enc); free(id);
		}
	}

	stack_push();
	spl_set_string(stack->node, strdup(""));

	XML_Parser p = XML_ParserCreate(0);
	XML_SetElementHandler(p, element_start_hdl, element_end_hdl);
	XML_SetCharacterDataHandler(p, chardata_hdl);

	if ( XML_Parse(p, xmlfile, strlen(xmlfile), 1) == 0 )
	{
		spl_clib_exception(task, "FormatXmlEx", "description",
			SPL_NEW_PRINTF("XML Parse error at line %d: %s",
				XML_GetCurrentLineNumber(p),
				XML_ErrorString(XML_GetErrorCode(p))),
			NULL);

		XML_ParserFree(p);

		while (stack->next) stack_pop();
		spl_put(task->vm, stack->node);
		stack_pop();

		return 0;
	}

	XML_ParserFree(p);

	struct spl_node *ret = stack->node;
	stack_pop();

	return ret;
}

/**
 * Create an XML text from a data structure such as returned by [[format_xml_parse()]].
 */
// builtin format_xml_dump(xmltree)
static struct spl_node *handler_format_xml_dump(struct spl_task *task, void *data UNUSED)
{
	struct txtlist_t {
		char *text;
		struct txtlist_t *next;
	};

	struct txtlist_t *list = 0, *current = 0;
	int textlen = 0, i, j;

	struct spl_node *tree = spl_clib_get_node(task);
	if (!tree) return 0;

	spl_cleanup(task, tree);

	void newtext()
	{
		struct txtlist_t *t = calloc(1, sizeof(struct txtlist_t));
		if ( !current ) list = t;
		else current->next = t;
		current = t;
	}

	void dump_xml(struct spl_node *n, int recurs)
	{
		struct spl_node_sub *s;
		char *t0, *t1, *t2;

		if ( recurs > 1024 ) {
			spl_report(SPL_REPORT_RUNTIME, task, "XML Object tree seams to be cyclic!\n");
			return;
		}

		if (recurs > 0) {
			for (s=n->subs_begin; s; s=s->next) {
				if (*s->key != 'A') continue;
				t0 = spl_hash_decode(s->key);
				t1 = strchr(t0, ':');
				if ( t1 ) {
					newtext();
					t2 = xml_encode(spl_get_string(s->node));
					textlen += my_asprintf(&current->text, " %s=\"%s\"", t1+1, t2);
					free(t2);
				}
				free(t0);
			}
			newtext();
			textlen += my_asprintf(&current->text, ">\n");
		}

		for (s=n->subs_begin; s; s=s->next)
			switch (*s->key) {
				case 'A':
					break;
				case 'C':
					newtext();
					t2 = xml_encode(spl_get_string(s->node));
					textlen += my_asprintf(&current->text, "%*s%s\n", recurs*3, "", t2);
					free(t2);
					break;
				case 'E':
					t0 = spl_hash_decode(s->key);
					t1 = strchr(t0, ':');
					if ( t1 ) {
						newtext();
						textlen += my_asprintf(&current->text, "%*s<%s", recurs*3, "", t1+1);
						dump_xml(s->node, recurs+1);
						newtext();
						textlen += my_asprintf(&current->text, "%*s</%s>\n", recurs*3, "", t1+1);
					}
					free(t0);
					break;
				default:
					/* simply ignore the rest */
					break;
			}
	}

	dump_xml(tree, 0);

	char *text = malloc(textlen+1);
	i=0;

	while (list) {
		current = list;
		list = list->next;

		for (j=0; current->text[j]; i++, j++)
			text[i] = current->text[j];

		free(current->text);
		free(current);
	}

	assert(i == textlen);
	text[i] = 0;

	return SPL_NEW_STRING(text);
}

/**
 * An instance of this object is thrown on XML parser errors.
 */
// object FormatXmlEx

/**
 * A description text describing the error.
 */
// var description;

void SPL_ABI(spl_mod_format_xml_init)(struct spl_vm *vm, struct spl_module *mod, int restore)
{
	if (!restore)
		spl_eval(vm, 0, strdup(mod->name), "object FormatXmlEx { }");

	spl_clib_reg(vm, "format_xml_parse",  handler_format_xml_parse,  0);
	spl_clib_reg(vm, "format_xml_dump", handler_format_xml_dump, 0);
}

void SPL_ABI(spl_mod_format_xml_done)(struct spl_vm *vm UNUSED, struct spl_module *mod UNUSED)
{
	return;
}

