#if !defined(lint)
static char rcs_id[] = "$Id: utils.c,v 1.15 1996/06/18 16:18:33 stefan Exp $";
#endif

/*
** utilities for http-analyze
**
** Copyright 1996 by Stefan Stapelberg, <stefan@rent-a-guru.de>
**
** $Log: utils.c,v $
** Revision 1.15  1996/06/18  16:18:33  stefan
** List of sites now used in cntrycode.c also. Set ishidden
** element of NLIST structure to -1 if item is not hidden.
**
** Revision 1.14  1996/06/18  14:42:40  stefan
** Modified getargs() to allow leading and trailing spaces
** around the first field of an entry in the configuration
** file. Still allow now blanks for the second and optional
** third field. Use tabs to separate fields in entries!
**
** Revision 1.13  1996/06/18  13:32:44  stefan
** Cleaned up readcfg() function.
**
** Revision 1.12  1996/06/18  12:53:17  stefan
** Print a warning if a third value field is required,
** but omitted from a HideURL/HideSys entry in the
** configuration file.
**
** Revision 1.11  1996/06/17  19:11:31  stefan
** Added entry "DefaultMode" for definition of the default
** operation mode (-d or -m) in the configuration file.
** Added code to link URLs with their hidden item so that
** we can generate a detailed list of all URLs under a
** hidden item.
**
** Revision 1.10  1996/06/13  18:31:33  stefan
** Changed BIGSIZE into LBUFSIZE.
**
** Revision 1.9  1996/05/29  05:58:07  stefan
** Modified algorithm to account for hidden items. Introduced
** a hashing list to eliminate duplicates from the hidden
** site/item lists.
**
** Revision 1.8  1996/05/25  16:38:54  stefan
** Added config file entry for list of N last frequently accessed URLs.
**
** Revision 1.7  1996/05/25  01:58:23  stefan
** Introduced new entries in config file: PrivateDir, TopSites, TopURLS.
** Changed handling of Homepage entry in cfg file. Changed data structures
** for hidden items, added a collector algorithm to allow for an accurate
** computation of hidden items (affects kb_saved and unique_urls). Fixed
** a bug where images were not hidden by default as intented. Enhanced
** algorithm for lookup of hidden items. Rewrote setDate()to avoid calls
** to the string conversion functions.
**
** Revision 1.6  1996/05/08  15:42:52  stefan
** Cosmetic changes only.
**
** Revision 1.5  1996/01/28  14:34:57  stefan
** Made it lint-clean.
**
** Revision 1.4  1996/01/28  13:43:34  stefan
** Added image suffixes to the default list of hidden items.
**
** Revision 1.3  1996/01/24  12:36:59  stefan
** Removed /cgi-bin from the default list of ignored items.
**
** Revision 1.2  1996/01/21  16:39:21  stefan
** Removed unused DocumentRoot argument from readcfg().
**
** Revision 1.1  1996/01/17  00:00:00  stefan
** Initial revision
**
**
*/

#include <assert.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <fcntl.h>

#include "defs.h"

/* tables & common messages */
static char skipmsg[] = "Skip invalid entry in %s, line %d: `%s'\n";
static char missing[] = "Missing third value field in `%s' entry for `%s'.";
static char allimg[] = "[All images]";

/*
** Lists of hidden sites and hidden items.
*/
ITEM_LIST hidden_sites[MAX_HIDDEN_SITES];
ITEM_LIST hidden_items[MAX_HIDDEN_ITEMS];

/* static functions */
static void addHiddenSite(char * const, char * const);
static void addHiddenItem(char * const, char * const);

/* WARNING: the following macro evaluates it's argument more than once! */
#define SKIPWS(ptr)	while (*(ptr) == '\t' || *(ptr) == ' ') (ptr)++

/*
** Split string into arguments.
** Fields are separated by one or more tabs.
*/
static int getargs(char *str, char **av, int const max) {
	int idx;

	for (idx=0; idx < max-1 && *str != '\0'; idx++) {
		SKIPWS(str);
		av[idx] = str;
		while (*str && *str != '\t' && (idx || *str != ' '))
			str++;
		if (*str == '\t' || *str == ' ')
			*str++ = '\0';
	}
	av[idx] = NULL;
	return idx;
}

/*
** Read configuration file, set global variables.
*/
int readcfg(char * const fname) {
	char lbuf[LBUFSIZE]; 		/* line buffer */
	char *cp, *args[5];		/* entry from configuration file */
	size_t len, cnt;		/* line counter */
	FILE *cfp = fopen(fname, "r");

	if (cfp == NULL)
		return 0;

	for (cnt=1; fgets(lbuf, sizeof lbuf, cfp) != NULL; cnt++) {
		len = strlen(lbuf)-1;
		if (lbuf[len] == '\n')
			lbuf[len] = '\0'; 	/* delete trailing newline */

		cp = lbuf;
		SKIPWS(cp);
		if (*cp == '#' || *cp == '\0')
			continue;		/* ignore empty and comment lines */

		/* split the line into arguments */
		if ((len = (size_t)getargs(lbuf, args, TABSIZE(args))) < 2 || len > 3) {
			prmsg(1, skipmsg, fname, cnt, lbuf);
			continue;		/* skip invalid lines */
}

		/* save values */
		cp = strsave(args[1]);
		if (cp == NULL) {
			prmsg(2, "Not enough memory for definitions from %s, line %d?!?\n",
				fname, cnt);
			exit(1);
		}

		/* set variables */
		if (streq(args[0], "ServerName")) {
			if (!srv_name)
				srv_name = cp;
		} else if (streq(args[0], "HTTPLogFile")) {
			if (!log_file)
				log_file = cp;
		} else if (streq(args[0], "HomePage")) {
			if (hpnum == MAX_HPNAMES) {
				prmsg(1, "Too many homepage names (max %d), ignore `%s'\n",
					MAX_HPNAMES, cp);
				if (cp != NULL) free(cp);
				continue;
			}
			home_page[hpnum++] = cp;
		} else if (streq(args[0], "HTMLDir")) {
			if (!out_dir)
				out_dir = cp;
		} else if (streq(args[0], "PrivateDir")) {
			if (!priv_dir)
				priv_dir = cp;
		} else if (streq(args[0], "DefaultMode")) {
			if (monthly == -1)
				monthly = (*cp == 'd' || *cp == 'D') ? 0 : 1;
			if (cp != NULL) free(cp);
		} else if (streq(args[0], "TopSites")) {
			if (topn_sites == -1)
				topn_sites = (int)strtoul(cp, NULL, 10);
			if (cp != NULL) free(cp);
		} else if (streq(args[0], "TopURLs")) {
			if (topn_urls == -1)
				topn_urls = (int)strtoul(cp, NULL, 10);
			if (cp != NULL) free(cp);
		} else if (streq(args[0], "LastURLs")) {
			if (lastn_urls == -1)
				lastn_urls = (int)strtoul(cp, NULL, 10);
			if (cp != NULL) free(cp);
		} else if (streq(args[0], "DocTitle")) {
			if (!doc_title)
				doc_title = cp;
		} else if (streq(args[0], "HeadPrefix"))
			html_str[HTML_HEADPFX] = cp;
		else if (streq(args[0], "HeadSuffix"))
			html_str[HTML_HEADSFX] = cp;
		else if (streq(args[0], "DocTrailer"))
			html_str[HTML_TRAILER] = cp;
		else if (streq(args[0], "HideURL")) {
			if (args[2] == NULL) {
				prmsg(1, missing, args[0], args[1]);
				if (cp != NULL)
					free(cp);
				continue;
			}
			addHiddenItem(cp, args[2]);	/* append to list of hidden items */
		} else if (streq(args[0], "HideSys")) {
			if (args[2] == NULL) {
				prmsg(1, missing, args[0], args[1]);
				if (cp != NULL)
					free(cp);
				continue;
			}
			addHiddenSite(cp, args[2]);	/* append to list of hidden sites */
		} else {
			prmsg(1, skipmsg, fname, cnt, lbuf);
			if (cp != NULL)
				free(cp);
		}
	}
	(void) fclose(cfp);
	return 1;
}

/*
** Add a sitename to the list of hidden sites.
*/
static void addHiddenSite(char * const pfx, char * const dsc) {
	static size_t idx = 0;
	size_t len;
	char *tm;

	if (idx == TABSIZE(hidden_sites)-1) {
		prmsg(1, "Table overflow in hidden site list, some sites are ignored.\n");
		return;
	}
	for (tm=pfx, len=0; *tm != '\0'; tm++, len++)
		MKLOWER(tm);		/* convert to lower case */

	if (!len) {
		prmsg(1, "Can't add zero length sitename?!?\n");
		return;
	}
	if (*pfx == '*' || *(pfx+len-1) == '*')
		len--;

	hidden_sites[idx].col = lookupItem(hstab, TABSIZE(hstab), dsc);
	if (hidden_sites[idx].col == NULL) {
		prmsg(2, "Not enough memory to hide site `%s' under `%s'?!?\n", pfx, dsc);
		return;
	}
	hidden_sites[idx].len = len;		/* save prefix */
	hidden_sites[idx].pfx = pfx;
	hidden_sites[++idx].pfx = NULL;		/* end mark */
	return;
}

/*
** Add an item to the list of hidden items.
*/
static void addHiddenItem(char * const pfx, char * const dsc) {
	static size_t idx = 0;
	size_t len = strlen(pfx);

	if (idx == TABSIZE(hidden_items)-1) {
		prmsg(1, "Table overflow in hidden item list, some items are ignored.\n");
		return;
	}
	if (!len) {
		prmsg(1, "Can't add zero length sitename?!?\n");
		return;
	}
	if (*pfx == '*' || *(pfx+len-1) == '*')
		len--;

	hidden_items[idx].col = lookupItem(hitab, TABSIZE(hitab), dsc);
	if (hidden_items[idx].col == NULL) {
		prmsg(2, "Not enough memory to hide site `%s' under `%s'?!?\n", pfx, dsc);
		return;
	}

	hidden_items[idx].len = len;		/* save prefix */
	hidden_items[idx].pfx = pfx;
	hidden_items[++idx].pfx = NULL;		/* end mark */
	return;
}

/*
** Append various image suffixes as defaults to the list
** of hidden items. They can be overwritten by redefinitons
** in the configuration file.
*/
void defHiddenItem(void) {
	addHiddenItem("*.gif", allimg);
	addHiddenItem("*.ief", allimg);
	addHiddenItem("*.jpg", allimg);
	addHiddenItem("*.jpeg", allimg);
	addHiddenItem("*.pcd", allimg);
	addHiddenItem("*.rgb", allimg);
	addHiddenItem("*.xbm", allimg);
	addHiddenItem("*.xpm", allimg);
	addHiddenItem("*.xwd", allimg);
	addHiddenItem("*.tiff", allimg);
	addHiddenItem("*.tif", allimg);
	return;
}

/*
** Check for hidden item, collect data. If prefix begins with `*',
** check only for a match of the suffix. If prefix ends with `*',
** check only for a match of the leading part, otherwise check
** for an exact match.
*/
int isHiddenItem(int const which, NLIST * const np) {
	register size_t idx;
	register ITEM_LIST *tab;

	tab = (which ? hidden_items : hidden_sites);
	for (idx=0; tab[idx].pfx != NULL; idx++) {
		if (*tab[idx].pfx == '*') {
			if (np->len-tab[idx].len > 0 &&
			    !strcmp(np->str+(np->len-tab[idx].len), tab[idx].pfx+1))
				break;
		} else if (*(tab[idx].pfx+tab[idx].len) == '*') {
			if (np->len >= tab[idx].len &&
			    !strncmp(np->str, tab[idx].pfx, (size_t)tab[idx].len))
				break;
		} else if (np->len == tab[idx].len &&
			!strcmp(np->str, tab[idx].pfx))
			break;
	}
	if (tab[idx].pfx == NULL || tab[idx].col->str == NULL) {
		np->ishidden = -1;
		return 0;
	}

	tab[idx].col->count += np->count;
	tab[idx].col->nomod += np->nomod;
	tab[idx].col->bytes += np->bytes;
	tab[idx].col->ishidden = np->ishidden = (short)idx;	/* stamp it */

	return 1;
}

/*
** Initialize list of hidden items.
*/
void initHiddenItems(void) {
	register size_t idx = 0;

	for (idx=0; hidden_sites[idx].pfx != NULL; idx++) {
		hidden_sites[idx].col->ishidden = -1;
		hidden_sites[idx].col->nomod = 0L;
		hidden_sites[idx].col->count = 0L;
		hidden_sites[idx].col->bytes = 0.0;
	}
	for (idx=0; hidden_items[idx].pfx != NULL; idx++) {
		hidden_items[idx].col->ishidden = -1;
		hidden_items[idx].col->nomod = 0L;
		hidden_items[idx].col->count = 0L;
		hidden_items[idx].col->bytes = 0.0;
	}
	return;
}

/*
** For maximum portability and for the sake of speed,
** here is our own version of the strdup() function.
*/
char *strsave(char *cp) {
	register char *np, *ep = cp;

	while (*ep != '\0')
		ep++;

	if (ep == cp)
		ep = NULL;
	else if ((ep = malloc((size_t)(ep-cp)+1)) != NULL)
		for (np=ep; (*np = *cp) != '\0'; np++, cp++)
			/* noop */ ;
	return ep;
}

/*
** Parse a date string in format DD/MMM/YYYY:HH:MM:SS
** and fill in the elements of the LOGTIME structure.
*/

int setDate(LOGTIME * const tp, char const cp[]) {
	tp->hour = (u_short) ((cp[12]-'0') * 10 + (cp[13]-'0'));
	tp->min = (u_short) ((cp[15]-'0') * 10 + (cp[16]-'0'));
	tp->sec = (u_short) ((cp[18]-'0') * 10 + (cp[19]-'0'));

	tp->mday = (u_short) ((cp[0]-'0') * 10 + (cp[1]-'0'));
	if (tp->mday > 31 || cp[2] != '/')
		return 0;

	tp->year = (u_short) ((cp[7]-'0') * 1000 + (cp[8]-'0') * 100 +
			      (cp[9]-'0') * 10   + (cp[10]-'0'));

	switch (cp[4]) {
	  case 'a':		/* jan, mar, may */
		switch (cp[5]) {
		  case 'n':	tp->mon = 0;	break;
		  case 'r':	tp->mon = 2;	break;
		  default:	tp->mon = 4;	break;
		}
		break;

	  case 'u':		/* jun, jul, aug */
		switch (cp[5]) {
		  case 'n':	tp->mon = 5;	break;
		  case 'l':	tp->mon = 6;	break;
		  default:	tp->mon = 7;	break;
		}
		break;

	  case 'e':		/* feb, sec, dec */
		switch (cp[3]) {
		  case 'F':	tp->mon = 1;	break;
		  case 'S':	tp->mon = 8;	break;
		  default:	tp->mon = 11;	break;
		}
		break;

	  default:		/* apr, oct, nov */
		switch (cp[3]) {
		  case 'A':	tp->mon = 3;	break;
		  case 'O':	tp->mon = 9;	break;
		  default:	tp->mon = 10;	break;
		}
		break;
	}
	return 1;
}
