/*
** Country table management for http-analyze.
**
** Copyright  1996-1999 by Stefan Stapelberg, <stefan@rent-a-guru.de>
**
** $Id: cntrycode.c,v 2.4 1999/10/30 09:40:18 stefan Stab $
**
*/

#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>		/* for `va_list' (in defs.h) */
#include <string.h>
#include <ctype.h>
#include <sys/types.h>

#if defined(unix)
# include <unistd.h>
#else
# if defined(WIN32)
#  include <winsock.h>		/* for the u_int, etc. types */
# endif

# if defined(WIN32) || defined(NETWARE)
#  include <direct.h>		/* for other windows/watcom stuff */
#  include <io.h>		/* for the F_OK, etc. symbolic constants */
# endif
#endif

#include "config.h"
#include "defs.h"
#include "cntrycode.h"

/*
** Country table.
** The first element is used for unresolved IP numbers (CC_UNRES).
** The following fields contain mock domains set by AddDomain
** (starting at index 1 up to cc_cntry). The remaining fields are
** used for the "official" two- to six-letter TLDs (up to cc_max).
*/
#define CC_UNRES	0

static COUNTRY *country = NULL;
static size_t cc_cntry = 0;
static size_t cc_max   = 0;
static size_t cc_avail = 0;
static u_int noTLD  = 0;

/*
** Sort country list by codes.
*/
static int sort_by_ccode(const void *e1, const void *e2) {
	return ((COUNTRY *)e1)->code - ((COUNTRY *)e2)->code;
}

/*
** Compute country code from top-level domain.
*/
static u_long cntryCode(char *dom) {
	u_long cc_code = 0L;

	for ( ; *dom != '\0'; dom++)
		cc_code = (cc_code<<5L) | (is_upper(*dom) ? to_lower(*dom) : *dom)-'a'+1;
	return cc_code;
}

/*
** Initialize/grow the country table.
*/
static int insertCountry(char *const pfx, char *const name, u_long const code) {

	if (cc_max == cc_avail) {
		if (!cc_avail) {
			cc_avail = 200;
			country = (COUNTRY *)malloc(cc_avail*sizeof(COUNTRY));
		} else {
			cc_avail += 200;
			country = (COUNTRY *)realloc((void *)country, cc_avail*sizeof(COUNTRY));
		}
		if (!country) {
			prmsg(1, GETMSG(408, "Can't grow country table from %u to %u bytes\n"),
				cc_avail-200, cc_avail);
			return 0;
		}
	}
	country[cc_max].pfx  = pfx;
	country[cc_max].plen = !pfx ? 0 : strlen(pfx);
	country[cc_max].name = name;
	country[cc_max].code = code;
	CLEAR_CNT(country[cc_max].cnt);
	return ++cc_max;
}

/*
** Add a top-level domain to the country list.
*/
void addTLD(char *const pfx, char *const name) {
	char *tld, *s1;
	u_long cc_code;

	if (!cc_avail) {		/* create table */
		if (!insertCountry(NULL, GETMSG(409, "Unresolved"), 0L))
			return;
		cc_cntry = cc_max;
	}

	if (!pfx || !name)
		return;

	if (!(s1=strsave(name))) {
		prmsg(1, GETMSG(410, "AddDomain: not enough memory for `%s (%s)'\n"),
			pfx, name);
		return;
	}
	tld = pfx + strlen(pfx);
	while (--tld > pfx) {
		if (*tld == '.') {
			tld++;
			break;
		}
	}
	cc_code = (tld == pfx) ? 0L : cntryCode(tld);
	if (insertCountry(pfx, s1, cc_code))
		cc_cntry = cc_max;
	return;
}

/*
** Read the list of valid top-level domains from the given file.
*/
static void readTLDFile(char *const tld) {
	FILE *tfp = fopen(tld, "r");
	char *s1, lbuf[1024], *args[5];
	size_t len;

	if (!tfp) {
		prmsg(1, GETMSG(411, "Couldn't open TLD file `%s' for reading\n"), tld);
		return;
	}

	while (fgets(lbuf, sizeof lbuf, tfp) != NULL) {
		len = strlen(lbuf)-1;
		if (lbuf[len] == '\n')
			lbuf[len] = '\0';       /* delete trailing newline */

		if (*lbuf == '\0' || *lbuf == '#')
			continue;		/* skip empty and comment lines */

                /* split the line into arguments */
                if (getargs(lbuf, args, TABSIZE(args)) != 2) {
			prmsg(1, GETMSG(412, "Invalid entry in TLD file (missing tabs?): %s\n"), lbuf);
			continue;
		}
		if (*args[0] == '.')
			args[0]++;

		if ((len = strlen(args[0])) < 2 || len > 6) {
			prmsg(1, GETMSG(413, "TLD `%s' for `%s' too short/long\n"),
				args[0], args[1]);
                        continue;
		}
		if (!(s1 = strsave(args[1]))) {
			prmsg(1, GETMSG(414, "TLDFile: Not enough memory for `%s (%s)'\n"),
				args[0], args[1]);
			return;
		}
		if (!insertCountry(NULL, s1, cntryCode(args[0])))
			break;
	}
	(void) fclose(tfp);
	return;
}

/*
** Initialize list of countries. Use built-in defaults or the TLD file if given.
** Set up the default country names from the localized catalog.
*/
void initTLD(char *const tld) {
	size_t idx;

	if (!cc_avail) {			/* allocate initial memory */
		if (!insertCountry(NULL, GETMSG(409, "Unresolved"), 0L))
			return;
		cc_cntry = cc_max;
	}
	if (tld) {
		if (streq(tld, "none"))		/* suppress TLD list */
			noTLD = 1;
		else	readTLDFile(tld);	/* read TLD list from file */
	} else for (idx=0; idx < TABSIZE(def_country); idx++) {
#if defined(USE_XPGCAT)
		def_country[idx].name = catgets(catFD,
				NL_SETD, def_country[idx].plen, def_country[idx].name);
#elif defined(USE_SVR4CAT)
		char msgid[SMALLSIZE];
		(void) snprintf(msgid, sizeof msgid, ":%d", def_country[idx].plen);
		def_country[idx].name = gettxt(msgid, def_country[idx].name);
#endif
		if (!insertCountry(NULL, def_country[idx].name, def_country[idx].code))
			break;
	}
	return;
}

/*
** Add country to the country list. Try to determine
** the country by looking at he top-level domain.
** For HideSys expressions with a wildcard suffix,
** use the optional domain set in the directive.
*/
u_int addCountry(HIDE_TAB *const hp, COUNTER *const uc, NLIST **const np, size_t const num) {
	register char *bp, *ep, *tld;
	register size_t idx, cur, len;
	register u_long cc_code;

	if (!country)		/* sanity check */
		return 0;

	/* save total unresolved hostnames */
	UPDATE_CNT(country[CC_UNRES].cnt, *uc);

	/* sort TLD part of country table */
	qsort((void *)&country[cc_cntry], cc_max-cc_cntry, sizeof(COUNTRY), sort_by_ccode);

	for (cur=0; cur < num; cur++) {
		int ndx = (int)np[cur]->ishidden;

		if (ndx >= 0 && ndx < hp->t_start && hp->tab[ndx].pfx != NULL) {
			if ((len = hp->tab[ndx].col->len) == 0)
				continue;

			bp = hp->tab[ndx].col->str;
			if ((tld = hp->tab[ndx].sref) == NULL) {
				ep = hp->tab[ndx].pfx+hp->tab[ndx].len;
				for (tld = ep-1; tld > hp->tab[ndx].pfx; tld--) {
					if (*tld == '.') {
						tld++;
						break;
					}
				}
				if (tld < ep-6 || tld == hp->tab[ndx].pfx ||
				    *tld == '*' || is_digit(*tld))
					tld = NULL;
			}
		} else {
			bp = np[cur]->str;
			if ((len = np[cur]->len) == 0)
				continue;

			ep = bp+len;
			for (tld=ep-1; tld > bp; tld--) {
				if (*tld == '.') {
					tld++;
					break;
				}
			}
			if (tld < ep-6 || tld == bp)
				tld = NULL;
		}
		cc_code = !tld ? 0L : cntryCode(tld);

		if (cc_cntry > 1) {
			for (idx=1; idx < cc_cntry; idx++) {
				if ((!country[idx].code || (cc_code == country[idx].code)) &&
				    len == country[idx].plen && streq(bp, country[idx].pfx)) {
					UPDATE_CNT(country[idx].cnt, np[cur]->cnt);
					break;
				}
			}
			if (idx < cc_cntry)
				continue;
		}

		if (noTLD) {		/* suppress country list */
			idx = CC_UNRES;
			UPDATE_CNT(country[idx].cnt, np[cur]->cnt);
			continue;
		}

		if (tld) {		/* lookup country table using binary search */
			int ndx, low = cc_cntry, high = cc_max-1;

			while (low <= high) {
				ndx = (low+high) / 2;
				if (cc_code < country[ndx].code)
					high = ndx-1;
				else if (cc_code > country[ndx].code)
					low = ndx+1;
				else	break;
			}
			idx = (high < low) ? CC_UNRES : (size_t)ndx;
		} else
			idx = CC_UNRES;

		if (idx == CC_UNRES && verbose)
			prmsg(1, GETMSG(415, "Invalid TLD, add `%s' to unresolved\n"), bp);

		UPDATE_CNT(country[idx].cnt, np[cur]->cnt);
	}
	return cc_max;
}

/*
** Print all mock domains.
*/
void prMockDom(FILE *const cfp) {
	size_t idx;

	if (cc_cntry < 2) {
		(void) fputs(
			"#AddDomain\t.myname.com\t\tMYCOMPANY\n"
			"#AddDomain\t.compuserve.com\t\tCompuserve\n"
			"#AddDomain\t.dtag.de\t\tT-Online\n"
			"#AddDomain\t.aol.com\t\tAOL\n", cfp);
		return;
	}
	for (idx=1; idx < cc_cntry; idx++)
		(void) fprintf(cfp, "AddDomain\t%s\t%s\n",
			country[idx].pfx, country[idx].name);
	return;
}

/*
** Retrieve next country from list, reset list pointer if `reset' is TRUE.
*/
COUNTRY *nextCountry(int const reset) {
	static size_t next = 0;
	COUNTRY *cp;

	if (reset) {
		next = 0;
		return NULL;
	}
	for (cp=NULL; !cp && (next < cc_max); next++)
		if (country[next].cnt.hits)
			cp = &country[next];
	return cp;
}

/*
** Clear the country table.
*/
void clearCountry(void) {
	size_t idx;

	for (idx=0; idx < cc_max; idx++)	/* don't overwrite country name */
		CLEAR_CNT(country[idx].cnt);

	(void) nextCountry(1);			/* reset index */
	return;
}

