/*
 * tkCtext.c --
 *
 *	This file contains conversion functions between
 *	wchar and STRING/COMPOUND_TEXT encoding.
 *
 * Copyright 1988,1993 Software Research Associates, Inc.
 * Permission to use, copy, modify, and distribute this software and its
 * documentation for any purpose and without fee is hereby granted, provided
 * that the above copyright notice appear in all copies and that both that
 * copyright notice and this permission notice appear in supporting
 * documentation, and that the name of Software Research Associates not be
 * used in advertising or publicity pertaining to distribution of the
 * software without specific, written prior permission.  Software Research
 * Associates makes no representations about the suitability of this software
 * for any purpose.  It is provided "as is" without express or implied
 * warranty.
 */

#ifndef lint
static char rcsid[] = "$Header: /home/m-hirano/cvsroot/tcltk/tk8/generic/tkCtext.c,v 1.4 1998/12/05 08:37:17 m-hirano Exp $";
#endif

#ifdef KANJI

#include "tkPort.h"
#include "tkInt.h"

/*
 * Character set flags. Each character set is specified
 * with its Final Character and these flags.
 *
 * CS96 -	indicates that the character set is 96 charset.
 *		otherwise 94.
 * MBCS -	indicates that the character set is a multibyte
 *		charset.
 */
#define CS96	0x100
#define MBCS	0x200

static int	convJWStoCT _ANSI_ARGS_((wchar *wstr, int len,
					 unsigned char *xstr));
static int	convCTtoJWS _ANSI_ARGS_((unsigned char *xstr, int len,
					 wchar *wstr));
static unsigned char	*getesc _ANSI_ARGS_((unsigned char *str, int len));
static unsigned char	*getcsi _ANSI_ARGS_((unsigned char *str, int len));

/*
 *--------------------------------------------------------------
 *
 * Tk_WStrToString --
 *
 *	Convert wchar string to STRING encoding string.
 *	Any characters which cannot be converted are ignored.
 *
 * Results:
 *	The return value is a pointer to the converted string,
 *	or NULL if an error occurred in the conversion process.
 *	The storage for the converted string is allocated with
 *	malloc (ckalloc, precisely), and it is the caller's
 *	responsibility to free it.
 *
 * Side effects:
 *	None.
 *
 *--------------------------------------------------------------
 */

char *
Tk_WStrToString(ws, n)
wchar *ws;
int n;
{
    int len;
    char *s;
    int i, j;

    /* if n < 0, count number of characters */
    if (n < 0) {
	wchar *t = ws;
	n = 0;
	while (*t++ != 0) n++;
    }

    /* calculate the length of the converted string */
    for (i = 0, len = 0; i < n; i++) {
	if ((ws[i] & 0x8080) == 0) len++;	/* G0 i.e. ASCII */
    }

    if (len <= 0) return NULL;
    s = ckalloc((unsigned int)len + 1);

    /* do the conversion */
    for (i = 0, j = 0; i < n; i++) {
	if ((ws[i] & 0x8080) == 0) s[j++] = ws[i] & 0x7f;
    }
    s[j] = '\0';

    return s;
}

/*
 *--------------------------------------------------------------
 *
 * Tk_WStrToCtext --
 *
 *	Convert wchar string to Compound Text string.
 *
 * Results:
 *	The return value is a pointer to the converted string,
 *	or NULL if an error occurred in the conversion process.
 *	The storage for the converted string is allocated with
 *	malloc (ckalloc, precisely), and it is the caller's
 *	responsibility to free it.
 *
 * Side effects:
 *	None.
 *
 *--------------------------------------------------------------
 */

char *
Tk_WStrToCtext(ws, n)
wchar *ws;
int n;
{
    int len;
    char *ct;

#ifdef __WIN32__
    wchar *tmpws;
    if (n < 0) n = Tcl_WStrlen(ws);
    tmpws = (wchar *)ckalloc(sizeof(wchar) * (n + 1));
    Tcl_WStrncpy(tmpws, ws, n);
    tmpws[n] = 0;
    len = Tcl_DecodeSJIS(tmpws, NULL);
    ct = ckalloc((unsigned int)len + 1);
    Tcl_DecodeSJIS(tmpws, ct);
    ckfree((char *) tmpws);
#else
    len = convJWStoCT(ws, n, (unsigned char *)NULL);
    if (len <= 0) return NULL;
    ct = ckalloc((unsigned int)len + 1);
    (void)convJWStoCT(ws, n, (unsigned char *)ct);
#endif /* __WIN32__ */
    return ct;
}

/*
 *--------------------------------------------------------------
 *
 * Tk_CtextToWStr --
 *
 *	Convert Compound Text string to wchar string.  Any
 *	characters which cannot be converted are ignored.
 *	Note that this function can be used to convert STRING
 *	to wchar string, for Compound Text is a superset of
 *	STRING.
 *
 * Results:
 *	The return value is a pointer to the converted string,
 *	or NULL if an error occurred in the conversion process.
 *	The storage for the converted string is allocated with
 *	malloc (ckalloc, precisely), and it is the caller's
 *	responsibility to free it.
 *
 * Side effects:
 *	None.
 *
 *--------------------------------------------------------------
 */

wchar *
Tk_CtextToWStr(ct, n)
char *ct;
int n;
{
    int len;
    wchar *ws;

#ifdef __WIN32__
    char *tmpct;
    if (n < 0) n = strlen(ct);
    tmpct = ckalloc(n + 1);
    strncpy(tmpct, ct, n);
    tmpct[n] = 0;
    len = Tcl_EncodeSJIS(tmpct, NULL);
    ws = (wchar *)ckalloc(sizeof(wchar) * (unsigned int)(len + 1));
    Tcl_EncodeSJIS(tmpct, ws);
    ckfree(tmpct);
#else
    len = convCTtoJWS((unsigned char *)ct, n, (wchar *)NULL);
    if (len <= 0) return (wchar *)NULL;
    ws = (wchar *)ckalloc(sizeof(wchar) * (unsigned int)(len + 1));
    len = convCTtoJWS((unsigned char *)ct, n, ws);
#endif /* __WIN32__ */
    return ws;
}

/*
 *--------------------------------------------------------------
 *
 * convJWStoCT --
 *
 *	Convert Japanese wide character string (type wchar) to
 *	Compound Text string and returns its length.
 *
 * Results:
 *	The return value is the length of the converted string
 *	(excluding trailing NUL byte).
 *	The converted string is written in the area specified
 *	by xstr.  It is the caller's responsibility to allocate
 *	appropriate size of memory for xstr.
 *	If xstr is NULL, the converted string is not written,
 *	only the length of the string is returned.  So calling
 *	with xstr NULL is useful for determining the size of
 *	required storage.
 *
 * Side effects:
 *	None. 
 *
 *--------------------------------------------------------------
 */

static int
convJWStoCT(wstr, len, xstr)
wchar *wstr;
int len;
unsigned char *xstr;
{
    int g1;
    int n = 0;
    
    /*
     * G0, G1 usage:
     *  G0: ASCII
     *  G1: Kanji or Kana
     */
    
    /* COMPOUND_TEXT initial value -- ISO8859-1 */
    g1 = CS96|'A';
    
    if (len < 0) {
	wchar *t = wstr;
	len = 0;
	while (*t++ != 0) len++;
    }

    while (len-- > 0) {
	int c = *wstr++;

	switch (c & 0x8080) {
	case 0:		/* ASCII or C0 or DEL */
	    if (c < ' ' || c == 0x7f) {
		/* C0 or DEL */
		if (c == '\t' || c == '\n') {
		    if (xstr) *xstr++ = c;
		    n++;
		}
		break;
	    }
	    if (xstr) *xstr++ = c & 0x7f;
	    n++;
	    break;
	case 0x80:	/* Kana (JIS-X0201 right half) or C1 */
	    if (c < 0xa0 || 0xfe < c) break;
	    if (g1 != 'I') {
		if (xstr) {
		    *xstr++ = '\033';
		    *xstr++ = ')';
		    *xstr++ = 'I';
		}
		n += 3;
		g1 = 'I';
	    }
	    if (xstr) *xstr++ = c & 0xff;
	    n++;
	    break;
	case 0x8080:	/* Kanji (JIS-X0208) */
	    if (g1 != (MBCS|'B')) {
		if (xstr) {
		    *xstr++ = '\033';
		    *xstr++ = '$';
		    *xstr++ = ')';
		    *xstr++ = 'B';
		}
		n += 4;
		g1 = MBCS|'B';
	    }
	    if (xstr) {
		*xstr++ = (c >> 8) & 0xff;
		*xstr++ = c & 0xff;
	    }
	    n += 2;
	    break;
	default:
	    /* ignore G3 characters (undefined) */
	    break;
	}
    }

    /*
     * reset G1 to the default character set
     */
    if (g1 != (CS96|'A')) {
	if (xstr) {
	    *xstr++ = '\033';
	    *xstr++ = '-';
	    *xstr++ = 'A';
	}
	n += 3;
    }

    if (xstr) *xstr = '\0';
    return n;
}

/* getesc -- get escape sequence */
static unsigned char *
getesc(str, len)
unsigned char *str;
int len;
{
    int c;

    /* skip intermediate characters: 02/00 - 02/15 */
    while (len > 0) {
	c = *str;
	if (c < 0x20 || 0x2f < c) break;
	len--, str++;
    }
    /* check final character: 03/00 - 07/14 */
    if (--len < 0 || (c = *str++) < 0x30 || 0x7e < c) {
	return (unsigned char *)NULL;
    }
    return str;
}

/* getcsi -- get CSI sequence */
static unsigned char *
getcsi(str, len)
unsigned char *str;
int len;
{
    int c;

    /* skip parameter characters: 03/00 - 03/15 */
    while (len > 0) {
	c = *str;
	if (c < 0x30 || 0x3f < c) break;
	len--, str++;
    }
    /* skip intermediate characters: 02/00 - 02/15 */
    while (len > 0) {
	c = *str;
	if (c < 0x20 || 0x2f < c) break;
	len--, str++;
    }
    /* check final character: 04/00 - 07/14 */
    if (--len < 0 || (c = *str++) < 0x40 || 0x7e < c) {
	return (unsigned char *)NULL;
    }
    return str;
}

/*
 *--------------------------------------------------------------
 *
 * convCTtoJWS --
 *
 *	Convert Compound Text string to Japanese wide character
 *	string (type wchar) and returns its length.
 *
 * Results:
 *	The return value is the length (the number of characters)
 *	of the converted string	(excluding trailing NUL character).
 *	The converted string is written in the area specified
 *	by wstr.  It is the caller's responsibility to allocate
 *	appropriate size of memory for it.
 *	If wstr is NULL, the converted string is not written,
 *	only the length of the string is returned.  So calling
 *	with wstr NULL is useful for determining the size of
 *	required storage.
 *
 * Side effects:
 *	None. 
 *
 *--------------------------------------------------------------
 */

static int
convCTtoJWS(xstr, len, wstr)
unsigned char *xstr;
int len;
wchar *wstr;
{
    int c;
    int nskip;
    int n = 0;
    int g0, g1, gs;
    unsigned char *xstr1;
    
    if (len < 0) len = strlen((char *)xstr);
    
    /*
     * set initial state:
     *	G0(GL): ASCII
     *	G1(GR): ISO8859-1 right half
     */
    g0 = 'B';
    g1 = CS96|'A';
    
    while (len-- > 0) {
	switch (c = *xstr++) {
	case '\n': case '\t': case ' ':	/* NL, TAB, SPACE */
	    if (wstr) *wstr++ = c;
	    n++;
	    break;
	case 0x9b:		/* CSI */
	    /*
	     * CSI sequence: in the form of CSI {P} {I} F
	     * where
	     *           parameter P: 03/00 - 03/15
	     *   intermediate char I: 02/00 - 02/15
	     *          final char F: 04/00 - 07/14
	     *
	     * currently, only directionality is defined by
	     * the Compound Text standard. since Japanese
	     * doesn't need directionality to be specified,
	     * ignore all the CSI sequences.
	     */
	    xstr1 = getcsi(xstr, len);
	    if (xstr1 == NULL) return -1;	/* Error */
	    len -= xstr1 - xstr;
	    xstr = xstr1;
	    break;
	case '\033':		/* ESC */
	    /*
	     * escape sequence: in the form of ESC {I} F
	     * where
	     *   intermediate char I: 02/00 - 02/15
	     *          final char F: 03/00 - 07/14
	     *
	     * currently follwing sequences are defined.
	     *   + statndard character set
	     *      ESC-(-F  ESC-)-F  ESC---F	-- single byte
	     *      ESC-$-(-F  ESC-$-)-F	-- multi-byte
	     *   + non-standard character set
	     *      ESC-%-/-[0123]
	     */
	    xstr1 = getesc(xstr, len);
	    if (xstr1 == NULL) return -1;	/* Error */
	    len -= xstr1 - xstr;
	    switch (xstr1 - xstr) {
	    case 2:		/* ESC - I - F */
		switch (*xstr++) {
		case '(': g0 = *xstr; break;	/* 94 CS -> G0 */
		case ')': g1 = *xstr; break;	/* 94 CS -> G1 */
		case '-': g1 = *xstr|CS96; break; /* 96 CS -> G1 */
		}
		break;
	    case 3:		/* ESC - I - I - F */
		switch (*xstr++) {
		case '$':	/* Muliti-Byte Character Set */
		    switch (*xstr++) {
		    case '(': g0 = *xstr|MBCS; break; /* 94 MBCS -> G0 */
		    case ')': g1 = *xstr|MBCS; break; /* 94 MBCS -> G1 */
		    case '-': g1 = *xstr|CS96|MBCS; break; /* 96 MBCS -> G1 */
		    }
		    break;
		case '%':
		    if (*xstr++ != '/') break;	/* unknown sequence */
		    /* private encoding. skip. */
		    len -= 2;
		    if (len < 0) return -1;
		    nskip = (*xstr1 & 0x7f) * 128 + (*(xstr1 + 1) & 0x7f);
		    if ((len -= nskip) < 0) return -1;
		    xstr1 += nskip + 2;
		    break;
		}
		break;
	    }
	    xstr = xstr1;
	    break;
	default:
	    if (!(c & 0x60)) return -1;	/* illegal C0 or C1 character */

	    gs = (c & 0x80) ? g1 : g0;
	    c &= 0x7f;
	    if (gs & MBCS) {
		switch (gs & 0x70) {
		case 0x70:	/* 4byte/char */
		    if (--len < 0) return -1;
		    c = (c << 8) | (*xstr++ & 0x7f);
		case 0x60:	/* 3byte/char */
		    if (--len < 0) return -1;
		    c = (c << 8) | (*xstr++ & 0x7f);
		case 0x50:	/* 2byte/char */
		case 0x40:	/* 2byte/char */
		    if (--len < 0) return -1;
		    c = (c << 8) | (*xstr++ & 0x7f);
		    break;
		default:
		    return -1;
		}
	    }
	    if (gs == 'B' || gs == 'J' || gs == 'I' || gs == (MBCS|'B')) {
		if (wstr) {
		    switch (gs) {
		    case MBCS|'B':	*wstr++ = c | 0x8080; break;
		    case 'I':		*wstr++ = c | 0x80; break;
		    default:		*wstr++ = c; break;
		    }
		}
		n++;
	    }
	    break;
	}
    }
    if (wstr) *wstr = 0;
    return n;
}

#endif /* KANJI */
