/*
 * KAKASI (Kanji Kana Simple inversion program)
 * $Header: kakasi.c,v 2.2 92/08/02 takahasi Exp $
 * Copyright (C) 1992
 * Hironobu Takahashi (takahasi@tiny.or.jp)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either versions 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with KAKASI, see the file COPYING.  If not, write to the Free
 * Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
/* $Log:	kakasi.c,v $
 * Revision 2.1  92/07/19  09:27:49  takahasi
 * *** empty log message ***
 * 
 * Revision 2.0  92/07/18  15:17:12  takahasi
*/

static char RCS_ID[] = "@(#) $Header: kakasi.c,v 2.2 92/08/02 takahasi Exp $ written by H. Takahashi";

#include <stdio.h>
#include "kakasi.h"

#define KAKASIBUF 256

int romaji_type = HEPBURN;
int romaji_capitalize = 0;
int romaji_upcase = 0;
int heiki_mode = 0;
int bunkatu_mode = 0;
int furigana_mode = 0;
int cr_eat_mode = 0;
int flush_mode = 0;

int kanji_digest;
int separator_out;
char cr_eat_string[KAKASIBUF];
Character n[KAKASIBUF];

extern input_term_type, output_term_type;

static void digest_start_copy(c, r)
     Character *c;
     Character *r;
{
    for(;;) {
	r->type = c->type;
	r->c1 = c->c1;
	r->c2 = c->c2;
	if ((r->type == OTHER) && (r->c1 == 0)) return;
	++r, ++c;
    }
}

static void put_separator()
{
    if (bunkatu_mode)
	if (separator_out == 1)
	    separator_out = 2;
}

static void putchars(results)
     Character *results;
{
    while(results->c1 != '\0') {
	putkanji(results);
	++ results;
    }
}

static void digest_out(c, ret)
     Character *c;
     int ret;
{
    Character sep, *ptr;
    int i;
 
    if (kanji_digest) {
	put_separator();
	if (romaji_capitalize) {
	    if ((n[0].type == ASCII) || (n[0].type == JISROMAN))
		if (('a' <= n[0].c1) && (n[0].c1 <= 'z'))
		    n[0].c1 = n[0].c1 - 0x20;
	} else if (romaji_upcase) {
	    for (ptr = n; ptr->c1 != '\0'; ++ptr) {
		if ((ptr->type == ASCII) || (ptr->type == JISROMAN))
		    if (('a' <= ptr->c1) && (ptr->c1 <= 'z'))
			ptr->c1 = ptr->c1 - 0x20;
	    }
	}
    }

    if (kanji_digest)
	if (max_hindo != NULL)
	    (*max_hindo) ++;

    if ((kanji_digest) && (furigana_mode)) {
	for (i = 0; i < ret; ++ i)
	    putkanji(c+i);
	sep.type = OTHER;
	sep.c1 = '[';
	putkanji(&sep);
	putchars(n);
	sep.c1 = ']';
	putkanji(&sep);
    } else {
	putchars(n);
    }
    if (flush_mode) fflush(stdout);
}

static int digest(c, clen, r, rlen, type, proc)
     Character *c;
     int clen;
     Character *r;
     int rlen;
     int type;
     int (*proc)();
{
    int ret, i, j, k;
    Character new;
    char *p;

    ret = (* proc)(c, n);
    if (ret == 0) ret = 1;

    if ((ret < 0) && (rlen < KAKASIBUF)) {
	getkanji(&new);
	if(new.type == type) {
	    r[rlen].type = c[clen].type = type;
	    r[rlen].c1 = c[clen].c1 = new.c1;
	    r[rlen].c2 = c[clen].c2 = new.c2;
	    r[rlen+1].type = c[clen+1].type = OTHER;
	    r[rlen+1].c1 = c[clen+1].c1 = '\0';
	    return digest(c, clen+1, r, rlen+1, type, proc);
	} else if (cr_eat_mode) {
	    if ((new.type == ASCII) || (new.type == JISROMAN) || (new.type == OTHER)) {
		for (p = cr_eat_string; *p != '\0'; ++ p) {
		    if ((unsigned)(*p) == new.c1) {
			r[rlen].type = new.type;
			r[rlen].c1 = new.c1;
			r[rlen].c2 = new.c2;
			r[rlen+1].type = OTHER;
			r[rlen+1].c1 = '\0';
			return digest(c, clen, r, rlen+1, type, proc);
		    }
		}
	    }
	}
	ungetkanji(&new);
	ret = -ret;
    }

    digest_out(c, ret);

    k = ret;
    j = 0;
    for (i = 0;; ++ i) {
	if ((r[i].type == type) && (k > 0)) {
	    -- k;
	} else {
	    c[j].type = r[i].type;
	    c[j].c1 = r[i].c1;
	    c[j].c2 = r[i].c2;
	    if (c[j].c1 == '\0')
		break;
	    ++ j;
	}
    }
    return rlen - ret;
}

static void digest_shift(c, s)
     Character *c;
     int s;
{
    int i;

    for (i = 0;; ++ i) { /* Yes, I know following lines can be written in
			    1 line, but I have doubts of compatibilities.. */
	c[i].type = c[i+s].type;
	c[i].c1 = c[i+s].c1;
	c[i].c2 = c[i+s].c2;
	if (c[i+s].c1 == '\0')
	    break;
    }
}

int main(argc, argv)
     int argc;
     char **argv;
{
    Character c[KAKASIBUF], r[KAKASIBUF];
    int clen, ptype, pctype;
    static int (*proc[8])()={NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
    /* ASCII, JISROMAN, KATAKANA, GRAPHIC, ѵ, ѥ, Ѥ,  */

    while(--argc > 0) {
	++ argv;
	if ((*argv)[0] != '-') break;
	switch((*argv)[1]) {
	  case 'a':
	    switch((*argv)[2]) {
	      case 'j':	proc[0] = a2j; break;
	      case 'E':	proc[0] = a2E; break;
	      default:  proc[0] = NULL;
	    }
	    break;
	  case 'j':
	    switch((*argv)[2]) {
	      case 'a':	proc[1] = j2a; break;
	      case 'E':	proc[1] = j2E; break;
	      default:  proc[1] = NULL;
	    }
	    break;
	  case 'g':
	    switch((*argv)[2]) {
	      case 'a':	proc[2] = g2a; break;
	      case 'j':	proc[2] = g2j; break;
	      case 'E':	proc[2] = g2E; break;
	      default:  proc[2] = NULL;
	    }
	    break;
	  case 'k':
	    switch((*argv)[2]) {
	      case 'a':	proc[3] = k2a; break;
	      case 'j':	proc[3] = k2j; break;
	      case 'K':	proc[3] = k2K; break;
	      case 'H':	proc[3] = k2H; break;
	      default:  proc[3] = NULL;
	    }
	    break;
	  case 'E':
	    switch((*argv)[2]) {
	      case 'a':	proc[4] = E2a; break;
	      case 'j':	proc[4] = E2j; break;
	      default:  proc[4] = NULL;
	    }
	    break;
	  case 'K':
	    switch((*argv)[2]) {
	      case 'a':	proc[5] = K2a; break;
	      case 'j':	proc[5] = K2j; break;
	      case 'k':	proc[5] = K2k; break;
	      case 'H':	proc[5] = K2H; break;
	      default:  proc[5] = NULL;
	    }
	    break;
	  case 'H':
	    switch((*argv)[2]) {
	      case 'a':	proc[6] = H2a; break;
	      case 'j':	proc[6] = H2j; break;
	      case 'k':	proc[6] = H2k; break;
	      case 'K':	proc[6] = H2K; break;
	      default:  proc[6] = NULL;
	    }
	    break;
	  case 'J':
	    switch((*argv)[2]) {
	      case 'a':	proc[7] = J2a; break;
	      case 'j':	proc[7] = J2j; break;
	      case 'k':	proc[7] = J2k; break;
	      case 'K':	proc[7] = J2K; break;
	      case 'H':	proc[7] = J2H; break;
	      default:  proc[7] = NULL;
	    }
	    break;
	  case 'i':
	    if ((*argv)[2] != '\0')
		set_input_term(term_type_str((*argv)+2));
	    else
		if (argc > 0) {
		    -- argc;
		    set_input_term(term_type_str(*(++ argv)));
		}
	    break;
	  case 'o':
	    if ((*argv)[2] != '\0')
		set_output_term(term_type_str((*argv)+2));
	    else
		if (argc > 1) {
		    -- argc;
		    set_output_term(term_type_str(*(++ argv)));
		}
	    break;
	  case 'r':
	    if ((*argv)[2] == 'k')
		romaji_type = KUNREI;
	    break;
	  case 'p':
	    heiki_mode = 1;
	    break;
	  case 's':
	    bunkatu_mode = 1;
	    break;
	  case 'f':
	    furigana_mode = 1;
	    break;
	  case 'c':
	    cr_eat_mode = 1;
		sprintf(cr_eat_string, "\011\012\015 %s", (*argv)+2);
	    break;
	  case 'C':
	    romaji_capitalize = 1;
	    break;
	  case 'U':
	    romaji_upcase = 1;
	    break;
	  case 'u':
	    flush_mode = 1;
	    break;
	  case '?':
	  default:
	    fprintf(stderr, "%s\n", RCS_ID);
	    fprintf(stderr, "\n");
	    fprintf(stderr, "Usage: kakasi -a[jE] -j[aE] -g[ajE] -k[ajKH] -E[aj] -K[ajkH] -H[ajkK] -J[ajkKH]\n");
	    fprintf(stderr, "              -i{oldjis,newjis,dec,euc,sjis} -o{oldjis,newjis,dec,euc,sjis}\n");
	    fprintf(stderr, "              -r{hepburn,kunrei} -p -s -f -c\"chars\"  [jisyo1, jisyo2,,,]\n");
	    fprintf(stderr, "\n");
	    fprintf(stderr, "       Character Sets:\n");
	    fprintf(stderr, "       a: ascii        j: jisroman   g: graphic    k: kana (j,k     defined in jisx0201)\n");
	    fprintf(stderr, "       E: kigou        K: katakana   H: hiragana   J: kanji(E,K,H,J defined in jisx0208)\n");
	    fprintf(stderr, "\n");
	    fprintf(stderr, "       Options:\n");
	    fprintf(stderr, "       -i: input coding system   -o output coding system\n");
	    fprintf(stderr, "       -r: romaji conversion system\n");
	    fprintf(stderr, "       -p: list all readings (with -J option)\n");
	    fprintf(stderr, "       -s: insert separate characters (with -J option)\n");
	    fprintf(stderr, "       -f: furigana mode (with -J option)\n");
	    fprintf(stderr, "       -c: skip chars within jukugo (with -J option: default TAB CR LF BLANK)\n");
	    fprintf(stderr, "       -C: romaji Capitalize (with -Ja or -Jj option)\n");
	    fprintf(stderr, "       -U: romaji Upcase     (with -Ja or -Jj option)\n");
	    fprintf(stderr, "       -u: call fflush() after 1 character output\n");
	    exit(1);
	}
    }

    if ((input_term_type != UNKNOWN) && (output_term_type == UNKNOWN))
	set_output_term(input_term_type);

    if (proc[7] == NULL)
      proc[7] = J2H;

    if (proc[7] != NULL) {
	init_jisyo();
	init_kanwa();
	for (; argc > 0; -- argc)
	    add_jisyo(*(argv ++));
    }

    pctype = OTHER;
    separator_out = 0;
    for(;;) {
	getkanji(c);
	if ((c[0].type == OTHER) && (c[0].c1 == 0xff)) break;
	c[1].type = OTHER;
	c[1].c1 = '\0';
	clen = 1;
	while (clen > 0) {
	    kanji_digest = 0;
	    switch (c[0].type) {
	      case ASCII:
	      case JISROMAN:
	      case GRAPHIC:
	      case KATAKANA:
		if ((c[0].type != OTHER) && (c[0].type != pctype)) {
		    put_separator();
		    pctype = c[0].type;
		}
		if ((*proc[(int)(c[0].type)]) == NULL) {
		    putkanji(c); digest_shift(c, 1); -- clen;
		    if (flush_mode) fflush(stdout);
		} else {
		    digest_start_copy(c, r);
		    clen = digest(c, clen, r, clen, (int)(c[0].type), *proc[(int)(c[0].type)]);
		}
		break;
	      case JIS83:
		if (c[0].c1 >= 0xb0) {
		    ptype = 7;
		    kanji_digest = 1;
    		} else if (c[0].c1 == 0xa4) {
		    ptype = 6;
		} else if (c[0].c1 == 0xa5) {
		    ptype = 5;
		} else if ((c[0].c1 == 0xa1) && (c[0].c2 == 0xbc)) {
		    ptype = 5;
		} else {
		    ptype = 4;
		}
		if (ptype != pctype) {
		    put_separator();
		    pctype = ptype;
		}
		if ((*proc[ptype]) == NULL) {
		    putkanji(c); digest_shift(c, 1); -- clen;
		    if (flush_mode) fflush(stdout);
		} else {
		    digest_start_copy(c, r);
		    clen = digest(c, clen, r, clen, JIS83, *proc[ptype]);
		}
		break;
	      default:
		putkanji(c); digest_shift(c, 1); -- clen;
		if (flush_mode) fflush(stdout);
	    }
	}
    }
    printout_hindo();
    return 0;
}
