//
//   Convert MB file to plain ASCII format
// 
//   by C.W.Huang    12-13 May 1998
//                   27-31 July 1998 (original b2a)
//        updated    19 August 1999
//

// Usage
//
//        mb2a              read from stdin and write to stdout
//              -f  file    read from the specific file
//              -o  file    write to the specific file
//              -d          decoding
//              -e          encoding (default)
//              -s  seq     escape sequence
//              -c  char    charset (big5 or gb)
//
// Compile
//
//        gcc -lstdc++ -o mb2a mb2a.cc
//

// 0  Big5
// 1  GBK
// 2  ...?
int charset=0;

//
// No use here!! Refer to later...
// 
char escape='@';
char *escapeseq="@=";
const char endchar=' ';


struct scope {
	unsigned int begin, end;
};

// scope big5scope[]={
// 	{ 0xa440, 0xc67e },
// 	{ 0xc940, 0xf9fe },
// 	{ 0xa140, 0xa3c7 }
// };


#include <stdlib.h>
#include <string.h>
#include <fstream.h>


bool isBig5Code(int hicode, int locode)
{
	return !((locode < 0x40 || locode > 0xFE || (locode > 0x7E && locode < 0xA1)) ||
 			 (hicode < 0xA1 || hicode > 0xF9 || (hicode > 0xC6 && hicode < 0xC9)));
}

bool isGBKCode(int hicode, int locode)
{
	return ((hicode >= 0x81 && hicode <= 0xfe) &&
                ((locode >= 0x40 && locode <= 0x7e) || (locode >= 0x80 && locode <= 0xfe)));
}

typedef bool (*isMBChar)(int, int);

isMBChar ismbchar[] = {
	isBig5Code,
	isGBKCode
};


void Encoding(istream& ifs, ostream& ofs)
{
	ofs << hex;
	unsigned char ch=ifs.get();
	while (ifs.good()) {
		if (ch == '\n') {
			ofs.put('\n');
		} else if (ch == escape) {
			ofs.put(escape);
			ofs.put(escape);
		} else {
			unsigned char cl=ifs.get();
			if (ifs.eof()) {
				ofs.put(ch);
				break;
			}
			if (ismbchar[charset](ch, cl)) {
				unsigned int code=((unsigned int)ch << 8) + cl;
				ofs << escapeseq << code << endchar;
			} else {
				ofs.put(ch);
				ch=cl;
				continue;
			}
		}
		ch=ifs.get();
	}
}

void Decoding(istream& ifs, ostream& ofs)
{
	ifs >> hex;
	while (ifs.good()) {
		unsigned char ch=ifs.get();
		if (!ifs.good())
			break;
		if (ch == escape) {
			unsigned char cl=ifs.get();
			if (cl == escapeseq[1]) {
				unsigned int code;
				ifs >> code;
				ofs.put((unsigned char)(code>>8));
				ofs.put((unsigned char)code);
			     // the endchar may be eaten sometimes
				if ((cl=ifs.get()) != endchar)
					ofs.put(cl);
                        } else {
				if (cl != escape)
					ofs.put(ch);
				ofs.put(cl);
			}
		} else
			ofs.put(ch);
	}	
}

int main(int argc, char *argv[])
{
	void (*convert)(istream&, ostream&) = Encoding;
	istream *pifs = &cin;
	ostream *pofs = &cout;
	
	fstream ifs, ofs;

	char esc[]="@=";
	escapeseq=esc;

	for (int i=1; i<argc; i++) {
		switch (argv[i][1]) {
			case 's':
				memcpy(escapeseq, argv[++i], 2);
				escape=escapeseq[0];
				break;

			case 'd':
				convert = Decoding;
				break;

			case 'e':
				convert = Encoding;
				break;

			case 'f':
				ifs.open(argv[++i], ios::in);
				pifs=&ifs;
				break;

			case 'o':
				ofs.open(argv[++i], ios::out);
				pofs=&ofs;
				break;

			case 'c':
				i++;
				if (strncasecmp(argv[i], "big5", 4)==0) {
					charset=0;
					break;
				} else if (strncasecmp(argv[i], "gb", 2)==0) {
					charset=1;
					break;
				}

			default:
				cerr << "Unknown options!\n";
				exit(1);
		}
	}

	convert(*pifs, *pofs);
	return 0;
}
