12 #include "../stdafx.h" 
   13 #include "../core/endian_func.hpp" 
   14 #include "../string_func.h" 
   15 #include "../table/control_codes.h" 
   20 #include "../table/strgen_tables.h" 
   22 #include "../safeguards.h" 
   28 const char *
_file = 
"(unknown file)"; 
 
   30 int _errors, _warnings, _show_todo;
 
   34 static const CmdStruct *ParseCommandString(
const char **str, 
char *param, 
int *argno, 
int *casei);
 
   43     caseidx(caseidx), string(
stredup(string)), next(next)
 
   63     hash_next(0), index(index), line(line), translated_case(NULL)
 
  122   for (; *s != 
'\0'; s++) hash = 
ROL(hash, 3) ^ *s;
 
  152     if (strcmp(ls->
name, s) == 0) 
return ls;
 
  166   for (; *s != 
'\0'; s++) {
 
  167     hash = 
ROL(hash, 3) ^ *s;
 
  168     hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
 
  192       hash ^= i * 0x717239;
 
  193       hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
 
  197       while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
 
  200         hash ^= (cs - _cmd_structs) * 0x1234567;
 
  201         hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
 
  220 static const char *_cur_ident;
 
  235 static int _cur_argidx;
 
  256     } 
else if (value < 0x800) {
 
  257       *this->
Append() = 0xC0 + 
GB(value,  6, 5);
 
  258       *this->
Append() = 0x80 + 
GB(value,  0, 6);
 
  259     } 
else if (value < 0x10000) {
 
  260       *this->
Append() = 0xE0 + 
GB(value, 12, 4);
 
  261       *this->
Append() = 0x80 + 
GB(value,  6, 6);
 
  262       *this->
Append() = 0x80 + 
GB(value,  0, 6);
 
  263     } 
else if (value < 0x110000) {
 
  264       *this->
Append() = 0xF0 + 
GB(value, 18, 3);
 
  265       *this->
Append() = 0x80 + 
GB(value, 12, 6);
 
  266       *this->
Append() = 0x80 + 
GB(value,  6, 6);
 
  267       *this->
Append() = 0x80 + 
GB(value,  0, 6);
 
  269       strgen_warning(
"Invalid unicode value U+0x%X", value);
 
  274 size_t Utf8Validate(
const char *s)
 
  281   } 
else if (
GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
 
  283     c = 
GB(s[0], 0, 5) << 6 | 
GB(s[1], 0, 6);
 
  284     if (c >= 0x80) 
return 2;
 
  285   } 
else if (
GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
 
  287     c = 
GB(s[0], 0, 4) << 12 | 
GB(s[1], 0, 6) << 6 | 
GB(s[2], 0, 6);
 
  288     if (c >= 0x800) 
return 3;
 
  289   } 
else if (
GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
 
  291     c = 
GB(s[0], 0, 3) << 18 | 
GB(s[1], 0, 6) << 12 | 
GB(s[2], 0, 6) << 6 | 
GB(s[3], 0, 6);
 
  292     if (c >= 0x10000 && c <= 0x10FFFF) 
return 4;
 
  299 void EmitSingleChar(
Buffer *buffer, 
char *buf, 
int value)
 
  301   if (*buf != 
'\0') strgen_warning(
"Ignoring trailing letters in command");
 
  312 bool ParseRelNum(
char **buf, 
int *value, 
int *offset)
 
  314   const char *s = *buf;
 
  318   while (*s == 
' ' || *s == 
'\t') s++;
 
  323   int v = strtol(s, &end, 0);
 
  324   if (end == s) 
return false;
 
  330   if (offset != NULL && *end == 
':') {
 
  333     *offset = strtol(s, &end, 0);
 
  334     if (end == s) 
return false;
 
  341 char *ParseWord(
char **buf)
 
  345   while (*s == 
' ' || *s == 
'\t') s++;
 
  346   if (*s == 
'\0') 
return NULL;
 
  352       if (*s == 
'\0') 
break;
 
  363       if (*s == 
'\0') 
break;
 
  364       if (*s == 
' ' || *s == 
'\t') {
 
  376 static int TranslateArgumentIdx(
int arg, 
int offset = 0);
 
  378 static void EmitWordList(
Buffer *buffer, 
const char * 
const *words, uint nw)
 
  381   for (uint i = 0; i < nw; i++) buffer->
AppendByte((byte)strlen(words[i]) + 1);
 
  382   for (uint i = 0; i < nw; i++) {
 
  383     for (uint j = 0; words[i][j] != 
'\0'; j++) buffer->
AppendByte(words[i][j]);
 
  388 void EmitPlural(
Buffer *buffer, 
char *buf, 
int value)
 
  390   int argidx = _cur_argidx;
 
  397   if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
 
  399   const CmdStruct *cmd = _cur_pcs.cmd[argidx];
 
  402     if (cmd == NULL || cmd->default_plural_offset < 0) {
 
  403       strgen_fatal(
"Command '%s' has no (default) plural position", cmd == NULL ? 
"<empty>" : cmd->cmd);
 
  405     offset = cmd->default_plural_offset;
 
  410     words[nw] = ParseWord(&buf);
 
  411     if (words[nw] == NULL) 
break;
 
  415     strgen_fatal(
"%s: No plural words", _cur_ident);
 
  418   if (expected != nw) {
 
  420       strgen_fatal(
"%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
 
  423       if ((_show_todo & 2) != 0) strgen_warning(
"'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
 
  427         for (; nw < expected; nw++) {
 
  428           words[nw] = words[nw - 1];
 
  436   buffer->
AppendByte(TranslateArgumentIdx(argidx, offset));
 
  437   EmitWordList(buffer, words, nw);
 
  441 void EmitGender(
Buffer *buffer, 
char *buf, 
int value)
 
  443   int argidx = _cur_argidx;
 
  452     if (nw >= 
MAX_NUM_GENDERS) strgen_fatal(
"G argument '%s' invalid", buf);
 
  462     if (!ParseRelNum(&buf, &argidx, &offset)) {}
 
  464     const CmdStruct *cmd = _cur_pcs.cmd[argidx];
 
  465     if (cmd == NULL || (cmd->flags & 
C_GENDER) == 0) {
 
  466       strgen_fatal(
"Command '%s' can't have a gender", cmd == NULL ? 
"<empty>" : cmd->cmd);
 
  470       words[nw] = ParseWord(&buf);
 
  471       if (words[nw] == NULL) 
break;
 
  473     if (nw != _lang.
num_genders) strgen_fatal(
"Bad # of arguments for gender command");
 
  475     assert(
IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
 
  477     buffer->
AppendByte(TranslateArgumentIdx(argidx, offset));
 
  478     EmitWordList(buffer, words, nw);
 
  482 static const CmdStruct *FindCmd(
const char *s, 
int len)
 
  484   for (
const CmdStruct *cs = _cmd_structs; cs != 
endof(_cmd_structs); cs++) {
 
  485     if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == 
'\0') 
return cs;
 
  490 static uint ResolveCaseName(
const char *str, 
size_t len)
 
  495   memcpy(case_str, str, len);
 
  496   case_str[len] = 
'\0';
 
  499   if (case_idx >= 
MAX_NUM_CASES) strgen_fatal(
"Invalid case-name '%s'", case_str);
 
  506 static const CmdStruct *ParseCommandString(
const char **str, 
char *param, 
int *argno, 
int *casei)
 
  508   const char *s = *str, *start;
 
  515   for (; *s != 
'{'; s++) {
 
  516     if (*s == 
'\0') 
return NULL;
 
  520   if (*s >= 
'0' && *s <= 
'9') {
 
  523     *argno = strtoul(s, &end, 0);
 
  524     if (*end != 
':') strgen_fatal(
"missing arg #");
 
  532   } 
while (c != 
'}' && c != 
' ' && c != 
'=' && c != 
'.' && c != 0);
 
  534   const CmdStruct *cmd = FindCmd(start, s - start - 1);
 
  536     strgen_error(
"Undefined command '%.*s'", (
int)(s - start - 1), start);
 
  541     const char *casep = s;
 
  543     if (!(cmd->flags & 
C_CASE)) {
 
  544       strgen_fatal(
"Command '%s' can't have a case", cmd->cmd);
 
  549     } 
while (c != 
'}' && c != 
' ' && c != 
'\0');
 
  550     *casei = ResolveCaseName(casep, s - casep - 1);
 
  554     strgen_error(
"Missing } from command '%s'", start);
 
  567         strgen_error(
"Missing } from command '%s'", start);
 
  589     data(data), file(
stredup(file)), master(master), translation(translation)
 
  606   memset(p, 0, 
sizeof(*p));
 
  610     const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
 
  612     if (ar == NULL) 
break;
 
  615     if (argno != -1 && ar->consumes == 0) strgen_fatal(
"Non consumer param can't have a paramindex");
 
  618       if (argno != -1) argidx = argno;
 
  619       if (argidx < 0 || (uint)argidx >= 
lengthof(p->cmd)) strgen_fatal(
"invalid param idx %d", argidx);
 
  620       if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) strgen_fatal(
"duplicate param idx %d", argidx);
 
  622       p->cmd[argidx++] = ar;
 
  624       if (p->np >= 
lengthof(p->pairs)) strgen_fatal(
"too many commands in string, max " PRINTF_SIZE, 
lengthof(p->pairs));
 
  625       p->pairs[p->np].a = ar;
 
  626       p->pairs[p->np].v = param[0] != 
'\0' ? 
stredup(param) : 
"";
 
  635   if (a == NULL) 
return NULL;
 
  637   if (strcmp(a->cmd, 
"STRING1") == 0 ||
 
  638       strcmp(a->cmd, 
"STRING2") == 0 ||
 
  639       strcmp(a->cmd, 
"STRING3") == 0 ||
 
  640       strcmp(a->cmd, 
"STRING4") == 0 ||
 
  641       strcmp(a->cmd, 
"STRING5") == 0 ||
 
  642       strcmp(a->cmd, 
"STRING6") == 0 ||
 
  643       strcmp(a->cmd, 
"STRING7") == 0 ||
 
  644       strcmp(a->cmd, 
"RAW_STRING") == 0) {
 
  645     return FindCmd(
"STRING", 6);
 
  652 static bool CheckCommandsMatch(
char *a, 
char *b, 
const char *name)
 
  664   ExtractCommandString(&templ, b, 
true);
 
  665   ExtractCommandString(&lang, a, 
true);
 
  668   if (templ.np != lang.np) {
 
  669     strgen_warning(
"%s: template string and language string have a different # of commands", name);
 
  673   for (uint i = 0; i < templ.np; i++) {
 
  676     for (uint j = 0; j < lang.np; j++) {
 
  677       if (templ.pairs[i].a == lang.pairs[j].a &&
 
  678           strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
 
  680         lang.pairs[j].a = NULL;
 
  687       strgen_warning(
"%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
 
  694   for (uint i = 0; i < 
lengthof(templ.cmd); i++) {
 
  695     if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
 
  696       strgen_warning(
"%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
 
  697         lang.cmd[i]  == NULL ? 
"<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
 
  698         templ.cmd[i] == NULL ? 
"<empty>" : templ.cmd[i]->cmd);
 
  706 void StringReader::HandleString(
char *str)
 
  709     if (str[1] == 
'#' && str[2] != 
'#') this->
HandlePragma(str + 2);
 
  714   if (*str == 
';' || *str == 
' ' || *str == 
'\0') 
return;
 
  716   char *s = strchr(str, 
':');
 
  718     strgen_error(
"Line has no ':' delimiter");
 
  725   for (t = s; t > str && (t[-1] == 
' ' || t[-1] == 
'\t'); t--) {}
 
  731   for (tmp = s; *tmp != 
'\0';) {
 
  732     size_t len = Utf8Validate(tmp);
 
  733     if (len == 0) strgen_fatal(
"Invalid UTF-8 sequence in '%s'", s);
 
  739         (c >= 0xE000 && c <= 0xF8FF) || 
 
  740         (c >= 0xFFF0 && c <= 0xFFFF)) { 
 
  741       strgen_fatal(
"Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
 
  749   char *casep = strchr(str, 
'.');
 
  750   if (casep != NULL) *casep++ = 
'\0';
 
  757       strgen_error(
"Cases in the base translation are not supported.");
 
  762       strgen_error(
"String name '%s' is used multiple times", str);
 
  766     if (this->
data.
strings[this->data.next_string_id] != NULL) {
 
  767       strgen_error(
"String ID 0x%X for '%s' already in use by '%s'", this->
data.
next_string_id, str, this->data.strings[this->data.next_string_id]->name);
 
  775       strgen_warning(
"String name '%s' does not exist in master file", str);
 
  780       strgen_error(
"String name '%s' is used multiple times", str);
 
  785     if (!CheckCommandsMatch(s, ent->
english, str)) 
return;
 
  801   if (!memcmp(str, 
"plural ", 7)) {
 
  804       strgen_fatal(
"Invalid pluralform %d", _lang.
plural_form);
 
  807     strgen_fatal(
"unknown pragma '%s'", str);
 
  811 static void rstrip(
char *buf)
 
  813   size_t i = strlen(buf);
 
  814   while (i > 0 && (buf[i - 1] == 
'\r' || buf[i - 1] == 
'\n' || buf[i - 1] == 
' ')) i--;
 
  821   _warnings = _errors = 0;
 
  835     this->HandleString(buf);
 
  857 static int TranslateArgumentIdx(
int argidx, 
int offset)
 
  861   if (argidx < 0 || (uint)argidx >= 
lengthof(_cur_pcs.cmd)) {
 
  862     strgen_fatal(
"invalid argidx %d", argidx);
 
  864   const CmdStruct *cs = _cur_pcs.cmd[argidx];
 
  865   if (cs != NULL && cs->consumes <= offset) {
 
  866     strgen_fatal(
"invalid argidx offset %d:%d", argidx, offset);
 
  869   if (_cur_pcs.cmd[argidx] == NULL) {
 
  870     strgen_fatal(
"no command for this argidx %d", argidx);
 
  873   for (
int i = sum = 0; i < argidx; i++) {
 
  876     sum += (cs != NULL) ? cs->consumes : 1;
 
  882 static void PutArgidxCommand(
Buffer *buffer)
 
  885   buffer->
AppendByte(TranslateArgumentIdx(_cur_argidx));
 
  889 static void PutCommandString(
Buffer *buffer, 
const char *str)
 
  893   while (*str != 
'\0') {
 
  903     const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
 
  904     if (cs == NULL) 
break;
 
  912     if (cs->consumes > 0) {
 
  914       if (argno != -1 && argno != _cur_argidx) {
 
  916         PutArgidxCommand(buffer);
 
  920       cs = _cur_pcs.cmd[_cur_argidx++];
 
  922         strgen_fatal(
"%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
 
  926     cs->proc(buffer, param, cs->value);
 
  938   if (length >= 0x4000) {
 
  939     strgen_fatal(
"string too long");
 
  942   if (length >= 0xC0) {
 
  943     buffer[offs++] = (length >> 8) | 0xC0;
 
  945   buffer[offs++] = length & 0xFF;
 
  946   this->
Write((byte*)buffer, offs);
 
  956   for (
size_t tab = 0; tab < data.
tabs; tab++) {
 
  960     _lang.
offsets[tab] = TO_LE16(n);
 
  962     for (uint j = 0; j != in_use[tab]; j++) {
 
  976   for (
size_t tab = 0; tab < data.
tabs; tab++) {
 
  977     for (uint j = 0; j != in_use[tab]; j++) {
 
  988       _cur_ident = ls->
name;
 
  992       if (_show_todo > 0 && ls->
translated == NULL) {
 
  993         if ((_show_todo & 2) != 0) {
 
  994           strgen_warning(
"'%s' is untranslated", ls->
name);
 
  996         if ((_show_todo & 1) != 0) {
 
  997           const char *s = 
"<TODO> ";
 
 1003       ExtractCommandString(&_cur_pcs, ls->
english, 
false);
 
 1015       if (casep != NULL) {
 
 1025         for (num = 0, c = casep; c; c = c->
next) num++;
 
 1029         for (c = casep; c != NULL; c = c->
next) {
 
 1032           uint pos = buffer.
Length();
 
 1036           PutCommandString(&buffer, c->
string);
 
 1039           uint size = buffer.
Length() - (pos + 2);
 
 1040           buffer[pos + 0] = 
GB(size, 8, 8);
 
 1041           buffer[pos + 1] = 
GB(size, 0, 8);
 
 1045       if (cmdp != NULL) PutCommandString(&buffer, cmdp);