Add: INR currency (#8136)
[openttd-github.git] / src / strgen / strgen_base.cpp
blob7e43471b2606c6bd12a1536ff1b932e600963991
1 /*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6 */
8 /** @file strgen_base.cpp Tool to create computer readable (stand-alone) translation files. */
10 #include "../stdafx.h"
11 #include "../core/endian_func.hpp"
12 #include "../string_func.h"
13 #include "../table/control_codes.h"
15 #include "strgen.h"
18 #include "../table/strgen_tables.h"
20 #include "../safeguards.h"
22 /* Compiles a list of strings into a compiled string list */
24 static bool _translated; ///< Whether the current language is not the master language
25 static bool _translation; ///< Is the current file actually a translation or not
26 const char *_file = "(unknown file)"; ///< The filename of the input, so we can refer to it in errors/warnings
27 int _cur_line; ///< The current line we're parsing in the input file
28 int _errors, _warnings, _show_todo;
29 LanguagePackHeader _lang; ///< Header information about a language.
31 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100; ///< Maximum size of every command block, not counting the name of the command itself
32 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei);
34 /**
35 * Create a new case.
36 * @param caseidx The index of the case.
37 * @param string The translation of the case.
38 * @param next The next chained case.
40 Case::Case(int caseidx, const char *string, Case *next) :
41 caseidx(caseidx), string(stredup(string)), next(next)
45 /** Free everything we allocated. */
46 Case::~Case()
48 free(this->string);
49 delete this->next;
52 /**
53 * Create a new string.
54 * @param name The name of the string.
55 * @param english The english "translation" of the string.
56 * @param index The index in the string table.
57 * @param line The line this string was found on.
59 LangString::LangString(const char *name, const char *english, size_t index, int line) :
60 name(stredup(name)), english(stredup(english)), translated(nullptr),
61 hash_next(0), index(index), line(line), translated_case(nullptr)
65 /** Free everything we allocated. */
66 LangString::~LangString()
68 free(this->name);
69 free(this->english);
70 free(this->translated);
71 delete this->translated_case;
74 /** Free all data related to the translation. */
75 void LangString::FreeTranslation()
77 free(this->translated);
78 this->translated = nullptr;
80 delete this->translated_case;
81 this->translated_case = nullptr;
84 /**
85 * Create a new string data container.
86 * @param tabs The maximum number of strings.
88 StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
90 this->strings = CallocT<LangString *>(max_strings);
91 this->hash_heads = CallocT<size_t>(max_strings);
92 this->next_string_id = 0;
95 /** Free everything we allocated. */
96 StringData::~StringData()
98 for (size_t i = 0; i < this->max_strings; i++) delete this->strings[i];
99 free(this->strings);
100 free(this->hash_heads);
103 /** Free all data related to the translation. */
104 void StringData::FreeTranslation()
106 for (size_t i = 0; i < this->max_strings; i++) {
107 LangString *ls = this->strings[i];
108 if (ls != nullptr) ls->FreeTranslation();
113 * Create a hash of the string for finding them back quickly.
114 * @param s The string to hash.
115 * @return The hashed string.
117 uint StringData::HashStr(const char *s) const
119 uint hash = 0;
120 for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
121 return hash % this->max_strings;
125 * Add a newly created LangString.
126 * @param s The name of the string.
127 * @param ls The string to add.
129 void StringData::Add(const char *s, LangString *ls)
131 uint hash = this->HashStr(s);
132 ls->hash_next = this->hash_heads[hash];
133 /* Off-by-one for hash find. */
134 this->hash_heads[hash] = ls->index + 1;
135 this->strings[ls->index] = ls;
139 * Find a LangString based on the string name.
140 * @param s The string name to search on.
141 * @return The LangString or nullptr if it is not known.
143 LangString *StringData::Find(const char *s)
145 size_t idx = this->hash_heads[this->HashStr(s)];
147 while (idx-- > 0) {
148 LangString *ls = this->strings[idx];
150 if (strcmp(ls->name, s) == 0) return ls;
151 idx = ls->hash_next;
153 return nullptr;
157 * Create a compound hash.
158 * @param hash The hash to add the string hash to.
159 * @param s The string hash.
160 * @return The new hash.
162 uint StringData::VersionHashStr(uint hash, const char *s) const
164 for (; *s != '\0'; s++) {
165 hash = ROL(hash, 3) ^ *s;
166 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
168 return hash;
172 * Make a hash of the file to get a unique "version number"
173 * @return The version number.
175 uint StringData::Version() const
177 uint hash = 0;
179 for (size_t i = 0; i < this->max_strings; i++) {
180 const LangString *ls = this->strings[i];
182 if (ls != nullptr) {
183 const CmdStruct *cs;
184 const char *s;
185 char buf[MAX_COMMAND_PARAM_SIZE];
186 int argno;
187 int casei;
189 s = ls->name;
190 hash ^= i * 0x717239;
191 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
192 hash = this->VersionHashStr(hash, s + 1);
194 s = ls->english;
195 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != nullptr) {
196 if (cs->flags & C_DONTCOUNT) continue;
198 hash ^= (cs - _cmd_structs) * 0x1234567;
199 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
204 return hash;
208 * Count the number of tab elements that are in use.
209 * @param tab The tab to count the elements of.
211 uint StringData::CountInUse(uint tab) const
213 int i;
214 for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != nullptr) break;
215 return i + 1;
218 static const char *_cur_ident;
220 struct CmdPair {
221 const CmdStruct *a;
222 const char *v;
225 struct ParsedCommandStruct {
226 uint np;
227 CmdPair pairs[32];
228 const CmdStruct *cmd[32]; // ordered by param #
231 /* Used when generating some advanced commands. */
232 static ParsedCommandStruct _cur_pcs;
233 static int _cur_argidx;
235 /** The buffer for writing a single string. */
236 struct Buffer : std::vector<byte> {
238 * Convenience method for adding a byte.
239 * @param value The value to add.
241 void AppendByte(byte value)
243 this->push_back(value);
247 * Add an Unicode character encoded in UTF-8 to the buffer.
248 * @param value The character to add.
250 void AppendUtf8(uint32 value)
252 if (value < 0x80) {
253 this->push_back(value);
254 } else if (value < 0x800) {
255 this->push_back(0xC0 + GB(value, 6, 5));
256 this->push_back(0x80 + GB(value, 0, 6));
257 } else if (value < 0x10000) {
258 this->push_back(0xE0 + GB(value, 12, 4));
259 this->push_back(0x80 + GB(value, 6, 6));
260 this->push_back(0x80 + GB(value, 0, 6));
261 } else if (value < 0x110000) {
262 this->push_back(0xF0 + GB(value, 18, 3));
263 this->push_back(0x80 + GB(value, 12, 6));
264 this->push_back(0x80 + GB(value, 6, 6));
265 this->push_back(0x80 + GB(value, 0, 6));
266 } else {
267 strgen_warning("Invalid unicode value U+0x%X", value);
272 size_t Utf8Validate(const char *s)
274 uint32 c;
276 if (!HasBit(s[0], 7)) {
277 /* 1 byte */
278 return 1;
279 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
280 /* 2 bytes */
281 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
282 if (c >= 0x80) return 2;
283 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
284 /* 3 bytes */
285 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
286 if (c >= 0x800) return 3;
287 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
288 /* 4 bytes */
289 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
290 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
293 return 0;
297 void EmitSingleChar(Buffer *buffer, char *buf, int value)
299 if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
300 buffer->AppendUtf8(value);
304 /* The plural specifier looks like
305 * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
307 /* This is encoded like
308 * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
310 bool ParseRelNum(char **buf, int *value, int *offset)
312 const char *s = *buf;
313 char *end;
314 bool rel = false;
316 while (*s == ' ' || *s == '\t') s++;
317 if (*s == '+') {
318 rel = true;
319 s++;
321 int v = strtol(s, &end, 0);
322 if (end == s) return false;
323 if (rel || v < 0) {
324 *value += v;
325 } else {
326 *value = v;
328 if (offset != nullptr && *end == ':') {
329 /* Take the Nth within */
330 s = end + 1;
331 *offset = strtol(s, &end, 0);
332 if (end == s) return false;
334 *buf = end;
335 return true;
338 /* Parse out the next word, or nullptr */
339 char *ParseWord(char **buf)
341 char *s = *buf, *r;
343 while (*s == ' ' || *s == '\t') s++;
344 if (*s == '\0') return nullptr;
346 if (*s == '"') {
347 r = ++s;
348 /* parse until next " or NUL */
349 for (;;) {
350 if (*s == '\0') break;
351 if (*s == '"') {
352 *s++ = '\0';
353 break;
355 s++;
357 } else {
358 /* proceed until whitespace or NUL */
359 r = s;
360 for (;;) {
361 if (*s == '\0') break;
362 if (*s == ' ' || *s == '\t') {
363 *s++ = '\0';
364 break;
366 s++;
369 *buf = s;
370 return r;
373 /* Forward declaration */
374 static int TranslateArgumentIdx(int arg, int offset = 0);
376 static void EmitWordList(Buffer *buffer, const char * const *words, uint nw)
378 buffer->AppendByte(nw);
379 for (uint i = 0; i < nw; i++) buffer->AppendByte((byte)strlen(words[i]) + 1);
380 for (uint i = 0; i < nw; i++) {
381 for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
382 buffer->AppendByte(0);
386 void EmitPlural(Buffer *buffer, char *buf, int value)
388 int argidx = _cur_argidx;
389 int offset = -1;
390 int expected = _plural_forms[_lang.plural_form].plural_count;
391 const char **words = AllocaM(const char *, max(expected, MAX_PLURALS));
392 int nw = 0;
394 /* Parse out the number, if one exists. Otherwise default to prev arg. */
395 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
397 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
398 if (offset == -1) {
399 /* Use default offset */
400 if (cmd == nullptr || cmd->default_plural_offset < 0) {
401 strgen_fatal("Command '%s' has no (default) plural position", cmd == nullptr ? "<empty>" : cmd->cmd);
403 offset = cmd->default_plural_offset;
406 /* Parse each string */
407 for (nw = 0; nw < MAX_PLURALS; nw++) {
408 words[nw] = ParseWord(&buf);
409 if (words[nw] == nullptr) break;
412 if (nw == 0) {
413 strgen_fatal("%s: No plural words", _cur_ident);
416 if (expected != nw) {
417 if (_translated) {
418 strgen_fatal("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
419 expected, nw);
420 } else {
421 if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
422 if (nw > expected) {
423 nw = expected;
424 } else {
425 for (; nw < expected; nw++) {
426 words[nw] = words[nw - 1];
432 buffer->AppendUtf8(SCC_PLURAL_LIST);
433 buffer->AppendByte(_lang.plural_form);
434 buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
435 EmitWordList(buffer, words, nw);
439 void EmitGender(Buffer *buffer, char *buf, int value)
441 int argidx = _cur_argidx;
442 int offset = 0;
443 uint nw;
445 if (buf[0] == '=') {
446 buf++;
448 /* This is a {G=DER} command */
449 nw = _lang.GetGenderIndex(buf);
450 if (nw >= MAX_NUM_GENDERS) strgen_fatal("G argument '%s' invalid", buf);
452 /* now nw contains the gender index */
453 buffer->AppendUtf8(SCC_GENDER_INDEX);
454 buffer->AppendByte(nw);
455 } else {
456 const char *words[MAX_NUM_GENDERS];
458 /* This is a {G 0 foo bar two} command.
459 * If no relative number exists, default to +0 */
460 if (!ParseRelNum(&buf, &argidx, &offset)) {}
462 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
463 if (cmd == nullptr || (cmd->flags & C_GENDER) == 0) {
464 strgen_fatal("Command '%s' can't have a gender", cmd == nullptr ? "<empty>" : cmd->cmd);
467 for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
468 words[nw] = ParseWord(&buf);
469 if (words[nw] == nullptr) break;
471 if (nw != _lang.num_genders) strgen_fatal("Bad # of arguments for gender command");
473 assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
474 buffer->AppendUtf8(SCC_GENDER_LIST);
475 buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
476 EmitWordList(buffer, words, nw);
480 static const CmdStruct *FindCmd(const char *s, int len)
482 for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
483 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
485 return nullptr;
488 static uint ResolveCaseName(const char *str, size_t len)
490 /* First get a clean copy of only the case name, then resolve it. */
491 char case_str[CASE_GENDER_LEN];
492 len = min(lengthof(case_str) - 1, len);
493 memcpy(case_str, str, len);
494 case_str[len] = '\0';
496 uint8 case_idx = _lang.GetCaseIndex(case_str);
497 if (case_idx >= MAX_NUM_CASES) strgen_fatal("Invalid case-name '%s'", case_str);
498 return case_idx + 1;
502 /* returns nullptr on eof
503 * else returns command struct */
504 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
506 const char *s = *str, *start;
507 char c;
509 *argno = -1;
510 *casei = -1;
512 /* Scan to the next command, exit if there's no next command. */
513 for (; *s != '{'; s++) {
514 if (*s == '\0') return nullptr;
516 s++; // Skip past the {
518 if (*s >= '0' && *s <= '9') {
519 char *end;
521 *argno = strtoul(s, &end, 0);
522 if (*end != ':') strgen_fatal("missing arg #");
523 s = end + 1;
526 /* parse command name */
527 start = s;
528 do {
529 c = *s++;
530 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
532 const CmdStruct *cmd = FindCmd(start, s - start - 1);
533 if (cmd == nullptr) {
534 strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
535 return nullptr;
538 if (c == '.') {
539 const char *casep = s;
541 if (!(cmd->flags & C_CASE)) {
542 strgen_fatal("Command '%s' can't have a case", cmd->cmd);
545 do {
546 c = *s++;
547 } while (c != '}' && c != ' ' && c != '\0');
548 *casei = ResolveCaseName(casep, s - casep - 1);
551 if (c == '\0') {
552 strgen_error("Missing } from command '%s'", start);
553 return nullptr;
557 if (c != '}') {
558 if (c == '=') s--;
559 /* copy params */
560 start = s;
561 for (;;) {
562 c = *s++;
563 if (c == '}') break;
564 if (c == '\0') {
565 strgen_error("Missing } from command '%s'", start);
566 return nullptr;
568 if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
569 *param++ = c;
572 *param = '\0';
574 *str = s;
576 return cmd;
580 * Prepare reading.
581 * @param data The data to fill during reading.
582 * @param file The file we are reading.
583 * @param master Are we reading the master file?
584 * @param translation Are we reading a translation?
586 StringReader::StringReader(StringData &data, const char *file, bool master, bool translation) :
587 data(data), file(stredup(file)), master(master), translation(translation)
591 /** Make sure the right reader gets freed. */
592 StringReader::~StringReader()
594 free(file);
597 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
599 char param[MAX_COMMAND_PARAM_SIZE];
600 int argno;
601 int argidx = 0;
602 int casei;
604 memset(p, 0, sizeof(*p));
606 for (;;) {
607 /* read until next command from a. */
608 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
610 if (ar == nullptr) break;
612 /* Sanity checking */
613 if (argno != -1 && ar->consumes == 0) strgen_fatal("Non consumer param can't have a paramindex");
615 if (ar->consumes) {
616 if (argno != -1) argidx = argno;
617 if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) strgen_fatal("invalid param idx %d", argidx);
618 if (p->cmd[argidx] != nullptr && p->cmd[argidx] != ar) strgen_fatal("duplicate param idx %d", argidx);
620 p->cmd[argidx++] = ar;
621 } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
622 if (p->np >= lengthof(p->pairs)) strgen_fatal("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
623 p->pairs[p->np].a = ar;
624 p->pairs[p->np].v = param[0] != '\0' ? stredup(param) : "";
625 p->np++;
631 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
633 if (a == nullptr) return nullptr;
635 if (strcmp(a->cmd, "STRING1") == 0 ||
636 strcmp(a->cmd, "STRING2") == 0 ||
637 strcmp(a->cmd, "STRING3") == 0 ||
638 strcmp(a->cmd, "STRING4") == 0 ||
639 strcmp(a->cmd, "STRING5") == 0 ||
640 strcmp(a->cmd, "STRING6") == 0 ||
641 strcmp(a->cmd, "STRING7") == 0 ||
642 strcmp(a->cmd, "RAW_STRING") == 0) {
643 return FindCmd("STRING", 6);
646 return a;
650 static bool CheckCommandsMatch(char *a, char *b, const char *name)
652 /* If we're not translating, i.e. we're compiling the base language,
653 * it is pointless to do all these checks as it'll always be correct.
654 * After all, all checks are based on the base language.
656 if (!_translation) return true;
658 ParsedCommandStruct templ;
659 ParsedCommandStruct lang;
660 bool result = true;
662 ExtractCommandString(&templ, b, true);
663 ExtractCommandString(&lang, a, true);
665 /* For each string in templ, see if we find it in lang */
666 if (templ.np != lang.np) {
667 strgen_warning("%s: template string and language string have a different # of commands", name);
668 result = false;
671 for (uint i = 0; i < templ.np; i++) {
672 /* see if we find it in lang, and zero it out */
673 bool found = false;
674 for (uint j = 0; j < lang.np; j++) {
675 if (templ.pairs[i].a == lang.pairs[j].a &&
676 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
677 /* it was found in both. zero it out from lang so we don't find it again */
678 lang.pairs[j].a = nullptr;
679 found = true;
680 break;
684 if (!found) {
685 strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
686 result = false;
690 /* if we reach here, all non consumer commands match up.
691 * Check if the non consumer commands match up also. */
692 for (uint i = 0; i < lengthof(templ.cmd); i++) {
693 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
694 strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
695 lang.cmd[i] == nullptr ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
696 templ.cmd[i] == nullptr ? "<empty>" : templ.cmd[i]->cmd);
697 result = false;
701 return result;
704 void StringReader::HandleString(char *str)
706 if (*str == '#') {
707 if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
708 return;
711 /* Ignore comments & blank lines */
712 if (*str == ';' || *str == ' ' || *str == '\0') return;
714 char *s = strchr(str, ':');
715 if (s == nullptr) {
716 strgen_error("Line has no ':' delimiter");
717 return;
720 char *t;
721 /* Trim spaces.
722 * After this str points to the command name, and s points to the command contents */
723 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
724 *t = 0;
725 s++;
727 /* Check string is valid UTF-8 */
728 const char *tmp;
729 for (tmp = s; *tmp != '\0';) {
730 size_t len = Utf8Validate(tmp);
731 if (len == 0) strgen_fatal("Invalid UTF-8 sequence in '%s'", s);
733 WChar c;
734 Utf8Decode(&c, tmp);
735 if (c <= 0x001F || // ASCII control character range
736 c == 0x200B || // Zero width space
737 (c >= 0xE000 && c <= 0xF8FF) || // Private range
738 (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
739 strgen_fatal("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
742 tmp += len;
745 /* Check if the string has a case..
746 * The syntax for cases is IDENTNAME.case */
747 char *casep = strchr(str, '.');
748 if (casep != nullptr) *casep++ = '\0';
750 /* Check if this string already exists.. */
751 LangString *ent = this->data.Find(str);
753 if (this->master) {
754 if (casep != nullptr) {
755 strgen_error("Cases in the base translation are not supported.");
756 return;
759 if (ent != nullptr) {
760 strgen_error("String name '%s' is used multiple times", str);
761 return;
764 if (this->data.strings[this->data.next_string_id] != nullptr) {
765 strgen_error("String ID 0x" PRINTF_SIZEX " for '%s' already in use by '%s'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
766 return;
769 /* Allocate a new LangString */
770 this->data.Add(str, new LangString(str, s, this->data.next_string_id++, _cur_line));
771 } else {
772 if (ent == nullptr) {
773 strgen_warning("String name '%s' does not exist in master file", str);
774 return;
777 if (ent->translated && casep == nullptr) {
778 strgen_error("String name '%s' is used multiple times", str);
779 return;
782 /* make sure that the commands match */
783 if (!CheckCommandsMatch(s, ent->english, str)) return;
785 if (casep != nullptr) {
786 ent->translated_case = new Case(ResolveCaseName(casep, strlen(casep)), s, ent->translated_case);
787 } else {
788 ent->translated = stredup(s);
789 /* If the string was translated, use the line from the
790 * translated language so errors in the translated file
791 * are properly referenced to. */
792 ent->line = _cur_line;
797 void StringReader::HandlePragma(char *str)
799 if (!memcmp(str, "plural ", 7)) {
800 _lang.plural_form = atoi(str + 7);
801 if (_lang.plural_form >= lengthof(_plural_forms)) {
802 strgen_fatal("Invalid pluralform %d", _lang.plural_form);
804 } else {
805 strgen_fatal("unknown pragma '%s'", str);
809 static void rstrip(char *buf)
811 size_t i = strlen(buf);
812 while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
813 buf[i] = '\0';
816 void StringReader::ParseFile()
818 char buf[2048];
819 _warnings = _errors = 0;
821 _translation = this->master || this->translation;
822 _file = this->file;
824 /* For each new file we parse, reset the genders, and language codes. */
825 MemSetT(&_lang, 0);
826 strecpy(_lang.digit_group_separator, ",", lastof(_lang.digit_group_separator));
827 strecpy(_lang.digit_group_separator_currency, ",", lastof(_lang.digit_group_separator_currency));
828 strecpy(_lang.digit_decimal_separator, ".", lastof(_lang.digit_decimal_separator));
830 _cur_line = 1;
831 while (this->data.next_string_id < this->data.max_strings && this->ReadLine(buf, lastof(buf)) != nullptr) {
832 rstrip(buf);
833 this->HandleString(buf);
834 _cur_line++;
837 if (this->data.next_string_id == this->data.max_strings) {
838 strgen_error("Too many strings, maximum allowed is " PRINTF_SIZE, this->data.max_strings);
843 * Write the header information.
844 * @param data The data about the string.
846 void HeaderWriter::WriteHeader(const StringData &data)
848 int last = 0;
849 for (size_t i = 0; i < data.max_strings; i++) {
850 if (data.strings[i] != nullptr) {
851 this->WriteStringID(data.strings[i]->name, (int)i);
852 last = (int)i;
856 this->WriteStringID("STR_LAST_STRINGID", last);
859 static int TranslateArgumentIdx(int argidx, int offset)
861 int sum;
863 if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
864 strgen_fatal("invalid argidx %d", argidx);
866 const CmdStruct *cs = _cur_pcs.cmd[argidx];
867 if (cs != nullptr && cs->consumes <= offset) {
868 strgen_fatal("invalid argidx offset %d:%d", argidx, offset);
871 if (_cur_pcs.cmd[argidx] == nullptr) {
872 strgen_fatal("no command for this argidx %d", argidx);
875 for (int i = sum = 0; i < argidx; i++) {
876 const CmdStruct *cs = _cur_pcs.cmd[i];
878 sum += (cs != nullptr) ? cs->consumes : 1;
881 return sum + offset;
884 static void PutArgidxCommand(Buffer *buffer)
886 buffer->AppendUtf8(SCC_ARG_INDEX);
887 buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
891 static void PutCommandString(Buffer *buffer, const char *str)
893 _cur_argidx = 0;
895 while (*str != '\0') {
896 /* Process characters as they are until we encounter a { */
897 if (*str != '{') {
898 buffer->AppendByte(*str++);
899 continue;
902 char param[MAX_COMMAND_PARAM_SIZE];
903 int argno;
904 int casei;
905 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
906 if (cs == nullptr) break;
908 if (casei != -1) {
909 buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
910 buffer->AppendByte(casei);
913 /* For params that consume values, we need to handle the argindex properly */
914 if (cs->consumes > 0) {
915 /* Check if we need to output a move-param command */
916 if (argno != -1 && argno != _cur_argidx) {
917 _cur_argidx = argno;
918 PutArgidxCommand(buffer);
921 /* Output the one from the master string... it's always accurate. */
922 cs = _cur_pcs.cmd[_cur_argidx++];
923 if (cs == nullptr) {
924 strgen_fatal("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
928 cs->proc(buffer, param, cs->value);
933 * Write the length as a simple gamma.
934 * @param length The number to write.
936 void LanguageWriter::WriteLength(uint length)
938 char buffer[2];
939 int offs = 0;
940 if (length >= 0x4000) {
941 strgen_fatal("string too long");
944 if (length >= 0xC0) {
945 buffer[offs++] = (length >> 8) | 0xC0;
947 buffer[offs++] = length & 0xFF;
948 this->Write((byte*)buffer, offs);
952 * Actually write the language.
953 * @param data The data about the string.
955 void LanguageWriter::WriteLang(const StringData &data)
957 uint *in_use = AllocaM(uint, data.tabs);
958 for (size_t tab = 0; tab < data.tabs; tab++) {
959 uint n = data.CountInUse((uint)tab);
961 in_use[tab] = n;
962 _lang.offsets[tab] = TO_LE16(n);
964 for (uint j = 0; j != in_use[tab]; j++) {
965 const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
966 if (ls != nullptr && ls->translated == nullptr) _lang.missing++;
970 _lang.ident = TO_LE32(LanguagePackHeader::IDENT);
971 _lang.version = TO_LE32(data.Version());
972 _lang.missing = TO_LE16(_lang.missing);
973 _lang.winlangid = TO_LE16(_lang.winlangid);
975 this->WriteHeader(&_lang);
976 Buffer buffer;
978 for (size_t tab = 0; tab < data.tabs; tab++) {
979 for (uint j = 0; j != in_use[tab]; j++) {
980 const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
981 const Case *casep;
982 const char *cmdp;
984 /* For undefined strings, just set that it's an empty string */
985 if (ls == nullptr) {
986 this->WriteLength(0);
987 continue;
990 _cur_ident = ls->name;
991 _cur_line = ls->line;
993 /* Produce a message if a string doesn't have a translation. */
994 if (_show_todo > 0 && ls->translated == nullptr) {
995 if ((_show_todo & 2) != 0) {
996 strgen_warning("'%s' is untranslated", ls->name);
998 if ((_show_todo & 1) != 0) {
999 const char *s = "<TODO> ";
1000 while (*s != '\0') buffer.AppendByte(*s++);
1004 /* Extract the strings and stuff from the english command string */
1005 ExtractCommandString(&_cur_pcs, ls->english, false);
1007 if (ls->translated_case != nullptr || ls->translated != nullptr) {
1008 casep = ls->translated_case;
1009 cmdp = ls->translated;
1010 } else {
1011 casep = nullptr;
1012 cmdp = ls->english;
1015 _translated = cmdp != ls->english;
1017 if (casep != nullptr) {
1018 const Case *c;
1019 uint num;
1021 /* Need to output a case-switch.
1022 * It has this format
1023 * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
1024 * Each LEN is printed using 2 bytes in big endian order. */
1025 buffer.AppendUtf8(SCC_SWITCH_CASE);
1026 /* Count the number of cases */
1027 for (num = 0, c = casep; c; c = c->next) num++;
1028 buffer.AppendByte(num);
1030 /* Write each case */
1031 for (c = casep; c != nullptr; c = c->next) {
1032 buffer.AppendByte(c->caseidx);
1033 /* Make some space for the 16-bit length */
1034 uint pos = (uint)buffer.size();
1035 buffer.AppendByte(0);
1036 buffer.AppendByte(0);
1037 /* Write string */
1038 PutCommandString(&buffer, c->string);
1039 buffer.AppendByte(0); // terminate with a zero
1040 /* Fill in the length */
1041 uint size = (uint)buffer.size() - (pos + 2);
1042 buffer[pos + 0] = GB(size, 8, 8);
1043 buffer[pos + 1] = GB(size, 0, 8);
1047 if (cmdp != nullptr) PutCommandString(&buffer, cmdp);
1049 this->WriteLength((uint)buffer.size());
1050 this->Write(buffer.data(), buffer.size());
1051 buffer.clear();