Update: Translations from eints
[openttd-github.git] / src / strgen / strgen_base.cpp
blob7ccd88d0062a25bbd26dc5309e8938338e70fd55
1 /*
2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
6 */
8 /** @file strgen_base.cpp Tool to create computer readable (stand-alone) translation files. */
10 #include "../stdafx.h"
11 #include "../core/alloc_func.hpp"
12 #include "../core/endian_func.hpp"
13 #include "../core/mem_func.hpp"
14 #include "../error_func.h"
15 #include "../string_func.h"
16 #include "../table/control_codes.h"
18 #include "strgen.h"
21 #include "../table/strgen_tables.h"
23 #include "../safeguards.h"
25 /* Compiles a list of strings into a compiled string list */
27 static bool _translated; ///< Whether the current language is not the master language
28 static bool _translation; ///< Is the current file actually a translation or not
29 const char *_file = "(unknown file)"; ///< The filename of the input, so we can refer to it in errors/warnings
30 int _cur_line; ///< The current line we're parsing in the input file
31 int _errors, _warnings, _show_todo;
32 LanguagePackHeader _lang; ///< Header information about a language.
34 static const CmdStruct *ParseCommandString(const char **str, std::string &param, int *argno, int *casei);
36 /**
37 * Create a new case.
38 * @param caseidx The index of the case.
39 * @param string The translation of the case.
41 Case::Case(int caseidx, const std::string &string) :
42 caseidx(caseidx), string(string)
46 /**
47 * Create a new string.
48 * @param name The name of the string.
49 * @param english The english "translation" of the string.
50 * @param index The index in the string table.
51 * @param line The line this string was found on.
53 LangString::LangString(const std::string &name, const std::string &english, size_t index, int line) :
54 name(name), english(english), index(index), line(line)
58 /** Free all data related to the translation. */
59 void LangString::FreeTranslation()
61 this->translated.clear();
62 this->translated_cases.clear();
65 /**
66 * Create a new string data container.
67 * @param tabs The maximum number of strings.
69 StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
71 this->strings.resize(max_strings);
72 this->next_string_id = 0;
75 /** Free all data related to the translation. */
76 void StringData::FreeTranslation()
78 for (size_t i = 0; i < this->max_strings; i++) {
79 LangString *ls = this->strings[i].get();
80 if (ls != nullptr) ls->FreeTranslation();
84 /**
85 * Add a newly created LangString.
86 * @param s The name of the string.
87 * @param ls The string to add.
89 void StringData::Add(std::unique_ptr<LangString> ls)
91 this->name_to_string[ls->name] = ls.get();
92 this->strings[ls->index].swap(ls);
95 /**
96 * Find a LangString based on the string name.
97 * @param s The string name to search on.
98 * @return The LangString or nullptr if it is not known.
100 LangString *StringData::Find(const std::string_view s)
102 auto it = this->name_to_string.find(s);
103 if (it == this->name_to_string.end()) return nullptr;
105 return it->second;
109 * Create a compound hash.
110 * @param hash The hash to add the string hash to.
111 * @param s The string hash.
112 * @return The new hash.
114 uint StringData::VersionHashStr(uint hash, const char *s) const
116 for (; *s != '\0'; s++) {
117 hash = std::rotl(hash, 3) ^ *s;
118 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
120 return hash;
124 * Make a hash of the file to get a unique "version number"
125 * @return The version number.
127 uint StringData::Version() const
129 uint hash = 0;
131 for (size_t i = 0; i < this->max_strings; i++) {
132 const LangString *ls = this->strings[i].get();
134 if (ls != nullptr) {
135 const CmdStruct *cs;
136 const char *s;
137 std::string buf;
138 int argno;
139 int casei;
141 s = ls->name.c_str();
142 hash ^= i * 0x717239;
143 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
144 hash = this->VersionHashStr(hash, s + 1);
146 s = ls->english.c_str();
147 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != nullptr) {
148 if (cs->flags & C_DONTCOUNT) continue;
150 hash ^= (cs - _cmd_structs) * 0x1234567;
151 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
156 return hash;
160 * Count the number of tab elements that are in use.
161 * @param tab The tab to count the elements of.
163 uint StringData::CountInUse(uint tab) const
165 int i;
166 for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != nullptr) break;
167 return i + 1;
170 static const char *_cur_ident;
172 /* Used when generating some advanced commands. */
173 static ParsedCommandStruct _cur_pcs;
174 static int _cur_argidx;
176 /** The buffer for writing a single string. */
177 struct Buffer : std::vector<uint8_t> {
179 * Convenience method for adding a byte.
180 * @param value The value to add.
182 void AppendByte(uint8_t value)
184 this->push_back(value);
188 * Add an Unicode character encoded in UTF-8 to the buffer.
189 * @param value The character to add.
191 void AppendUtf8(uint32_t value)
193 if (value < 0x80) {
194 this->push_back(value);
195 } else if (value < 0x800) {
196 this->push_back(0xC0 + GB(value, 6, 5));
197 this->push_back(0x80 + GB(value, 0, 6));
198 } else if (value < 0x10000) {
199 this->push_back(0xE0 + GB(value, 12, 4));
200 this->push_back(0x80 + GB(value, 6, 6));
201 this->push_back(0x80 + GB(value, 0, 6));
202 } else if (value < 0x110000) {
203 this->push_back(0xF0 + GB(value, 18, 3));
204 this->push_back(0x80 + GB(value, 12, 6));
205 this->push_back(0x80 + GB(value, 6, 6));
206 this->push_back(0x80 + GB(value, 0, 6));
207 } else {
208 StrgenWarning("Invalid unicode value U+0x{:X}", value);
213 size_t Utf8Validate(const char *s)
215 uint32_t c;
217 if (!HasBit(s[0], 7)) {
218 /* 1 byte */
219 return 1;
220 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
221 /* 2 bytes */
222 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
223 if (c >= 0x80) return 2;
224 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
225 /* 3 bytes */
226 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
227 if (c >= 0x800) return 3;
228 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
229 /* 4 bytes */
230 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
231 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
234 return 0;
238 void EmitSingleChar(Buffer *buffer, char *buf, int value)
240 if (*buf != '\0') StrgenWarning("Ignoring trailing letters in command");
241 buffer->AppendUtf8(value);
245 /* The plural specifier looks like
246 * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
248 /* This is encoded like
249 * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
251 bool ParseRelNum(char **buf, int *value, int *offset)
253 const char *s = *buf;
254 char *end;
255 bool rel = false;
257 while (*s == ' ' || *s == '\t') s++;
258 if (*s == '+') {
259 rel = true;
260 s++;
262 int v = std::strtol(s, &end, 0);
263 if (end == s) return false;
264 if (rel || v < 0) {
265 *value += v;
266 } else {
267 *value = v;
269 if (offset != nullptr && *end == ':') {
270 /* Take the Nth within */
271 s = end + 1;
272 *offset = std::strtol(s, &end, 0);
273 if (end == s) return false;
275 *buf = end;
276 return true;
279 /* Parse out the next word, or nullptr */
280 char *ParseWord(char **buf)
282 char *s = *buf, *r;
284 while (*s == ' ' || *s == '\t') s++;
285 if (*s == '\0') return nullptr;
287 if (*s == '"') {
288 r = ++s;
289 /* parse until next " or NUL */
290 for (;;) {
291 if (*s == '\0') break;
292 if (*s == '"') {
293 *s++ = '\0';
294 break;
296 s++;
298 } else {
299 /* proceed until whitespace or NUL */
300 r = s;
301 for (;;) {
302 if (*s == '\0') break;
303 if (*s == ' ' || *s == '\t') {
304 *s++ = '\0';
305 break;
307 s++;
310 *buf = s;
311 return r;
314 /* Forward declaration */
315 static int TranslateArgumentIdx(int arg, int offset = 0);
317 static void EmitWordList(Buffer *buffer, const std::vector<const char *> &words, uint nw)
319 /* Maximum word length in bytes, excluding trailing NULL. */
320 constexpr uint MAX_WORD_LENGTH = UINT8_MAX - 2;
322 buffer->AppendByte(nw);
323 for (uint i = 0; i < nw; i++) {
324 size_t len = strlen(words[i]) + 1;
325 if (len >= UINT8_MAX) StrgenFatal("WordList {}/{} string '{}' too long, max bytes {}", i + 1, nw, words[i], MAX_WORD_LENGTH);
326 buffer->AppendByte(static_cast<uint8_t>(len));
328 for (uint i = 0; i < nw; i++) {
329 for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
330 buffer->AppendByte(0);
334 void EmitPlural(Buffer *buffer, char *buf, int)
336 int argidx = _cur_argidx;
337 int offset = -1;
338 int expected = _plural_forms[_lang.plural_form].plural_count;
339 std::vector<const char *> words(std::max(expected, MAX_PLURALS), nullptr);
340 int nw = 0;
342 /* Parse out the number, if one exists. Otherwise default to prev arg. */
343 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
345 const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx];
346 if (offset == -1) {
347 /* Use default offset */
348 if (cmd == nullptr || cmd->default_plural_offset < 0) {
349 StrgenFatal("Command '{}' has no (default) plural position", cmd == nullptr ? "<empty>" : cmd->cmd);
351 offset = cmd->default_plural_offset;
354 /* Parse each string */
355 for (nw = 0; nw < MAX_PLURALS; nw++) {
356 words[nw] = ParseWord(&buf);
357 if (words[nw] == nullptr) break;
360 if (nw == 0) {
361 StrgenFatal("{}: No plural words", _cur_ident);
364 if (expected != nw) {
365 if (_translated) {
366 StrgenFatal("{}: Invalid number of plural forms. Expecting {}, found {}.", _cur_ident,
367 expected, nw);
368 } else {
369 if ((_show_todo & 2) != 0) StrgenWarning("'{}' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
370 if (nw > expected) {
371 nw = expected;
372 } else {
373 for (; nw < expected; nw++) {
374 words[nw] = words[nw - 1];
380 buffer->AppendUtf8(SCC_PLURAL_LIST);
381 buffer->AppendByte(_lang.plural_form);
382 buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
383 EmitWordList(buffer, words, nw);
386 void EmitGender(Buffer *buffer, char *buf, int)
388 int argidx = _cur_argidx;
389 int offset = 0;
390 uint nw;
392 if (buf[0] == '=') {
393 buf++;
395 /* This is a {G=DER} command */
396 nw = _lang.GetGenderIndex(buf);
397 if (nw >= MAX_NUM_GENDERS) StrgenFatal("G argument '{}' invalid", buf);
399 /* now nw contains the gender index */
400 buffer->AppendUtf8(SCC_GENDER_INDEX);
401 buffer->AppendByte(nw);
402 } else {
403 std::vector<const char *> words(MAX_NUM_GENDERS, nullptr);
405 /* This is a {G 0 foo bar two} command.
406 * If no relative number exists, default to +0 */
407 ParseRelNum(&buf, &argidx, &offset);
409 const CmdStruct *cmd = _cur_pcs.consuming_commands[argidx];
410 if (cmd == nullptr || (cmd->flags & C_GENDER) == 0) {
411 StrgenFatal("Command '{}' can't have a gender", cmd == nullptr ? "<empty>" : cmd->cmd);
414 for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
415 words[nw] = ParseWord(&buf);
416 if (words[nw] == nullptr) break;
418 if (nw != _lang.num_genders) StrgenFatal("Bad # of arguments for gender command");
420 assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
421 buffer->AppendUtf8(SCC_GENDER_LIST);
422 buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
423 EmitWordList(buffer, words, nw);
427 static const CmdStruct *FindCmd(const char *s, int len)
429 for (const auto &cs : _cmd_structs) {
430 if (strncmp(cs.cmd, s, len) == 0 && cs.cmd[len] == '\0') return &cs;
432 return nullptr;
435 static uint ResolveCaseName(const char *str, size_t len)
437 /* First get a clean copy of only the case name, then resolve it. */
438 char case_str[CASE_GENDER_LEN];
439 len = std::min(lengthof(case_str) - 1, len);
440 memcpy(case_str, str, len);
441 case_str[len] = '\0';
443 uint8_t case_idx = _lang.GetCaseIndex(case_str);
444 if (case_idx >= MAX_NUM_CASES) StrgenFatal("Invalid case-name '{}'", case_str);
445 return case_idx + 1;
449 /* returns nullptr on eof
450 * else returns command struct */
451 static const CmdStruct *ParseCommandString(const char **str, std::string &param, int *argno, int *casei)
453 const char *s = *str, *start;
454 char c;
456 *argno = -1;
457 *casei = -1;
459 /* Scan to the next command, exit if there's no next command. */
460 for (; *s != '{'; s++) {
461 if (*s == '\0') return nullptr;
463 s++; // Skip past the {
465 if (*s >= '0' && *s <= '9') {
466 char *end;
468 *argno = std::strtoul(s, &end, 0);
469 if (*end != ':') StrgenFatal("missing arg #");
470 s = end + 1;
473 /* parse command name */
474 start = s;
475 do {
476 c = *s++;
477 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
479 const CmdStruct *cmd = FindCmd(start, s - start - 1);
480 if (cmd == nullptr) {
481 std::string command(start, s - start - 1);
482 StrgenError("Undefined command '{}'", command);
483 return nullptr;
486 if (c == '.') {
487 const char *casep = s;
489 if (!(cmd->flags & C_CASE)) {
490 StrgenFatal("Command '{}' can't have a case", cmd->cmd);
493 do {
494 c = *s++;
495 } while (c != '}' && c != ' ' && c != '\0');
496 *casei = ResolveCaseName(casep, s - casep - 1);
499 if (c == '\0') {
500 StrgenError("Missing }} from command '{}'", start);
501 return nullptr;
505 if (c != '}') {
506 if (c == '=') s--;
507 /* copy params */
508 start = s;
509 for (;;) {
510 c = *s++;
511 if (c == '}') break;
512 if (c == '\0') {
513 StrgenError("Missing }} from command '{}'", start);
514 return nullptr;
516 param += c;
520 *str = s;
522 return cmd;
526 * Prepare reading.
527 * @param data The data to fill during reading.
528 * @param file The file we are reading.
529 * @param master Are we reading the master file?
530 * @param translation Are we reading a translation?
532 StringReader::StringReader(StringData &data, const std::string &file, bool master, bool translation) :
533 data(data), file(file), master(master), translation(translation)
537 ParsedCommandStruct ExtractCommandString(const char *s, bool)
539 int argno;
540 int argidx = 0;
541 int casei;
543 ParsedCommandStruct p;
545 for (;;) {
546 /* read until next command from a. */
547 std::string param;
548 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
550 if (ar == nullptr) break;
552 /* Sanity checking */
553 if (argno != -1 && ar->consumes == 0) StrgenFatal("Non consumer param can't have a paramindex");
555 if (ar->consumes) {
556 if (argno != -1) argidx = argno;
557 if (argidx < 0 || (uint)argidx >= p.consuming_commands.max_size()) StrgenFatal("invalid param idx {}", argidx);
558 if (p.consuming_commands[argidx] != nullptr && p.consuming_commands[argidx] != ar) StrgenFatal("duplicate param idx {}", argidx);
560 p.consuming_commands[argidx++] = ar;
561 } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
562 p.non_consuming_commands.emplace_back(CmdPair{ar, std::move(param)});
566 return p;
570 const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
572 if (a == nullptr) return nullptr;
574 if (strcmp(a->cmd, "STRING1") == 0 ||
575 strcmp(a->cmd, "STRING2") == 0 ||
576 strcmp(a->cmd, "STRING3") == 0 ||
577 strcmp(a->cmd, "STRING4") == 0 ||
578 strcmp(a->cmd, "STRING5") == 0 ||
579 strcmp(a->cmd, "STRING6") == 0 ||
580 strcmp(a->cmd, "STRING7") == 0 ||
581 strcmp(a->cmd, "RAW_STRING") == 0) {
582 return FindCmd("STRING", 6);
585 return a;
589 static bool CheckCommandsMatch(const char *a, const char *b, const char *name)
591 /* If we're not translating, i.e. we're compiling the base language,
592 * it is pointless to do all these checks as it'll always be correct.
593 * After all, all checks are based on the base language.
595 if (!_translation) return true;
597 bool result = true;
599 ParsedCommandStruct templ = ExtractCommandString(b, true);
600 ParsedCommandStruct lang = ExtractCommandString(a, true);
602 /* For each string in templ, see if we find it in lang */
603 if (templ.non_consuming_commands.max_size() != lang.non_consuming_commands.max_size()) {
604 StrgenWarning("{}: template string and language string have a different # of commands", name);
605 result = false;
608 for (auto &templ_nc : templ.non_consuming_commands) {
609 /* see if we find it in lang, and zero it out */
610 bool found = false;
611 for (auto &lang_nc : lang.non_consuming_commands) {
612 if (templ_nc.cmd == lang_nc.cmd && templ_nc.param == lang_nc.param) {
613 /* it was found in both. zero it out from lang so we don't find it again */
614 lang_nc.cmd = nullptr;
615 found = true;
616 break;
620 if (!found) {
621 StrgenWarning("{}: command '{}' exists in template file but not in language file", name, templ_nc.cmd->cmd);
622 result = false;
626 /* if we reach here, all non consumer commands match up.
627 * Check if the non consumer commands match up also. */
628 for (uint i = 0; i < templ.consuming_commands.max_size(); i++) {
629 if (TranslateCmdForCompare(templ.consuming_commands[i]) != lang.consuming_commands[i]) {
630 StrgenWarning("{}: Param idx #{} '{}' doesn't match with template command '{}'", name, i,
631 lang.consuming_commands[i] == nullptr ? "<empty>" : TranslateCmdForCompare(lang.consuming_commands[i])->cmd,
632 templ.consuming_commands[i] == nullptr ? "<empty>" : templ.consuming_commands[i]->cmd);
633 result = false;
637 return result;
640 void StringReader::HandleString(char *str)
642 if (*str == '#') {
643 if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
644 return;
647 /* Ignore comments & blank lines */
648 if (*str == ';' || *str == ' ' || *str == '\0') return;
650 char *s = strchr(str, ':');
651 if (s == nullptr) {
652 StrgenError("Line has no ':' delimiter");
653 return;
656 char *t;
657 /* Trim spaces.
658 * After this str points to the command name, and s points to the command contents */
659 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
660 *t = 0;
661 s++;
663 /* Check string is valid UTF-8 */
664 const char *tmp;
665 for (tmp = s; *tmp != '\0';) {
666 size_t len = Utf8Validate(tmp);
667 if (len == 0) StrgenFatal("Invalid UTF-8 sequence in '{}'", s);
669 char32_t c;
670 Utf8Decode(&c, tmp);
671 if (c <= 0x001F || // ASCII control character range
672 c == 0x200B || // Zero width space
673 (c >= 0xE000 && c <= 0xF8FF) || // Private range
674 (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
675 StrgenFatal("Unwanted UTF-8 character U+{:04X} in sequence '{}'", (int)c, s);
678 tmp += len;
681 /* Check if the string has a case..
682 * The syntax for cases is IDENTNAME.case */
683 char *casep = strchr(str, '.');
684 if (casep != nullptr) *casep++ = '\0';
686 /* Check if this string already exists.. */
687 LangString *ent = this->data.Find(str);
689 if (this->master) {
690 if (casep != nullptr) {
691 StrgenError("Cases in the base translation are not supported.");
692 return;
695 if (ent != nullptr) {
696 StrgenError("String name '{}' is used multiple times", str);
697 return;
700 if (this->data.strings[this->data.next_string_id] != nullptr) {
701 StrgenError("String ID 0x{:X} for '{}' already in use by '{}'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
702 return;
705 /* Allocate a new LangString */
706 this->data.Add(std::make_unique<LangString>(str, s, this->data.next_string_id++, _cur_line));
707 } else {
708 if (ent == nullptr) {
709 StrgenWarning("String name '{}' does not exist in master file", str);
710 return;
713 if (!ent->translated.empty() && casep == nullptr) {
714 StrgenError("String name '{}' is used multiple times", str);
715 return;
718 /* make sure that the commands match */
719 if (!CheckCommandsMatch(s, ent->english.c_str(), str)) return;
721 if (casep != nullptr) {
722 ent->translated_cases.emplace_back(ResolveCaseName(casep, strlen(casep)), s);
723 } else {
724 ent->translated = s;
725 /* If the string was translated, use the line from the
726 * translated language so errors in the translated file
727 * are properly referenced to. */
728 ent->line = _cur_line;
733 void StringReader::HandlePragma(char *str)
735 if (!memcmp(str, "plural ", 7)) {
736 _lang.plural_form = atoi(str + 7);
737 if (_lang.plural_form >= lengthof(_plural_forms)) {
738 StrgenFatal("Invalid pluralform {}", _lang.plural_form);
740 } else {
741 StrgenFatal("unknown pragma '{}'", str);
745 static void StripTrailingWhitespace(std::string &str)
747 str.erase(str.find_last_not_of("\r\n ") + 1);
750 void StringReader::ParseFile()
752 _warnings = _errors = 0;
754 _translation = this->translation;
755 _file = this->file.c_str();
757 /* Abusing _show_todo to replace "warning" with "info" for translations. */
758 _show_todo &= 3;
759 if (!this->translation) _show_todo |= 4;
761 /* For each new file we parse, reset the genders, and language codes. */
762 MemSetT(&_lang, 0);
763 strecpy(_lang.digit_group_separator, ",");
764 strecpy(_lang.digit_group_separator_currency, ",");
765 strecpy(_lang.digit_decimal_separator, ".");
767 _cur_line = 1;
768 while (this->data.next_string_id < this->data.max_strings) {
769 std::optional<std::string> line = this->ReadLine();
770 if (!line.has_value()) return;
772 StripTrailingWhitespace(line.value());
773 this->HandleString(line.value().data());
774 _cur_line++;
777 if (this->data.next_string_id == this->data.max_strings) {
778 StrgenError("Too many strings, maximum allowed is {}", this->data.max_strings);
783 * Write the header information.
784 * @param data The data about the string.
786 void HeaderWriter::WriteHeader(const StringData &data)
788 int last = 0;
789 for (size_t i = 0; i < data.max_strings; i++) {
790 if (data.strings[i] != nullptr) {
791 this->WriteStringID(data.strings[i]->name, (int)i);
792 last = (int)i;
796 this->WriteStringID("STR_LAST_STRINGID", last);
799 static int TranslateArgumentIdx(int argidx, int offset)
801 int sum;
803 if (argidx < 0 || (uint)argidx >= _cur_pcs.consuming_commands.max_size()) {
804 StrgenFatal("invalid argidx {}", argidx);
806 const CmdStruct *cs = _cur_pcs.consuming_commands[argidx];
807 if (cs != nullptr && cs->consumes <= offset) {
808 StrgenFatal("invalid argidx offset {}:{}", argidx, offset);
811 if (_cur_pcs.consuming_commands[argidx] == nullptr) {
812 StrgenFatal("no command for this argidx {}", argidx);
815 for (int i = sum = 0; i < argidx; i++) {
816 cs = _cur_pcs.consuming_commands[i];
818 sum += (cs != nullptr) ? cs->consumes : 1;
821 return sum + offset;
824 static void PutArgidxCommand(Buffer *buffer)
826 buffer->AppendUtf8(SCC_ARG_INDEX);
827 buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
831 static void PutCommandString(Buffer *buffer, const char *str)
833 _cur_argidx = 0;
835 while (*str != '\0') {
836 /* Process characters as they are until we encounter a { */
837 if (*str != '{') {
838 buffer->AppendByte(*str++);
839 continue;
842 std::string param;
843 int argno;
844 int casei;
845 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
846 if (cs == nullptr) break;
848 if (casei != -1) {
849 buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
850 buffer->AppendByte(casei);
853 /* For params that consume values, we need to handle the argindex properly */
854 if (cs->consumes > 0) {
855 /* Check if we need to output a move-param command */
856 if (argno != -1 && argno != _cur_argidx) {
857 _cur_argidx = argno;
858 PutArgidxCommand(buffer);
861 /* Output the one from the master string... it's always accurate. */
862 cs = _cur_pcs.consuming_commands[_cur_argidx++];
863 if (cs == nullptr) {
864 StrgenFatal("{}: No argument exists at position {}", _cur_ident, _cur_argidx - 1);
868 cs->proc(buffer, param.data(), cs->value);
873 * Write the length as a simple gamma.
874 * @param length The number to write.
876 void LanguageWriter::WriteLength(uint length)
878 char buffer[2];
879 int offs = 0;
880 if (length >= 0x4000) {
881 StrgenFatal("string too long");
884 if (length >= 0xC0) {
885 buffer[offs++] = (length >> 8) | 0xC0;
887 buffer[offs++] = length & 0xFF;
888 this->Write((uint8_t*)buffer, offs);
892 * Actually write the language.
893 * @param data The data about the string.
895 void LanguageWriter::WriteLang(const StringData &data)
897 std::vector<uint> in_use;
898 for (size_t tab = 0; tab < data.tabs; tab++) {
899 uint n = data.CountInUse((uint)tab);
901 in_use.push_back(n);
902 _lang.offsets[tab] = TO_LE16(n);
904 for (uint j = 0; j != in_use[tab]; j++) {
905 const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
906 if (ls != nullptr && ls->translated.empty()) _lang.missing++;
910 _lang.ident = TO_LE32(LanguagePackHeader::IDENT);
911 _lang.version = TO_LE32(data.Version());
912 _lang.missing = TO_LE16(_lang.missing);
913 _lang.winlangid = TO_LE16(_lang.winlangid);
915 this->WriteHeader(&_lang);
916 Buffer buffer;
918 for (size_t tab = 0; tab < data.tabs; tab++) {
919 for (uint j = 0; j != in_use[tab]; j++) {
920 const LangString *ls = data.strings[(tab * TAB_SIZE) + j].get();
921 const std::string *cmdp;
923 /* For undefined strings, just set that it's an empty string */
924 if (ls == nullptr) {
925 this->WriteLength(0);
926 continue;
929 _cur_ident = ls->name.c_str();
930 _cur_line = ls->line;
932 /* Produce a message if a string doesn't have a translation. */
933 if (_show_todo > 0 && ls->translated.empty()) {
934 if ((_show_todo & 2) != 0) {
935 StrgenWarning("'{}' is untranslated", ls->name);
937 if ((_show_todo & 1) != 0) {
938 const char *s = "<TODO> ";
939 while (*s != '\0') buffer.AppendByte(*s++);
943 /* Extract the strings and stuff from the english command string */
944 _cur_pcs = ExtractCommandString(ls->english.c_str(), false);
946 if (!ls->translated_cases.empty() || !ls->translated.empty()) {
947 cmdp = &ls->translated;
948 } else {
949 cmdp = &ls->english;
952 _translated = cmdp != &ls->english;
954 if (!ls->translated_cases.empty()) {
955 /* Need to output a case-switch.
956 * It has this format
957 * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
958 * Each LEN is printed using 2 bytes in big endian order. */
959 buffer.AppendUtf8(SCC_SWITCH_CASE);
960 buffer.AppendByte((uint8_t)ls->translated_cases.size());
962 /* Write each case */
963 for (const Case &c : ls->translated_cases) {
964 buffer.AppendByte(c.caseidx);
965 /* Make some space for the 16-bit length */
966 uint pos = (uint)buffer.size();
967 buffer.AppendByte(0);
968 buffer.AppendByte(0);
969 /* Write string */
970 PutCommandString(&buffer, c.string.c_str());
971 buffer.AppendByte(0); // terminate with a zero
972 /* Fill in the length */
973 uint size = (uint)buffer.size() - (pos + 2);
974 buffer[pos + 0] = GB(size, 8, 8);
975 buffer[pos + 1] = GB(size, 0, 8);
979 if (!cmdp->empty()) PutCommandString(&buffer, cmdp->c_str());
981 this->WriteLength((uint)buffer.size());
982 this->Write(buffer.data(), buffer.size());
983 buffer.clear();