2 * This file is part of OpenTTD.
3 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
4 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
5 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
8 /** @file strgen_base.cpp Tool to create computer readable (stand-alone) translation files. */
10 #include "../stdafx.h"
11 #include "../core/endian_func.hpp"
12 #include "../string_func.h"
13 #include "../table/control_codes.h"
18 #include "../table/strgen_tables.h"
20 #include "../safeguards.h"
22 /* Compiles a list of strings into a compiled string list */
24 static bool _translated
; ///< Whether the current language is not the master language
25 static bool _translation
; ///< Is the current file actually a translation or not
26 const char *_file
= "(unknown file)"; ///< The filename of the input, so we can refer to it in errors/warnings
27 int _cur_line
; ///< The current line we're parsing in the input file
28 int _errors
, _warnings
, _show_todo
;
29 LanguagePackHeader _lang
; ///< Header information about a language.
31 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE
= 100; ///< Maximum size of every command block, not counting the name of the command itself
32 static const CmdStruct
*ParseCommandString(const char **str
, char *param
, int *argno
, int *casei
);
36 * @param caseidx The index of the case.
37 * @param string The translation of the case.
38 * @param next The next chained case.
40 Case::Case(int caseidx
, const char *string
, Case
*next
) :
41 caseidx(caseidx
), string(stredup(string
)), next(next
)
45 /** Free everything we allocated. */
53 * Create a new string.
54 * @param name The name of the string.
55 * @param english The english "translation" of the string.
56 * @param index The index in the string table.
57 * @param line The line this string was found on.
59 LangString::LangString(const char *name
, const char *english
, size_t index
, int line
) :
60 name(stredup(name
)), english(stredup(english
)), translated(nullptr),
61 hash_next(0), index(index
), line(line
), translated_case(nullptr)
65 /** Free everything we allocated. */
66 LangString::~LangString()
70 free(this->translated
);
71 delete this->translated_case
;
74 /** Free all data related to the translation. */
75 void LangString::FreeTranslation()
77 free(this->translated
);
78 this->translated
= nullptr;
80 delete this->translated_case
;
81 this->translated_case
= nullptr;
85 * Create a new string data container.
86 * @param tabs The maximum number of strings.
88 StringData::StringData(size_t tabs
) : tabs(tabs
), max_strings(tabs
* TAB_SIZE
)
90 this->strings
= CallocT
<LangString
*>(max_strings
);
91 this->hash_heads
= CallocT
<size_t>(max_strings
);
92 this->next_string_id
= 0;
95 /** Free everything we allocated. */
96 StringData::~StringData()
98 for (size_t i
= 0; i
< this->max_strings
; i
++) delete this->strings
[i
];
100 free(this->hash_heads
);
103 /** Free all data related to the translation. */
104 void StringData::FreeTranslation()
106 for (size_t i
= 0; i
< this->max_strings
; i
++) {
107 LangString
*ls
= this->strings
[i
];
108 if (ls
!= nullptr) ls
->FreeTranslation();
113 * Create a hash of the string for finding them back quickly.
114 * @param s The string to hash.
115 * @return The hashed string.
117 uint
StringData::HashStr(const char *s
) const
120 for (; *s
!= '\0'; s
++) hash
= ROL(hash
, 3) ^ *s
;
121 return hash
% this->max_strings
;
125 * Add a newly created LangString.
126 * @param s The name of the string.
127 * @param ls The string to add.
129 void StringData::Add(const char *s
, LangString
*ls
)
131 uint hash
= this->HashStr(s
);
132 ls
->hash_next
= this->hash_heads
[hash
];
133 /* Off-by-one for hash find. */
134 this->hash_heads
[hash
] = ls
->index
+ 1;
135 this->strings
[ls
->index
] = ls
;
139 * Find a LangString based on the string name.
140 * @param s The string name to search on.
141 * @return The LangString or nullptr if it is not known.
143 LangString
*StringData::Find(const char *s
)
145 size_t idx
= this->hash_heads
[this->HashStr(s
)];
148 LangString
*ls
= this->strings
[idx
];
150 if (strcmp(ls
->name
, s
) == 0) return ls
;
157 * Create a compound hash.
158 * @param hash The hash to add the string hash to.
159 * @param s The string hash.
160 * @return The new hash.
162 uint
StringData::VersionHashStr(uint hash
, const char *s
) const
164 for (; *s
!= '\0'; s
++) {
165 hash
= ROL(hash
, 3) ^ *s
;
166 hash
= (hash
& 1 ? hash
>> 1 ^ 0xDEADBEEF : hash
>> 1);
172 * Make a hash of the file to get a unique "version number"
173 * @return The version number.
175 uint
StringData::Version() const
179 for (size_t i
= 0; i
< this->max_strings
; i
++) {
180 const LangString
*ls
= this->strings
[i
];
185 char buf
[MAX_COMMAND_PARAM_SIZE
];
190 hash
^= i
* 0x717239;
191 hash
= (hash
& 1 ? hash
>> 1 ^ 0xDEADBEEF : hash
>> 1);
192 hash
= this->VersionHashStr(hash
, s
+ 1);
195 while ((cs
= ParseCommandString(&s
, buf
, &argno
, &casei
)) != nullptr) {
196 if (cs
->flags
& C_DONTCOUNT
) continue;
198 hash
^= (cs
- _cmd_structs
) * 0x1234567;
199 hash
= (hash
& 1 ? hash
>> 1 ^ 0xF00BAA4 : hash
>> 1);
208 * Count the number of tab elements that are in use.
209 * @param tab The tab to count the elements of.
211 uint
StringData::CountInUse(uint tab
) const
214 for (i
= TAB_SIZE
; --i
>= 0;) if (this->strings
[(tab
* TAB_SIZE
) + i
] != nullptr) break;
218 static const char *_cur_ident
;
225 struct ParsedCommandStruct
{
228 const CmdStruct
*cmd
[32]; // ordered by param #
231 /* Used when generating some advanced commands. */
232 static ParsedCommandStruct _cur_pcs
;
233 static int _cur_argidx
;
235 /** The buffer for writing a single string. */
236 struct Buffer
: std::vector
<byte
> {
238 * Convenience method for adding a byte.
239 * @param value The value to add.
241 void AppendByte(byte value
)
243 this->push_back(value
);
247 * Add an Unicode character encoded in UTF-8 to the buffer.
248 * @param value The character to add.
250 void AppendUtf8(uint32 value
)
253 this->push_back(value
);
254 } else if (value
< 0x800) {
255 this->push_back(0xC0 + GB(value
, 6, 5));
256 this->push_back(0x80 + GB(value
, 0, 6));
257 } else if (value
< 0x10000) {
258 this->push_back(0xE0 + GB(value
, 12, 4));
259 this->push_back(0x80 + GB(value
, 6, 6));
260 this->push_back(0x80 + GB(value
, 0, 6));
261 } else if (value
< 0x110000) {
262 this->push_back(0xF0 + GB(value
, 18, 3));
263 this->push_back(0x80 + GB(value
, 12, 6));
264 this->push_back(0x80 + GB(value
, 6, 6));
265 this->push_back(0x80 + GB(value
, 0, 6));
267 strgen_warning("Invalid unicode value U+0x%X", value
);
272 size_t Utf8Validate(const char *s
)
276 if (!HasBit(s
[0], 7)) {
279 } else if (GB(s
[0], 5, 3) == 6 && IsUtf8Part(s
[1])) {
281 c
= GB(s
[0], 0, 5) << 6 | GB(s
[1], 0, 6);
282 if (c
>= 0x80) return 2;
283 } else if (GB(s
[0], 4, 4) == 14 && IsUtf8Part(s
[1]) && IsUtf8Part(s
[2])) {
285 c
= GB(s
[0], 0, 4) << 12 | GB(s
[1], 0, 6) << 6 | GB(s
[2], 0, 6);
286 if (c
>= 0x800) return 3;
287 } else if (GB(s
[0], 3, 5) == 30 && IsUtf8Part(s
[1]) && IsUtf8Part(s
[2]) && IsUtf8Part(s
[3])) {
289 c
= GB(s
[0], 0, 3) << 18 | GB(s
[1], 0, 6) << 12 | GB(s
[2], 0, 6) << 6 | GB(s
[3], 0, 6);
290 if (c
>= 0x10000 && c
<= 0x10FFFF) return 4;
297 void EmitSingleChar(Buffer
*buffer
, char *buf
, int value
)
299 if (*buf
!= '\0') strgen_warning("Ignoring trailing letters in command");
300 buffer
->AppendUtf8(value
);
304 /* The plural specifier looks like
305 * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
307 /* This is encoded like
308 * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
310 bool ParseRelNum(char **buf
, int *value
, int *offset
)
312 const char *s
= *buf
;
316 while (*s
== ' ' || *s
== '\t') s
++;
321 int v
= strtol(s
, &end
, 0);
322 if (end
== s
) return false;
328 if (offset
!= nullptr && *end
== ':') {
329 /* Take the Nth within */
331 *offset
= strtol(s
, &end
, 0);
332 if (end
== s
) return false;
338 /* Parse out the next word, or nullptr */
339 char *ParseWord(char **buf
)
343 while (*s
== ' ' || *s
== '\t') s
++;
344 if (*s
== '\0') return nullptr;
348 /* parse until next " or NUL */
350 if (*s
== '\0') break;
358 /* proceed until whitespace or NUL */
361 if (*s
== '\0') break;
362 if (*s
== ' ' || *s
== '\t') {
373 /* Forward declaration */
374 static int TranslateArgumentIdx(int arg
, int offset
= 0);
376 static void EmitWordList(Buffer
*buffer
, const char * const *words
, uint nw
)
378 buffer
->AppendByte(nw
);
379 for (uint i
= 0; i
< nw
; i
++) buffer
->AppendByte((byte
)strlen(words
[i
]) + 1);
380 for (uint i
= 0; i
< nw
; i
++) {
381 for (uint j
= 0; words
[i
][j
] != '\0'; j
++) buffer
->AppendByte(words
[i
][j
]);
382 buffer
->AppendByte(0);
386 void EmitPlural(Buffer
*buffer
, char *buf
, int value
)
388 int argidx
= _cur_argidx
;
390 int expected
= _plural_forms
[_lang
.plural_form
].plural_count
;
391 const char **words
= AllocaM(const char *, max(expected
, MAX_PLURALS
));
394 /* Parse out the number, if one exists. Otherwise default to prev arg. */
395 if (!ParseRelNum(&buf
, &argidx
, &offset
)) argidx
--;
397 const CmdStruct
*cmd
= _cur_pcs
.cmd
[argidx
];
399 /* Use default offset */
400 if (cmd
== nullptr || cmd
->default_plural_offset
< 0) {
401 strgen_fatal("Command '%s' has no (default) plural position", cmd
== nullptr ? "<empty>" : cmd
->cmd
);
403 offset
= cmd
->default_plural_offset
;
406 /* Parse each string */
407 for (nw
= 0; nw
< MAX_PLURALS
; nw
++) {
408 words
[nw
] = ParseWord(&buf
);
409 if (words
[nw
] == nullptr) break;
413 strgen_fatal("%s: No plural words", _cur_ident
);
416 if (expected
!= nw
) {
418 strgen_fatal("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident
,
421 if ((_show_todo
& 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident
);
425 for (; nw
< expected
; nw
++) {
426 words
[nw
] = words
[nw
- 1];
432 buffer
->AppendUtf8(SCC_PLURAL_LIST
);
433 buffer
->AppendByte(_lang
.plural_form
);
434 buffer
->AppendByte(TranslateArgumentIdx(argidx
, offset
));
435 EmitWordList(buffer
, words
, nw
);
439 void EmitGender(Buffer
*buffer
, char *buf
, int value
)
441 int argidx
= _cur_argidx
;
448 /* This is a {G=DER} command */
449 nw
= _lang
.GetGenderIndex(buf
);
450 if (nw
>= MAX_NUM_GENDERS
) strgen_fatal("G argument '%s' invalid", buf
);
452 /* now nw contains the gender index */
453 buffer
->AppendUtf8(SCC_GENDER_INDEX
);
454 buffer
->AppendByte(nw
);
456 const char *words
[MAX_NUM_GENDERS
];
458 /* This is a {G 0 foo bar two} command.
459 * If no relative number exists, default to +0 */
460 if (!ParseRelNum(&buf
, &argidx
, &offset
)) {}
462 const CmdStruct
*cmd
= _cur_pcs
.cmd
[argidx
];
463 if (cmd
== nullptr || (cmd
->flags
& C_GENDER
) == 0) {
464 strgen_fatal("Command '%s' can't have a gender", cmd
== nullptr ? "<empty>" : cmd
->cmd
);
467 for (nw
= 0; nw
< MAX_NUM_GENDERS
; nw
++) {
468 words
[nw
] = ParseWord(&buf
);
469 if (words
[nw
] == nullptr) break;
471 if (nw
!= _lang
.num_genders
) strgen_fatal("Bad # of arguments for gender command");
473 assert(IsInsideBS(cmd
->value
, SCC_CONTROL_START
, UINT8_MAX
));
474 buffer
->AppendUtf8(SCC_GENDER_LIST
);
475 buffer
->AppendByte(TranslateArgumentIdx(argidx
, offset
));
476 EmitWordList(buffer
, words
, nw
);
480 static const CmdStruct
*FindCmd(const char *s
, int len
)
482 for (const CmdStruct
*cs
= _cmd_structs
; cs
!= endof(_cmd_structs
); cs
++) {
483 if (strncmp(cs
->cmd
, s
, len
) == 0 && cs
->cmd
[len
] == '\0') return cs
;
488 static uint
ResolveCaseName(const char *str
, size_t len
)
490 /* First get a clean copy of only the case name, then resolve it. */
491 char case_str
[CASE_GENDER_LEN
];
492 len
= min(lengthof(case_str
) - 1, len
);
493 memcpy(case_str
, str
, len
);
494 case_str
[len
] = '\0';
496 uint8 case_idx
= _lang
.GetCaseIndex(case_str
);
497 if (case_idx
>= MAX_NUM_CASES
) strgen_fatal("Invalid case-name '%s'", case_str
);
502 /* returns nullptr on eof
503 * else returns command struct */
504 static const CmdStruct
*ParseCommandString(const char **str
, char *param
, int *argno
, int *casei
)
506 const char *s
= *str
, *start
;
512 /* Scan to the next command, exit if there's no next command. */
513 for (; *s
!= '{'; s
++) {
514 if (*s
== '\0') return nullptr;
516 s
++; // Skip past the {
518 if (*s
>= '0' && *s
<= '9') {
521 *argno
= strtoul(s
, &end
, 0);
522 if (*end
!= ':') strgen_fatal("missing arg #");
526 /* parse command name */
530 } while (c
!= '}' && c
!= ' ' && c
!= '=' && c
!= '.' && c
!= 0);
532 const CmdStruct
*cmd
= FindCmd(start
, s
- start
- 1);
533 if (cmd
== nullptr) {
534 strgen_error("Undefined command '%.*s'", (int)(s
- start
- 1), start
);
539 const char *casep
= s
;
541 if (!(cmd
->flags
& C_CASE
)) {
542 strgen_fatal("Command '%s' can't have a case", cmd
->cmd
);
547 } while (c
!= '}' && c
!= ' ' && c
!= '\0');
548 *casei
= ResolveCaseName(casep
, s
- casep
- 1);
552 strgen_error("Missing } from command '%s'", start
);
565 strgen_error("Missing } from command '%s'", start
);
568 if (s
- start
== MAX_COMMAND_PARAM_SIZE
) error("param command too long");
581 * @param data The data to fill during reading.
582 * @param file The file we are reading.
583 * @param master Are we reading the master file?
584 * @param translation Are we reading a translation?
586 StringReader::StringReader(StringData
&data
, const char *file
, bool master
, bool translation
) :
587 data(data
), file(stredup(file
)), master(master
), translation(translation
)
591 /** Make sure the right reader gets freed. */
592 StringReader::~StringReader()
597 static void ExtractCommandString(ParsedCommandStruct
*p
, const char *s
, bool warnings
)
599 char param
[MAX_COMMAND_PARAM_SIZE
];
604 memset(p
, 0, sizeof(*p
));
607 /* read until next command from a. */
608 const CmdStruct
*ar
= ParseCommandString(&s
, param
, &argno
, &casei
);
610 if (ar
== nullptr) break;
612 /* Sanity checking */
613 if (argno
!= -1 && ar
->consumes
== 0) strgen_fatal("Non consumer param can't have a paramindex");
616 if (argno
!= -1) argidx
= argno
;
617 if (argidx
< 0 || (uint
)argidx
>= lengthof(p
->cmd
)) strgen_fatal("invalid param idx %d", argidx
);
618 if (p
->cmd
[argidx
] != nullptr && p
->cmd
[argidx
] != ar
) strgen_fatal("duplicate param idx %d", argidx
);
620 p
->cmd
[argidx
++] = ar
;
621 } else if (!(ar
->flags
& C_DONTCOUNT
)) { // Ignore some of them
622 if (p
->np
>= lengthof(p
->pairs
)) strgen_fatal("too many commands in string, max " PRINTF_SIZE
, lengthof(p
->pairs
));
623 p
->pairs
[p
->np
].a
= ar
;
624 p
->pairs
[p
->np
].v
= param
[0] != '\0' ? stredup(param
) : "";
631 static const CmdStruct
*TranslateCmdForCompare(const CmdStruct
*a
)
633 if (a
== nullptr) return nullptr;
635 if (strcmp(a
->cmd
, "STRING1") == 0 ||
636 strcmp(a
->cmd
, "STRING2") == 0 ||
637 strcmp(a
->cmd
, "STRING3") == 0 ||
638 strcmp(a
->cmd
, "STRING4") == 0 ||
639 strcmp(a
->cmd
, "STRING5") == 0 ||
640 strcmp(a
->cmd
, "STRING6") == 0 ||
641 strcmp(a
->cmd
, "STRING7") == 0 ||
642 strcmp(a
->cmd
, "RAW_STRING") == 0) {
643 return FindCmd("STRING", 6);
650 static bool CheckCommandsMatch(char *a
, char *b
, const char *name
)
652 /* If we're not translating, i.e. we're compiling the base language,
653 * it is pointless to do all these checks as it'll always be correct.
654 * After all, all checks are based on the base language.
656 if (!_translation
) return true;
658 ParsedCommandStruct templ
;
659 ParsedCommandStruct lang
;
662 ExtractCommandString(&templ
, b
, true);
663 ExtractCommandString(&lang
, a
, true);
665 /* For each string in templ, see if we find it in lang */
666 if (templ
.np
!= lang
.np
) {
667 strgen_warning("%s: template string and language string have a different # of commands", name
);
671 for (uint i
= 0; i
< templ
.np
; i
++) {
672 /* see if we find it in lang, and zero it out */
674 for (uint j
= 0; j
< lang
.np
; j
++) {
675 if (templ
.pairs
[i
].a
== lang
.pairs
[j
].a
&&
676 strcmp(templ
.pairs
[i
].v
, lang
.pairs
[j
].v
) == 0) {
677 /* it was found in both. zero it out from lang so we don't find it again */
678 lang
.pairs
[j
].a
= nullptr;
685 strgen_warning("%s: command '%s' exists in template file but not in language file", name
, templ
.pairs
[i
].a
->cmd
);
690 /* if we reach here, all non consumer commands match up.
691 * Check if the non consumer commands match up also. */
692 for (uint i
= 0; i
< lengthof(templ
.cmd
); i
++) {
693 if (TranslateCmdForCompare(templ
.cmd
[i
]) != lang
.cmd
[i
]) {
694 strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name
, i
,
695 lang
.cmd
[i
] == nullptr ? "<empty>" : TranslateCmdForCompare(lang
.cmd
[i
])->cmd
,
696 templ
.cmd
[i
] == nullptr ? "<empty>" : templ
.cmd
[i
]->cmd
);
704 void StringReader::HandleString(char *str
)
707 if (str
[1] == '#' && str
[2] != '#') this->HandlePragma(str
+ 2);
711 /* Ignore comments & blank lines */
712 if (*str
== ';' || *str
== ' ' || *str
== '\0') return;
714 char *s
= strchr(str
, ':');
716 strgen_error("Line has no ':' delimiter");
722 * After this str points to the command name, and s points to the command contents */
723 for (t
= s
; t
> str
&& (t
[-1] == ' ' || t
[-1] == '\t'); t
--) {}
727 /* Check string is valid UTF-8 */
729 for (tmp
= s
; *tmp
!= '\0';) {
730 size_t len
= Utf8Validate(tmp
);
731 if (len
== 0) strgen_fatal("Invalid UTF-8 sequence in '%s'", s
);
735 if (c
<= 0x001F || // ASCII control character range
736 c
== 0x200B || // Zero width space
737 (c
>= 0xE000 && c
<= 0xF8FF) || // Private range
738 (c
>= 0xFFF0 && c
<= 0xFFFF)) { // Specials range
739 strgen_fatal("Unwanted UTF-8 character U+%04X in sequence '%s'", c
, s
);
745 /* Check if the string has a case..
746 * The syntax for cases is IDENTNAME.case */
747 char *casep
= strchr(str
, '.');
748 if (casep
!= nullptr) *casep
++ = '\0';
750 /* Check if this string already exists.. */
751 LangString
*ent
= this->data
.Find(str
);
754 if (casep
!= nullptr) {
755 strgen_error("Cases in the base translation are not supported.");
759 if (ent
!= nullptr) {
760 strgen_error("String name '%s' is used multiple times", str
);
764 if (this->data
.strings
[this->data
.next_string_id
] != nullptr) {
765 strgen_error("String ID 0x" PRINTF_SIZEX
" for '%s' already in use by '%s'", this->data
.next_string_id
, str
, this->data
.strings
[this->data
.next_string_id
]->name
);
769 /* Allocate a new LangString */
770 this->data
.Add(str
, new LangString(str
, s
, this->data
.next_string_id
++, _cur_line
));
772 if (ent
== nullptr) {
773 strgen_warning("String name '%s' does not exist in master file", str
);
777 if (ent
->translated
&& casep
== nullptr) {
778 strgen_error("String name '%s' is used multiple times", str
);
782 /* make sure that the commands match */
783 if (!CheckCommandsMatch(s
, ent
->english
, str
)) return;
785 if (casep
!= nullptr) {
786 ent
->translated_case
= new Case(ResolveCaseName(casep
, strlen(casep
)), s
, ent
->translated_case
);
788 ent
->translated
= stredup(s
);
789 /* If the string was translated, use the line from the
790 * translated language so errors in the translated file
791 * are properly referenced to. */
792 ent
->line
= _cur_line
;
797 void StringReader::HandlePragma(char *str
)
799 if (!memcmp(str
, "plural ", 7)) {
800 _lang
.plural_form
= atoi(str
+ 7);
801 if (_lang
.plural_form
>= lengthof(_plural_forms
)) {
802 strgen_fatal("Invalid pluralform %d", _lang
.plural_form
);
805 strgen_fatal("unknown pragma '%s'", str
);
809 static void rstrip(char *buf
)
811 size_t i
= strlen(buf
);
812 while (i
> 0 && (buf
[i
- 1] == '\r' || buf
[i
- 1] == '\n' || buf
[i
- 1] == ' ')) i
--;
816 void StringReader::ParseFile()
819 _warnings
= _errors
= 0;
821 _translation
= this->master
|| this->translation
;
824 /* For each new file we parse, reset the genders, and language codes. */
826 strecpy(_lang
.digit_group_separator
, ",", lastof(_lang
.digit_group_separator
));
827 strecpy(_lang
.digit_group_separator_currency
, ",", lastof(_lang
.digit_group_separator_currency
));
828 strecpy(_lang
.digit_decimal_separator
, ".", lastof(_lang
.digit_decimal_separator
));
831 while (this->data
.next_string_id
< this->data
.max_strings
&& this->ReadLine(buf
, lastof(buf
)) != nullptr) {
833 this->HandleString(buf
);
837 if (this->data
.next_string_id
== this->data
.max_strings
) {
838 strgen_error("Too many strings, maximum allowed is " PRINTF_SIZE
, this->data
.max_strings
);
843 * Write the header information.
844 * @param data The data about the string.
846 void HeaderWriter::WriteHeader(const StringData
&data
)
849 for (size_t i
= 0; i
< data
.max_strings
; i
++) {
850 if (data
.strings
[i
] != nullptr) {
851 this->WriteStringID(data
.strings
[i
]->name
, (int)i
);
856 this->WriteStringID("STR_LAST_STRINGID", last
);
859 static int TranslateArgumentIdx(int argidx
, int offset
)
863 if (argidx
< 0 || (uint
)argidx
>= lengthof(_cur_pcs
.cmd
)) {
864 strgen_fatal("invalid argidx %d", argidx
);
866 const CmdStruct
*cs
= _cur_pcs
.cmd
[argidx
];
867 if (cs
!= nullptr && cs
->consumes
<= offset
) {
868 strgen_fatal("invalid argidx offset %d:%d", argidx
, offset
);
871 if (_cur_pcs
.cmd
[argidx
] == nullptr) {
872 strgen_fatal("no command for this argidx %d", argidx
);
875 for (int i
= sum
= 0; i
< argidx
; i
++) {
876 const CmdStruct
*cs
= _cur_pcs
.cmd
[i
];
878 sum
+= (cs
!= nullptr) ? cs
->consumes
: 1;
884 static void PutArgidxCommand(Buffer
*buffer
)
886 buffer
->AppendUtf8(SCC_ARG_INDEX
);
887 buffer
->AppendByte(TranslateArgumentIdx(_cur_argidx
));
891 static void PutCommandString(Buffer
*buffer
, const char *str
)
895 while (*str
!= '\0') {
896 /* Process characters as they are until we encounter a { */
898 buffer
->AppendByte(*str
++);
902 char param
[MAX_COMMAND_PARAM_SIZE
];
905 const CmdStruct
*cs
= ParseCommandString(&str
, param
, &argno
, &casei
);
906 if (cs
== nullptr) break;
909 buffer
->AppendUtf8(SCC_SET_CASE
); // {SET_CASE}
910 buffer
->AppendByte(casei
);
913 /* For params that consume values, we need to handle the argindex properly */
914 if (cs
->consumes
> 0) {
915 /* Check if we need to output a move-param command */
916 if (argno
!= -1 && argno
!= _cur_argidx
) {
918 PutArgidxCommand(buffer
);
921 /* Output the one from the master string... it's always accurate. */
922 cs
= _cur_pcs
.cmd
[_cur_argidx
++];
924 strgen_fatal("%s: No argument exists at position %d", _cur_ident
, _cur_argidx
- 1);
928 cs
->proc(buffer
, param
, cs
->value
);
933 * Write the length as a simple gamma.
934 * @param length The number to write.
936 void LanguageWriter::WriteLength(uint length
)
940 if (length
>= 0x4000) {
941 strgen_fatal("string too long");
944 if (length
>= 0xC0) {
945 buffer
[offs
++] = (length
>> 8) | 0xC0;
947 buffer
[offs
++] = length
& 0xFF;
948 this->Write((byte
*)buffer
, offs
);
952 * Actually write the language.
953 * @param data The data about the string.
955 void LanguageWriter::WriteLang(const StringData
&data
)
957 uint
*in_use
= AllocaM(uint
, data
.tabs
);
958 for (size_t tab
= 0; tab
< data
.tabs
; tab
++) {
959 uint n
= data
.CountInUse((uint
)tab
);
962 _lang
.offsets
[tab
] = TO_LE16(n
);
964 for (uint j
= 0; j
!= in_use
[tab
]; j
++) {
965 const LangString
*ls
= data
.strings
[(tab
* TAB_SIZE
) + j
];
966 if (ls
!= nullptr && ls
->translated
== nullptr) _lang
.missing
++;
970 _lang
.ident
= TO_LE32(LanguagePackHeader::IDENT
);
971 _lang
.version
= TO_LE32(data
.Version());
972 _lang
.missing
= TO_LE16(_lang
.missing
);
973 _lang
.winlangid
= TO_LE16(_lang
.winlangid
);
975 this->WriteHeader(&_lang
);
978 for (size_t tab
= 0; tab
< data
.tabs
; tab
++) {
979 for (uint j
= 0; j
!= in_use
[tab
]; j
++) {
980 const LangString
*ls
= data
.strings
[(tab
* TAB_SIZE
) + j
];
984 /* For undefined strings, just set that it's an empty string */
986 this->WriteLength(0);
990 _cur_ident
= ls
->name
;
991 _cur_line
= ls
->line
;
993 /* Produce a message if a string doesn't have a translation. */
994 if (_show_todo
> 0 && ls
->translated
== nullptr) {
995 if ((_show_todo
& 2) != 0) {
996 strgen_warning("'%s' is untranslated", ls
->name
);
998 if ((_show_todo
& 1) != 0) {
999 const char *s
= "<TODO> ";
1000 while (*s
!= '\0') buffer
.AppendByte(*s
++);
1004 /* Extract the strings and stuff from the english command string */
1005 ExtractCommandString(&_cur_pcs
, ls
->english
, false);
1007 if (ls
->translated_case
!= nullptr || ls
->translated
!= nullptr) {
1008 casep
= ls
->translated_case
;
1009 cmdp
= ls
->translated
;
1015 _translated
= cmdp
!= ls
->english
;
1017 if (casep
!= nullptr) {
1021 /* Need to output a case-switch.
1022 * It has this format
1023 * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
1024 * Each LEN is printed using 2 bytes in big endian order. */
1025 buffer
.AppendUtf8(SCC_SWITCH_CASE
);
1026 /* Count the number of cases */
1027 for (num
= 0, c
= casep
; c
; c
= c
->next
) num
++;
1028 buffer
.AppendByte(num
);
1030 /* Write each case */
1031 for (c
= casep
; c
!= nullptr; c
= c
->next
) {
1032 buffer
.AppendByte(c
->caseidx
);
1033 /* Make some space for the 16-bit length */
1034 uint pos
= (uint
)buffer
.size();
1035 buffer
.AppendByte(0);
1036 buffer
.AppendByte(0);
1038 PutCommandString(&buffer
, c
->string
);
1039 buffer
.AppendByte(0); // terminate with a zero
1040 /* Fill in the length */
1041 uint size
= (uint
)buffer
.size() - (pos
+ 2);
1042 buffer
[pos
+ 0] = GB(size
, 8, 8);
1043 buffer
[pos
+ 1] = GB(size
, 0, 8);
1047 if (cmdp
!= nullptr) PutCommandString(&buffer
, cmdp
);
1049 this->WriteLength((uint
)buffer
.size());
1050 this->Write(buffer
.data(), buffer
.size());