4 * This file is part of OpenTTD.
5 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
6 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
7 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
10 /** @file strgen_base.cpp Tool to create computer readable (stand-alone) translation files. */
12 #include "../stdafx.h"
13 #include "../core/endian_func.hpp"
14 #include "../string_func.h"
15 #include "../table/control_codes.h"
20 #include "../table/strgen_tables.h"
22 #include "../safeguards.h"
24 /* Compiles a list of strings into a compiled string list */
26 static bool _translated
; ///< Whether the current language is not the master language
27 static bool _translation
; ///< Is the current file actually a translation or not
28 const char *_file
= "(unknown file)"; ///< The filename of the input, so we can refer to it in errors/warnings
29 int _cur_line
; ///< The current line we're parsing in the input file
30 int _errors
, _warnings
, _show_todo
;
31 LanguagePackHeader _lang
; ///< Header information about a language.
33 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE
= 100; ///< Maximum size of every command block, not counting the name of the command itself
34 static const CmdStruct
*ParseCommandString(const char **str
, char *param
, int *argno
, int *casei
);
38 * @param caseidx The index of the case.
39 * @param string The translation of the case.
40 * @param next The next chained case.
42 Case::Case(int caseidx
, const char *string
, Case
*next
) :
43 caseidx(caseidx
), string(stredup(string
)), next(next
)
47 /** Free everything we allocated. */
55 * Create a new string.
56 * @param name The name of the string.
57 * @param english The english "translation" of the string.
58 * @param index The index in the string table.
59 * @param line The line this string was found on.
61 LangString::LangString(const char *name
, const char *english
, int index
, int line
) :
62 name(stredup(name
)), english(stredup(english
)), translated(NULL
),
63 hash_next(0), index(index
), line(line
), translated_case(NULL
)
67 /** Free everything we allocated. */
68 LangString::~LangString()
72 free(this->translated
);
73 delete this->translated_case
;
76 /** Free all data related to the translation. */
77 void LangString::FreeTranslation()
79 free(this->translated
);
80 this->translated
= NULL
;
82 delete this->translated_case
;
83 this->translated_case
= NULL
;
87 * Create a new string data container.
88 * @param max_strings The maximum number of strings.
90 StringData::StringData(size_t tabs
) : tabs(tabs
), max_strings(tabs
* TAB_SIZE
)
92 this->strings
= CallocT
<LangString
*>(max_strings
);
93 this->hash_heads
= CallocT
<uint16
>(max_strings
);
94 this->next_string_id
= 0;
97 /** Free everything we allocated. */
98 StringData::~StringData()
100 for (size_t i
= 0; i
< this->max_strings
; i
++) delete this->strings
[i
];
102 free(this->hash_heads
);
105 /** Free all data related to the translation. */
106 void StringData::FreeTranslation()
108 for (size_t i
= 0; i
< this->max_strings
; i
++) {
109 LangString
*ls
= this->strings
[i
];
110 if (ls
!= NULL
) ls
->FreeTranslation();
115 * Create a hash of the string for finding them back quickly.
116 * @param s The string to hash.
117 * @return The hashed string.
119 uint
StringData::HashStr(const char *s
) const
122 for (; *s
!= '\0'; s
++) hash
= ROL(hash
, 3) ^ *s
;
123 return hash
% this->max_strings
;
127 * Add a newly created LangString.
128 * @param s The name of the string.
129 * @param ls The string to add.
131 void StringData::Add(const char *s
, LangString
*ls
)
133 uint hash
= this->HashStr(s
);
134 ls
->hash_next
= this->hash_heads
[hash
];
135 /* Off-by-one for hash find. */
136 this->hash_heads
[hash
] = ls
->index
+ 1;
137 this->strings
[ls
->index
] = ls
;
141 * Find a LangString based on the string name.
142 * @param s The string name to search on.
143 * @return The LangString or NULL if it is not known.
145 LangString
*StringData::Find(const char *s
)
147 int idx
= this->hash_heads
[this->HashStr(s
)];
150 LangString
*ls
= this->strings
[idx
];
152 if (strcmp(ls
->name
, s
) == 0) return ls
;
159 * Create a compound hash.
160 * @param hash The hash to add the string hash to.
161 * @param s The string hash.
162 * @return The new hash.
164 uint
StringData::VersionHashStr(uint hash
, const char *s
) const
166 for (; *s
!= '\0'; s
++) {
167 hash
= ROL(hash
, 3) ^ *s
;
168 hash
= (hash
& 1 ? hash
>> 1 ^ 0xDEADBEEF : hash
>> 1);
174 * Make a hash of the file to get a unique "version number"
175 * @return The version number.
177 uint
StringData::Version() const
181 for (size_t i
= 0; i
< this->max_strings
; i
++) {
182 const LangString
*ls
= this->strings
[i
];
187 char buf
[MAX_COMMAND_PARAM_SIZE
];
192 hash
^= i
* 0x717239;
193 hash
= (hash
& 1 ? hash
>> 1 ^ 0xDEADBEEF : hash
>> 1);
194 hash
= this->VersionHashStr(hash
, s
+ 1);
197 while ((cs
= ParseCommandString(&s
, buf
, &argno
, &casei
)) != NULL
) {
198 if (cs
->flags
& C_DONTCOUNT
) continue;
200 hash
^= (cs
- _cmd_structs
) * 0x1234567;
201 hash
= (hash
& 1 ? hash
>> 1 ^ 0xF00BAA4 : hash
>> 1);
210 * Count the number of tab elements that are in use.
211 * @param tab The tab to count the elements of.
213 uint
StringData::CountInUse(uint tab
) const
216 for (i
= TAB_SIZE
; --i
>= 0;) if (this->strings
[(tab
* TAB_SIZE
) + i
] != NULL
) break;
220 static const char *_cur_ident
;
227 struct ParsedCommandStruct
{
230 const CmdStruct
*cmd
[32]; // ordered by param #
233 /* Used when generating some advanced commands. */
234 static ParsedCommandStruct _cur_pcs
;
235 static int _cur_argidx
;
237 /** The buffer for writing a single string. */
238 struct Buffer
: SmallVector
<byte
, 256> {
240 * Convenience method for adding a byte.
241 * @param value The value to add.
243 void AppendByte(byte value
)
245 *this->Append() = value
;
249 * Add an Unicode character encoded in UTF-8 to the buffer.
250 * @param value The character to add.
252 void AppendUtf8(uint32 value
)
255 *this->Append() = value
;
256 } else if (value
< 0x800) {
257 *this->Append() = 0xC0 + GB(value
, 6, 5);
258 *this->Append() = 0x80 + GB(value
, 0, 6);
259 } else if (value
< 0x10000) {
260 *this->Append() = 0xE0 + GB(value
, 12, 4);
261 *this->Append() = 0x80 + GB(value
, 6, 6);
262 *this->Append() = 0x80 + GB(value
, 0, 6);
263 } else if (value
< 0x110000) {
264 *this->Append() = 0xF0 + GB(value
, 18, 3);
265 *this->Append() = 0x80 + GB(value
, 12, 6);
266 *this->Append() = 0x80 + GB(value
, 6, 6);
267 *this->Append() = 0x80 + GB(value
, 0, 6);
269 strgen_warning("Invalid unicode value U+0x%X", value
);
274 size_t Utf8Validate(const char *s
)
278 if (!HasBit(s
[0], 7)) {
281 } else if (GB(s
[0], 5, 3) == 6 && IsUtf8Part(s
[1])) {
283 c
= GB(s
[0], 0, 5) << 6 | GB(s
[1], 0, 6);
284 if (c
>= 0x80) return 2;
285 } else if (GB(s
[0], 4, 4) == 14 && IsUtf8Part(s
[1]) && IsUtf8Part(s
[2])) {
287 c
= GB(s
[0], 0, 4) << 12 | GB(s
[1], 0, 6) << 6 | GB(s
[2], 0, 6);
288 if (c
>= 0x800) return 3;
289 } else if (GB(s
[0], 3, 5) == 30 && IsUtf8Part(s
[1]) && IsUtf8Part(s
[2]) && IsUtf8Part(s
[3])) {
291 c
= GB(s
[0], 0, 3) << 18 | GB(s
[1], 0, 6) << 12 | GB(s
[2], 0, 6) << 6 | GB(s
[3], 0, 6);
292 if (c
>= 0x10000 && c
<= 0x10FFFF) return 4;
299 void EmitSingleChar(Buffer
*buffer
, char *buf
, int value
)
301 if (*buf
!= '\0') strgen_warning("Ignoring trailing letters in command");
302 buffer
->AppendUtf8(value
);
306 /* The plural specifier looks like
307 * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
309 /* This is encoded like
310 * CommandByte <ARG#> <NUM> {Length of each string} {each string} */
312 bool ParseRelNum(char **buf
, int *value
, int *offset
)
314 const char *s
= *buf
;
318 while (*s
== ' ' || *s
== '\t') s
++;
323 int v
= strtol(s
, &end
, 0);
324 if (end
== s
) return false;
330 if (offset
!= NULL
&& *end
== ':') {
331 /* Take the Nth within */
333 *offset
= strtol(s
, &end
, 0);
334 if (end
== s
) return false;
340 /* Parse out the next word, or NULL */
341 char *ParseWord(char **buf
)
345 while (*s
== ' ' || *s
== '\t') s
++;
346 if (*s
== '\0') return NULL
;
350 /* parse until next " or NUL */
352 if (*s
== '\0') break;
360 /* proceed until whitespace or NUL */
363 if (*s
== '\0') break;
364 if (*s
== ' ' || *s
== '\t') {
375 /* Forward declaration */
376 static int TranslateArgumentIdx(int arg
, int offset
= 0);
378 static void EmitWordList(Buffer
*buffer
, const char * const *words
, uint nw
)
380 buffer
->AppendByte(nw
);
381 for (uint i
= 0; i
< nw
; i
++) buffer
->AppendByte((byte
)strlen(words
[i
]) + 1);
382 for (uint i
= 0; i
< nw
; i
++) {
383 for (uint j
= 0; words
[i
][j
] != '\0'; j
++) buffer
->AppendByte(words
[i
][j
]);
384 buffer
->AppendByte(0);
388 void EmitPlural(Buffer
*buffer
, char *buf
, int value
)
390 int argidx
= _cur_argidx
;
392 int expected
= _plural_forms
[_lang
.plural_form
].plural_count
;
393 const char **words
= AllocaM(const char *, max(expected
, MAX_PLURALS
));
396 /* Parse out the number, if one exists. Otherwise default to prev arg. */
397 if (!ParseRelNum(&buf
, &argidx
, &offset
)) argidx
--;
399 const CmdStruct
*cmd
= _cur_pcs
.cmd
[argidx
];
401 /* Use default offset */
402 if (cmd
== NULL
|| cmd
->default_plural_offset
< 0) {
403 strgen_fatal("Command '%s' has no (default) plural position", cmd
== NULL
? "<empty>" : cmd
->cmd
);
405 offset
= cmd
->default_plural_offset
;
408 /* Parse each string */
409 for (nw
= 0; nw
< MAX_PLURALS
; nw
++) {
410 words
[nw
] = ParseWord(&buf
);
411 if (words
[nw
] == NULL
) break;
415 strgen_fatal("%s: No plural words", _cur_ident
);
418 if (expected
!= nw
) {
420 strgen_fatal("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident
,
423 if ((_show_todo
& 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident
);
427 for (; nw
< expected
; nw
++) {
428 words
[nw
] = words
[nw
- 1];
434 buffer
->AppendUtf8(SCC_PLURAL_LIST
);
435 buffer
->AppendByte(_lang
.plural_form
);
436 buffer
->AppendByte(TranslateArgumentIdx(argidx
, offset
));
437 EmitWordList(buffer
, words
, nw
);
441 void EmitGender(Buffer
*buffer
, char *buf
, int value
)
443 int argidx
= _cur_argidx
;
450 /* This is a {G=DER} command */
451 nw
= _lang
.GetGenderIndex(buf
);
452 if (nw
>= MAX_NUM_GENDERS
) strgen_fatal("G argument '%s' invalid", buf
);
454 /* now nw contains the gender index */
455 buffer
->AppendUtf8(SCC_GENDER_INDEX
);
456 buffer
->AppendByte(nw
);
458 const char *words
[MAX_NUM_GENDERS
];
460 /* This is a {G 0 foo bar two} command.
461 * If no relative number exists, default to +0 */
462 if (!ParseRelNum(&buf
, &argidx
, &offset
)) {}
464 const CmdStruct
*cmd
= _cur_pcs
.cmd
[argidx
];
465 if (cmd
== NULL
|| (cmd
->flags
& C_GENDER
) == 0) {
466 strgen_fatal("Command '%s' can't have a gender", cmd
== NULL
? "<empty>" : cmd
->cmd
);
469 for (nw
= 0; nw
< MAX_NUM_GENDERS
; nw
++) {
470 words
[nw
] = ParseWord(&buf
);
471 if (words
[nw
] == NULL
) break;
473 if (nw
!= _lang
.num_genders
) strgen_fatal("Bad # of arguments for gender command");
475 assert(IsInsideBS(cmd
->value
, SCC_CONTROL_START
, UINT8_MAX
));
476 buffer
->AppendUtf8(SCC_GENDER_LIST
);
477 buffer
->AppendByte(TranslateArgumentIdx(argidx
, offset
));
478 EmitWordList(buffer
, words
, nw
);
482 static const CmdStruct
*FindCmd(const char *s
, int len
)
484 for (const CmdStruct
*cs
= _cmd_structs
; cs
!= endof(_cmd_structs
); cs
++) {
485 if (strncmp(cs
->cmd
, s
, len
) == 0 && cs
->cmd
[len
] == '\0') return cs
;
490 static uint
ResolveCaseName(const char *str
, size_t len
)
492 /* First get a clean copy of only the case name, then resolve it. */
493 char case_str
[CASE_GENDER_LEN
];
494 len
= min(lengthof(case_str
) - 1, len
);
495 memcpy(case_str
, str
, len
);
496 case_str
[len
] = '\0';
498 uint8 case_idx
= _lang
.GetCaseIndex(case_str
);
499 if (case_idx
>= MAX_NUM_CASES
) strgen_fatal("Invalid case-name '%s'", case_str
);
504 /* returns NULL on eof
505 * else returns command struct */
506 static const CmdStruct
*ParseCommandString(const char **str
, char *param
, int *argno
, int *casei
)
508 const char *s
= *str
, *start
;
514 /* Scan to the next command, exit if there's no next command. */
515 for (; *s
!= '{'; s
++) {
516 if (*s
== '\0') return NULL
;
518 s
++; // Skip past the {
520 if (*s
>= '0' && *s
<= '9') {
523 *argno
= strtoul(s
, &end
, 0);
524 if (*end
!= ':') strgen_fatal("missing arg #");
528 /* parse command name */
532 } while (c
!= '}' && c
!= ' ' && c
!= '=' && c
!= '.' && c
!= 0);
534 const CmdStruct
*cmd
= FindCmd(start
, s
- start
- 1);
536 strgen_error("Undefined command '%.*s'", (int)(s
- start
- 1), start
);
541 const char *casep
= s
;
543 if (!(cmd
->flags
& C_CASE
)) {
544 strgen_fatal("Command '%s' can't have a case", cmd
->cmd
);
549 } while (c
!= '}' && c
!= ' ' && c
!= '\0');
550 *casei
= ResolveCaseName(casep
, s
- casep
- 1);
554 strgen_error("Missing } from command '%s'", start
);
567 strgen_error("Missing } from command '%s'", start
);
570 if (s
- start
== MAX_COMMAND_PARAM_SIZE
) error("param command too long");
583 * @param data The data to fill during reading.
584 * @param file The file we are reading.
585 * @param master Are we reading the master file?
586 * @param translation Are we reading a translation?
588 StringReader::StringReader(StringData
&data
, const char *file
, bool master
, bool translation
) :
589 data(data
), file(stredup(file
)), master(master
), translation(translation
)
593 /** Make sure the right reader gets freed. */
594 StringReader::~StringReader()
599 static void ExtractCommandString(ParsedCommandStruct
*p
, const char *s
, bool warnings
)
601 char param
[MAX_COMMAND_PARAM_SIZE
];
606 memset(p
, 0, sizeof(*p
));
609 /* read until next command from a. */
610 const CmdStruct
*ar
= ParseCommandString(&s
, param
, &argno
, &casei
);
612 if (ar
== NULL
) break;
614 /* Sanity checking */
615 if (argno
!= -1 && ar
->consumes
== 0) strgen_fatal("Non consumer param can't have a paramindex");
618 if (argno
!= -1) argidx
= argno
;
619 if (argidx
< 0 || (uint
)argidx
>= lengthof(p
->cmd
)) strgen_fatal("invalid param idx %d", argidx
);
620 if (p
->cmd
[argidx
] != NULL
&& p
->cmd
[argidx
] != ar
) strgen_fatal("duplicate param idx %d", argidx
);
622 p
->cmd
[argidx
++] = ar
;
623 } else if (!(ar
->flags
& C_DONTCOUNT
)) { // Ignore some of them
624 if (p
->np
>= lengthof(p
->pairs
)) strgen_fatal("too many commands in string, max " PRINTF_SIZE
, lengthof(p
->pairs
));
625 p
->pairs
[p
->np
].a
= ar
;
626 p
->pairs
[p
->np
].v
= param
[0] != '\0' ? stredup(param
) : "";
633 static const CmdStruct
*TranslateCmdForCompare(const CmdStruct
*a
)
635 if (a
== NULL
) return NULL
;
637 if (strcmp(a
->cmd
, "STRING1") == 0 ||
638 strcmp(a
->cmd
, "STRING2") == 0 ||
639 strcmp(a
->cmd
, "STRING3") == 0 ||
640 strcmp(a
->cmd
, "STRING4") == 0 ||
641 strcmp(a
->cmd
, "STRING5") == 0 ||
642 strcmp(a
->cmd
, "STRING6") == 0 ||
643 strcmp(a
->cmd
, "STRING7") == 0 ||
644 strcmp(a
->cmd
, "RAW_STRING") == 0) {
645 return FindCmd("STRING", 6);
652 static bool CheckCommandsMatch(char *a
, char *b
, const char *name
)
654 /* If we're not translating, i.e. we're compiling the base language,
655 * it is pointless to do all these checks as it'll always be correct.
656 * After all, all checks are based on the base language.
658 if (!_translation
) return true;
660 ParsedCommandStruct templ
;
661 ParsedCommandStruct lang
;
664 ExtractCommandString(&templ
, b
, true);
665 ExtractCommandString(&lang
, a
, true);
667 /* For each string in templ, see if we find it in lang */
668 if (templ
.np
!= lang
.np
) {
669 strgen_warning("%s: template string and language string have a different # of commands", name
);
673 for (uint i
= 0; i
< templ
.np
; i
++) {
674 /* see if we find it in lang, and zero it out */
676 for (uint j
= 0; j
< lang
.np
; j
++) {
677 if (templ
.pairs
[i
].a
== lang
.pairs
[j
].a
&&
678 strcmp(templ
.pairs
[i
].v
, lang
.pairs
[j
].v
) == 0) {
679 /* it was found in both. zero it out from lang so we don't find it again */
680 lang
.pairs
[j
].a
= NULL
;
687 strgen_warning("%s: command '%s' exists in template file but not in language file", name
, templ
.pairs
[i
].a
->cmd
);
692 /* if we reach here, all non consumer commands match up.
693 * Check if the non consumer commands match up also. */
694 for (uint i
= 0; i
< lengthof(templ
.cmd
); i
++) {
695 if (TranslateCmdForCompare(templ
.cmd
[i
]) != lang
.cmd
[i
]) {
696 strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name
, i
,
697 lang
.cmd
[i
] == NULL
? "<empty>" : TranslateCmdForCompare(lang
.cmd
[i
])->cmd
,
698 templ
.cmd
[i
] == NULL
? "<empty>" : templ
.cmd
[i
]->cmd
);
706 void StringReader::HandleString(char *str
)
709 if (str
[1] == '#' && str
[2] != '#') this->HandlePragma(str
+ 2);
713 /* Ignore comments & blank lines */
714 if (*str
== ';' || *str
== ' ' || *str
== '\0') return;
716 char *s
= strchr(str
, ':');
718 strgen_error("Line has no ':' delimiter");
724 * After this str points to the command name, and s points to the command contents */
725 for (t
= s
; t
> str
&& (t
[-1] == ' ' || t
[-1] == '\t'); t
--) {}
729 /* Check string is valid UTF-8 */
731 for (tmp
= s
; *tmp
!= '\0';) {
732 size_t len
= Utf8Validate(tmp
);
733 if (len
== 0) strgen_fatal("Invalid UTF-8 sequence in '%s'", s
);
737 if (c
<= 0x001F || // ASCII control character range
738 c
== 0x200B || // Zero width space
739 (c
>= 0xE000 && c
<= 0xF8FF) || // Private range
740 (c
>= 0xFFF0 && c
<= 0xFFFF)) { // Specials range
741 strgen_fatal("Unwanted UTF-8 character U+%04X in sequence '%s'", c
, s
);
747 /* Check if the string has a case..
748 * The syntax for cases is IDENTNAME.case */
749 char *casep
= strchr(str
, '.');
750 if (casep
!= NULL
) *casep
++ = '\0';
752 /* Check if this string already exists.. */
753 LangString
*ent
= this->data
.Find(str
);
757 strgen_error("Cases in the base translation are not supported.");
762 strgen_error("String name '%s' is used multiple times", str
);
766 if (this->data
.strings
[this->data
.next_string_id
] != NULL
) {
767 strgen_error("String ID 0x%X for '%s' already in use by '%s'", this->data
.next_string_id
, str
, this->data
.strings
[this->data
.next_string_id
]->name
);
771 /* Allocate a new LangString */
772 this->data
.Add(str
, new LangString(str
, s
, this->data
.next_string_id
++, _cur_line
));
775 strgen_warning("String name '%s' does not exist in master file", str
);
779 if (ent
->translated
&& casep
== NULL
) {
780 strgen_error("String name '%s' is used multiple times", str
);
784 /* make sure that the commands match */
785 if (!CheckCommandsMatch(s
, ent
->english
, str
)) return;
788 ent
->translated_case
= new Case(ResolveCaseName(casep
, strlen(casep
)), s
, ent
->translated_case
);
790 ent
->translated
= stredup(s
);
791 /* If the string was translated, use the line from the
792 * translated language so errors in the translated file
793 * are properly referenced to. */
794 ent
->line
= _cur_line
;
799 void StringReader::HandlePragma(char *str
)
801 if (!memcmp(str
, "plural ", 7)) {
802 _lang
.plural_form
= atoi(str
+ 7);
803 if (_lang
.plural_form
>= lengthof(_plural_forms
)) {
804 strgen_fatal("Invalid pluralform %d", _lang
.plural_form
);
807 strgen_fatal("unknown pragma '%s'", str
);
811 static void rstrip(char *buf
)
813 size_t i
= strlen(buf
);
814 while (i
> 0 && (buf
[i
- 1] == '\r' || buf
[i
- 1] == '\n' || buf
[i
- 1] == ' ')) i
--;
818 void StringReader::ParseFile()
821 _warnings
= _errors
= 0;
823 _translation
= this->master
|| this->translation
;
826 /* For each new file we parse, reset the genders, and language codes. */
828 strecpy(_lang
.digit_group_separator
, ",", lastof(_lang
.digit_group_separator
));
829 strecpy(_lang
.digit_group_separator_currency
, ",", lastof(_lang
.digit_group_separator_currency
));
830 strecpy(_lang
.digit_decimal_separator
, ".", lastof(_lang
.digit_decimal_separator
));
833 while (this->ReadLine(buf
, lastof(buf
)) != NULL
) {
835 this->HandleString(buf
);
841 * Write the header information.
842 * @param data The data about the string.
844 void HeaderWriter::WriteHeader(const StringData
&data
)
847 for (size_t i
= 0; i
< data
.max_strings
; i
++) {
848 if (data
.strings
[i
] != NULL
) {
849 this->WriteStringID(data
.strings
[i
]->name
, (int)i
);
854 this->WriteStringID("STR_LAST_STRINGID", last
);
857 static int TranslateArgumentIdx(int argidx
, int offset
)
861 if (argidx
< 0 || (uint
)argidx
>= lengthof(_cur_pcs
.cmd
)) {
862 strgen_fatal("invalid argidx %d", argidx
);
864 const CmdStruct
*cs
= _cur_pcs
.cmd
[argidx
];
865 if (cs
!= NULL
&& cs
->consumes
<= offset
) {
866 strgen_fatal("invalid argidx offset %d:%d", argidx
, offset
);
869 if (_cur_pcs
.cmd
[argidx
] == NULL
) {
870 strgen_fatal("no command for this argidx %d", argidx
);
873 for (int i
= sum
= 0; i
< argidx
; i
++) {
874 const CmdStruct
*cs
= _cur_pcs
.cmd
[i
];
876 sum
+= (cs
!= NULL
) ? cs
->consumes
: 1;
882 static void PutArgidxCommand(Buffer
*buffer
)
884 buffer
->AppendUtf8(SCC_ARG_INDEX
);
885 buffer
->AppendByte(TranslateArgumentIdx(_cur_argidx
));
889 static void PutCommandString(Buffer
*buffer
, const char *str
)
893 while (*str
!= '\0') {
894 /* Process characters as they are until we encounter a { */
896 buffer
->AppendByte(*str
++);
900 char param
[MAX_COMMAND_PARAM_SIZE
];
903 const CmdStruct
*cs
= ParseCommandString(&str
, param
, &argno
, &casei
);
904 if (cs
== NULL
) break;
907 buffer
->AppendUtf8(SCC_SET_CASE
); // {SET_CASE}
908 buffer
->AppendByte(casei
);
911 /* For params that consume values, we need to handle the argindex properly */
912 if (cs
->consumes
> 0) {
913 /* Check if we need to output a move-param command */
914 if (argno
!= -1 && argno
!= _cur_argidx
) {
916 PutArgidxCommand(buffer
);
919 /* Output the one from the master string... it's always accurate. */
920 cs
= _cur_pcs
.cmd
[_cur_argidx
++];
922 strgen_fatal("%s: No argument exists at position %d", _cur_ident
, _cur_argidx
- 1);
926 cs
->proc(buffer
, param
, cs
->value
);
931 * Write the length as a simple gamma.
932 * @param length The number to write.
934 void LanguageWriter::WriteLength(uint length
)
938 if (length
>= 0x4000) {
939 strgen_fatal("string too long");
942 if (length
>= 0xC0) {
943 buffer
[offs
++] = (length
>> 8) | 0xC0;
945 buffer
[offs
++] = length
& 0xFF;
946 this->Write((byte
*)buffer
, offs
);
950 * Actually write the language.
951 * @param data The data about the string.
953 void LanguageWriter::WriteLang(const StringData
&data
)
955 uint
*in_use
= AllocaM(uint
, data
.tabs
);
956 for (size_t tab
= 0; tab
< data
.tabs
; tab
++) {
957 uint n
= data
.CountInUse((uint
)tab
);
960 _lang
.offsets
[tab
] = TO_LE16(n
);
962 for (uint j
= 0; j
!= in_use
[tab
]; j
++) {
963 const LangString
*ls
= data
.strings
[(tab
* TAB_SIZE
) + j
];
964 if (ls
!= NULL
&& ls
->translated
== NULL
) _lang
.missing
++;
968 _lang
.ident
= TO_LE32(LanguagePackHeader::IDENT
);
969 _lang
.version
= TO_LE32(data
.Version());
970 _lang
.missing
= TO_LE16(_lang
.missing
);
971 _lang
.winlangid
= TO_LE16(_lang
.winlangid
);
973 this->WriteHeader(&_lang
);
976 for (size_t tab
= 0; tab
< data
.tabs
; tab
++) {
977 for (uint j
= 0; j
!= in_use
[tab
]; j
++) {
978 const LangString
*ls
= data
.strings
[(tab
* TAB_SIZE
) + j
];
982 /* For undefined strings, just set that it's an empty string */
984 this->WriteLength(0);
988 _cur_ident
= ls
->name
;
989 _cur_line
= ls
->line
;
991 /* Produce a message if a string doesn't have a translation. */
992 if (_show_todo
> 0 && ls
->translated
== NULL
) {
993 if ((_show_todo
& 2) != 0) {
994 strgen_warning("'%s' is untranslated", ls
->name
);
996 if ((_show_todo
& 1) != 0) {
997 const char *s
= "<TODO> ";
998 while (*s
!= '\0') buffer
.AppendByte(*s
++);
1002 /* Extract the strings and stuff from the english command string */
1003 ExtractCommandString(&_cur_pcs
, ls
->english
, false);
1005 if (ls
->translated_case
!= NULL
|| ls
->translated
!= NULL
) {
1006 casep
= ls
->translated_case
;
1007 cmdp
= ls
->translated
;
1013 _translated
= cmdp
!= ls
->english
;
1015 if (casep
!= NULL
) {
1019 /* Need to output a case-switch.
1020 * It has this format
1021 * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
1022 * Each LEN is printed using 2 bytes in big endian order. */
1023 buffer
.AppendUtf8(SCC_SWITCH_CASE
);
1024 /* Count the number of cases */
1025 for (num
= 0, c
= casep
; c
; c
= c
->next
) num
++;
1026 buffer
.AppendByte(num
);
1028 /* Write each case */
1029 for (c
= casep
; c
!= NULL
; c
= c
->next
) {
1030 buffer
.AppendByte(c
->caseidx
);
1031 /* Make some space for the 16-bit length */
1032 uint pos
= buffer
.Length();
1033 buffer
.AppendByte(0);
1034 buffer
.AppendByte(0);
1036 PutCommandString(&buffer
, c
->string
);
1037 buffer
.AppendByte(0); // terminate with a zero
1038 /* Fill in the length */
1039 uint size
= buffer
.Length() - (pos
+ 2);
1040 buffer
[pos
+ 0] = GB(size
, 8, 8);
1041 buffer
[pos
+ 1] = GB(size
, 0, 8);
1045 if (cmdp
!= NULL
) PutCommandString(&buffer
, cmdp
);
1047 this->WriteLength(buffer
.Length());
1048 this->Write(buffer
.Begin(), buffer
.Length());