1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
26 #include <sal/types.h>
27 #include <rtl/ustring.hxx>
31 static void make_hhc_char(FILE *sfp
, FILE *cfp
);
32 static void make_stc_char(FILE *sfp
, FILE *cfp
);
33 static void make_stc_word(FILE *sfp
, FILE *cfp
);
37 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc
, argv
)
41 if (argc
< 4) exit(-1);
44 sfp
= fopen(argv
[2], "rb"); // open the source file for read;
47 fprintf(stderr
, "Opening the dictionary source file %s for reading failed: %s\n", argv
[1], strerror(errno
));
51 // create the C source file to write
52 cfp
= fopen(argv
[3], "wb");
55 fprintf(stderr
, "Opening %s for writing failed: %s\n", argv
[3], strerror(errno
));
60 fprintf(cfp
, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
61 fprintf(cfp
, " * All Rights Reserved.\n");
62 fprintf(cfp
, " */\n\n");
63 fprintf(cfp
, "/* !!!The file is generated automatically. DO NOT edit the file manually!!! */\n\n");
64 fprintf(cfp
, "#include <sal/types.h>\n");
65 fprintf(cfp
, "#include <textconversion.hxx>\n");
66 fprintf(cfp
, "\nextern \"C\" {\n");
68 if (strcmp(argv
[1], "hhc_char") == 0)
69 make_hhc_char(sfp
, cfp
);
70 else if (strcmp(argv
[1], "stc_char") == 0)
71 make_stc_char(sfp
, cfp
);
72 else if (strcmp(argv
[1], "stc_word") == 0)
73 make_stc_word(sfp
, cfp
);
83 // Hangul/Hanja character conversion
84 void make_hhc_char(FILE *sfp
, FILE *cfp
)
86 sal_Int32 count
, address
, i
, j
, k
;
87 sal_Unicode Hanja2HangulData
[0x10000];
88 for (i
= 0; i
< 0x10000; i
++) {
89 Hanja2HangulData
[i
] = 0;
91 sal_uInt16 Hangul2HanjaData
[10000][3];
93 // generate main dict. data array
94 fprintf(cfp
, "\nstatic const sal_Unicode Hangul2HanjaData[] = {");
99 while (fgets(Cstr
, 1024, sfp
)) {
100 // input file is in UTF-8 encoding (Hangul:Hanja)
101 // don't convert last new line character to Ostr.
102 OUString
Ostr(Cstr
, strlen(Cstr
) - 1, RTL_TEXTENCODING_UTF8
);
103 sal_Int32 len
= Ostr
.getLength();
105 Hangul2HanjaData
[count
][0] = Ostr
[0];
106 Hangul2HanjaData
[count
][1] = sal::static_int_cast
<sal_uInt16
>( address
);
107 Hangul2HanjaData
[count
][2] = sal::static_int_cast
<sal_uInt16
>( len
- 2 );
110 for (i
= 2; i
< len
; i
++) {
111 Hanja2HangulData
[Ostr
[i
]] = Ostr
[0];
112 if (address
++ % 16 == 0)
113 fprintf(cfp
, "\n\t");
114 fprintf(cfp
, "0x%04x, ", Ostr
[i
]);
117 fprintf(cfp
, "\n};\n");
119 fprintf(cfp
, "\nstatic const i18npool::Hangul_Index Hangul2HanjaIndex[] = {\n");
120 for (i
= 0; i
< count
; i
++)
121 fprintf(cfp
, "\t{ 0x%04x, 0x%04x, 0x%02x },\n",
122 Hangul2HanjaData
[i
][0],
123 Hangul2HanjaData
[i
][1],
124 Hangul2HanjaData
[i
][2]);
125 fprintf(cfp
, "};\n");
127 fprintf(cfp
, "\nstatic const sal_uInt16 Hanja2HangulIndex[] = {");
130 for (i
= 0; i
< 0x10; i
++) {
131 fprintf(cfp
, "\n\t");
132 for (j
= 0; j
< 0x10; j
++) {
133 for (k
= 0; k
< 0x100; k
++) {
134 if (Hanja2HangulData
[((i
*0x10)+j
)*0x100+k
] != 0)
139 sal::static_int_cast
< unsigned long >(
140 k
< 0x100 ? (address
++)*0x100 : 0xFFFF));
143 fprintf(cfp
, "\n};\n");
145 fprintf(cfp
, "\nstatic const sal_Unicode Hanja2HangulData[] = {");
147 for (i
= 0; i
< 0x100; i
++) {
148 for (j
= 0; j
< 0x100; j
++) {
149 if (Hanja2HangulData
[i
*0x100+j
] != 0)
153 for (j
= 0; j
< 0x10; j
++) {
154 fprintf(cfp
, "\n\t");
155 for (k
= 0; k
< 0x10; k
++) {
156 sal_Unicode c
= Hanja2HangulData
[((i
*0x10+j
)*0x10)+k
];
157 fprintf(cfp
, "0x%04x, ", c
? c
: 0xFFFF);
162 fprintf(cfp
, "\n};\n");
164 // create function to return arrays
165 fprintf (cfp
, "\tconst sal_Unicode* getHangul2HanjaData() { return Hangul2HanjaData; }\n");
166 fprintf (cfp
, "\tconst i18npool::Hangul_Index* getHangul2HanjaIndex() { return Hangul2HanjaIndex; }\n");
167 fprintf (cfp
, "\tsal_Int16 getHangul2HanjaIndexCount() { return sizeof(Hangul2HanjaIndex) / sizeof(i18npool::Hangul_Index); }\n");
168 fprintf (cfp
, "\tconst sal_uInt16* getHanja2HangulIndex() { return Hanja2HangulIndex; }\n");
169 fprintf (cfp
, "\tconst sal_Unicode* getHanja2HangulData() { return Hanja2HangulData; }\n");
172 // Simplified/Traditional Chinese character conversion
173 void make_stc_char(FILE *sfp
, FILE *cfp
)
175 sal_Int32 address
, i
, j
, k
;
176 sal_Unicode SChinese2TChineseData
[0x10000];
177 sal_Unicode SChinese2VChineseData
[0x10000];
178 sal_Unicode TChinese2SChineseData
[0x10000];
179 for (i
= 0; i
< 0x10000; i
++) {
180 SChinese2TChineseData
[i
] = 0;
181 SChinese2VChineseData
[i
] = 0;
182 TChinese2SChineseData
[i
] = 0;
186 while (fgets(Cstr
, 1024, sfp
)) {
187 // input file is in UTF-8 encoding (SChinese:TChinese)
188 // don't convert last new line character to Ostr.
189 OUString
Ostr(Cstr
, strlen(Cstr
) - 1, RTL_TEXTENCODING_UTF8
);
190 sal_Int32 len
= Ostr
.getLength();
192 SChinese2VChineseData
[Ostr
[0]] = Ostr
[2];
194 SChinese2TChineseData
[Ostr
[0]] = Ostr
[2];
195 if (SChinese2VChineseData
[Ostr
[0]] == 0)
196 SChinese2VChineseData
[Ostr
[0]] = Ostr
[2];
198 for (i
= 2; i
< len
; i
++)
199 TChinese2SChineseData
[Ostr
[i
]] = Ostr
[0];
202 fprintf(cfp
, "\nstatic const sal_uInt16 STC_CharIndex_S2T[] = {");
205 for (i
= 0; i
< 0x10; i
++) {
206 fprintf(cfp
, "\n\t");
207 for (j
= 0; j
< 0x10; j
++) {
208 for (k
= 0; k
< 0x100; k
++) {
209 if (SChinese2TChineseData
[((i
*0x10)+j
)*0x100+k
] != 0)
214 sal::static_int_cast
< unsigned long >(
215 k
< 0x100 ? (address
++)*0x100 : 0xFFFF));
218 fprintf(cfp
, "\n};\n");
220 fprintf(cfp
, "\nstatic const sal_Unicode STC_CharData_S2T[] = {");
222 for (i
= 0; i
< 0x100; i
++) {
223 for (j
= 0; j
< 0x100; j
++) {
224 if (SChinese2TChineseData
[i
*0x100+j
] != 0)
228 for (j
= 0; j
< 0x10; j
++) {
229 fprintf(cfp
, "\n\t");
230 for (k
= 0; k
< 0x10; k
++) {
231 sal_Unicode c
= SChinese2TChineseData
[((i
*0x10+j
)*0x10)+k
];
232 fprintf(cfp
, "0x%04x, ", c
? c
: 0xFFFF);
237 fprintf(cfp
, "\n};\n");
239 fprintf(cfp
, "\nstatic const sal_uInt16 STC_CharIndex_S2V[] = {");
242 for (i
= 0; i
< 0x10; i
++) {
243 fprintf(cfp
, "\n\t");
244 for (j
= 0; j
< 0x10; j
++) {
245 for (k
= 0; k
< 0x100; k
++) {
246 if (SChinese2VChineseData
[((i
*0x10)+j
)*0x100+k
] != 0)
251 sal::static_int_cast
< unsigned long >(
252 k
< 0x100 ? (address
++)*0x100 : 0xFFFF));
255 fprintf(cfp
, "\n};\n");
257 fprintf(cfp
, "\nstatic const sal_Unicode STC_CharData_S2V[] = {");
259 for (i
= 0; i
< 0x100; i
++) {
260 for (j
= 0; j
< 0x100; j
++) {
261 if (SChinese2VChineseData
[i
*0x100+j
] != 0)
265 for (j
= 0; j
< 0x10; j
++) {
266 fprintf(cfp
, "\n\t");
267 for (k
= 0; k
< 0x10; k
++) {
268 sal_Unicode c
= SChinese2VChineseData
[((i
*0x10+j
)*0x10)+k
];
269 fprintf(cfp
, "0x%04x, ", c
? c
: 0xFFFF);
274 fprintf(cfp
, "\n};\n");
276 fprintf(cfp
, "\nstatic const sal_uInt16 STC_CharIndex_T2S[] = {");
279 for (i
= 0; i
< 0x10; i
++) {
280 fprintf(cfp
, "\n\t");
281 for (j
= 0; j
< 0x10; j
++) {
282 for (k
= 0; k
< 0x100; k
++) {
283 if (TChinese2SChineseData
[((i
*0x10)+j
)*0x100+k
] != 0)
288 sal::static_int_cast
< unsigned long >(
289 k
< 0x100 ? (address
++)*0x100 : 0xFFFF));
292 fprintf(cfp
, "\n};\n");
294 fprintf(cfp
, "\nstatic const sal_Unicode STC_CharData_T2S[] = {");
296 for (i
= 0; i
< 0x100; i
++) {
297 for (j
= 0; j
< 0x100; j
++) {
298 if (TChinese2SChineseData
[i
*0x100+j
] != 0)
302 for (j
= 0; j
< 0x10; j
++) {
303 fprintf(cfp
, "\n\t");
304 for (k
= 0; k
< 0x10; k
++) {
305 sal_Unicode c
= TChinese2SChineseData
[((i
*0x10+j
)*0x10)+k
];
306 fprintf(cfp
, "0x%04x, ", c
? c
: 0xFFFF);
311 fprintf(cfp
, "\n};\n");
313 // create function to return arrays
314 fprintf (cfp
, "\tconst sal_uInt16* getSTC_CharIndex_S2T() { return STC_CharIndex_S2T; }\n");
315 fprintf (cfp
, "\tconst sal_Unicode* getSTC_CharData_S2T() { return STC_CharData_S2T; }\n");
316 fprintf (cfp
, "\tconst sal_uInt16* getSTC_CharIndex_S2V() { return STC_CharIndex_S2V; }\n");
317 fprintf (cfp
, "\tconst sal_Unicode* getSTC_CharData_S2V() { return STC_CharData_S2V; }\n");
318 fprintf (cfp
, "\tconst sal_uInt16* getSTC_CharIndex_T2S() { return STC_CharIndex_T2S; }\n");
319 fprintf (cfp
, "\tconst sal_Unicode* getSTC_CharData_T2S() { return STC_CharData_T2S; }\n");
333 static int Index_comp(const void* s1
, const void* s2
)
335 Index
const *p1
= static_cast<Index
const *>(s1
), *p2
= static_cast<Index
const *>(s2
);
336 int result
= p1
->len
- p2
->len
;
337 for (int i
= 0; result
== 0 && i
< p1
->len
; i
++)
338 result
= *(p1
->data
+i
) - *(p2
->data
+i
);
343 // Simplified/Traditional Chinese word conversion
344 void make_stc_word(FILE *sfp
, FILE *cfp
)
346 sal_Int32 count
, i
, length
;
347 sal_Unicode STC_WordData
[0x10000];
348 std::vector
<Index
> STC_WordEntry_S2T(0x10000);
349 std::vector
<Index
> STC_WordEntry_T2S(0x10000);
350 sal_Int32 count_S2T
= 0, count_T2S
= 0;
351 sal_Int32 line
= 0, char_total
= 0;
354 while (fgets(Cstr
, 1024, sfp
)) {
355 // input file is in UTF-8 encoding (SChinese:TChinese)
356 // don't convert last new line character to Ostr.
357 OUString
Ostr(Cstr
, strlen(Cstr
) - 1, RTL_TEXTENCODING_UTF8
);
358 sal_Int32 len
= Ostr
.getLength();
359 if (char_total
+ len
+ 1 > 0xFFFF) {
360 fprintf(stderr
, "Word Dictionary stc_word.dic is too big (line %" SAL_PRIdINT32
")", line
);
363 sal_Int32 sep
=-1, eq
=-1, gt
=-1, lt
=-1;
364 if (((sep
= eq
= Ostr
.indexOf('=')) > 0) ||
365 ((sep
= gt
= Ostr
.indexOf('>')) > 0) ||
366 ((sep
= lt
= Ostr
.indexOf('<')) > 0)) {
368 if (eq
> 0 || gt
> 0) {
369 STC_WordEntry_S2T
[count_S2T
].address
= sal::static_int_cast
<sal_uInt16
>( char_total
);
370 STC_WordEntry_S2T
[count_S2T
].len
= sep
;
371 STC_WordEntry_S2T
[count_S2T
++].data
= &STC_WordData
[char_total
];
373 if (eq
> 0 || lt
> 0) {
374 STC_WordEntry_T2S
[count_T2S
].address
= sal::static_int_cast
<sal_uInt16
>( char_total
+ sep
+ 1 );
375 STC_WordEntry_T2S
[count_T2S
].len
= len
- sep
- 1;
376 STC_WordEntry_T2S
[count_T2S
++].data
= &STC_WordData
[char_total
+ sep
+ 1];
378 for (i
= 0; i
< len
; i
++)
379 STC_WordData
[char_total
++] = (i
== sep
) ? 0 : Ostr
[i
];
380 STC_WordData
[char_total
++] = 0;
382 fprintf(stderr
, "Invalid entry in stc_word.dic (line %" SAL_PRIdINT64
")", sal_Int64(line
));
388 if (char_total
> 0) {
389 fprintf(cfp
, "\nstatic const sal_Unicode STC_WordData[] = {");
390 for (i
= 0; i
< char_total
; i
++) {
391 if (i
% 32 == 0) fprintf(cfp
, "\n\t");
392 fprintf(cfp
, "0x%04x, ", STC_WordData
[i
]);
394 fprintf(cfp
, "\n};\n");
396 fprintf(cfp
, "\nstatic sal_Int32 STC_WordData_Count = %" SAL_PRIdINT32
";\n", sal_Int32(char_total
));
398 // create function to return arrays
399 fprintf (cfp
, "\tconst sal_Unicode* getSTC_WordData(sal_Int32& count) { count = STC_WordData_Count; return STC_WordData; }\n");
401 fprintf (cfp
, "\tconst sal_Unicode* getSTC_WordData(sal_Int32& count) { count = 0; return NULL; }\n");
404 sal_uInt16 STC_WordIndex
[0x100];
407 qsort(STC_WordEntry_S2T
.data(), count_S2T
, sizeof(Index
), Index_comp
);
409 fprintf(cfp
, "\nstatic const sal_uInt16 STC_WordEntry_S2T[] = {");
412 for (i
= 0; i
< count_S2T
; i
++) {
413 if (i
% 32 == 0) fprintf(cfp
, "\n\t");
414 fprintf(cfp
, "0x%04x, ", STC_WordEntry_S2T
[i
].address
);
415 if (STC_WordEntry_S2T
[i
].len
!= length
) {
416 length
= STC_WordEntry_S2T
[i
].len
;
417 while (count
<= length
)
418 STC_WordIndex
[count
++] = sal::static_int_cast
<sal_uInt16
>(i
);
421 fprintf(cfp
, "\n};\n");
422 STC_WordIndex
[count
++] = sal::static_int_cast
<sal_uInt16
>(i
);
424 fprintf(cfp
, "\nstatic const sal_uInt16 STC_WordIndex_S2T[] = {");
425 for (i
= 0; i
< count
; i
++) {
426 if (i
% 16 == 0) fprintf(cfp
, "\n\t");
427 fprintf(cfp
, "0x%04x, ", STC_WordIndex
[i
]);
429 fprintf(cfp
, "\n};\n");
431 fprintf(cfp
, "\nstatic sal_Int32 STC_WordIndex_S2T_Count = %" SAL_PRIdINT64
";\n", sal_Int64(length
));
432 fprintf (cfp
, "\tconst sal_uInt16* getSTC_WordEntry_S2T() { return STC_WordEntry_S2T; }\n");
433 fprintf (cfp
, "\tconst sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = STC_WordIndex_S2T_Count; return STC_WordIndex_S2T; }\n");
435 fprintf (cfp
, "\tconst sal_uInt16* getSTC_WordEntry_S2T() { return NULL; }\n");
436 fprintf (cfp
, "\tconst sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = 0; return NULL; }\n");
440 qsort(STC_WordEntry_T2S
.data(), count_T2S
, sizeof(Index
), Index_comp
);
442 fprintf(cfp
, "\nstatic const sal_uInt16 STC_WordEntry_T2S[] = {");
445 for (i
= 0; i
< count_T2S
; i
++) {
446 if (i
% 32 == 0) fprintf(cfp
, "\n\t");
447 fprintf(cfp
, "0x%04x, ", STC_WordEntry_T2S
[i
].address
);
448 if (STC_WordEntry_T2S
[i
].len
!= length
) {
449 length
= STC_WordEntry_T2S
[i
].len
;
450 while (count
<= length
)
451 STC_WordIndex
[count
++] = sal::static_int_cast
<sal_uInt16
>(i
);
454 STC_WordIndex
[count
++] = sal::static_int_cast
<sal_uInt16
>(i
);
455 fprintf(cfp
, "\n};\n");
457 fprintf(cfp
, "\nstatic const sal_uInt16 STC_WordIndex_T2S[] = {");
458 for (i
= 0; i
< count
; i
++) {
459 if (i
% 16 == 0) fprintf(cfp
, "\n\t");
460 fprintf(cfp
, "0x%04x, ", STC_WordIndex
[i
]);
462 fprintf(cfp
, "\n};\n");
464 fprintf(cfp
, "\nstatic sal_Int32 STC_WordIndex_T2S_Count = %" SAL_PRIdINT64
";\n\n", sal_Int64(length
));
465 fprintf (cfp
, "\tconst sal_uInt16* getSTC_WordEntry_T2S() { return STC_WordEntry_T2S; }\n");
466 fprintf (cfp
, "\tconst sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = STC_WordIndex_T2S_Count; return STC_WordIndex_T2S; }\n");
468 fprintf (cfp
, "\tconst sal_uInt16* getSTC_WordEntry_T2S() { return NULL; }\n");
469 fprintf (cfp
, "\tconst sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = 0; return NULL; }\n");
473 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */