1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
35 #include <sal/types.h>
36 #include <rtl/strbuf.hxx>
37 #include <rtl/ustring.hxx>
39 using namespace ::rtl
;
43 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc
, argv
)
47 if (argc
< 3) exit(-1);
49 sfp
= fopen(argv
[1], "rb"); // open the source file for read;
52 printf("Open the dictionary source file failed.");
56 // create the C source file to write
57 cfp
= fopen(argv
[2], "wb");
60 printf("Can't create the C source file.");
65 fprintf(cfp
, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
66 fprintf(cfp
, " * All Rights Reserved.\n");
67 fprintf(cfp
, " */\n\n");
68 fprintf(cfp
, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n");
69 fprintf(cfp
, "#include <sal/types.h>\n\n");
70 fprintf(cfp
, "extern \"C\" {\n");
72 sal_Int32 count
, i
, j
;
73 sal_Int32 lenArrayCurr
= 0, lenArrayCount
= 0, lenArrayLen
= 0, *lenArray
= NULL
, charArray
[0x10000];
74 sal_Bool exist
[0x10000];
75 for (i
= 0; i
< 0x10000; i
++) {
80 // generate main dict. data array
81 fprintf(cfp
, "static const sal_Unicode dataArea[] = {");
83 sal_Unicode current
= 0;
85 while (fgets(str
, 1024, sfp
)) {
86 // input file is in UTF-8 encoding
87 // don't convert last new line character to Ostr.
88 OUString
Ostr((const sal_Char
*)str
, strlen(str
) - 1, RTL_TEXTENCODING_UTF8
);
89 const sal_Unicode
*u
= Ostr
.getStr();
91 sal_Int32 len
= Ostr
.getLength();
94 Ostr
.iterateCodePoints(&i
, 1);
95 if (len
== i
) continue; // skip one character word
99 printf("u %x, current %x, count %d, lenArrayCount %d\n", *u
, current
,
100 sal::static_int_cast
<int>(count
), sal::static_int_cast
<int>(lenArrayCount
));
102 charArray
[current
] = lenArrayCount
;
105 if (lenArrayLen
<= lenArrayCount
+1)
106 lenArray
= (sal_Int32
*) realloc(lenArray
, (lenArrayLen
+= 1000) * sizeof(sal_Int32
));
107 lenArray
[lenArrayCount
++] = lenArrayCurr
;
109 exist
[u
[0]] = sal_True
;
110 for (i
= 1; i
< len
; i
++) { // start from second character,
111 exist
[u
[i
]] = sal_True
; // since the first character is captured in charArray.
113 if ((count
++) % 0x10 == 0)
114 fprintf(cfp
, "\n\t");
115 fprintf(cfp
, "0x%04x, ", u
[i
]);
118 lenArray
[lenArrayCount
++] = lenArrayCurr
; // store last ending pointer
119 charArray
[current
+1] = lenArrayCount
;
120 fprintf(cfp
, "\n};\n");
123 fprintf(cfp
, "static const sal_Int32 lenArray[] = {\n\t");
125 fprintf(cfp
, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
126 for (i
= 0; i
< lenArrayCount
; i
++) {
127 fprintf(cfp
, "0x%lx, ", static_cast<long unsigned int>(lenArray
[i
]));
130 fprintf(cfp
, "\n\t");
133 fprintf(cfp
, "\n};\n");
137 // generate index1 array
138 fprintf (cfp
, "static const sal_Int16 index1[] = {\n\t");
139 sal_Int16 set
[0x100];
141 for (i
= 0; i
< 0x100; i
++) {
142 for (j
= 0; j
< 0x100; j
++)
143 if (charArray
[(i
*0x100) + j
] != 0)
146 fprintf(cfp
, "0x%02x, ", set
[i
] = (j
< 0x100 ? sal::static_int_cast
<sal_Int16
>(count
++) : 0xff));
147 if ((i
+1) % 0x10 == 0)
148 fprintf (cfp
, "\n\t");
150 fprintf (cfp
, "};\n");
152 // generate index2 array
153 fprintf (cfp
, "static const sal_Int32 index2[] = {\n\t");
155 for (i
= 0; i
< 0x100; i
++) {
156 if (set
[i
] != 0xff) {
157 for (j
= 0; j
< 0x100; j
++) {
158 sal_Int32 k
= (i
*0x100) + j
;
159 if (prev
!= 0 && charArray
[k
] == 0) {
160 for (k
++; k
< 0x10000; k
++)
161 if (charArray
[k
] != 0)
164 prev
= charArray
[(i
*0x100) + j
];
167 sal::static_int_cast
< unsigned long >(
168 k
< 0x10000 ? charArray
[k
] + 1 : 0));
169 if ((j
+1) % 0x10 == 0)
170 fprintf (cfp
, "\n\t");
172 fprintf (cfp
, "\n\t");
175 fprintf (cfp
, "\n};\n");
177 // generate existMark array
179 fprintf (cfp
, "static const sal_uInt8 existMark[] = {\n\t");
180 for (i
= 0; i
< 0x1FFF; i
++) {
182 for (j
= 0; j
< 8; j
++)
183 if (exist
[i
* 8 + j
])
185 fprintf(cfp
, "0x%02x, ", bit
);
188 fprintf(cfp
, "\n\t");
191 fprintf (cfp
, "\n};\n");
193 // create function to return arrays
194 fprintf (cfp
, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
195 fprintf (cfp
, "\tconst sal_Int16* getIndex1() { return index1; }\n");
196 fprintf (cfp
, "\tconst sal_Int32* getIndex2() { return index2; }\n");
197 fprintf (cfp
, "\tconst sal_Int32* getLenArray() { return lenArray; }\n");
198 fprintf (cfp
, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
199 fprintf (cfp
, "}\n");