merge the formfield patch from ooo-build
[ooovba.git] / i18npool / source / breakiterator / gendict.cxx
blobb70de16fda750f06f3aab3ffe01a36c9c5ec12c7
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: gendict.cxx,v $
10 * $Revision: 1.12 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <sal/main.h>
38 #include <sal/types.h>
39 #include <rtl/strbuf.hxx>
40 #include <rtl/ustring.hxx>
42 using namespace ::rtl;
44 /* Main Procedure */
46 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
48 FILE *sfp, *cfp;
50 if (argc < 3) exit(-1);
52 sfp = fopen(argv[1], "rb"); // open the source file for read;
53 if (sfp == NULL)
55 printf("Open the dictionary source file failed.");
56 return -1;
59 // create the C source file to write
60 cfp = fopen(argv[2], "wb");
61 if (cfp == NULL) {
62 fclose(sfp);
63 printf("Can't create the C source file.");
64 return -1;
67 fprintf(cfp, "/*\n");
68 fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
69 fprintf(cfp, " * All Rights Reserved.\n");
70 fprintf(cfp, " */\n\n");
71 fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n");
72 fprintf(cfp, "#include <sal/types.h>\n\n");
73 fprintf(cfp, "extern \"C\" {\n");
75 sal_Int32 count, i, j;
76 sal_Int32 lenArrayCurr = 0, lenArrayCount = 0, lenArrayLen = 0, *lenArray = NULL, charArray[0x10000];
77 sal_Bool exist[0x10000];
78 for (i = 0; i < 0x10000; i++) {
79 exist[i] = sal_False;
80 charArray[i] = 0;
83 // generate main dict. data array
84 fprintf(cfp, "static const sal_Unicode dataArea[] = {");
85 sal_Char str[1024];
86 sal_Unicode current = 0;
87 count = 0;
88 while (fgets(str, 1024, sfp)) {
89 // input file is in UTF-8 encoding
90 // don't convert last new line character to Ostr.
91 OUString Ostr((const sal_Char *)str, strlen(str) - 1, RTL_TEXTENCODING_UTF8);
92 const sal_Unicode *u = Ostr.getStr();
94 sal_Int32 len = Ostr.getLength();
96 i=0;
97 Ostr.iterateCodePoints(&i, 1);
98 if (len == i) continue; // skip one character word
100 if (*u != current) {
101 if (*u < current)
102 printf("u %x, current %x, count %d, lenArrayCount %d\n", *u, current,
103 sal::static_int_cast<int>(count), sal::static_int_cast<int>(lenArrayCount));
104 current = *u;
105 charArray[current] = lenArrayCount;
108 if (lenArrayLen <= lenArrayCount+1)
109 lenArray = (sal_Int32*) realloc(lenArray, (lenArrayLen += 1000) * sizeof(sal_Int32));
110 lenArray[lenArrayCount++] = lenArrayCurr;
112 exist[u[0]] = sal_True;
113 for (i = 1; i < len; i++) { // start from second character,
114 exist[u[i]] = sal_True; // since the first character is captured in charArray.
115 lenArrayCurr++;
116 if ((count++) % 0x10 == 0)
117 fprintf(cfp, "\n\t");
118 fprintf(cfp, "0x%04x, ", u[i]);
121 lenArray[lenArrayCount++] = lenArrayCurr; // store last ending pointer
122 charArray[current+1] = lenArrayCount;
123 fprintf(cfp, "\n};\n");
125 // generate lenArray
126 fprintf(cfp, "static const sal_Int32 lenArray[] = {\n\t");
127 count = 1;
128 fprintf(cfp, "0x%x, ", 0); // insert one slat for skipping 0 in index2 array.
129 for (i = 0; i < lenArrayCount; i++) {
130 fprintf(cfp, "0x%lx, ", static_cast<long unsigned int>(lenArray[i]));
131 if (count == 0xf) {
132 count = 0;
133 fprintf(cfp, "\n\t");
134 } else count++;
136 fprintf(cfp, "\n};\n");
138 free(lenArray);
140 // generate index1 array
141 fprintf (cfp, "static const sal_Int16 index1[] = {\n\t");
142 sal_Int16 set[0x100];
143 count = 0;
144 for (i = 0; i < 0x100; i++) {
145 for (j = 0; j < 0x100; j++)
146 if (charArray[(i*0x100) + j] != 0)
147 break;
149 fprintf(cfp, "0x%02x, ", set[i] = (j < 0x100 ? sal::static_int_cast<sal_Int16>(count++) : 0xff));
150 if ((i+1) % 0x10 == 0)
151 fprintf (cfp, "\n\t");
153 fprintf (cfp, "};\n");
155 // generate index2 array
156 fprintf (cfp, "static const sal_Int32 index2[] = {\n\t");
157 sal_Int32 prev = 0;
158 for (i = 0; i < 0x100; i++) {
159 if (set[i] != 0xff) {
160 for (j = 0; j < 0x100; j++) {
161 sal_Int32 k = (i*0x100) + j;
162 if (prev != 0 && charArray[k] == 0) {
163 for (k++; k < 0x10000; k++)
164 if (charArray[k] != 0)
165 break;
167 prev = charArray[(i*0x100) + j];
168 fprintf(
169 cfp, "0x%lx, ",
170 sal::static_int_cast< unsigned long >(
171 k < 0x10000 ? charArray[k] + 1 : 0));
172 if ((j+1) % 0x10 == 0)
173 fprintf (cfp, "\n\t");
175 fprintf (cfp, "\n\t");
178 fprintf (cfp, "\n};\n");
180 // generate existMark array
181 count = 0;
182 fprintf (cfp, "static const sal_uInt8 existMark[] = {\n\t");
183 for (i = 0; i < 0x1FFF; i++) {
184 sal_uInt8 bit = 0;
185 for (j = 0; j < 8; j++)
186 if (exist[i * 8 + j])
187 bit |= 1 << j;
188 fprintf(cfp, "0x%02x, ", bit);
189 if (count == 0xf) {
190 count = 0;
191 fprintf(cfp, "\n\t");
192 } else count++;
194 fprintf (cfp, "\n};\n");
196 // create function to return arrays
197 fprintf (cfp, "\tconst sal_uInt8* getExistMark() { return existMark; }\n");
198 fprintf (cfp, "\tconst sal_Int16* getIndex1() { return index1; }\n");
199 fprintf (cfp, "\tconst sal_Int32* getIndex2() { return index2; }\n");
200 fprintf (cfp, "\tconst sal_Int32* getLenArray() { return lenArray; }\n");
201 fprintf (cfp, "\tconst sal_Unicode* getDataArea() { return dataArea; }\n");
202 fprintf (cfp, "}\n");
204 fclose(sfp);
205 fclose(cfp);
207 return 0;
208 } // End of main