Update ooo320-m1
[ooovba.git] / setup_native / source / ulfconv / ulfconv.cxx
blob28ecabb91061dff1aedb5f74236cab57ec2d0db2
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: ulfconv.cxx,v $
10 * $Revision: 1.10 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <fcntl.h>
34 #include <errno.h>
35 #include <string.h>
36 #include <unistd.h>
37 #include <ctype.h>
38 #include <sal/alloca.h>
40 #include <rtl/ustring.hxx>
42 #include <map>
43 #include <string>
45 /*****************************************************************************
46 * typedefs
47 *****************************************************************************/
49 typedef std::map< const std::string, rtl_TextEncoding > EncodingMap;
51 struct _pair {
52 const char *key;
53 rtl_TextEncoding value;
56 static int _pair_compare (const char *key, const _pair *pair);
57 static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member );
60 const _pair _ms_encoding_list[] = {
61 { "0", RTL_TEXTENCODING_UTF8 },
62 { "1250", RTL_TEXTENCODING_MS_1250 },
63 { "1251", RTL_TEXTENCODING_MS_1251 },
64 { "1252", RTL_TEXTENCODING_MS_1252 },
65 { "1253", RTL_TEXTENCODING_MS_1253 },
66 { "1254", RTL_TEXTENCODING_MS_1254 },
67 { "1255", RTL_TEXTENCODING_MS_1255 },
68 { "1256", RTL_TEXTENCODING_MS_1256 },
69 { "1257", RTL_TEXTENCODING_MS_1257 },
70 { "1258", RTL_TEXTENCODING_MS_1258 },
71 { "874", RTL_TEXTENCODING_MS_874 },
72 { "932", RTL_TEXTENCODING_MS_932 },
73 { "936", RTL_TEXTENCODING_MS_936 },
74 { "949", RTL_TEXTENCODING_MS_949 },
75 { "950", RTL_TEXTENCODING_MS_950 }
79 /*****************************************************************************
80 * fgets that work with unix line ends on Windows
81 *****************************************************************************/
83 char * my_fgets(char *s, int n, FILE *fp)
85 int i;
86 for( i=0; i < n-1; i++ )
88 int c = getc(fp);
90 if( c == EOF )
91 break;
93 s[i] = (char) c;
95 if( s[i] == '\n' )
97 i++;
98 break;
102 if( i>0 )
104 s[i] = '\0';
105 return s;
107 else
109 return NULL;
113 /*****************************************************************************
114 * compare function for binary search
115 *****************************************************************************/
117 static int
118 _pair_compare (const char *key, const _pair *pair)
120 int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
121 return result;
124 /*****************************************************************************
125 * binary search on encoding tables
126 *****************************************************************************/
128 static const _pair*
129 _pair_search (const char *key, const _pair *base, unsigned int member )
131 unsigned int lower = 0;
132 unsigned int upper = member;
133 unsigned int current;
134 int comparison;
136 /* check for validity of input */
137 if ( (key == NULL) || (base == NULL) || (member == 0) )
138 return NULL;
140 /* binary search */
141 while ( lower < upper )
143 current = (lower + upper) / 2;
144 comparison = _pair_compare( key, base + current );
145 if (comparison < 0)
146 upper = current;
147 else
148 if (comparison > 0)
149 lower = current + 1;
150 else
151 return base + current;
154 return NULL;
158 /************************************************************************
159 * read_encoding_table
160 ************************************************************************/
162 void read_encoding_table(char * file, EncodingMap& aEncodingMap)
164 FILE * fp = fopen(file, "r");
165 if ( ! fp ) {
166 fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno));
167 exit(2);
170 char buffer[512];
171 while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) {
173 // strip comment lines
174 if ( buffer[0] == '#' )
175 continue;
177 // find end of language string
178 char * cp;
179 for ( cp = buffer; ! isspace(*cp); cp++ )
181 *cp = '\0';
183 // find start of codepage string
184 for ( ++cp; isspace(*cp); ++cp )
186 char * codepage = cp;
188 // find end of codepage string
189 for ( ++cp; ! isspace(*cp); ++cp )
191 *cp = '\0';
193 // find the correct mapping for codepage
194 const unsigned int members = sizeof( _ms_encoding_list ) / sizeof( _pair );
195 const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members );
197 if ( encoding != NULL ) {
198 const std::string language(buffer);
199 aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) );
204 /************************************************************************
205 * print_legacy_mixed
206 ************************************************************************/
208 void print_legacy_mixed(
209 FILE * ostream,
210 const rtl::OUString& aString,
211 const std::string& language,
212 EncodingMap& aEncodingMap)
214 EncodingMap::iterator iter = aEncodingMap.find(language);
216 if ( iter != aEncodingMap.end() ) {
217 fputs(OUStringToOString(aString, iter->second).getStr(), ostream);
218 } else {
219 fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str());
223 /************************************************************************
224 * print_java_style
225 ************************************************************************/
227 void print_java_style(FILE * ostream, const rtl::OUString& aString)
229 int imax = aString.getLength();
230 for (int i = 0; i < imax; i++) {
231 sal_Unicode uc = aString[i];
232 if ( uc < 128 ) {
233 fprintf(ostream, "%c", (char) uc);
234 } else {
235 fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF );
240 /************************************************************************
241 * main
242 ************************************************************************/
244 int main( int argc, char * const argv[] )
246 EncodingMap aEncodingMap;
248 FILE *istream = stdin;
249 FILE *ostream = stdout;
251 char *outfile = NULL;
253 int errflg = 0;
254 int argi;
256 for( argi=1; argi < argc; argi++ )
258 if( argv[argi][0] == '-' && argv[argi][2] == '\0' )
260 switch(argv[argi][1]) {
261 case 'o':
262 if (argi+1 >= argc || argv[argi+1][0] == '-')
264 fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
265 errflg++;
266 break;
269 ++argi;
270 outfile = argv[argi];
271 break;
272 case 't':
273 if (argi+1 >= argc || argv[argi+1][0] == '-')
275 fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
276 errflg++;
277 break;
280 read_encoding_table(argv[++argi], aEncodingMap);
281 break;
282 default:
283 fprintf(stderr, "Unrecognized option: -%c\n", argv[argi][1]);
284 errflg++;
287 else
289 break;
293 if (errflg) {
294 fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n");
295 exit(2);
298 /* assign input file to stdin */
299 if ( argi < argc )
301 istream = fopen(argv[argi], "r");
302 if ( istream == NULL ) {
303 fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno));
304 exit(2);
308 /* open output file if any */
309 if ( outfile )
311 ostream = fopen(outfile, "w");
312 if ( ostream == NULL ) {
313 fprintf(stderr, "ulfconv: %s : %s\n", outfile, strerror(errno));
314 exit(2);
318 /* read line by line from stdin */
319 char buffer[65536];
320 while ( NULL != fgets(buffer, sizeof(buffer), istream) ) {
322 /* only handle lines containing " = " */
323 char * cp = strstr(buffer, " = \"");
324 if ( cp ) {
325 rtl::OUString aString;
327 /* find end of lang string */
328 int n;
329 for ( n=0; ! isspace(buffer[n]); n++ )
332 std::string line = buffer;
333 std::string lang(line, 0, n);
335 cp += 4;
336 rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp,
337 RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS );
339 fprintf(ostream, "%s = \"", lang.c_str());
341 if ( aEncodingMap.empty() ) {
342 print_java_style(ostream, aString);
343 } else {
344 print_legacy_mixed(ostream, aString, lang, aEncodingMap);
347 fprintf(ostream, "\"\n");
350 } else {
351 fputs(buffer, ostream);