Version 3.6.0.4, tag libreoffice-3.6.0.4
[LibreOffice.git] / setup_native / source / ulfconv / ulfconv.cxx
blob1643b330d776c9c3b361de6157e5472a86342a59
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <fcntl.h>
32 #include <errno.h>
33 #include <string.h>
34 #include <unistd.h>
35 #include <ctype.h>
36 #include <sal/alloca.h>
37 #include <sal/macros.h>
39 #include <rtl/ustring.hxx>
41 #include <map>
42 #include <string>
44 /*****************************************************************************
45 * typedefs
46 *****************************************************************************/
48 typedef std::map< const std::string, rtl_TextEncoding > EncodingMap;
50 struct _pair {
51 const char *key;
52 rtl_TextEncoding value;
55 static int _pair_compare (const char *key, const _pair *pair);
56 static const _pair* _pair_search (const char *key, const _pair *base, unsigned int member );
59 const _pair _ms_encoding_list[] = {
60 { "0", RTL_TEXTENCODING_UTF8 },
61 { "1250", RTL_TEXTENCODING_MS_1250 },
62 { "1251", RTL_TEXTENCODING_MS_1251 },
63 { "1252", RTL_TEXTENCODING_MS_1252 },
64 { "1253", RTL_TEXTENCODING_MS_1253 },
65 { "1254", RTL_TEXTENCODING_MS_1254 },
66 { "1255", RTL_TEXTENCODING_MS_1255 },
67 { "1256", RTL_TEXTENCODING_MS_1256 },
68 { "1257", RTL_TEXTENCODING_MS_1257 },
69 { "1258", RTL_TEXTENCODING_MS_1258 },
70 { "874", RTL_TEXTENCODING_MS_874 },
71 { "932", RTL_TEXTENCODING_MS_932 },
72 { "936", RTL_TEXTENCODING_MS_936 },
73 { "949", RTL_TEXTENCODING_MS_949 },
74 { "950", RTL_TEXTENCODING_MS_950 }
78 /*****************************************************************************
79 * fgets that work with unix line ends on Windows
80 *****************************************************************************/
82 char * my_fgets(char *s, int n, FILE *fp)
84 int i;
85 for( i=0; i < n-1; i++ )
87 int c = getc(fp);
89 if( c == EOF )
90 break;
92 s[i] = (char) c;
94 if( s[i] == '\n' )
96 i++;
97 break;
101 if( i>0 )
103 s[i] = '\0';
104 return s;
106 else
108 return NULL;
112 /*****************************************************************************
113 * compare function for binary search
114 *****************************************************************************/
116 static int
117 _pair_compare (const char *key, const _pair *pair)
119 int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
120 return result;
123 /*****************************************************************************
124 * binary search on encoding tables
125 *****************************************************************************/
127 static const _pair*
128 _pair_search (const char *key, const _pair *base, unsigned int member )
130 unsigned int lower = 0;
131 unsigned int upper = member;
132 unsigned int current;
133 int comparison;
135 /* check for validity of input */
136 if ( (key == NULL) || (base == NULL) || (member == 0) )
137 return NULL;
139 /* binary search */
140 while ( lower < upper )
142 current = (lower + upper) / 2;
143 comparison = _pair_compare( key, base + current );
144 if (comparison < 0)
145 upper = current;
146 else
147 if (comparison > 0)
148 lower = current + 1;
149 else
150 return base + current;
153 return NULL;
157 /************************************************************************
158 * read_encoding_table
159 ************************************************************************/
161 void read_encoding_table(char * file, EncodingMap& aEncodingMap)
163 FILE * fp = fopen(file, "r");
164 if ( ! fp ) {
165 fprintf(stderr, "ulfconv: %s %s\n", file, strerror(errno));
166 exit(2);
169 char buffer[512];
170 while ( NULL != my_fgets(buffer, sizeof(buffer), fp) ) {
172 // strip comment lines
173 if ( buffer[0] == '#' )
174 continue;
176 // find end of language string
177 char * cp;
178 for ( cp = buffer; ! isspace(*cp); cp++ )
180 *cp = '\0';
182 // find start of codepage string
183 for ( ++cp; isspace(*cp); ++cp )
185 char * codepage = cp;
187 // find end of codepage string
188 for ( ++cp; ! isspace(*cp); ++cp )
190 *cp = '\0';
192 // find the correct mapping for codepage
193 const unsigned int members = SAL_N_ELEMENTS( _ms_encoding_list );
194 const _pair *encoding = _pair_search( codepage, _ms_encoding_list, members );
196 if ( encoding != NULL ) {
197 const std::string language(buffer);
198 aEncodingMap.insert( EncodingMap::value_type(language, encoding->value) );
202 fclose(fp);
205 /************************************************************************
206 * print_legacy_mixed
207 ************************************************************************/
209 void print_legacy_mixed(
210 FILE * ostream,
211 const rtl::OUString& aString,
212 const std::string& language,
213 EncodingMap& aEncodingMap)
215 EncodingMap::iterator iter = aEncodingMap.find(language);
217 if ( iter != aEncodingMap.end() ) {
218 fputs(OUStringToOString(aString, iter->second).getStr(), ostream);
219 } else {
220 fprintf(stderr, "ulfconv: WARNING: no legacy encoding found for %s\n", language.c_str());
224 /************************************************************************
225 * print_java_style
226 ************************************************************************/
228 void print_java_style(FILE * ostream, const rtl::OUString& aString)
230 int imax = aString.getLength();
231 for (int i = 0; i < imax; i++) {
232 sal_Unicode uc = aString[i];
233 if ( uc < 128 ) {
234 fprintf(ostream, "%c", (char) uc);
235 } else {
236 fprintf(ostream, "\\u%2.2x%2.2x", uc >> 8, uc & 0xFF );
241 /************************************************************************
242 * main
243 ************************************************************************/
245 int main( int argc, char * const argv[] )
247 EncodingMap aEncodingMap;
249 FILE *istream = stdin;
250 FILE *ostream = stdout;
252 char *outfile = NULL;
254 int errflg = 0;
255 int argi;
257 for( argi=1; argi < argc; argi++ )
259 if( argv[argi][0] == '-' && argv[argi][2] == '\0' )
261 switch(argv[argi][1]) {
262 case 'o':
263 if (argi+1 >= argc || argv[argi+1][0] == '-')
265 fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
266 errflg++;
267 break;
270 ++argi;
271 outfile = argv[argi];
272 break;
273 case 't':
274 if (argi+1 >= argc || argv[argi+1][0] == '-')
276 fprintf(stderr, "Option -%c requires an operand\n", argv[argi][1]);
277 errflg++;
278 break;
281 read_encoding_table(argv[++argi], aEncodingMap);
282 break;
283 default:
284 fprintf(stderr, "Unrecognized option: -%c\n", argv[argi][1]);
285 errflg++;
288 else
290 break;
294 if (errflg) {
295 fprintf(stderr, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n");
296 exit(2);
299 /* assign input file to stdin */
300 if ( argi < argc )
302 istream = fopen(argv[argi], "r");
303 if ( istream == NULL ) {
304 fprintf(stderr, "ulfconv: %s : %s\n", argv[argi], strerror(errno));
305 exit(2);
309 /* open output file if any */
310 if ( outfile )
312 ostream = fopen(outfile, "w");
313 if ( ostream == NULL ) {
314 fprintf(stderr, "ulfconv: %s : %s\n", outfile, strerror(errno));
315 fclose(istream);
316 exit(2);
320 /* read line by line from stdin */
321 char buffer[65536];
322 while ( NULL != fgets(buffer, sizeof(buffer), istream) ) {
324 /* only handle lines containing " = " */
325 char * cp = strstr(buffer, " = \"");
326 if ( cp ) {
327 rtl::OUString aString;
329 /* find end of lang string */
330 int n;
331 for ( n=0; ! isspace(buffer[n]); n++ )
334 std::string line = buffer;
335 std::string lang(line, 0, n);
337 cp += 4;
338 rtl_string2UString( &aString.pData, cp, strrchr(cp, '\"') - cp,
339 RTL_TEXTENCODING_UTF8, OSTRING_TO_OUSTRING_CVTFLAGS );
341 fprintf(ostream, "%s = \"", lang.c_str());
343 if ( aEncodingMap.empty() ) {
344 print_java_style(ostream, aString);
345 } else {
346 print_legacy_mixed(ostream, aString, lang, aEncodingMap);
349 fprintf(ostream, "\"\n");
352 } else {
353 fputs(buffer, ostream);
357 fclose(ostream);
358 fclose(istream);
361 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */