1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
36 #include <sal/alloca.h>
37 #include <sal/macros.h>
39 #include <rtl/ustring.hxx>
44 /*****************************************************************************
46 *****************************************************************************/
48 typedef std::map
< const std::string
, rtl_TextEncoding
> EncodingMap
;
52 rtl_TextEncoding value
;
55 static int _pair_compare (const char *key
, const _pair
*pair
);
56 static const _pair
* _pair_search (const char *key
, const _pair
*base
, unsigned int member
);
59 const _pair _ms_encoding_list
[] = {
60 { "0", RTL_TEXTENCODING_UTF8
},
61 { "1250", RTL_TEXTENCODING_MS_1250
},
62 { "1251", RTL_TEXTENCODING_MS_1251
},
63 { "1252", RTL_TEXTENCODING_MS_1252
},
64 { "1253", RTL_TEXTENCODING_MS_1253
},
65 { "1254", RTL_TEXTENCODING_MS_1254
},
66 { "1255", RTL_TEXTENCODING_MS_1255
},
67 { "1256", RTL_TEXTENCODING_MS_1256
},
68 { "1257", RTL_TEXTENCODING_MS_1257
},
69 { "1258", RTL_TEXTENCODING_MS_1258
},
70 { "874", RTL_TEXTENCODING_MS_874
},
71 { "932", RTL_TEXTENCODING_MS_932
},
72 { "936", RTL_TEXTENCODING_MS_936
},
73 { "949", RTL_TEXTENCODING_MS_949
},
74 { "950", RTL_TEXTENCODING_MS_950
}
78 /*****************************************************************************
79 * fgets that work with unix line ends on Windows
80 *****************************************************************************/
82 char * my_fgets(char *s
, int n
, FILE *fp
)
85 for( i
=0; i
< n
-1; i
++ )
112 /*****************************************************************************
113 * compare function for binary search
114 *****************************************************************************/
117 _pair_compare (const char *key
, const _pair
*pair
)
119 int result
= rtl_str_compareIgnoreAsciiCase( key
, pair
->key
);
123 /*****************************************************************************
124 * binary search on encoding tables
125 *****************************************************************************/
128 _pair_search (const char *key
, const _pair
*base
, unsigned int member
)
130 unsigned int lower
= 0;
131 unsigned int upper
= member
;
132 unsigned int current
;
135 /* check for validity of input */
136 if ( (key
== NULL
) || (base
== NULL
) || (member
== 0) )
140 while ( lower
< upper
)
142 current
= (lower
+ upper
) / 2;
143 comparison
= _pair_compare( key
, base
+ current
);
150 return base
+ current
;
157 /************************************************************************
158 * read_encoding_table
159 ************************************************************************/
161 void read_encoding_table(char * file
, EncodingMap
& aEncodingMap
)
163 FILE * fp
= fopen(file
, "r");
165 fprintf(stderr
, "ulfconv: %s %s\n", file
, strerror(errno
));
170 while ( NULL
!= my_fgets(buffer
, sizeof(buffer
), fp
) ) {
172 // strip comment lines
173 if ( buffer
[0] == '#' )
176 // find end of language string
178 for ( cp
= buffer
; ! isspace(*cp
); cp
++ )
182 // find start of codepage string
183 for ( ++cp
; isspace(*cp
); ++cp
)
185 char * codepage
= cp
;
187 // find end of codepage string
188 for ( ++cp
; ! isspace(*cp
); ++cp
)
192 // find the correct mapping for codepage
193 const unsigned int members
= SAL_N_ELEMENTS( _ms_encoding_list
);
194 const _pair
*encoding
= _pair_search( codepage
, _ms_encoding_list
, members
);
196 if ( encoding
!= NULL
) {
197 const std::string
language(buffer
);
198 aEncodingMap
.insert( EncodingMap::value_type(language
, encoding
->value
) );
205 /************************************************************************
207 ************************************************************************/
209 void print_legacy_mixed(
211 const rtl::OUString
& aString
,
212 const std::string
& language
,
213 EncodingMap
& aEncodingMap
)
215 EncodingMap::iterator iter
= aEncodingMap
.find(language
);
217 if ( iter
!= aEncodingMap
.end() ) {
218 fputs(OUStringToOString(aString
, iter
->second
).getStr(), ostream
);
220 fprintf(stderr
, "ulfconv: WARNING: no legacy encoding found for %s\n", language
.c_str());
224 /************************************************************************
226 ************************************************************************/
228 void print_java_style(FILE * ostream
, const rtl::OUString
& aString
)
230 int imax
= aString
.getLength();
231 for (int i
= 0; i
< imax
; i
++) {
232 sal_Unicode uc
= aString
[i
];
234 fprintf(ostream
, "%c", (char) uc
);
236 fprintf(ostream
, "\\u%2.2x%2.2x", uc
>> 8, uc
& 0xFF );
241 /************************************************************************
243 ************************************************************************/
245 int main( int argc
, char * const argv
[] )
247 EncodingMap aEncodingMap
;
249 FILE *istream
= stdin
;
250 FILE *ostream
= stdout
;
252 char *outfile
= NULL
;
257 for( argi
=1; argi
< argc
; argi
++ )
259 if( argv
[argi
][0] == '-' && argv
[argi
][2] == '\0' )
261 switch(argv
[argi
][1]) {
263 if (argi
+1 >= argc
|| argv
[argi
+1][0] == '-')
265 fprintf(stderr
, "Option -%c requires an operand\n", argv
[argi
][1]);
271 outfile
= argv
[argi
];
274 if (argi
+1 >= argc
|| argv
[argi
+1][0] == '-')
276 fprintf(stderr
, "Option -%c requires an operand\n", argv
[argi
][1]);
281 read_encoding_table(argv
[++argi
], aEncodingMap
);
284 fprintf(stderr
, "Unrecognized option: -%c\n", argv
[argi
][1]);
295 fprintf(stderr
, "Usage: ulfconv [-o <output file>] [-t <encoding table>] [<ulf file>]\n");
299 /* assign input file to stdin */
302 istream
= fopen(argv
[argi
], "r");
303 if ( istream
== NULL
) {
304 fprintf(stderr
, "ulfconv: %s : %s\n", argv
[argi
], strerror(errno
));
309 /* open output file if any */
312 ostream
= fopen(outfile
, "w");
313 if ( ostream
== NULL
) {
314 fprintf(stderr
, "ulfconv: %s : %s\n", outfile
, strerror(errno
));
320 /* read line by line from stdin */
322 while ( NULL
!= fgets(buffer
, sizeof(buffer
), istream
) ) {
324 /* only handle lines containing " = " */
325 char * cp
= strstr(buffer
, " = \"");
327 rtl::OUString aString
;
329 /* find end of lang string */
331 for ( n
=0; ! isspace(buffer
[n
]); n
++ )
334 std::string line
= buffer
;
335 std::string
lang(line
, 0, n
);
338 rtl_string2UString( &aString
.pData
, cp
, strrchr(cp
, '\"') - cp
,
339 RTL_TEXTENCODING_UTF8
, OSTRING_TO_OUSTRING_CVTFLAGS
);
341 fprintf(ostream
, "%s = \"", lang
.c_str());
343 if ( aEncodingMap
.empty() ) {
344 print_java_style(ostream
, aString
);
346 print_legacy_mixed(ostream
, aString
, lang
, aEncodingMap
);
349 fprintf(ostream
, "\"\n");
353 fputs(buffer
, ostream
);
361 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */