1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: gsiconv.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_transex3.hxx"
34 #include <tools/fsys.hxx>
35 #include <tools/stream.hxx>
38 #include "utf8conv.hxx"
40 #define GSI_FILE_UNKNOWN 0x0000
41 #define GSI_FILE_OLDSTYLE 0x0001
42 #define GSI_FILE_L10NFRAMEWORK 0x0002
44 /*****************************************************************************/
45 USHORT
GetGSIFileType( SvStream
&rStream
)
46 /*****************************************************************************/
48 USHORT nFileType
= GSI_FILE_UNKNOWN
;
50 ULONG
nPos( rStream
.Tell());
51 rStream
.Seek( STREAM_SEEK_TO_BEGIN
);
54 while( !rStream
.IsEof() && !sLine
.Len())
55 rStream
.ReadLine( sLine
);
58 if( sLine
.Search( "($$)" ) != STRING_NOTFOUND
)
59 nFileType
= GSI_FILE_OLDSTYLE
;
61 nFileType
= GSI_FILE_L10NFRAMEWORK
;
69 /*****************************************************************************/
70 ByteString
GetGSILineId( const ByteString
&rLine
, USHORT nFileType
)
71 /*****************************************************************************/
74 switch ( nFileType
) {
75 case GSI_FILE_OLDSTYLE
:
77 sId
.SearchAndReplaceAll( "($$)", "\t" );
78 sId
= sId
.GetToken( 0, '\t' );
81 case GSI_FILE_L10NFRAMEWORK
:
82 sId
= rLine
.GetToken( 0, '\t' );
84 sId
+= rLine
.GetToken( 1, '\t' );
86 sId
+= rLine
.GetToken( 4, '\t' );
88 sId
+= rLine
.GetToken( 5, '\t' );
94 /*****************************************************************************/
95 ByteString
GetGSILineLangId( const ByteString
&rLine
, USHORT nFileType
)
96 /*****************************************************************************/
99 switch ( nFileType
) {
100 case GSI_FILE_OLDSTYLE
:
102 sLangId
.SearchAndReplaceAll( "($$)", "\t" );
103 sLangId
= sLangId
.GetToken( 2, '\t' );
106 case GSI_FILE_L10NFRAMEWORK
:
107 sLangId
= rLine
.GetToken( 9, '\t' );
113 /*****************************************************************************/
114 void ConvertGSILine( BOOL bToUTF8
, ByteString
&rLine
,
115 rtl_TextEncoding nEncoding
, USHORT nFileType
)
116 /*****************************************************************************/
118 switch ( nFileType
) {
119 case GSI_FILE_OLDSTYLE
:
121 rLine
= UTF8Converter::ConvertToUTF8( rLine
, nEncoding
);
123 rLine
= UTF8Converter::ConvertFromUTF8( rLine
, nEncoding
);
126 case GSI_FILE_L10NFRAMEWORK
: {
127 ByteString sConverted
;
128 for ( USHORT i
= 0; i
< rLine
.GetTokenCount( '\t' ); i
++ ) {
129 ByteString sToken
= rLine
.GetToken( i
, '\t' );
130 if (( i
> 9 ) && ( i
< 14 )) {
132 sToken
= UTF8Converter::ConvertToUTF8( sToken
, nEncoding
);
134 sToken
= UTF8Converter::ConvertFromUTF8( sToken
, nEncoding
);
138 sConverted
+= sToken
;
146 /*****************************************************************************/
148 /*****************************************************************************/
150 fprintf( stdout
, "\n" );
151 fprintf( stdout
, "gsiconv (c)1999 by StarOffice Entwicklungs GmbH\n" );
152 fprintf( stdout
, "===============================================\n" );
153 fprintf( stdout
, "\n" );
154 fprintf( stdout
, "gsiconv converts strings in GSI-Files (Gutschmitt Interface) from or to UTF-8\n" );
155 fprintf( stdout
, "\n" );
156 fprintf( stdout
, "Syntax: gsiconv (-t|-f langid charset)|(-p n) filename\n" );
157 fprintf( stdout
, "Switches: -t => conversion from charset to UTF-8\n" );
158 fprintf( stdout
, " -f => conversion from UTF-8 to charset\n" );
159 fprintf( stdout
, " -p n => creates several files with ca. n lines\n" );
160 fprintf( stdout
, "\n" );
161 fprintf( stdout
, "Allowed charsets:\n" );
162 fprintf( stdout
, " MS_932 => Japanese\n" );
163 fprintf( stdout
, " MS_936 => Chinese Simplified\n" );
164 fprintf( stdout
, " MS_949 => Korean\n" );
165 fprintf( stdout
, " MS_950 => Chinese Traditional\n" );
166 fprintf( stdout
, " MS_1250 => East Europe\n" );
167 fprintf( stdout
, " MS_1251 => Cyrillic\n" );
168 fprintf( stdout
, " MS_1252 => West Europe\n" );
169 fprintf( stdout
, " MS_1253 => Greek\n" );
170 fprintf( stdout
, " MS_1254 => Turkish\n" );
171 fprintf( stdout
, " MS_1255 => Hebrew\n" );
172 fprintf( stdout
, " MS_1256 => Arabic\n" );
173 fprintf( stdout
, "\n" );
174 fprintf( stdout
, "Allowed langids:\n" );
175 fprintf( stdout
, " 1 => ENGLISH_US\n" );
176 fprintf( stdout
, " 3 => PORTUGUESE \n" );
177 fprintf( stdout
, " 4 => GERMAN_DE (new german style)\n" );
178 fprintf( stdout
, " 7 => RUSSIAN\n" );
179 fprintf( stdout
, " 30 => GREEK\n" );
180 fprintf( stdout
, " 31 => DUTCH\n" );
181 fprintf( stdout
, " 33 => FRENCH\n" );
182 fprintf( stdout
, " 34 => SPANISH\n" );
183 fprintf( stdout
, " 35 => FINNISH\n" );
184 fprintf( stdout
, " 36 => HUNGARIAN\n" );
185 fprintf( stdout
, " 39 => ITALIAN\n" );
186 fprintf( stdout
, " 42 => CZECH\n" );
187 fprintf( stdout
, " 44 => ENGLISH (UK)\n" );
188 fprintf( stdout
, " 45 => DANISH\n" );
189 fprintf( stdout
, " 46 => SWEDISH\n" );
190 fprintf( stdout
, " 47 => NORWEGIAN\n" );
191 fprintf( stdout
, " 49 => GERMAN (old german style)\n" );
192 fprintf( stdout
, " 55 => PORTUGUESE_BRAZILIAN\n" );
193 fprintf( stdout
, " 81 => JAPANESE\n" );
194 fprintf( stdout
, " 82 => KOREAN\n" );
195 fprintf( stdout
, " 86 => CHINESE_SIMPLIFIED\n" );
196 fprintf( stdout
, " 88 => CHINESE_TRADITIONAL\n" );
197 fprintf( stdout
, " 90 => TURKISH\n" );
198 fprintf( stdout
, " 96 => ARABIC\n" );
199 fprintf( stdout
, " 97 => HEBREW\n" );
200 fprintf( stdout
, "\n" );
203 /*****************************************************************************/
204 #if defined(UNX) || defined(OS2)
205 int main( int argc
, char *argv
[] )
207 int _cdecl
main( int argc
, char *argv
[] )
209 /*****************************************************************************/
211 if (( argc
!= 5 ) && ( argc
!= 4 )) {
217 if ( ByteString( argv
[ 1 ] ) == "-p" ) {
219 DirEntry aSource
= DirEntry( String( argv
[ 3 ], RTL_TEXTENCODING_ASCII_US
));
220 if ( !aSource
.Exists()) {
221 fprintf( stderr
, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv
[ 3 ] ).GetBuffer());
225 DirEntry
aOutput( aSource
);
227 String sBase
= aOutput
.GetBase();
228 String sExt
= aOutput
.GetExtension();
230 String
sGSI( argv
[ 3 ], RTL_TEXTENCODING_ASCII_US
);
231 SvFileStream
aGSI( sGSI
, STREAM_STD_READ
);
232 if ( !aGSI
.IsOpen()) {
233 fprintf( stderr
, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv
[ 3 ] ).GetBuffer());
237 USHORT
nFileType( GetGSIFileType( aGSI
));
239 ULONG nMaxLines
= (ULONG
) ByteString( argv
[ 2 ] ).ToInt64();
241 fprintf( stderr
, "\nERROR: Linecount must be at least 1!\n\n" );
248 ULONG nOutputFile
= 1;
250 String
sOutput( sBase
);
251 sOutput
+= String( "_", RTL_TEXTENCODING_ASCII_US
);
252 sOutput
+= String::CreateFromInt64( nOutputFile
);
254 sOutput
+= String( ".", RTL_TEXTENCODING_ASCII_US
);
259 aOutput
.SetName( sOutput
);
260 SvFileStream
aOutputStream( aOutput
.GetFull(), STREAM_STD_WRITE
| STREAM_TRUNC
);
262 while ( !aGSI
.IsEof()) {
264 aGSI
.ReadLine( sGSILine
);
265 ByteString
sId( GetGSILineId( sGSILine
, nFileType
));
269 if (( nLine
>= nMaxLines
) && ( sId
!= sOldId
)) {
270 aOutputStream
.Close();
272 ByteString
sText( aOutput
.GetFull(), gsl_getSystemTextEncoding());
274 sText
+= ByteString::CreateFromInt64( nLine
);
275 sText
+= " lines written.";
277 fprintf( stdout
, "%s\n", sText
.GetBuffer());
278 String
sOutput1( sBase
);
279 sOutput1
+= String( "_", RTL_TEXTENCODING_ASCII_US
);
280 sOutput1
+= String::CreateFromInt64( nOutputFile
);
282 sOutput1
+= String( ".", RTL_TEXTENCODING_ASCII_US
);
287 aOutput
.SetName( sOutput1
);
289 aOutputStream
.Open( aOutput
.GetFull(), STREAM_STD_WRITE
| STREAM_TRUNC
);
293 aOutputStream
.WriteLine( sGSILine
);
299 aOutputStream
.Close();
301 ByteString
sText( aOutput
.GetFull(), RTL_TEXTENCODING_ASCII_US
);
303 sText
+= ByteString::CreateFromInt64( nLine
);
304 sText
+= " lines written.";
312 if ( ByteString( argv
[ 1 ] ) == "-t" || ByteString( argv
[ 1 ] ) == "-f" ) {
313 rtl_TextEncoding nEncoding
;
315 ByteString
sCurLangId( argv
[ 2 ] );
317 ByteString
sCharset( argv
[ 3 ] );
318 sCharset
.ToUpperAscii();
320 if ( sCharset
== "MS_932" ) nEncoding
= RTL_TEXTENCODING_MS_932
;
321 else if ( sCharset
== "MS_936" ) nEncoding
= RTL_TEXTENCODING_MS_936
;
322 else if ( sCharset
== "MS_949" ) nEncoding
= RTL_TEXTENCODING_MS_949
;
323 else if ( sCharset
== "MS_950" ) nEncoding
= RTL_TEXTENCODING_MS_950
;
324 else if ( sCharset
== "MS_1250" ) nEncoding
= RTL_TEXTENCODING_MS_1250
;
325 else if ( sCharset
== "MS_1251" ) nEncoding
= RTL_TEXTENCODING_MS_1251
;
326 else if ( sCharset
== "MS_1252" ) nEncoding
= RTL_TEXTENCODING_MS_1252
;
327 else if ( sCharset
== "MS_1253" ) nEncoding
= RTL_TEXTENCODING_MS_1253
;
328 else if ( sCharset
== "MS_1254" ) nEncoding
= RTL_TEXTENCODING_MS_1254
;
329 else if ( sCharset
== "MS_1255" ) nEncoding
= RTL_TEXTENCODING_MS_1255
;
330 else if ( sCharset
== "MS_1256" ) nEncoding
= RTL_TEXTENCODING_MS_1256
;
331 else if ( sCharset
== "MS_1257" ) nEncoding
= RTL_TEXTENCODING_MS_1257
;
332 else if ( sCharset
== "UTF8" ) nEncoding
= RTL_TEXTENCODING_UTF8
;
339 DirEntry aSource
= DirEntry( String( argv
[ 4 ], RTL_TEXTENCODING_ASCII_US
));
340 if ( !aSource
.Exists()) {
341 fprintf( stderr
, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv
[ 3 ] ).GetBuffer());
345 String
sGSI( argv
[ 4 ], RTL_TEXTENCODING_ASCII_US
);
346 SvFileStream
aGSI( sGSI
, STREAM_STD_READ
);
347 if ( !aGSI
.IsOpen()) {
348 fprintf( stderr
, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv
[ 3 ] ).GetBuffer());
351 USHORT
nFileType( GetGSIFileType( aGSI
));
354 while ( !aGSI
.IsEof()) {
356 aGSI
.ReadLine( sGSILine
);
357 ByteString
sLangId( GetGSILineLangId( sGSILine
, nFileType
));
358 if ( sLangId
== sCurLangId
)
359 ConvertGSILine(( ByteString( argv
[ 1 ] ) == "-t" ), sGSILine
, nEncoding
, nFileType
);
361 fprintf( stdout
, "%s\n", sGSILine
.GetBuffer());