merge the formfield patch from ooo-build
[ooovba.git] / transex3 / source / gsiconv.cxx
blob80d1ad61988066741c06602c2adff68c4875f28d
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: gsiconv.cxx,v $
10 * $Revision: 1.8 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_transex3.hxx"
33 #include <stdio.h>
34 #include <tools/fsys.hxx>
35 #include <tools/stream.hxx>
37 // local includes
38 #include "utf8conv.hxx"
40 #define GSI_FILE_UNKNOWN 0x0000
41 #define GSI_FILE_OLDSTYLE 0x0001
42 #define GSI_FILE_L10NFRAMEWORK 0x0002
44 /*****************************************************************************/
45 USHORT GetGSIFileType( SvStream &rStream )
46 /*****************************************************************************/
48 USHORT nFileType = GSI_FILE_UNKNOWN;
50 ULONG nPos( rStream.Tell());
51 rStream.Seek( STREAM_SEEK_TO_BEGIN );
53 ByteString sLine;
54 while( !rStream.IsEof() && !sLine.Len())
55 rStream.ReadLine( sLine );
57 if( sLine.Len()) {
58 if( sLine.Search( "($$)" ) != STRING_NOTFOUND )
59 nFileType = GSI_FILE_OLDSTYLE;
60 else
61 nFileType = GSI_FILE_L10NFRAMEWORK;
64 rStream.Seek( nPos );
66 return nFileType;
69 /*****************************************************************************/
70 ByteString GetGSILineId( const ByteString &rLine, USHORT nFileType )
71 /*****************************************************************************/
73 ByteString sId;
74 switch ( nFileType ) {
75 case GSI_FILE_OLDSTYLE:
76 sId = rLine;
77 sId.SearchAndReplaceAll( "($$)", "\t" );
78 sId = sId.GetToken( 0, '\t' );
79 break;
81 case GSI_FILE_L10NFRAMEWORK:
82 sId = rLine.GetToken( 0, '\t' );
83 sId += "\t";
84 sId += rLine.GetToken( 1, '\t' );
85 sId += "\t";
86 sId += rLine.GetToken( 4, '\t' );
87 sId += "\t";
88 sId += rLine.GetToken( 5, '\t' );
89 break;
91 return sId;
94 /*****************************************************************************/
95 ByteString GetGSILineLangId( const ByteString &rLine, USHORT nFileType )
96 /*****************************************************************************/
98 ByteString sLangId;
99 switch ( nFileType ) {
100 case GSI_FILE_OLDSTYLE:
101 sLangId = rLine;
102 sLangId.SearchAndReplaceAll( "($$)", "\t" );
103 sLangId = sLangId.GetToken( 2, '\t' );
104 break;
106 case GSI_FILE_L10NFRAMEWORK:
107 sLangId = rLine.GetToken( 9, '\t' );
108 break;
110 return sLangId;
113 /*****************************************************************************/
114 void ConvertGSILine( BOOL bToUTF8, ByteString &rLine,
115 rtl_TextEncoding nEncoding, USHORT nFileType )
116 /*****************************************************************************/
118 switch ( nFileType ) {
119 case GSI_FILE_OLDSTYLE:
120 if ( bToUTF8 )
121 rLine = UTF8Converter::ConvertToUTF8( rLine, nEncoding );
122 else
123 rLine = UTF8Converter::ConvertFromUTF8( rLine, nEncoding );
124 break;
126 case GSI_FILE_L10NFRAMEWORK: {
127 ByteString sConverted;
128 for ( USHORT i = 0; i < rLine.GetTokenCount( '\t' ); i++ ) {
129 ByteString sToken = rLine.GetToken( i, '\t' );
130 if (( i > 9 ) && ( i < 14 )) {
131 if( bToUTF8 )
132 sToken = UTF8Converter::ConvertToUTF8( sToken, nEncoding );
133 else
134 sToken = UTF8Converter::ConvertFromUTF8( sToken, nEncoding );
136 if ( i )
137 sConverted += "\t";
138 sConverted += sToken;
140 rLine = sConverted;
142 break;
146 /*****************************************************************************/
147 void Help()
148 /*****************************************************************************/
150 fprintf( stdout, "\n" );
151 fprintf( stdout, "gsiconv (c)1999 by StarOffice Entwicklungs GmbH\n" );
152 fprintf( stdout, "===============================================\n" );
153 fprintf( stdout, "\n" );
154 fprintf( stdout, "gsiconv converts strings in GSI-Files (Gutschmitt Interface) from or to UTF-8\n" );
155 fprintf( stdout, "\n" );
156 fprintf( stdout, "Syntax: gsiconv (-t|-f langid charset)|(-p n) filename\n" );
157 fprintf( stdout, "Switches: -t => conversion from charset to UTF-8\n" );
158 fprintf( stdout, " -f => conversion from UTF-8 to charset\n" );
159 fprintf( stdout, " -p n => creates several files with ca. n lines\n" );
160 fprintf( stdout, "\n" );
161 fprintf( stdout, "Allowed charsets:\n" );
162 fprintf( stdout, " MS_932 => Japanese\n" );
163 fprintf( stdout, " MS_936 => Chinese Simplified\n" );
164 fprintf( stdout, " MS_949 => Korean\n" );
165 fprintf( stdout, " MS_950 => Chinese Traditional\n" );
166 fprintf( stdout, " MS_1250 => East Europe\n" );
167 fprintf( stdout, " MS_1251 => Cyrillic\n" );
168 fprintf( stdout, " MS_1252 => West Europe\n" );
169 fprintf( stdout, " MS_1253 => Greek\n" );
170 fprintf( stdout, " MS_1254 => Turkish\n" );
171 fprintf( stdout, " MS_1255 => Hebrew\n" );
172 fprintf( stdout, " MS_1256 => Arabic\n" );
173 fprintf( stdout, "\n" );
174 fprintf( stdout, "Allowed langids:\n" );
175 fprintf( stdout, " 1 => ENGLISH_US\n" );
176 fprintf( stdout, " 3 => PORTUGUESE \n" );
177 fprintf( stdout, " 4 => GERMAN_DE (new german style)\n" );
178 fprintf( stdout, " 7 => RUSSIAN\n" );
179 fprintf( stdout, " 30 => GREEK\n" );
180 fprintf( stdout, " 31 => DUTCH\n" );
181 fprintf( stdout, " 33 => FRENCH\n" );
182 fprintf( stdout, " 34 => SPANISH\n" );
183 fprintf( stdout, " 35 => FINNISH\n" );
184 fprintf( stdout, " 36 => HUNGARIAN\n" );
185 fprintf( stdout, " 39 => ITALIAN\n" );
186 fprintf( stdout, " 42 => CZECH\n" );
187 fprintf( stdout, " 44 => ENGLISH (UK)\n" );
188 fprintf( stdout, " 45 => DANISH\n" );
189 fprintf( stdout, " 46 => SWEDISH\n" );
190 fprintf( stdout, " 47 => NORWEGIAN\n" );
191 fprintf( stdout, " 49 => GERMAN (old german style)\n" );
192 fprintf( stdout, " 55 => PORTUGUESE_BRAZILIAN\n" );
193 fprintf( stdout, " 81 => JAPANESE\n" );
194 fprintf( stdout, " 82 => KOREAN\n" );
195 fprintf( stdout, " 86 => CHINESE_SIMPLIFIED\n" );
196 fprintf( stdout, " 88 => CHINESE_TRADITIONAL\n" );
197 fprintf( stdout, " 90 => TURKISH\n" );
198 fprintf( stdout, " 96 => ARABIC\n" );
199 fprintf( stdout, " 97 => HEBREW\n" );
200 fprintf( stdout, "\n" );
203 /*****************************************************************************/
204 #if defined(UNX) || defined(OS2)
205 int main( int argc, char *argv[] )
206 #else
207 int _cdecl main( int argc, char *argv[] )
208 #endif
209 /*****************************************************************************/
211 if (( argc != 5 ) && ( argc != 4 )) {
212 Help();
213 exit ( 0 );
216 if ( argc == 4 ) {
217 if ( ByteString( argv[ 1 ] ) == "-p" ) {
219 DirEntry aSource = DirEntry( String( argv[ 3 ], RTL_TEXTENCODING_ASCII_US ));
220 if ( !aSource.Exists()) {
221 fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
222 exit ( 2 );
225 DirEntry aOutput( aSource );
227 String sBase = aOutput.GetBase();
228 String sExt = aOutput.GetExtension();
230 String sGSI( argv[ 3 ], RTL_TEXTENCODING_ASCII_US );
231 SvFileStream aGSI( sGSI, STREAM_STD_READ );
232 if ( !aGSI.IsOpen()) {
233 fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
234 exit ( 3 );
237 USHORT nFileType( GetGSIFileType( aGSI ));
239 ULONG nMaxLines = (ULONG) ByteString( argv[ 2 ] ).ToInt64();
240 if ( !nMaxLines ) {
241 fprintf( stderr, "\nERROR: Linecount must be at least 1!\n\n" );
242 exit ( 3 );
245 ByteString sGSILine;
246 ByteString sOldId;
247 ULONG nLine = 0;
248 ULONG nOutputFile = 1;
250 String sOutput( sBase );
251 sOutput += String( "_", RTL_TEXTENCODING_ASCII_US );
252 sOutput += String::CreateFromInt64( nOutputFile );
253 if ( sExt.Len()) {
254 sOutput += String( ".", RTL_TEXTENCODING_ASCII_US );
255 sOutput += sExt;
257 nOutputFile ++;
259 aOutput.SetName( sOutput );
260 SvFileStream aOutputStream( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC );
262 while ( !aGSI.IsEof()) {
264 aGSI.ReadLine( sGSILine );
265 ByteString sId( GetGSILineId( sGSILine, nFileType ));
267 nLine++;
269 if (( nLine >= nMaxLines ) && ( sId != sOldId )) {
270 aOutputStream.Close();
272 ByteString sText( aOutput.GetFull(), gsl_getSystemTextEncoding());
273 sText += " with ";
274 sText += ByteString::CreateFromInt64( nLine );
275 sText += " lines written.";
277 fprintf( stdout, "%s\n", sText.GetBuffer());
278 String sOutput1( sBase );
279 sOutput1 += String( "_", RTL_TEXTENCODING_ASCII_US );
280 sOutput1 += String::CreateFromInt64( nOutputFile );
281 if ( sExt.Len()) {
282 sOutput1 += String( ".", RTL_TEXTENCODING_ASCII_US );
283 sOutput1 += sExt;
285 nOutputFile ++;
287 aOutput.SetName( sOutput1 );
289 aOutputStream.Open( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC );
290 nLine = 0;
293 aOutputStream.WriteLine( sGSILine );
295 sOldId = sId;
298 aGSI.Close();
299 aOutputStream.Close();
301 ByteString sText( aOutput.GetFull(), RTL_TEXTENCODING_ASCII_US );
302 sText += " with ";
303 sText += ByteString::CreateFromInt64( nLine );
304 sText += " lines written.";
306 else {
307 Help();
308 exit( 1 );
311 else {
312 if ( ByteString( argv[ 1 ] ) == "-t" || ByteString( argv[ 1 ] ) == "-f" ) {
313 rtl_TextEncoding nEncoding;
315 ByteString sCurLangId( argv[ 2 ] );
317 ByteString sCharset( argv[ 3 ] );
318 sCharset.ToUpperAscii();
320 if ( sCharset == "MS_932" ) nEncoding = RTL_TEXTENCODING_MS_932;
321 else if ( sCharset == "MS_936" ) nEncoding = RTL_TEXTENCODING_MS_936;
322 else if ( sCharset == "MS_949" ) nEncoding = RTL_TEXTENCODING_MS_949;
323 else if ( sCharset == "MS_950" ) nEncoding = RTL_TEXTENCODING_MS_950;
324 else if ( sCharset == "MS_1250" ) nEncoding = RTL_TEXTENCODING_MS_1250;
325 else if ( sCharset == "MS_1251" ) nEncoding = RTL_TEXTENCODING_MS_1251;
326 else if ( sCharset == "MS_1252" ) nEncoding = RTL_TEXTENCODING_MS_1252;
327 else if ( sCharset == "MS_1253" ) nEncoding = RTL_TEXTENCODING_MS_1253;
328 else if ( sCharset == "MS_1254" ) nEncoding = RTL_TEXTENCODING_MS_1254;
329 else if ( sCharset == "MS_1255" ) nEncoding = RTL_TEXTENCODING_MS_1255;
330 else if ( sCharset == "MS_1256" ) nEncoding = RTL_TEXTENCODING_MS_1256;
331 else if ( sCharset == "MS_1257" ) nEncoding = RTL_TEXTENCODING_MS_1257;
332 else if ( sCharset == "UTF8" ) nEncoding = RTL_TEXTENCODING_UTF8;
334 else {
335 Help();
336 exit ( 1 );
339 DirEntry aSource = DirEntry( String( argv[ 4 ], RTL_TEXTENCODING_ASCII_US ));
340 if ( !aSource.Exists()) {
341 fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
342 exit ( 2 );
345 String sGSI( argv[ 4 ], RTL_TEXTENCODING_ASCII_US );
346 SvFileStream aGSI( sGSI, STREAM_STD_READ );
347 if ( !aGSI.IsOpen()) {
348 fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
349 exit ( 3 );
351 USHORT nFileType( GetGSIFileType( aGSI ));
353 ByteString sGSILine;
354 while ( !aGSI.IsEof()) {
356 aGSI.ReadLine( sGSILine );
357 ByteString sLangId( GetGSILineLangId( sGSILine, nFileType ));
358 if ( sLangId == sCurLangId )
359 ConvertGSILine(( ByteString( argv[ 1 ] ) == "-t" ), sGSILine, nEncoding, nFileType );
361 fprintf( stdout, "%s\n", sGSILine.GetBuffer());
364 aGSI.Close();
366 else {
367 Help();
368 exit( 1 );
371 return 0;