1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: txtconv.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_transex3.hxx"
34 #include <tools/fsys.hxx>
35 #include <tools/stream.hxx>
38 #include "utf8conv.hxx"
40 extern void ConvertHalfwitdhToFullwidth( String
& rString
);
42 /*****************************************************************************/
44 /*****************************************************************************/
46 fprintf( stdout
, "\n" );
47 fprintf( stdout
, "txtconv (c)2001 by StarOffice Entwicklungs GmbH\n" );
48 fprintf( stdout
, "===============================================\n" );
49 fprintf( stdout
, "\n" );
50 fprintf( stdout
, "txtconv converts textfiles from or to UTF-8\n" );
51 fprintf( stdout
, "\n" );
52 fprintf( stdout
, "Syntax: txtconv -t|-f charset filename (destinationfile)\n" );
53 fprintf( stdout
, "Switches: -t => conversion from charset to UTF-8\n" );
54 fprintf( stdout
, " -f => conversion from UTF-8 to charset\n" );
55 fprintf( stdout
, "\n" );
56 fprintf( stdout
, "Allowed charsets:\n" );
57 fprintf( stdout
, " MS_932 => Japanese\n" );
58 fprintf( stdout
, " MS_936 => Chinese Simplified\n" );
59 fprintf( stdout
, " MS_949 => Korean\n" );
60 fprintf( stdout
, " MS_950 => Chinese Traditional\n" );
61 fprintf( stdout
, " MS_1250 => East Europe\n" );
62 fprintf( stdout
, " MS_1251 => Cyrillic\n" );
63 fprintf( stdout
, " MS_1252 => West Europe\n" );
64 fprintf( stdout
, " MS_1253 => Greek\n" );
65 fprintf( stdout
, " MS_1254 => Turkish\n" );
66 fprintf( stdout
, " MS_1255 => Hebrew\n" );
67 fprintf( stdout
, " MS_1256 => Arabic\n" );
68 fprintf( stdout
, " HW2FW => Only with -t, converts half to full width katakana" );
69 fprintf( stdout
, "\n" );
72 /*****************************************************************************/
73 #if defined(UNX) || defined(OS2)
74 int main( int argc
, char *argv
[] )
76 int _cdecl
main( int argc
, char *argv
[] )
78 /*****************************************************************************/
80 if (( argc
!= 4 ) && ( argc
!= 5 )) {
85 if ( ByteString( argv
[ 1 ] ) == "-t" || ByteString( argv
[ 1 ] ) == "-f" ) {
86 rtl_TextEncoding nEncoding
= RTL_TEXTENCODING_MS_1252
;
90 ByteString
sCharset( argv
[ 2 ] );
91 sCharset
.ToUpperAscii();
93 if ( sCharset
== "MS_932" ) nEncoding
= RTL_TEXTENCODING_MS_932
;
94 else if ( sCharset
== "MS_936" ) nEncoding
= RTL_TEXTENCODING_MS_936
;
95 else if ( sCharset
== "MS_949" ) nEncoding
= RTL_TEXTENCODING_MS_949
;
96 else if ( sCharset
== "MS_950" ) nEncoding
= RTL_TEXTENCODING_MS_950
;
97 else if ( sCharset
== "MS_1250" ) nEncoding
= RTL_TEXTENCODING_MS_1250
;
98 else if ( sCharset
== "MS_1251" ) nEncoding
= RTL_TEXTENCODING_MS_1251
;
99 else if ( sCharset
== "MS_1252" ) nEncoding
= RTL_TEXTENCODING_MS_1252
;
100 else if ( sCharset
== "MS_1253" ) nEncoding
= RTL_TEXTENCODING_MS_1253
;
101 else if ( sCharset
== "MS_1254" ) nEncoding
= RTL_TEXTENCODING_MS_1254
;
102 else if ( sCharset
== "MS_1255" ) nEncoding
= RTL_TEXTENCODING_MS_1255
;
103 else if ( sCharset
== "MS_1256" ) nEncoding
= RTL_TEXTENCODING_MS_1256
;
104 else if ( sCharset
== "MS_1257" ) nEncoding
= RTL_TEXTENCODING_MS_1257
;
105 else if (( sCharset
== "HW2FW" ) && ( ByteString( argv
[ 1 ] ) == "-t" )) bHW2FW
= TRUE
;
112 DirEntry aSource
= DirEntry( String( argv
[ 3 ], RTL_TEXTENCODING_ASCII_US
));
113 if ( !aSource
.Exists()) {
114 fprintf( stderr
, "\nERROR: File %s not found!\n\n", ByteString( argv
[ 3 ] ).GetBuffer());
119 SvFileStream aOutput
;
121 sOutput
= String( argv
[ 4 ], RTL_TEXTENCODING_ASCII_US
);
122 aOutput
.Open( sOutput
, STREAM_STD_WRITE
| STREAM_TRUNC
);
123 if ( !aOutput
.IsOpen()) {
124 fprintf( stderr
, "\nERROR: Could not open output file %s!\n\n", argv
[ 4 ]);
129 String
sGSI( argv
[ 3 ], RTL_TEXTENCODING_ASCII_US
);
130 SvFileStream
aGSI( sGSI
, STREAM_STD_READ
);
131 if ( !aGSI
.IsOpen()) {
132 fprintf( stderr
, "\nERROR: Could not open input file %s!\n\n", argv
[ 3 ]);
137 while ( !aGSI
.IsEof()) {
139 aGSI
.ReadLine( sGSILine
);
141 String
sConverter( sGSILine
, RTL_TEXTENCODING_UTF8
);
142 ConvertHalfwitdhToFullwidth( sConverter
);
143 sGSILine
= ByteString( sConverter
, RTL_TEXTENCODING_UTF8
);
146 if ( ByteString( argv
[ 1 ] ) == "-t" )
147 sGSILine
= UTF8Converter::ConvertToUTF8( sGSILine
, nEncoding
);
149 sGSILine
= UTF8Converter::ConvertFromUTF8( sGSILine
, nEncoding
);
152 if ( aOutput
.IsOpen())
153 aOutput
.WriteLine( sGSILine
);
155 fprintf( stdout
, "%s\n", sGSILine
.GetBuffer());
159 if ( aOutput
.IsOpen())