1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: strcvt.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // no include "precompiled_tools.hxx" because this is included in other cxx files.
33 // -----------------------------------------------------------------------
35 void ByteString::ImplUpdateStringFromUniString(
36 const sal_Unicode
* pUniStr
, sal_Size nUniLen
,
37 rtl_TextEncoding eTextEncoding
, sal_uInt32 nCvtFlags
)
39 ByteStringData
* pNewStringData
= NULL
;
40 rtl_uString2String( (rtl_String
**)(&pNewStringData
),
42 eTextEncoding
, nCvtFlags
);
43 STRING_RELEASE((STRING_TYPE
*)mpData
);
44 mpData
= pNewStringData
;
47 // =======================================================================
49 ByteString::ByteString( const UniString
& rUniStr
, rtl_TextEncoding eTextEncoding
, sal_uInt32 nCvtFlags
)
51 DBG_CTOR( ByteString
, DbgCheckByteString
);
52 DBG_CHKOBJ( &rUniStr
, UniString
, DbgCheckUniString
);
55 rtl_uString2String( (rtl_String
**)(&mpData
),
56 rUniStr
.mpData
->maStr
, rUniStr
.mpData
->mnLen
,
57 eTextEncoding
, nCvtFlags
);
60 // -----------------------------------------------------------------------
62 ByteString::ByteString( const UniString
& rUniStr
, xub_StrLen nPos
, xub_StrLen nLen
,
63 rtl_TextEncoding eTextEncoding
, sal_uInt32 nCvtFlags
)
65 DBG_CTOR( ByteString
, DbgCheckByteString
);
66 DBG_CHKOBJ( &rUniStr
, UniString
, DbgCheckUniString
);
68 // Stringlaenge ermitteln
69 if ( nPos
> rUniStr
.mpData
->mnLen
)
73 // Laenge korrigieren, wenn noetig
74 sal_Int32 nMaxLen
= rUniStr
.mpData
->mnLen
-nPos
;
76 nLen
= static_cast< xub_StrLen
>(nMaxLen
);
80 rtl_uString2String( (rtl_String
**)(&mpData
),
81 rUniStr
.mpData
->maStr
+nPos
, nLen
,
82 eTextEncoding
, nCvtFlags
);
85 // -----------------------------------------------------------------------
87 ByteString::ByteString( const sal_Unicode
* pUniStr
,
88 rtl_TextEncoding eTextEncoding
, sal_uInt32 nCvtFlags
)
90 DBG_CTOR( ByteString
, DbgCheckByteString
);
91 DBG_ASSERT( pUniStr
, "ByteString::ByteString() - pUniStr is NULL" );
94 rtl_uString2String( (rtl_String
**)(&mpData
),
95 pUniStr
, ImplStringLen( pUniStr
),
96 eTextEncoding
, nCvtFlags
);
99 // -----------------------------------------------------------------------
101 ByteString::ByteString( const sal_Unicode
* pUniStr
, xub_StrLen nLen
,
102 rtl_TextEncoding eTextEncoding
, sal_uInt32 nCvtFlags
)
104 DBG_CTOR( ByteString
, DbgCheckByteString
);
105 DBG_ASSERT( pUniStr
, "ByteString::ByteString() - pUniStr is NULL" );
107 if ( nLen
== STRING_LEN
)
108 nLen
= ImplStringLen( pUniStr
);
111 rtl_uString2String( (rtl_String
**)(&mpData
),
113 eTextEncoding
, nCvtFlags
);
116 // =======================================================================
118 static sal_uChar aImplByteTab
[256] =
120 0, 1, 2, 3, 4, 5, 6, 7,
121 8, 9, 10, 11, 12, 13, 14, 15,
122 16, 17, 18, 19, 20, 21, 22, 23,
123 24, 25, 26, 27, 28, 29, 30, 31,
124 32, 33, 34, 35, 36, 37, 38, 39,
125 40, 41, 42, 43, 44, 45, 46, 47,
126 48, 49, 50, 51, 52, 53, 54, 55,
127 56, 57, 58, 59, 60, 61, 62, 63,
128 64, 65, 66, 67, 68, 69, 70, 71,
129 72, 73, 74, 75, 76, 77, 78, 79,
130 80, 81, 82, 83, 84, 85, 86, 87,
131 88, 89, 90, 91, 92, 93, 94, 95,
132 96, 97, 98, 99, 100, 101, 102, 103,
133 104, 105, 106, 107, 108, 109, 110, 111,
134 112, 113, 114, 115, 116, 117, 118, 119,
135 120, 121, 122, 123, 124, 125, 126, 127,
136 128, 129, 130, 131, 132, 133, 134, 135,
137 136, 137, 138, 139, 140, 141, 142, 143,
138 144, 145, 146, 147, 148, 149, 150, 151,
139 152, 153, 154, 155, 156, 157, 158, 159,
140 160, 161, 162, 163, 164, 165, 166, 167,
141 168, 169, 170, 171, 172, 173, 174, 175,
142 176, 177, 178, 179, 180, 181, 182, 183,
143 184, 185, 186, 187, 188, 189, 190, 191,
144 192, 193, 194, 195, 196, 197, 198, 199,
145 200, 201, 202, 203, 204, 205, 206, 207,
146 208, 209, 210, 211, 212, 213, 214, 215,
147 216, 217, 218, 219, 220, 221, 222, 223,
148 224, 225, 226, 227, 228, 229, 230, 231,
149 232, 233, 234, 235, 236, 237, 238, 239,
150 240, 241, 242, 243, 244, 245, 246, 247,
151 248, 249, 250, 251, 252, 253, 254, 255
154 // =======================================================================
156 struct Impl1ByteUnicodeTabData
158 rtl_TextEncoding meTextEncoding
;
159 sal_Unicode maUniTab
[256];
160 Impl1ByteUnicodeTabData
* mpNext
;
163 // -----------------------------------------------------------------------
165 struct Impl1ByteConvertTabData
167 rtl_TextEncoding meSrcTextEncoding
;
168 rtl_TextEncoding meDestTextEncoding
;
169 sal_uChar maConvertTab
[256];
170 sal_uChar maRepConvertTab
[256];
171 Impl1ByteConvertTabData
* mpNext
;
174 // =======================================================================
176 sal_Unicode
* ImplGet1ByteUnicodeTab( rtl_TextEncoding eTextEncoding
)
179 TOOLSINDATA
* pToolsData
= ImplGetToolsInData();
181 TOOLSINDATA
* pToolsData
= 0x0;
183 Impl1ByteUnicodeTabData
* pTab
= pToolsData
->mpFirstUniTabData
;
187 if ( pTab
->meTextEncoding
== eTextEncoding
)
188 return pTab
->maUniTab
;
192 // get TextEncodingInfo
193 rtl_TextEncodingInfo aTextEncInfo
;
194 aTextEncInfo
.StructSize
= sizeof( aTextEncInfo
);
195 rtl_getTextEncodingInfo( eTextEncoding
, &aTextEncInfo
);
197 if ( aTextEncInfo
.MaximumCharSize
== 1 )
199 pTab
= new Impl1ByteUnicodeTabData
;
200 pTab
->meTextEncoding
= eTextEncoding
;
201 pTab
->mpNext
= pToolsData
->mpFirstUniTabData
;
203 rtl_TextToUnicodeConverter hConverter
;
207 hConverter
= rtl_createTextToUnicodeConverter( eTextEncoding
);
208 nDestChars
= rtl_convertTextToUnicode( hConverter
, 0,
209 (const sal_Char
*)aImplByteTab
, 256,
211 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE
|
212 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT
|
213 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
,
214 &nInfo
, &nSrcBytes
);
215 rtl_destroyTextToUnicodeConverter( hConverter
);
217 if ( (nSrcBytes
!= 256) || (nDestChars
!= 256) )
221 pToolsData
->mpFirstUniTabData
= pTab
;
222 return pTab
->maUniTab
;
229 // -----------------------------------------------------------------------
231 static sal_uChar
* ImplGet1ByteConvertTab( rtl_TextEncoding eSrcTextEncoding
,
232 rtl_TextEncoding eDestTextEncoding
,
236 TOOLSINDATA
* pToolsData
= ImplGetToolsInData();
238 TOOLSINDATA
* pToolsData
= 0x0;
240 Impl1ByteConvertTabData
* pTab
= pToolsData
->mpFirstConvertTabData
;
244 if ( (pTab
->meSrcTextEncoding
== eSrcTextEncoding
) &&
245 (pTab
->meDestTextEncoding
== eDestTextEncoding
) )
248 return pTab
->maRepConvertTab
;
250 return pTab
->maConvertTab
;
255 // get TextEncodingInfo
256 rtl_TextEncodingInfo aTextEncInfo1
;
257 aTextEncInfo1
.StructSize
= sizeof( aTextEncInfo1
);
258 rtl_getTextEncodingInfo( eSrcTextEncoding
, &aTextEncInfo1
);
259 rtl_TextEncodingInfo aTextEncInfo2
;
260 aTextEncInfo2
.StructSize
= sizeof( aTextEncInfo2
);
261 rtl_getTextEncodingInfo( eDestTextEncoding
, &aTextEncInfo2
);
263 if ( (aTextEncInfo1
.MaximumCharSize
== 1) &&
264 (aTextEncInfo2
.MaximumCharSize
== 1) )
266 pTab
= new Impl1ByteConvertTabData
;
267 pTab
->meSrcTextEncoding
= eSrcTextEncoding
;
268 pTab
->meDestTextEncoding
= eDestTextEncoding
;
269 pTab
->mpNext
= pToolsData
->mpFirstConvertTabData
;
271 rtl_TextToUnicodeConverter hConverter
;
272 rtl_UnicodeToTextConverter hConverter2
;
278 sal_Unicode aTempBuf
[256];
279 hConverter
= rtl_createTextToUnicodeConverter( eSrcTextEncoding
);
280 nDestChars
= rtl_convertTextToUnicode( hConverter
, 0,
281 (const sal_Char
*)aImplByteTab
, 256,
283 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT
|
284 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT
|
285 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
,
286 &nInfo
, &nSrcBytes
);
287 rtl_destroyTextToUnicodeConverter( hConverter
);
288 if ( (nSrcBytes
!= 256) || (nDestChars
!= 256) )
292 hConverter2
= rtl_createUnicodeToTextConverter( eDestTextEncoding
);
293 nDestBytes
= rtl_convertUnicodeToText( hConverter2
, 0,
295 (sal_Char
*)pTab
->maConvertTab
, 256,
296 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0
|
297 RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT
,
298 &nInfo
, &nSrcChars
);
299 if ( (nDestBytes
== 256) || (nSrcChars
== 256) )
301 nDestBytes
= rtl_convertUnicodeToText( hConverter2
, 0,
303 (sal_Char
*)pTab
->maRepConvertTab
, 256,
304 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT
|
305 RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT
|
306 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
,
307 &nInfo
, &nSrcChars
);
309 rtl_destroyUnicodeToTextConverter( hConverter2
);
310 if ( (nDestBytes
!= 256) || (nSrcChars
!= 256) )
314 pToolsData
->mpFirstConvertTabData
= pTab
;
316 return pTab
->maRepConvertTab
;
318 return pTab
->maConvertTab
;
326 // =======================================================================
328 void ImplDeleteCharTabData()
331 TOOLSINDATA
* pToolsData
= ImplGetToolsInData();
333 TOOLSINDATA
* pToolsData
= 0x0;
335 Impl1ByteUnicodeTabData
* pTempUniTab
;
336 Impl1ByteUnicodeTabData
* pUniTab
= pToolsData
->mpFirstUniTabData
;
339 pTempUniTab
= pUniTab
->mpNext
;
341 pUniTab
= pTempUniTab
;
343 pToolsData
->mpFirstUniTabData
= NULL
;
345 Impl1ByteConvertTabData
* pTempConvertTab
;
346 Impl1ByteConvertTabData
* pConvertTab
= pToolsData
->mpFirstConvertTabData
;
347 while ( pConvertTab
)
349 pTempConvertTab
= pConvertTab
->mpNext
;
351 pConvertTab
= pTempConvertTab
;
353 pToolsData
->mpFirstConvertTabData
= NULL
;
356 // =======================================================================
358 void ByteString::ImplStringConvert(
359 rtl_TextEncoding eSource
, rtl_TextEncoding eTarget
, BOOL bReplace
)
361 sal_uChar
* pConvertTab
= ImplGet1ByteConvertTab( eSource
, eTarget
, bReplace
);
364 char* pStr
= mpData
->maStr
;
367 sal_uChar c
= (sal_uChar
)*pStr
;
368 sal_uChar cConv
= pConvertTab
[c
];
371 pStr
= ImplCopyStringData( pStr
);
380 rtl_UnicodeToTextConverter hSrcConverter
= rtl_createTextToUnicodeConverter( eSource
);
385 sal_Unicode
* pTempBuf
;
386 nTempLen
= mpData
->mnLen
;
387 pTempBuf
= new sal_Unicode
[nTempLen
];
388 nDestChars
= rtl_convertTextToUnicode( hSrcConverter
, 0,
389 mpData
->maStr
, mpData
->mnLen
,
391 RTL_TEXTTOUNICODE_FLAGS_FLUSH
|
392 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE
|
393 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT
|
394 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
,
395 &nInfo
, &nSrcBytes
);
396 rtl_destroyTextToUnicodeConverter( hSrcConverter
);
397 // Hier werten wir bReplace nicht aus, da fuer MultiByte-Textencodings
398 // sowieso keine Ersatzdarstellung moeglich ist. Da sich der String
399 // sowieso in der Laenge aendern kann, nehmen wir auch sonst keine
400 // Ruecksicht darauf, das die Laenge erhalten bleibt.
401 ImplUpdateStringFromUniString( pTempBuf
, nDestChars
, eTarget
,
402 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT
|
403 RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT
|
404 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
|
405 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR
|
406 RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0
|
407 RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE
|
408 RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE
);
413 // =======================================================================
415 ByteString
& ByteString::Convert( rtl_TextEncoding eSource
, rtl_TextEncoding eTarget
, BOOL bReplace
)
417 DBG_CHKTHIS( ByteString
, DbgCheckByteString
);
419 // rtl_TextEncoding Dontknow kann nicht konvertiert werden
420 if ( (eSource
== RTL_TEXTENCODING_DONTKNOW
) || (eTarget
== RTL_TEXTENCODING_DONTKNOW
) )
423 // Wenn Source und Target gleich sind, muss nicht konvertiert werden
424 if ( eSource
== eTarget
)
427 // rtl_TextEncoding Symbol nur nach Unicode oder von Unicode wandeln, ansonsten
428 // wollen wir die Zeichencodes beibehalten
429 if ( (eSource
== RTL_TEXTENCODING_SYMBOL
) &&
430 (eTarget
!= RTL_TEXTENCODING_UTF7
) &&
431 (eTarget
!= RTL_TEXTENCODING_UTF8
) )
433 if ( (eTarget
== RTL_TEXTENCODING_SYMBOL
) &&
434 (eSource
!= RTL_TEXTENCODING_UTF7
) &&
435 (eSource
!= RTL_TEXTENCODING_UTF8
) )
438 // Zeichensatz umwandeln
439 ImplStringConvert( eSource
, eTarget
, bReplace
);
444 // =======================================================================
446 char ByteString::Convert( char c
,
447 rtl_TextEncoding eSource
, rtl_TextEncoding eTarget
,
450 // TextEncoding Dontknow kann nicht konvertiert werden
451 if ( (eSource
== RTL_TEXTENCODING_DONTKNOW
) || (eTarget
== RTL_TEXTENCODING_DONTKNOW
) )
454 // Wenn Source und Target gleich sind, muss nicht konvertiert werden
455 if ( eSource
== eTarget
)
458 // TextEncoding Symbol nur nach Unicode oder von Unicode wandeln, ansonsten
459 // wollen wir die Zeichencodes beibehalten
460 if ( (eSource
== RTL_TEXTENCODING_SYMBOL
) &&
461 (eTarget
!= RTL_TEXTENCODING_UTF7
) &&
462 (eTarget
!= RTL_TEXTENCODING_UTF8
) )
464 if ( (eTarget
== RTL_TEXTENCODING_SYMBOL
) &&
465 (eSource
!= RTL_TEXTENCODING_UTF7
) &&
466 (eSource
!= RTL_TEXTENCODING_UTF8
) )
469 sal_uChar
* pConvertTab
= ImplGet1ByteConvertTab( eSource
, eTarget
, bReplace
);
471 return (char)pConvertTab
[(sal_uChar
)c
];
476 // =======================================================================
478 sal_Unicode
ByteString::ConvertToUnicode( char c
, rtl_TextEncoding eTextEncoding
)
481 return ConvertToUnicode( &c
, &nLen
, eTextEncoding
);
484 // -----------------------------------------------------------------------
486 char ByteString::ConvertFromUnicode( sal_Unicode c
, rtl_TextEncoding eTextEncoding
, BOOL bReplace
)
490 nLen
= ConvertFromUnicode( c
, aBuf
, sizeof( aBuf
), eTextEncoding
, bReplace
);
497 // -----------------------------------------------------------------------
499 sal_Unicode
ByteString::ConvertToUnicode( const char* pChar
, sal_Size
* pLen
, rtl_TextEncoding eTextEncoding
)
501 // TextEncoding Dontknow wird nicht konvertiert
502 if ( eTextEncoding
== RTL_TEXTENCODING_DONTKNOW
)
505 rtl_TextToUnicodeConverter hConverter
;
509 sal_Unicode nConvChar
;
510 hConverter
= rtl_createTextToUnicodeConverter( eTextEncoding
);
511 nDestChars
= rtl_convertTextToUnicode( hConverter
, 0,
512 (const sal_Char
*)pChar
, *pLen
,
514 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT
|
515 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT
|
516 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT
|
517 RTL_TEXTTOUNICODE_FLAGS_FLUSH
,
518 &nInfo
, &nSrcBytes
);
519 rtl_destroyTextToUnicodeConverter( hConverter
);
521 if ( nDestChars
== 1 )
533 // -----------------------------------------------------------------------
535 sal_Size
ByteString::ConvertFromUnicode( sal_Unicode c
, char* pBuf
, sal_Size nBufLen
, rtl_TextEncoding eTextEncoding
,
538 // TextEncoding Dontknow wird nicht konvertiert
539 if ( eTextEncoding
== RTL_TEXTENCODING_DONTKNOW
)
542 rtl_UnicodeToTextConverter hConverter
;
546 sal_Unicode cUni
= c
;
547 sal_uInt32 nFlags
= RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE
|
548 RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE
|
549 RTL_UNICODETOTEXT_FLAGS_FLUSH
;
552 nFlags
|= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT
|
553 RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT
;
554 nFlags
|= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
;
556 nFlags
|= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR
;
560 nFlags
|= RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0
|
561 RTL_UNICODETOTEXT_FLAGS_INVALID_0
;
564 hConverter
= rtl_createUnicodeToTextConverter( eTextEncoding
);
565 nDestBytes
= rtl_convertUnicodeToText( hConverter
, 0,
567 (sal_Char
*)pBuf
, nBufLen
,
569 &nInfo
, &nSrcChars
);
570 rtl_destroyUnicodeToTextConverter( hConverter
);
574 // =======================================================================
576 ByteString::ByteString( const rtl::OString
& rStr
)
579 DBG_CTOR( ByteString
, DbgCheckByteString
);
581 OSL_ENSURE(rStr
.pData
->length
< STRING_MAXLEN
,
582 "Overflowing rtl::OString -> ByteString cut to zero length");
584 if (rStr
.pData
->length
< STRING_MAXLEN
)
586 mpData
= reinterpret_cast< ByteStringData
* >(const_cast< rtl::OString
& >(rStr
).pData
);
587 STRING_ACQUIRE((STRING_TYPE
*)mpData
);
591 STRING_NEW((STRING_TYPE
**)&mpData
);
595 // -----------------------------------------------------------------------
597 ByteString
& ByteString::Assign( const rtl::OString
& rStr
)
599 DBG_CHKTHIS( ByteString
, DbgCheckByteString
);
601 OSL_ENSURE(rStr
.pData
->length
< STRING_MAXLEN
,
602 "Overflowing rtl::OString -> ByteString cut to zero length");
604 if (rStr
.pData
->length
< STRING_MAXLEN
)
606 STRING_RELEASE((STRING_TYPE
*)mpData
);
607 mpData
= reinterpret_cast< ByteStringData
* >(const_cast< rtl::OString
& >(rStr
).pData
);
608 STRING_ACQUIRE((STRING_TYPE
*)mpData
);
612 STRING_NEW((STRING_TYPE
**)&mpData
);