update credits
[LibreOffice.git] / sal / rtl / ustring.cxx
blobbdebd897a71b514649d4d600f9d040c5481a0a71
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
22 #ifdef _MSC_VER
23 #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
24 #endif
26 #include <cassert>
27 #include <cstdlib>
29 #include <osl/diagnose.h>
30 #include <osl/interlck.h>
31 #include <rtl/alloc.h>
32 #include <osl/mutex.h>
33 #include <osl/doublecheckedlocking.h>
34 #include <rtl/tencinfo.h>
36 #include <string.h>
37 #include <sal/alloca.h>
38 #include <sal/log.hxx>
40 #include "hash.hxx"
41 #include "strimp.hxx"
42 #include "surrogates.hxx"
43 #include <rtl/ustring.h>
45 #include "rtl/math.h"
46 #include "rtl/tencinfo.h"
48 /* ======================================================================= */
50 /* static data to be referenced by all empty strings
51 * the refCount is predefined to 1 and must never become 0 !
53 static rtl_uString const aImplEmpty_rtl_uString =
55 (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32 refCount; */
56 0, /*sal_Int32 length; */
57 { 0 } /*sal_Unicode buffer[1];*/
60 /* ======================================================================= */
62 #define IMPL_RTL_STRCODE sal_Unicode
63 #define IMPL_RTL_USTRCODE( c ) (c)
64 #define IMPL_RTL_STRNAME( n ) rtl_ustr_ ## n
66 #define IMPL_RTL_STRINGNAME( n ) rtl_uString_ ## n
67 #define IMPL_RTL_STRINGDATA rtl_uString
68 #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_uString
69 #define IMPL_RTL_INTERN
70 static void internRelease (rtl_uString *pThis);
72 /* ======================================================================= */
74 /* Include String/UString template code */
76 #include "strtmpl.cxx"
78 sal_Int32 rtl_ustr_indexOfAscii_WithLength(
79 sal_Unicode const * str, sal_Int32 len,
80 char const * subStr, sal_Int32 subLen) SAL_THROW_EXTERN_C()
82 if (subLen > 0 && subLen <= len) {
83 sal_Int32 i;
84 for (i = 0; i <= len - subLen; ++i) {
85 if (rtl_ustr_asciil_reverseEquals_WithLength(
86 str + i, subStr, subLen))
88 return i;
92 return -1;
95 sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength(
96 sal_Unicode const * str, sal_Int32 len,
97 char const * subStr, sal_Int32 subLen) SAL_THROW_EXTERN_C()
99 if (subLen > 0 && subLen <= len) {
100 sal_Int32 i;
101 for (i = len - subLen; i >= 0; --i) {
102 if (rtl_ustr_asciil_reverseEquals_WithLength(
103 str + i, subStr, subLen))
105 return i;
109 return -1;
112 sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f)
113 SAL_THROW_EXTERN_C()
115 rtl_uString * pResult = NULL;
116 sal_Int32 nLen;
117 rtl_math_doubleToUString(
118 &pResult, 0, 0, f, rtl_math_StringFormat_G,
119 RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
120 0, sal_True);
121 nLen = pResult->length;
122 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT);
123 memcpy(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
124 rtl_uString_release(pResult);
125 return nLen;
128 sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d)
129 SAL_THROW_EXTERN_C()
131 rtl_uString * pResult = NULL;
132 sal_Int32 nLen;
133 rtl_math_doubleToUString(
134 &pResult, 0, 0, d, rtl_math_StringFormat_G,
135 RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
136 0, sal_True);
137 nLen = pResult->length;
138 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE);
139 memcpy(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
140 rtl_uString_release(pResult);
141 return nLen;
144 float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr) SAL_THROW_EXTERN_C()
146 return (float) rtl_math_uStringToDouble(pStr,
147 pStr + rtl_ustr_getLength(pStr),
148 '.', 0, 0, 0);
151 double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr) SAL_THROW_EXTERN_C()
153 return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.',
154 0, 0, 0);
157 /* ======================================================================= */
159 sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1,
160 const sal_Char* pStr2 )
161 SAL_THROW_EXTERN_C()
163 sal_Int32 nRet;
164 while ( ((nRet = ((sal_Int32)(*pStr1))-
165 ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
166 *pStr2 )
168 /* Check ASCII range */
169 SAL_WARN_IF( ((unsigned char)*pStr2) > 127, "rtl.string",
170 "rtl_ustr_ascii_compare - Found char > 127" );
171 pStr1++;
172 pStr2++;
175 return nRet;
178 /* ----------------------------------------------------------------------- */
180 sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1,
181 sal_Int32 nStr1Len,
182 const sal_Char* pStr2 )
183 SAL_THROW_EXTERN_C()
185 sal_Int32 nRet = 0;
186 while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)-
187 ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
188 nStr1Len && *pStr2 )
190 /* Check ASCII range */
191 SAL_WARN_IF( ((unsigned char)*pStr2) > 127, "rtl.string",
192 "rtl_ustr_ascii_compare_WithLength - Found char > 127" );
193 pStr1++;
194 pStr2++;
195 nStr1Len--;
198 return nRet;
201 /* ----------------------------------------------------------------------- */
203 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1,
204 sal_Int32 nStr1Len,
205 const sal_Char* pStr2,
206 sal_Int32 nShortenedLength )
207 SAL_THROW_EXTERN_C()
209 const sal_Unicode* pStr1End = pStr1 + nStr1Len;
210 sal_Int32 nRet;
211 while ( (nShortenedLength > 0) &&
212 (pStr1 < pStr1End) && *pStr2 )
214 /* Check ASCII range */
215 SAL_WARN_IF( ((unsigned char)*pStr2) > 127, "rtl.string",
216 "rtl_ustr_ascii_shortenedCompare_WithLength - Found char > 127" );
218 nRet = ((sal_Int32)*pStr1)-
219 ((sal_Int32)(unsigned char)*pStr2);
220 if ( nRet != 0 )
221 return nRet;
223 nShortenedLength--;
224 pStr1++;
225 pStr2++;
228 if ( nShortenedLength <= 0 )
229 return 0;
231 if ( *pStr2 )
233 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
234 // first is a substring of the second string => less (negative value)
235 nRet = -1;
237 else
239 // greater or equal
240 nRet = pStr1End - pStr1;
243 return nRet;
246 /* ----------------------------------------------------------------------- */
248 sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1,
249 sal_Int32 nStr1Len,
250 const sal_Char* pStr2,
251 sal_Int32 nStr2Len )
252 SAL_THROW_EXTERN_C()
254 const sal_Unicode* pStr1Run = pStr1+nStr1Len;
255 const sal_Char* pStr2Run = pStr2+nStr2Len;
256 sal_Int32 nRet;
257 while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) )
259 /* Check ASCII range */
260 SAL_WARN_IF( ((unsigned char)*pStr2) > 127, "rtl.string",
261 "rtl_ustr_asciil_reverseCompare_WithLength - Found char > 127" );
262 pStr1Run--;
263 pStr2Run--;
264 nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run);
265 if ( nRet )
266 return nRet;
269 return nStr1Len - nStr2Len;
272 /* ----------------------------------------------------------------------- */
274 sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1,
275 const sal_Char* pStr2,
276 sal_Int32 nStrLen )
277 SAL_THROW_EXTERN_C()
279 const sal_Unicode* pStr1Run = pStr1+nStrLen;
280 const sal_Char* pStr2Run = pStr2+nStrLen;
281 while ( pStr1 < pStr1Run )
283 /* Check ASCII range */
284 SAL_WARN_IF( ((unsigned char)*pStr2) > 127, "rtl.string",
285 "rtl_ustr_asciil_reverseEquals_WithLength - Found char > 127" );
286 pStr1Run--;
287 pStr2Run--;
288 if( *pStr1Run != (sal_Unicode)*pStr2Run )
289 return sal_False;
292 return sal_True;
295 /* ----------------------------------------------------------------------- */
297 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1,
298 const sal_Char* pStr2 )
299 SAL_THROW_EXTERN_C()
301 sal_Int32 nRet;
302 sal_Int32 c1;
303 sal_Int32 c2;
306 /* Check ASCII range */
307 SAL_WARN_IF( ((unsigned char)*pStr2) > 127, "rtl.string",
308 "rtl_ustr_ascii_compareIgnoreAsciiCase - Found char > 127" );
309 /* If character between 'A' and 'Z', than convert it to lowercase */
310 c1 = (sal_Int32)*pStr1;
311 c2 = (sal_Int32)((unsigned char)*pStr2);
312 if ( (c1 >= 65) && (c1 <= 90) )
313 c1 += 32;
314 if ( (c2 >= 65) && (c2 <= 90) )
315 c2 += 32;
316 nRet = c1-c2;
317 if ( nRet != 0 )
318 return nRet;
320 pStr1++;
321 pStr2++;
323 while ( c2 );
325 return 0;
328 /* ----------------------------------------------------------------------- */
330 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
331 sal_Int32 nStr1Len,
332 const sal_Char* pStr2 )
333 SAL_THROW_EXTERN_C()
335 sal_Int32 nRet;
336 sal_Int32 c1;
337 sal_Int32 c2;
340 /* Check ASCII range */
341 SAL_WARN_IF( ((unsigned char)*pStr2) > 127, "rtl.string",
342 "rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength - Found char > 127" );
343 if ( !nStr1Len )
344 return *pStr2 == '\0' ? 0 : -1;
346 /* If character between 'A' and 'Z', than convert it to lowercase */
347 c1 = (sal_Int32)*pStr1;
348 c2 = (sal_Int32)((unsigned char)*pStr2);
349 if ( (c1 >= 65) && (c1 <= 90) )
350 c1 += 32;
351 if ( (c2 >= 65) && (c2 <= 90) )
352 c2 += 32;
353 nRet = c1-c2;
354 if ( nRet != 0 )
355 return nRet;
357 pStr1++;
358 pStr2++;
359 nStr1Len--;
361 while( c2 );
363 return 0;
366 sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(
367 sal_Unicode const * first, sal_Int32 firstLen,
368 char const * second, sal_Int32 secondLen) SAL_THROW_EXTERN_C()
370 sal_Int32 i;
371 sal_Int32 len = firstLen < secondLen ? firstLen : secondLen;
372 for (i = 0; i < len; ++i) {
373 /* Check ASCII range */
374 SAL_WARN_IF( ((unsigned char)*second) > 127, "rtl.string",
375 "rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths - Found char > 127" );
376 sal_Int32 c1 = *first++;
377 sal_Int32 c2 = (unsigned char) *second++;
378 sal_Int32 d;
379 if (c1 >= 65 && c1 <= 90) {
380 c1 += 32;
382 if (c2 >= 65 && c2 <= 90) {
383 c2 += 32;
385 d = c1 - c2;
386 if (d != 0) {
387 return d;
390 return firstLen - secondLen;
393 /* ----------------------------------------------------------------------- */
395 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
396 sal_Int32 nStr1Len,
397 const sal_Char* pStr2,
398 sal_Int32 nShortenedLength )
399 SAL_THROW_EXTERN_C()
401 const sal_Unicode* pStr1End = pStr1 + nStr1Len;
402 sal_Int32 nRet;
403 sal_Int32 c1;
404 sal_Int32 c2;
405 while ( (nShortenedLength > 0) &&
406 (pStr1 < pStr1End) && *pStr2 )
408 /* Check ASCII range */
409 SAL_WARN_IF( ((unsigned char)*pStr2) > 127, "rtl.string",
410 "rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength - Found char > 127" );
412 /* If character between 'A' and 'Z', than convert it to lowercase */
413 c1 = (sal_Int32)*pStr1;
414 c2 = (sal_Int32)((unsigned char)*pStr2);
415 if ( (c1 >= 65) && (c1 <= 90) )
416 c1 += 32;
417 if ( (c2 >= 65) && (c2 <= 90) )
418 c2 += 32;
419 nRet = c1-c2;
420 if ( nRet != 0 )
421 return nRet;
423 nShortenedLength--;
424 pStr1++;
425 pStr2++;
428 if ( nShortenedLength <= 0 )
429 return 0;
431 if ( *pStr2 )
433 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
434 // first is a substring of the second string => less (negative value)
435 nRet = -1;
437 else
439 // greater or equal
440 nRet = pStr1End - pStr1;
443 return nRet;
446 /* ----------------------------------------------------------------------- */
448 void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis,
449 const sal_Char* pCharStr )
450 SAL_THROW_EXTERN_C()
452 sal_Int32 nLen;
454 if ( pCharStr )
456 const sal_Char* pTempStr = pCharStr;
457 while( *pTempStr )
458 pTempStr++;
459 nLen = pTempStr-pCharStr;
461 else
462 nLen = 0;
464 if ( !nLen )
466 IMPL_RTL_STRINGNAME( new )( ppThis );
467 return;
470 if ( *ppThis )
471 IMPL_RTL_STRINGNAME( release )( *ppThis );
473 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
474 OSL_ASSERT(*ppThis != NULL);
475 if ( (*ppThis) )
477 IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer;
480 /* Check ASCII range */
481 SAL_WARN_IF( ((unsigned char)*pCharStr) > 127, "rtl.string",
482 "rtl_uString_newFromAscii - Found char > 127" );
484 *pBuffer = *pCharStr;
485 pBuffer++;
486 pCharStr++;
488 while ( *pCharStr );
491 RTL_LOG_STRING_NEW( *ppThis );
494 void SAL_CALL rtl_uString_newFromCodePoints(
495 rtl_uString ** newString, sal_uInt32 const * codePoints,
496 sal_Int32 codePointCount) SAL_THROW_EXTERN_C()
498 sal_Int32 n;
499 sal_Int32 i;
500 sal_Unicode * p;
501 OSL_ASSERT(
502 newString != NULL &&
503 (codePoints != NULL || codePointCount == 0) &&
504 codePointCount >= 0);
505 if (codePointCount == 0) {
506 rtl_uString_new(newString);
507 return;
509 if (*newString != NULL) {
510 rtl_uString_release(*newString);
512 n = codePointCount;
513 for (i = 0; i < codePointCount; ++i) {
514 OSL_ASSERT(codePoints[i] <= 0x10FFFF);
515 if (codePoints[i] >= 0x10000) {
516 ++n;
519 /* Builds on the assumption that sal_Int32 uses 32 bit two's complement
520 representation with wrap around (the necessary number of UTF-16 code
521 units will be no larger than 2 * SAL_MAX_INT32, represented as
522 sal_Int32 -2): */
523 if (n < 0) {
524 *newString = NULL;
525 return;
527 *newString = rtl_uString_ImplAlloc(n);
528 if (*newString == NULL) {
529 return;
531 p = (*newString)->buffer;
532 for (i = 0; i < codePointCount; ++i) {
533 sal_uInt32 c = codePoints[i];
534 if (c < 0x10000) {
535 *p++ = (sal_Unicode) c;
536 } else {
537 c -= 0x10000;
538 *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
539 *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
542 RTL_LOG_STRING_NEW( *newString );
545 /* ======================================================================= */
547 static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen )
549 int n;
550 sal_uChar c;
551 const sal_Char* pEndStr;
553 n = 0;
554 pEndStr = pStr+nLen;
555 while ( pStr < pEndStr )
557 c = (sal_uChar)*pStr;
559 if ( !(c & 0x80) )
560 pStr++;
561 else if ( (c & 0xE0) == 0xC0 )
562 pStr += 2;
563 else if ( (c & 0xF0) == 0xE0 )
564 pStr += 3;
565 else if ( (c & 0xF8) == 0xF0 )
566 pStr += 4;
567 else if ( (c & 0xFC) == 0xF8 )
568 pStr += 5;
569 else if ( (c & 0xFE) == 0xFC )
570 pStr += 6;
571 else
572 pStr++;
574 n++;
577 return n;
580 /* ----------------------------------------------------------------------- */
582 static void rtl_string2UString_status( rtl_uString** ppThis,
583 const sal_Char* pStr,
584 sal_Int32 nLen,
585 rtl_TextEncoding eTextEncoding,
586 sal_uInt32 nCvtFlags,
587 sal_uInt32 *pInfo )
589 OSL_ENSURE(nLen == 0 || rtl_isOctetTextEncoding(eTextEncoding),
590 "rtl_string2UString_status() - Wrong TextEncoding" );
592 if ( !nLen )
594 rtl_uString_new( ppThis );
595 if (pInfo != NULL) {
596 *pInfo = 0;
599 else
601 if ( *ppThis )
602 IMPL_RTL_STRINGNAME( release )( *ppThis );
604 /* Optimization for US-ASCII */
605 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
607 IMPL_RTL_STRCODE* pBuffer;
608 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
609 if (*ppThis == NULL) {
610 if (pInfo != NULL) {
611 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
612 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
614 return;
616 pBuffer = (*ppThis)->buffer;
619 /* Check ASCII range */
620 SAL_WARN_IF( ((unsigned char)*pStr) > 127, "rtl.string",
621 "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
623 *pBuffer = *pStr;
624 pBuffer++;
625 pStr++;
626 nLen--;
628 while ( nLen );
629 if (pInfo != NULL) {
630 *pInfo = 0;
633 else
635 rtl_uString* pTemp;
636 rtl_uString* pTemp2 = NULL;
637 rtl_TextToUnicodeConverter hConverter;
638 sal_uInt32 nInfo;
639 sal_Size nSrcBytes;
640 sal_Size nDestChars;
641 sal_Size nNewLen;
643 /* Optimization for UTF-8 - we try to calculate the exact length */
644 /* For all other encoding we try the maximum - and reallocate
645 the buffer if needed */
646 if ( eTextEncoding == RTL_TEXTENCODING_UTF8 )
648 nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen );
649 /* Includes the string only ASCII, then we could copy
650 the buffer faster */
651 if ( nNewLen == (sal_Size)nLen )
653 IMPL_RTL_STRCODE* pBuffer;
654 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
655 if (*ppThis == NULL)
657 if (pInfo != NULL) {
658 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
659 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
661 return;
663 pBuffer = (*ppThis)->buffer;
666 /* Check ASCII range */
667 SAL_WARN_IF( ((unsigned char)*pStr) > 127, "rtl.string",
668 "rtl_string2UString_status() - UTF8 test encoding is wrong" );
670 *pBuffer = *pStr;
671 pBuffer++;
672 pStr++;
673 nLen--;
675 while ( nLen );
676 if (pInfo != NULL) {
677 *pInfo = 0;
679 RTL_LOG_STRING_NEW( *ppThis );
680 return;
683 else
684 nNewLen = nLen;
686 nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH;
687 hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
689 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
690 if (pTemp == NULL) {
691 if (pInfo != NULL) {
692 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
693 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
695 return;
697 nDestChars = rtl_convertTextToUnicode( hConverter, 0,
698 pStr, nLen,
699 pTemp->buffer, nNewLen,
700 nCvtFlags,
701 &nInfo, &nSrcBytes );
703 /* Buffer not big enough, try again with enough space */
704 /* Shouldn't be the case, but if we get textencoding which
705 could results in more unicode characters we have this
706 code here. Could be the case for apple encodings */
707 while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL )
709 rtl_freeMemory( pTemp );
710 nNewLen += 8;
711 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
712 if (pTemp == NULL) {
713 if (pInfo != NULL) {
714 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
715 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
717 return;
719 nDestChars = rtl_convertTextToUnicode( hConverter, 0,
720 pStr, nLen,
721 pTemp->buffer, nNewLen,
722 nCvtFlags,
723 &nInfo, &nSrcBytes );
726 if (pInfo)
727 *pInfo = nInfo;
729 /* Set the buffer to the correct size or if there is too
730 much overhead, reallocate to the correct size */
731 if ( nNewLen > nDestChars+8 )
733 pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars );
735 if (pTemp2 != NULL)
737 rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars);
738 rtl_freeMemory(pTemp);
739 pTemp = pTemp2;
741 else
743 pTemp->length = nDestChars;
744 pTemp->buffer[nDestChars] = 0;
747 rtl_destroyTextToUnicodeConverter( hConverter );
748 *ppThis = pTemp;
750 /* Results the conversion in an empty buffer -
751 create an empty string */
752 if ( pTemp && !nDestChars )
753 rtl_uString_new( ppThis );
756 RTL_LOG_STRING_NEW( *ppThis );
759 void SAL_CALL rtl_string2UString( rtl_uString** ppThis,
760 const sal_Char* pStr,
761 sal_Int32 nLen,
762 rtl_TextEncoding eTextEncoding,
763 sal_uInt32 nCvtFlags ) SAL_THROW_EXTERN_C()
765 rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding,
766 nCvtFlags, NULL );
769 /* ----------------------------------------------------------------------- */
771 enum StrLifecycle {
772 CANNOT_RETURN,
773 CAN_RETURN = 1
776 static oslMutex
777 getInternMutex()
779 static oslMutex pPoolGuard = NULL;
780 if( !pPoolGuard )
782 oslMutex pGlobalGuard;
783 pGlobalGuard = *osl_getGlobalMutex();
784 osl_acquireMutex( pGlobalGuard );
785 if( !pPoolGuard )
787 oslMutex p = osl_createMutex();
788 OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
789 pPoolGuard = p;
791 osl_releaseMutex( pGlobalGuard );
793 else
795 OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
798 return pPoolGuard;
801 /* returns true if we found a dup in the pool */
802 static void rtl_ustring_intern_internal( rtl_uString ** newStr,
803 rtl_uString * str,
804 StrLifecycle can_return )
806 oslMutex pPoolMutex;
808 pPoolMutex = getInternMutex();
810 osl_acquireMutex( pPoolMutex );
812 *newStr = rtl_str_hash_intern (str, can_return);
814 osl_releaseMutex( pPoolMutex );
816 if( can_return && *newStr != str )
817 { /* we dupped, then found a match */
818 rtl_freeMemory( str );
822 void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
823 rtl_uString * str) SAL_THROW_EXTERN_C()
825 if (SAL_STRING_IS_INTERN(str))
827 IMPL_RTL_AQUIRE( str );
828 *newStr = str;
830 else
832 rtl_uString *pOrg = *newStr;
833 *newStr = NULL;
834 rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN );
835 if (pOrg)
836 rtl_uString_release (pOrg);
840 static int rtl_canGuessUOutputLength( int len, rtl_TextEncoding eTextEncoding )
842 // FIXME: Maybe we should use a bit flag in the higher bits of the
843 // eTextEncoding value itself to determine the encoding type. But if we
844 // do, be sure to mask the value in certain places that expect the values
845 // to be numbered serially from 0 and up. One such place is
846 // Impl_getTextEncodingData().
848 switch ( eTextEncoding )
850 // 1 to 1 (with no zero elements)
851 case RTL_TEXTENCODING_IBM_437:
852 case RTL_TEXTENCODING_IBM_850:
853 case RTL_TEXTENCODING_IBM_860:
854 case RTL_TEXTENCODING_IBM_861:
855 case RTL_TEXTENCODING_IBM_863:
856 case RTL_TEXTENCODING_IBM_865:
857 return len;
858 break;
860 return 0;
863 void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
864 const sal_Char * str,
865 sal_Int32 len,
866 rtl_TextEncoding eTextEncoding,
867 sal_uInt32 convertFlags,
868 sal_uInt32 * pInfo )
869 SAL_THROW_EXTERN_C()
871 rtl_uString *scratch;
873 if (*newStr)
875 rtl_uString_release (*newStr);
876 *newStr = NULL;
879 if ( len < 256 )
880 { // try various optimisations
881 sal_Int32 ulen;
882 if ( len < 0 )
883 len = strlen( str );
884 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
886 int i;
887 rtl_uString *pScratch;
888 pScratch = static_cast< rtl_uString * >(
889 alloca(sizeof (rtl_uString) + len * sizeof (IMPL_RTL_STRCODE)));
890 for (i = 0; i < len; i++)
892 /* Check ASCII range */
893 SAL_WARN_IF( ((unsigned char)str[i]) > 127, "rtl.string",
894 "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
895 pScratch->buffer[i] = str[i];
897 pScratch->length = len;
898 rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
899 return;
901 else if ( (ulen = rtl_canGuessUOutputLength(len, eTextEncoding)) != 0 )
903 rtl_uString *pScratch;
904 rtl_TextToUnicodeConverter hConverter;
905 sal_Size nSrcBytes;
906 sal_uInt32 nInfo;
908 pScratch = static_cast< rtl_uString * >(
909 alloca(
910 sizeof (rtl_uString) + ulen * sizeof (IMPL_RTL_STRCODE)));
912 hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
913 rtl_convertTextToUnicode(
914 hConverter, 0, str, len, pScratch->buffer, ulen, convertFlags, &nInfo, &nSrcBytes );
915 rtl_destroyTextToUnicodeConverter( hConverter );
917 if (pInfo)
918 *pInfo = nInfo;
920 pScratch->length = ulen;
921 rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
922 return;
925 /* FIXME: we want a nice UTF-8 / alloca shortcut here */
928 scratch = NULL;
929 rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags,
930 pInfo );
931 if (!scratch) {
932 return;
934 rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN );
937 static void
938 internRelease (rtl_uString *pThis)
940 oslMutex pPoolMutex;
942 rtl_uString *pFree = NULL;
943 if ( SAL_STRING_REFCOUNT(
944 osl_atomic_decrement( &(pThis->refCount) ) ) == 0)
946 pPoolMutex = getInternMutex();
947 osl_acquireMutex( pPoolMutex );
949 rtl_str_hash_remove (pThis);
951 /* May have been separately acquired */
952 if ( SAL_STRING_REFCOUNT(
953 osl_atomic_increment( &(pThis->refCount) ) ) == 1 )
955 /* we got the last ref */
956 pFree = pThis;
958 else /* very unusual */
960 internRelease (pThis);
963 osl_releaseMutex( pPoolMutex );
965 if (pFree)
966 rtl_freeMemory (pFree);
969 sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
970 rtl_uString const * string, sal_Int32 * indexUtf16,
971 sal_Int32 incrementCodePoints)
973 sal_Int32 n;
974 sal_Unicode cu;
975 sal_uInt32 cp;
976 OSL_ASSERT(string != NULL && indexUtf16 != NULL);
977 n = *indexUtf16;
978 OSL_ASSERT(n >= 0 && n <= string->length);
979 while (incrementCodePoints < 0) {
980 OSL_ASSERT(n > 0);
981 cu = string->buffer[--n];
982 if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 &&
983 SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1]))
985 --n;
987 ++incrementCodePoints;
989 OSL_ASSERT(n >= 0 && n < string->length);
990 cu = string->buffer[n];
991 if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 &&
992 SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1]))
994 cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]);
995 } else {
996 cp = cu;
998 while (incrementCodePoints > 0) {
999 OSL_ASSERT(n < string->length);
1000 cu = string->buffer[n++];
1001 if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length &&
1002 SAL_RTL_IS_LOW_SURROGATE(string->buffer[n]))
1004 ++n;
1006 --incrementCodePoints;
1008 OSL_ASSERT(n >= 0 && n <= string->length);
1009 *indexUtf16 = n;
1010 return cp;
1013 sal_Bool rtl_convertStringToUString(
1014 rtl_uString ** target, char const * source, sal_Int32 length,
1015 rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C()
1017 sal_uInt32 info;
1018 rtl_string2UString_status(target, source, length, encoding, flags, &info);
1019 return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0);
1022 void rtl_uString_newReplaceFirst(
1023 rtl_uString ** newStr, rtl_uString * str, rtl_uString const * from,
1024 rtl_uString const * to, sal_Int32 * index) SAL_THROW_EXTERN_C()
1026 assert(str != 0);
1027 assert(index != 0);
1028 assert(*index >= 0 && *index <= str->length);
1029 assert(from != 0);
1030 assert(to != 0);
1031 sal_Int32 i = rtl_ustr_indexOfStr_WithLength(
1032 str->buffer + *index, str->length - *index, from->buffer, from->length);
1033 if (i == -1) {
1034 rtl_uString_assign(newStr, str);
1035 } else {
1036 assert(i <= str->length - *index);
1037 i += *index;
1038 assert(from->length <= str->length);
1039 if (str->length - from->length > SAL_MAX_INT32 - to->length) {
1040 std::abort();
1042 sal_Int32 n = str->length - from->length + to->length;
1043 rtl_uString_acquire(str); // in case *newStr == str
1044 rtl_uString_new_WithLength(newStr, n);
1045 if (n != 0) {
1046 (*newStr)->length = n;
1047 assert(i >= 0 && i < str->length);
1048 memcpy(
1049 (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode));
1050 memcpy(
1051 (*newStr)->buffer + i, to->buffer,
1052 to->length * sizeof (sal_Unicode));
1053 memcpy(
1054 (*newStr)->buffer + i + to->length,
1055 str->buffer + i + from->length,
1056 (str->length - i - from->length) * sizeof (sal_Unicode));
1058 rtl_uString_release(str);
1060 *index = i;
1063 void rtl_uString_newReplaceFirstAsciiL(
1064 rtl_uString ** newStr, rtl_uString * str, char const * from,
1065 sal_Int32 fromLength, rtl_uString const * to, sal_Int32 * index)
1066 SAL_THROW_EXTERN_C()
1068 assert(str != 0);
1069 assert(index != 0);
1070 assert(*index >= 0 && *index <= str->length);
1071 assert(fromLength >= 0);
1072 assert(to != 0);
1073 sal_Int32 i = rtl_ustr_indexOfAscii_WithLength(
1074 str->buffer + *index, str->length - *index, from, fromLength);
1075 if (i == -1) {
1076 rtl_uString_assign(newStr, str);
1077 } else {
1078 assert(i <= str->length - *index);
1079 i += *index;
1080 assert(fromLength <= str->length);
1081 if (str->length - fromLength > SAL_MAX_INT32 - to->length) {
1082 std::abort();
1084 sal_Int32 n = str->length - fromLength + to->length;
1085 rtl_uString_acquire(str); // in case *newStr == str
1086 if (n != 0) {
1087 rtl_uString_new_WithLength(newStr, n);
1088 (*newStr)->length = n;
1089 assert(i >= 0 && i < str->length);
1090 memcpy(
1091 (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode));
1092 memcpy(
1093 (*newStr)->buffer + i, to->buffer,
1094 to->length * sizeof (sal_Unicode));
1095 memcpy(
1096 (*newStr)->buffer + i + to->length,
1097 str->buffer + i + fromLength,
1098 (str->length - i - fromLength) * sizeof (sal_Unicode));
1100 rtl_uString_release(str);
1102 *index = i;
1105 void rtl_uString_newReplaceFirstAsciiLAsciiL(
1106 rtl_uString ** newStr, rtl_uString * str, char const * from,
1107 sal_Int32 fromLength, char const * to, sal_Int32 toLength,
1108 sal_Int32 * index) SAL_THROW_EXTERN_C()
1110 assert(str != 0);
1111 assert(index != 0);
1112 assert(*index >= 0 && *index <= str->length);
1113 assert(fromLength >= 0);
1114 assert(to != 0);
1115 assert(toLength >= 0);
1116 sal_Int32 i = rtl_ustr_indexOfAscii_WithLength(
1117 str->buffer + *index, str->length - *index, from, fromLength);
1118 if (i == -1) {
1119 rtl_uString_assign(newStr, str);
1120 } else {
1121 assert(i <= str->length - *index);
1122 i += *index;
1123 assert(fromLength <= str->length);
1124 if (str->length - fromLength > SAL_MAX_INT32 - toLength) {
1125 std::abort();
1127 sal_Int32 n = str->length - fromLength + toLength;
1128 rtl_uString_acquire(str); // in case *newStr == str
1129 if (n != 0) {
1130 rtl_uString_new_WithLength(newStr, n);
1131 (*newStr)->length = n;
1132 assert(i >= 0 && i < str->length);
1133 memcpy(
1134 (*newStr)->buffer, str->buffer, i * sizeof (sal_Unicode));
1135 for (sal_Int32 j = 0; j != toLength; ++j) {
1136 assert(static_cast< unsigned char >(to[j]) <= 0x7F);
1137 (*newStr)->buffer[i + j] = to[j];
1139 memcpy(
1140 (*newStr)->buffer + i + toLength,
1141 str->buffer + i + fromLength,
1142 (str->length - i - fromLength) * sizeof (sal_Unicode));
1144 rtl_uString_release(str);
1146 *index = i;
1149 void rtl_uString_newReplaceAll(
1150 rtl_uString ** newStr, rtl_uString * str, rtl_uString const * from,
1151 rtl_uString const * to) SAL_THROW_EXTERN_C()
1153 rtl_uString_newReplaceAllFromIndex( newStr, str, from, to, 0 );
1156 void rtl_uString_newReplaceAllFromIndex(
1157 rtl_uString ** newStr, rtl_uString * str, rtl_uString const * from,
1158 rtl_uString const * to, sal_Int32 fromIndex) SAL_THROW_EXTERN_C()
1160 assert(to != 0);
1161 assert(fromIndex >= 0 && fromIndex <= str->length);
1162 rtl_uString_assign(newStr, str);
1163 for (sal_Int32 i = fromIndex;; i += to->length) {
1164 rtl_uString_newReplaceFirst(newStr, *newStr, from, to, &i);
1165 if (i == -1) {
1166 break;
1171 void rtl_uString_newReplaceAllAsciiL(
1172 rtl_uString ** newStr, rtl_uString * str, char const * from,
1173 sal_Int32 fromLength, rtl_uString const * to) SAL_THROW_EXTERN_C()
1175 assert(to != 0);
1176 rtl_uString_assign(newStr, str);
1177 for (sal_Int32 i = 0;; i += to->length) {
1178 rtl_uString_newReplaceFirstAsciiL(
1179 newStr, *newStr, from, fromLength, to, &i);
1180 if (i == -1) {
1181 break;
1186 void rtl_uString_newReplaceAllAsciiLAsciiL(
1187 rtl_uString ** newStr, rtl_uString * str, char const * from,
1188 sal_Int32 fromLength, char const * to, sal_Int32 toLength)
1189 SAL_THROW_EXTERN_C()
1191 assert(toLength >= 0);
1192 rtl_uString_assign(newStr, str);
1193 for (sal_Int32 i = 0;; i += toLength) {
1194 rtl_uString_newReplaceFirstAsciiLAsciiL(
1195 newStr, *newStr, from, fromLength, to, toLength, &i);
1196 if (i == -1) {
1197 break;
1202 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */