update dev300-m58
[ooovba.git] / sal / rtl / source / ustring.c
blobb545ff48bbac21f7ffe2041699d6b690bf8ad9b5
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: ustring.c,v $
10 * $Revision: 1.31 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
30 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
31 #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
32 #endif
34 #include <rtl/memory.h>
35 #include <osl/diagnose.h>
36 #include <osl/interlck.h>
37 #include <rtl/alloc.h>
38 #include <osl/mutex.h>
39 #include <osl/doublecheckedlocking.h>
40 #include <rtl/tencinfo.h>
42 #include <string.h>
43 #include <sal/alloca.h>
45 #include "hash.h"
46 #include "strimp.h"
47 #include "surrogates.h"
48 #include <rtl/ustring.h>
50 #include "rtl/math.h"
51 #include "rtl/tencinfo.h"
53 /* ======================================================================= */
55 /* static data to be referenced by all empty strings
56 * the refCount is predefined to 1 and must never become 0 !
58 static rtl_uString const aImplEmpty_rtl_uString =
60 (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32 refCount; */
61 0, /*sal_Int32 length; */
62 { 0 } /*sal_Unicode buffer[1];*/
65 /* ======================================================================= */
67 #define IMPL_RTL_STRCODE sal_Unicode
68 #define IMPL_RTL_USTRCODE( c ) (c)
69 #define IMPL_RTL_STRNAME( n ) rtl_ustr_ ## n
71 #define IMPL_RTL_STRINGNAME( n ) rtl_uString_ ## n
72 #define IMPL_RTL_STRINGDATA rtl_uString
73 #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_uString
74 #define IMPL_RTL_INTERN
75 static void internRelease (rtl_uString *pThis);
77 /* ======================================================================= */
79 /* Include String/UString template code */
81 #include "strtmpl.c"
83 sal_Int32 rtl_ustr_indexOfAscii_WithLength(
84 sal_Unicode const * str, sal_Int32 len,
85 char const * subStr, sal_Int32 subLen)
87 if (subLen > 0 && subLen <= len) {
88 sal_Int32 i;
89 for (i = 0; i <= len - subLen; ++i) {
90 if (rtl_ustr_asciil_reverseEquals_WithLength(
91 str + i, subStr, subLen))
93 return i;
97 return -1;
100 sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength(
101 sal_Unicode const * str, sal_Int32 len,
102 char const * subStr, sal_Int32 subLen)
104 if (subLen > 0 && subLen <= len) {
105 sal_Int32 i;
106 for (i = len - subLen; i >= 0; --i) {
107 if (rtl_ustr_asciil_reverseEquals_WithLength(
108 str + i, subStr, subLen))
110 return i;
114 return -1;
117 sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f)
119 rtl_uString * pResult = NULL;
120 sal_Int32 nLen;
121 rtl_math_doubleToUString(
122 &pResult, 0, 0, f, rtl_math_StringFormat_G,
123 RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
124 0, sal_True);
125 nLen = pResult->length;
126 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT);
127 rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
128 rtl_uString_release(pResult);
129 return nLen;
132 sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d)
134 rtl_uString * pResult = NULL;
135 sal_Int32 nLen;
136 rtl_math_doubleToUString(
137 &pResult, 0, 0, d, rtl_math_StringFormat_G,
138 RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
139 0, sal_True);
140 nLen = pResult->length;
141 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE);
142 rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
143 rtl_uString_release(pResult);
144 return nLen;
147 float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr)
149 return (float) rtl_math_uStringToDouble(pStr,
150 pStr + rtl_ustr_getLength(pStr),
151 '.', 0, 0, 0);
154 double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr)
156 return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.',
157 0, 0, 0);
160 /* ======================================================================= */
162 sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1,
163 const sal_Char* pStr2 )
165 sal_Int32 nRet;
166 while ( ((nRet = ((sal_Int32)(*pStr1))-
167 ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
168 *pStr2 )
170 pStr1++;
171 pStr2++;
174 return nRet;
177 /* ----------------------------------------------------------------------- */
179 sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1,
180 sal_Int32 nStr1Len,
181 const sal_Char* pStr2 )
183 sal_Int32 nRet = 0;
184 while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)-
185 ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
186 nStr1Len && *pStr2 )
188 pStr1++;
189 pStr2++;
190 nStr1Len--;
193 return nRet;
196 /* ----------------------------------------------------------------------- */
198 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1,
199 sal_Int32 nStr1Len,
200 const sal_Char* pStr2,
201 sal_Int32 nShortenedLength )
203 const sal_Unicode* pStr1End = pStr1 + nStr1Len;
204 sal_Int32 nRet;
205 while ( (nShortenedLength > 0) &&
206 (pStr1 < pStr1End) && *pStr2 )
208 /* Check ASCII range */
209 OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
211 nRet = ((sal_Int32)*pStr1)-
212 ((sal_Int32)(unsigned char)*pStr2);
213 if ( nRet != 0 )
214 return nRet;
216 nShortenedLength--;
217 pStr1++;
218 pStr2++;
221 if ( nShortenedLength <= 0 )
222 return 0;
224 if ( *pStr2 )
226 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
227 // first is a substring of the second string => less (negative value)
228 nRet = -1;
230 else
232 // greater or equal
233 nRet = pStr1End - pStr1;
236 return nRet;
239 /* ----------------------------------------------------------------------- */
241 sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1,
242 sal_Int32 nStr1Len,
243 const sal_Char* pStr2,
244 sal_Int32 nStr2Len )
246 const sal_Unicode* pStr1Run = pStr1+nStr1Len;
247 const sal_Char* pStr2Run = pStr2+nStr2Len;
248 sal_Int32 nRet;
249 while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) )
251 pStr1Run--;
252 pStr2Run--;
253 nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run);
254 if ( nRet )
255 return nRet;
258 return nStr1Len - nStr2Len;
261 /* ----------------------------------------------------------------------- */
263 sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1,
264 const sal_Char* pStr2,
265 sal_Int32 nStrLen )
267 const sal_Unicode* pStr1Run = pStr1+nStrLen;
268 const sal_Char* pStr2Run = pStr2+nStrLen;
269 while ( pStr1 < pStr1Run )
271 pStr1Run--;
272 pStr2Run--;
273 if( *pStr1Run != (sal_Unicode)*pStr2Run )
274 return sal_False;
277 return sal_True;
280 /* ----------------------------------------------------------------------- */
282 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1,
283 const sal_Char* pStr2 )
285 sal_Int32 nRet;
286 sal_Int32 c1;
287 sal_Int32 c2;
290 /* If character between 'A' and 'Z', than convert it to lowercase */
291 c1 = (sal_Int32)*pStr1;
292 c2 = (sal_Int32)((unsigned char)*pStr2);
293 if ( (c1 >= 65) && (c1 <= 90) )
294 c1 += 32;
295 if ( (c2 >= 65) && (c2 <= 90) )
296 c2 += 32;
297 nRet = c1-c2;
298 if ( nRet != 0 )
299 return nRet;
301 pStr1++;
302 pStr2++;
304 while ( c2 );
306 return 0;
309 /* ----------------------------------------------------------------------- */
311 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
312 sal_Int32 nStr1Len,
313 const sal_Char* pStr2 )
315 sal_Int32 nRet;
316 sal_Int32 c1;
317 sal_Int32 c2;
320 if ( !nStr1Len )
321 return *pStr2 == '\0' ? 0 : -1;
323 /* If character between 'A' and 'Z', than convert it to lowercase */
324 c1 = (sal_Int32)*pStr1;
325 c2 = (sal_Int32)((unsigned char)*pStr2);
326 if ( (c1 >= 65) && (c1 <= 90) )
327 c1 += 32;
328 if ( (c2 >= 65) && (c2 <= 90) )
329 c2 += 32;
330 nRet = c1-c2;
331 if ( nRet != 0 )
332 return nRet;
334 pStr1++;
335 pStr2++;
336 nStr1Len--;
338 while( c2 );
340 return 0;
343 sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(
344 sal_Unicode const * first, sal_Int32 firstLen,
345 char const * second, sal_Int32 secondLen)
347 sal_Int32 i;
348 sal_Int32 len = firstLen < secondLen ? firstLen : secondLen;
349 for (i = 0; i < len; ++i) {
350 sal_Int32 c1 = *first++;
351 sal_Int32 c2 = (unsigned char) *second++;
352 sal_Int32 d;
353 if (c1 >= 65 && c1 <= 90) {
354 c1 += 32;
356 if (c2 >= 65 && c2 <= 90) {
357 c2 += 32;
359 d = c1 - c2;
360 if (d != 0) {
361 return d;
364 return firstLen - secondLen;
367 /* ----------------------------------------------------------------------- */
369 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
370 sal_Int32 nStr1Len,
371 const sal_Char* pStr2,
372 sal_Int32 nShortenedLength )
374 const sal_Unicode* pStr1End = pStr1 + nStr1Len;
375 sal_Int32 nRet;
376 sal_Int32 c1;
377 sal_Int32 c2;
378 while ( (nShortenedLength > 0) &&
379 (pStr1 < pStr1End) && *pStr2 )
381 /* Check ASCII range */
382 OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
384 /* If character between 'A' and 'Z', than convert it to lowercase */
385 c1 = (sal_Int32)*pStr1;
386 c2 = (sal_Int32)((unsigned char)*pStr2);
387 if ( (c1 >= 65) && (c1 <= 90) )
388 c1 += 32;
389 if ( (c2 >= 65) && (c2 <= 90) )
390 c2 += 32;
391 nRet = c1-c2;
392 if ( nRet != 0 )
393 return nRet;
395 nShortenedLength--;
396 pStr1++;
397 pStr2++;
400 if ( nShortenedLength <= 0 )
401 return 0;
403 if ( *pStr2 )
405 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
406 // first is a substring of the second string => less (negative value)
407 nRet = -1;
409 else
411 // greater or equal
412 nRet = pStr1End - pStr1;
415 return nRet;
418 /* ----------------------------------------------------------------------- */
420 void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis,
421 const sal_Char* pCharStr )
423 sal_Int32 nLen;
425 if ( pCharStr )
427 const sal_Char* pTempStr = pCharStr;
428 while( *pTempStr )
429 pTempStr++;
430 nLen = pTempStr-pCharStr;
432 else
433 nLen = 0;
435 if ( !nLen )
437 IMPL_RTL_STRINGNAME( new )( ppThis );
438 return;
441 if ( *ppThis )
442 IMPL_RTL_STRINGNAME( release )( *ppThis );
444 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
445 OSL_ASSERT(*ppThis != NULL);
446 if ( (*ppThis) )
448 IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer;
451 /* Check ASCII range */
452 OSL_ENSURE( ((unsigned char)*pCharStr) <= 127,
453 "rtl_uString_newFromAscii() - Found ASCII char > 127" );
455 *pBuffer = *pCharStr;
456 pBuffer++;
457 pCharStr++;
459 while ( *pCharStr );
463 void SAL_CALL rtl_uString_newFromCodePoints(
464 rtl_uString ** newString, sal_uInt32 const * codePoints,
465 sal_Int32 codePointCount)
467 sal_Int32 n;
468 sal_Int32 i;
469 sal_Unicode * p;
470 OSL_ASSERT(
471 newString != NULL &&
472 (codePoints != NULL || codePointCount == 0) &&
473 codePointCount >= 0);
474 if (codePointCount == 0) {
475 rtl_uString_new(newString);
476 return;
478 if (*newString != NULL) {
479 rtl_uString_release(*newString);
481 n = codePointCount;
482 for (i = 0; i < codePointCount; ++i) {
483 OSL_ASSERT(codePoints[i] <= 0x10FFFF);
484 if (codePoints[i] >= 0x10000) {
485 ++n;
488 /* Builds on the assumption that sal_Int32 uses 32 bit two's complement
489 representation with wrap around (the necessary number of UTF-16 code
490 units will be no larger than 2 * SAL_MAX_INT32, represented as
491 sal_Int32 -2): */
492 if (n < 0) {
493 *newString = NULL;
494 return;
496 *newString = rtl_uString_ImplAlloc(n);
497 if (*newString == NULL) {
498 return;
500 p = (*newString)->buffer;
501 for (i = 0; i < codePointCount; ++i) {
502 sal_uInt32 c = codePoints[i];
503 if (c < 0x10000) {
504 *p++ = (sal_Unicode) c;
505 } else {
506 c -= 0x10000;
507 *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
508 *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
513 /* ======================================================================= */
515 static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen )
517 int n;
518 sal_uChar c;
519 const sal_Char* pEndStr;
521 n = 0;
522 pEndStr = pStr+nLen;
523 while ( pStr < pEndStr )
525 c = (sal_uChar)*pStr;
527 if ( !(c & 0x80) )
528 pStr++;
529 else if ( (c & 0xE0) == 0xC0 )
530 pStr += 2;
531 else if ( (c & 0xF0) == 0xE0 )
532 pStr += 3;
533 else if ( (c & 0xF8) == 0xF0 )
534 pStr += 4;
535 else if ( (c & 0xFC) == 0xF8 )
536 pStr += 5;
537 else if ( (c & 0xFE) == 0xFC )
538 pStr += 6;
539 else
540 pStr++;
542 n++;
545 return n;
548 /* ----------------------------------------------------------------------- */
550 static void rtl_string2UString_status( rtl_uString** ppThis,
551 const sal_Char* pStr,
552 sal_Int32 nLen,
553 rtl_TextEncoding eTextEncoding,
554 sal_uInt32 nCvtFlags,
555 sal_uInt32 *pInfo )
557 OSL_ENSURE(rtl_isOctetTextEncoding(eTextEncoding),
558 "rtl_string2UString_status() - Wrong TextEncoding" );
560 if ( !nLen )
561 rtl_uString_new( ppThis );
562 else
564 if ( *ppThis )
565 IMPL_RTL_STRINGNAME( release )( *ppThis );
567 /* Optimization for US-ASCII */
568 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
570 IMPL_RTL_STRCODE* pBuffer;
571 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
572 if (*ppThis == NULL) {
573 if (pInfo != NULL) {
574 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
575 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
577 return;
579 pBuffer = (*ppThis)->buffer;
582 /* Check ASCII range */
583 OSL_ENSURE( ((unsigned char)*pStr) <= 127,
584 "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
586 *pBuffer = *pStr;
587 pBuffer++;
588 pStr++;
589 nLen--;
591 while ( nLen );
593 else
595 rtl_uString* pTemp;
596 rtl_uString* pTemp2 = NULL;
597 rtl_TextToUnicodeConverter hConverter;
598 sal_uInt32 nInfo;
599 sal_Size nSrcBytes;
600 sal_Size nDestChars;
601 sal_Size nNewLen;
603 /* Optimization for UTF-8 - we try to calculate the exact length */
604 /* For all other encoding we try the maximum - and reallocate
605 the buffer if needed */
606 if ( eTextEncoding == RTL_TEXTENCODING_UTF8 )
608 nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen );
609 /* Includes the string only ASCII, then we could copy
610 the buffer faster */
611 if ( nNewLen == (sal_Size)nLen )
613 IMPL_RTL_STRCODE* pBuffer;
614 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
615 if (*ppThis == NULL)
617 if (pInfo != NULL) {
618 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
619 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
621 return;
623 pBuffer = (*ppThis)->buffer;
626 /* Check ASCII range */
627 OSL_ENSURE( ((unsigned char)*pStr) <= 127,
628 "rtl_string2UString_status() - UTF8 test encoding is wrong" );
630 *pBuffer = *pStr;
631 pBuffer++;
632 pStr++;
633 nLen--;
635 while ( nLen );
636 if (pInfo != NULL) {
637 *pInfo = 0;
639 return;
642 else
643 nNewLen = nLen;
645 nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH;
646 hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
648 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
649 if (pTemp == NULL) {
650 if (pInfo != NULL) {
651 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
652 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
654 return;
656 nDestChars = rtl_convertTextToUnicode( hConverter, 0,
657 pStr, nLen,
658 pTemp->buffer, nNewLen,
659 nCvtFlags,
660 &nInfo, &nSrcBytes );
662 /* Buffer not big enough, try again with enough space */
663 /* Shouldn't be the case, but if we get textencoding which
664 could results in more unicode characters we have this
665 code here. Could be the case for apple encodings */
666 while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL )
668 rtl_freeMemory( pTemp );
669 nNewLen += 8;
670 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
671 if (pTemp == NULL) {
672 if (pInfo != NULL) {
673 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
674 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
676 return;
678 nDestChars = rtl_convertTextToUnicode( hConverter, 0,
679 pStr, nLen,
680 pTemp->buffer, nNewLen,
681 nCvtFlags,
682 &nInfo, &nSrcBytes );
685 if (pInfo)
686 *pInfo = nInfo;
688 /* Set the buffer to the correct size or if there is too
689 much overhead, reallocate to the correct size */
690 if ( nNewLen > nDestChars+8 )
692 pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars );
694 if (pTemp2 != NULL)
696 rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars);
697 rtl_freeMemory(pTemp);
698 pTemp = pTemp2;
700 else
702 pTemp->length = nDestChars;
703 pTemp->buffer[nDestChars] = 0;
706 rtl_destroyTextToUnicodeConverter( hConverter );
707 *ppThis = pTemp;
709 /* Results the conversion in an empty buffer -
710 create an empty string */
711 if ( pTemp && !nDestChars )
712 rtl_uString_new( ppThis );
717 void SAL_CALL rtl_string2UString( rtl_uString** ppThis,
718 const sal_Char* pStr,
719 sal_Int32 nLen,
720 rtl_TextEncoding eTextEncoding,
721 sal_uInt32 nCvtFlags )
723 rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding,
724 nCvtFlags, NULL );
727 /* ----------------------------------------------------------------------- */
729 typedef enum {
730 CANNOT_RETURN,
731 CAN_RETURN = 1
732 } StrLifecycle;
734 static oslMutex
735 getInternMutex()
737 static oslMutex pPoolGuard = NULL;
738 if( !pPoolGuard )
740 oslMutex pGlobalGuard;
741 pGlobalGuard = *osl_getGlobalMutex();
742 osl_acquireMutex( pGlobalGuard );
743 if( !pPoolGuard )
745 oslMutex p = osl_createMutex();
746 OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
747 pPoolGuard = p;
749 osl_releaseMutex( pGlobalGuard );
751 else
753 OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
756 return pPoolGuard;
759 static StringHashTable *pInternPool = NULL;
761 /* returns true if we found a dup in the pool */
762 static void rtl_ustring_intern_internal( rtl_uString ** newStr,
763 rtl_uString * str,
764 StrLifecycle can_return )
766 oslMutex pPoolMutex;
768 pPoolMutex = getInternMutex();
770 osl_acquireMutex( pPoolMutex );
772 if (!pInternPool)
773 pInternPool = rtl_str_hash_new (1024);
774 *newStr = rtl_str_hash_intern (pInternPool, str, can_return);
776 osl_releaseMutex( pPoolMutex );
778 if( can_return && *newStr != str )
779 { /* we dupped, then found a match */
780 rtl_freeMemory( str );
784 void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
785 rtl_uString * str)
787 if (SAL_STRING_IS_INTERN(str))
789 IMPL_RTL_AQUIRE( str );
790 *newStr = str;
792 else
794 rtl_uString *pOrg = *newStr;
795 *newStr = NULL;
796 rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN );
797 if (pOrg)
798 rtl_uString_release (pOrg);
802 void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
803 const sal_Char * str,
804 sal_Int32 len,
805 rtl_TextEncoding eTextEncoding,
806 sal_uInt32 convertFlags,
807 sal_uInt32 * pInfo )
809 rtl_uString *scratch;
811 if (*newStr)
813 rtl_uString_release (*newStr);
814 *newStr = NULL;
817 if ( len < 256 )
818 { // try various optimisations
819 if ( len < 0 )
820 len = strlen( str );
821 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
823 int i;
824 rtl_uString *pScratch;
825 pScratch = alloca( sizeof( rtl_uString )
826 + len * sizeof (IMPL_RTL_STRCODE ) );
827 for (i = 0; i < len; i++)
829 /* Check ASCII range */
830 OSL_ENSURE( ((unsigned char)str[i]) <= 127,
831 "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
832 pScratch->buffer[i] = str[i];
834 pScratch->length = len;
835 rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
836 return;
838 /* FIXME: we want a nice UTF-8 / alloca shortcut here */
841 scratch = NULL;
842 rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags,
843 pInfo );
844 if (!scratch) {
845 return;
847 rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN );
850 static void
851 internRelease (rtl_uString *pThis)
853 oslMutex pPoolMutex;
855 rtl_uString *pFree = NULL;
856 if ( SAL_STRING_REFCOUNT(
857 osl_decrementInterlockedCount( &(pThis->refCount) ) ) == 0)
859 pPoolMutex = getInternMutex();
860 osl_acquireMutex( pPoolMutex );
862 rtl_str_hash_remove (pInternPool, pThis);
864 /* May have been separately acquired */
865 if ( SAL_STRING_REFCOUNT(
866 osl_incrementInterlockedCount( &(pThis->refCount) ) ) == 1 )
868 /* we got the last ref */
869 pFree = pThis;
871 else /* very unusual */
873 internRelease (pThis);
876 osl_releaseMutex( pPoolMutex );
878 if (pFree)
879 rtl_freeMemory (pFree);
882 sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
883 rtl_uString const * string, sal_Int32 * indexUtf16,
884 sal_Int32 incrementCodePoints)
886 sal_Int32 n;
887 sal_Unicode cu;
888 sal_uInt32 cp;
889 OSL_ASSERT(string != NULL && indexUtf16 != NULL);
890 n = *indexUtf16;
891 OSL_ASSERT(n >= 0 && n <= string->length);
892 while (incrementCodePoints < 0) {
893 OSL_ASSERT(n > 0);
894 cu = string->buffer[--n];
895 if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 &&
896 SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1]))
898 --n;
900 ++incrementCodePoints;
902 OSL_ASSERT(n >= 0 && n < string->length);
903 cu = string->buffer[n];
904 if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 &&
905 SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1]))
907 cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]);
908 } else {
909 cp = cu;
911 while (incrementCodePoints > 0) {
912 OSL_ASSERT(n < string->length);
913 cu = string->buffer[n++];
914 if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length &&
915 SAL_RTL_IS_LOW_SURROGATE(string->buffer[n]))
917 ++n;
919 --incrementCodePoints;
921 OSL_ASSERT(n >= 0 && n <= string->length);
922 *indexUtf16 = n;
923 return cp;
926 sal_Bool rtl_convertStringToUString(
927 rtl_uString ** target, char const * source, sal_Int32 length,
928 rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C()
930 sal_uInt32 info;
931 rtl_string2UString_status(target, source, length, encoding, flags, &info);
932 return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0);