1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
27 #include <rtl/ustring.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <rtl/string.hxx>
30 #include <rtl/strbuf.hxx>
31 #include <sal/types.h>
33 #include <comphelper/string.hxx>
34 #include <comphelper/stl_types.hxx>
36 #include <com/sun/star/i18n/BreakIterator.hpp>
37 #include <com/sun/star/i18n/CharType.hpp>
38 #include <com/sun/star/i18n/Collator.hpp>
41 namespace comphelper
{ namespace string
{
45 template <typename T
, typename C
> T
tmpl_stripStart(const T
&rIn
,
53 while (i
< rIn
.getLength())
55 if (rIn
[i
] != cRemove
)
64 OString
stripStart(const OString
&rIn
, sal_Char c
)
66 return tmpl_stripStart
<OString
, sal_Char
>(rIn
, c
);
69 OUString
stripStart(const OUString
&rIn
, sal_Unicode c
)
71 return tmpl_stripStart
<OUString
, sal_Unicode
>(rIn
, c
);
76 template <typename T
, typename C
> T
tmpl_stripEnd(const T
&rIn
,
82 sal_Int32 i
= rIn
.getLength();
86 if (rIn
[i
-1] != cRemove
)
91 return rIn
.copy(0, i
);
95 OString
stripEnd(const OString
&rIn
, sal_Char c
)
97 return tmpl_stripEnd
<OString
, sal_Char
>(rIn
, c
);
100 OUString
stripEnd(const OUString
&rIn
, sal_Unicode c
)
102 return tmpl_stripEnd
<OUString
, sal_Unicode
>(rIn
, c
);
105 OString
strip(const OString
&rIn
, sal_Char c
)
107 return stripEnd(stripStart(rIn
, c
), c
);
110 OUString
strip(const OUString
&rIn
, sal_Unicode c
)
112 return stripEnd(stripStart(rIn
, c
), c
);
117 template <typename T
, typename C
> sal_Int32
tmpl_getTokenCount(const T
&rIn
,
120 // Empty String: TokenCount by Definition is 0
124 sal_Int32 nTokCount
= 1;
125 for (sal_Int32 i
= 0; i
< rIn
.getLength(); ++i
)
134 sal_Int32
getTokenCount(const OString
&rIn
, sal_Char cTok
)
136 return tmpl_getTokenCount
<OString
, sal_Char
>(rIn
, cTok
);
139 sal_Int32
getTokenCount(const OUString
&rIn
, sal_Unicode cTok
)
141 return tmpl_getTokenCount
<OUString
, sal_Unicode
>(rIn
, cTok
);
144 sal_uInt32
decimalStringToNumber(
145 OUString
const & str
)
147 sal_uInt32 result
= 0;
148 for( sal_Int32 i
= 0 ; i
< str
.getLength() ; )
150 sal_uInt32 c
= str
.iterateCodePoints(&i
);
151 sal_uInt32 value
= 0;
152 if( c
<= 0x0039) // ASCII decimal digits, most common
154 else if( c
>= 0x1D7F6 ) // mathematical monospace digits
156 else if( c
>= 0x1D7EC ) // mathematical sans-serif bold digits
158 else if( c
>= 0x1D7E2 ) // mathematical sans-serif digits
160 else if( c
>= 0x1D7D8 ) // mathematical double-struck digits
162 else if( c
>= 0x1D7CE ) // mathematical bold digits
164 else if( c
>= 0x11066 ) // brahmi digits
166 else if( c
>= 0x104A0 ) // osmanya digits
168 else if( c
>= 0xFF10 ) // fullwidth digits
170 else if( c
>= 0xABF0 ) // meetei mayek digits
172 else if( c
>= 0xAA50 ) // cham digits
174 else if( c
>= 0xA9D0 ) // javanese digits
176 else if( c
>= 0xA900 ) // kayah li digits
178 else if( c
>= 0xA8D0 ) // saurashtra digits
180 else if( c
>= 0xA620 ) // vai digits
182 else if( c
>= 0x1C50 ) // ol chiki digits
184 else if( c
>= 0x1C40 ) // lepcha digits
186 else if( c
>= 0x1BB0 ) // sundanese digits
188 else if( c
>= 0x1B50 ) // balinese digits
190 else if( c
>= 0x1A90 ) // tai tham tham digits
192 else if( c
>= 0x1A80 ) // tai tham hora digits
194 else if( c
>= 0x19D0 ) // new tai lue digits
196 else if( c
>= 0x1946 ) // limbu digits
198 else if( c
>= 0x1810 ) // mongolian digits
200 else if( c
>= 0x17E0 ) // khmer digits
202 else if( c
>= 0x1090 ) // myanmar shan digits
204 else if( c
>= 0x1040 ) // myanmar digits
206 else if( c
>= 0x0F20 ) // tibetan digits
208 else if( c
>= 0x0ED0 ) // lao digits
210 else if( c
>= 0x0E50 ) // thai digits
212 else if( c
>= 0x0D66 ) // malayalam digits
214 else if( c
>= 0x0CE6 ) // kannada digits
216 else if( c
>= 0x0C66 ) // telugu digits
218 else if( c
>= 0x0BE6 ) // tamil digits
220 else if( c
>= 0x0B66 ) // odia digits
222 else if( c
>= 0x0AE6 ) // gujarati digits
224 else if( c
>= 0x0A66 ) // gurmukhi digits
226 else if( c
>= 0x09E6 ) // bengali digits
228 else if( c
>= 0x0966 ) // devanagari digit
230 else if( c
>= 0x07C0 ) // nko digits
232 else if( c
>= 0x06F0 ) // extended arabic-indic digits
234 else if( c
>= 0x0660 ) // arabic-indic digits
236 result
= result
* 10 + value
;
241 using namespace ::com::sun::star
;
243 // convert between sequence of string and comma separated string
245 OUString
convertCommaSeparated(
246 uno::Sequence
< OUString
> const& i_rSeq
)
249 ::comphelper::intersperse(
250 i_rSeq
.begin(), i_rSeq
.end(), ::comphelper::OUStringBufferAppender(buf
), OUString( ", " ));
251 return buf
.makeStringAndClear();
254 uno::Sequence
< OUString
>
255 convertCommaSeparated( OUString
const& i_rString
)
257 std::vector
< OUString
> vec
;
261 i_rString
.getToken(0, static_cast<sal_Unicode
> (','), idx
);
267 uno::Sequence
< OUString
> kws(vec
.size());
268 std::copy(vec
.begin(), vec
.end(), kws
.begin());
273 sal_Int32
compareNatural( const OUString
& rLHS
, const OUString
& rRHS
,
274 const uno::Reference
< i18n::XCollator
> &rCollator
,
275 const uno::Reference
< i18n::XBreakIterator
> &rBI
,
276 const lang::Locale
&rLocale
)
280 sal_Int32 nLHSLastNonDigitPos
= 0;
281 sal_Int32 nRHSLastNonDigitPos
= 0;
282 sal_Int32 nLHSFirstDigitPos
= 0;
283 sal_Int32 nRHSFirstDigitPos
= 0;
285 while (nLHSFirstDigitPos
< rLHS
.getLength() || nRHSFirstDigitPos
< rRHS
.getLength())
287 sal_Int32 nLHSChunkLen
;
288 sal_Int32 nRHSChunkLen
;
290 //Compare non digit block as normal strings
291 nLHSFirstDigitPos
= rBI
->nextCharBlock(rLHS
, nLHSLastNonDigitPos
,
292 rLocale
, i18n::CharType::DECIMAL_DIGIT_NUMBER
);
293 nRHSFirstDigitPos
= rBI
->nextCharBlock(rRHS
, nRHSLastNonDigitPos
,
294 rLocale
, i18n::CharType::DECIMAL_DIGIT_NUMBER
);
295 if (nLHSFirstDigitPos
== -1)
296 nLHSFirstDigitPos
= rLHS
.getLength();
297 if (nRHSFirstDigitPos
== -1)
298 nRHSFirstDigitPos
= rRHS
.getLength();
299 nLHSChunkLen
= nLHSFirstDigitPos
- nLHSLastNonDigitPos
;
300 nRHSChunkLen
= nRHSFirstDigitPos
- nRHSLastNonDigitPos
;
302 nRet
= rCollator
->compareSubstring(rLHS
, nLHSLastNonDigitPos
,
303 nLHSChunkLen
, rRHS
, nRHSLastNonDigitPos
, nRHSChunkLen
);
307 //Compare digit block as one number vs another
308 nLHSLastNonDigitPos
= rBI
->endOfCharBlock(rLHS
, nLHSFirstDigitPos
,
309 rLocale
, i18n::CharType::DECIMAL_DIGIT_NUMBER
);
310 nRHSLastNonDigitPos
= rBI
->endOfCharBlock(rRHS
, nRHSFirstDigitPos
,
311 rLocale
, i18n::CharType::DECIMAL_DIGIT_NUMBER
);
312 if (nLHSLastNonDigitPos
== -1)
313 nLHSLastNonDigitPos
= rLHS
.getLength();
314 if (nRHSLastNonDigitPos
== -1)
315 nRHSLastNonDigitPos
= rRHS
.getLength();
316 nLHSChunkLen
= nLHSLastNonDigitPos
- nLHSFirstDigitPos
;
317 nRHSChunkLen
= nRHSLastNonDigitPos
- nRHSFirstDigitPos
;
319 //To-Do: Possibly scale down those unicode codepoints that relate to
320 //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in
323 sal_uInt32 nLHS
= comphelper::string::decimalStringToNumber(rLHS
.copy(nLHSFirstDigitPos
, nLHSChunkLen
));
324 sal_uInt32 nRHS
= comphelper::string::decimalStringToNumber(rRHS
.copy(nRHSFirstDigitPos
, nRHSChunkLen
));
328 nRet
= (nLHS
< nRHS
) ? -1 : 1;
336 NaturalStringSorter::NaturalStringSorter(
337 const uno::Reference
< uno::XComponentContext
> &rContext
,
338 const lang::Locale
&rLocale
) : m_aLocale(rLocale
)
340 m_xCollator
= i18n::Collator::create( rContext
);
341 m_xCollator
->loadDefaultCollator(m_aLocale
, 0);
342 m_xBI
= i18n::BreakIterator::create( rContext
);
347 //do OPER on each element of the string, return false
348 //if any OPER is false, true otherwise
349 template <bool (*OPER
)(sal_Unicode
), typename T
>
350 bool tmpl_is_OPER_AsciiString(const T
&rString
)
352 for (sal_Int32 i
= 0; i
< rString
.getLength(); ++i
)
354 if (!OPER(rString
[i
]))
361 bool isdigitAsciiString(const OString
&rString
)
363 return tmpl_is_OPER_AsciiString
<isdigitAscii
>(rString
);
366 bool isdigitAsciiString(const OUString
&rString
)
368 return tmpl_is_OPER_AsciiString
<isdigitAscii
>(rString
);
373 template <typename T
, typename O
> T
tmpl_reverseString(const T
&rIn
)
378 sal_Int32 i
= rIn
.getLength();
381 sBuf
.append(rIn
[--i
]);
382 return sBuf
.makeStringAndClear();
386 OUString
reverseString(const OUString
&rStr
)
388 return tmpl_reverseString
<OUString
, OUStringBuffer
>(rStr
);
391 OString
reverseString(const OString
&rStr
)
393 return tmpl_reverseString
<OString
, OStringBuffer
>(rStr
);
396 sal_Int32
indexOfAny(OUString
const& rIn
,
397 sal_Unicode
const*const pChars
, sal_Int32
const nPos
)
399 for (sal_Int32 i
= nPos
; i
< rIn
.getLength(); ++i
)
401 sal_Unicode
const c
= rIn
[i
];
402 for (sal_Unicode
const* pChar
= pChars
; *pChar
; ++pChar
)
413 OUString
setToken(const OUString
& rIn
, sal_Int32 nToken
, sal_Unicode cTok
,
414 const OUString
& rNewToken
)
416 const sal_Unicode
* pStr
= rIn
.getStr();
417 sal_Int32 nLen
= rIn
.getLength();
419 sal_Int32 nFirstChar
= 0;
422 // Determine token position and length
425 // Increase token count if match
432 else if (nTok
> nToken
)
441 return rIn
.replaceAt(nFirstChar
, i
-nFirstChar
, rNewToken
);
447 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */