1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "sal/config.h"
27 #include <rtl/ustring.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <rtl/string.hxx>
30 #include <rtl/strbuf.hxx>
31 #include <sal/types.h>
33 #include <comphelper/string.hxx>
34 #include <comphelper/stlunosequence.hxx>
35 #include <comphelper/stl_types.hxx>
37 #include <com/sun/star/i18n/BreakIterator.hpp>
38 #include <com/sun/star/i18n/CharType.hpp>
39 #include <com/sun/star/i18n/Collator.hpp>
42 namespace comphelper
{ namespace string
{
46 template <typename T
, typename C
> T
tmpl_stripStart(const T
&rIn
,
54 while (i
< rIn
.getLength())
56 if (rIn
[i
] != cRemove
)
65 OString
stripStart(const OString
&rIn
, sal_Char c
)
67 return tmpl_stripStart
<OString
, sal_Char
>(rIn
, c
);
70 OUString
stripStart(const OUString
&rIn
, sal_Unicode c
)
72 return tmpl_stripStart
<OUString
, sal_Unicode
>(rIn
, c
);
77 template <typename T
, typename C
> T
tmpl_stripEnd(const T
&rIn
,
83 sal_Int32 i
= rIn
.getLength();
87 if (rIn
[i
-1] != cRemove
)
92 return rIn
.copy(0, i
);
96 OString
stripEnd(const OString
&rIn
, sal_Char c
)
98 return tmpl_stripEnd
<OString
, sal_Char
>(rIn
, c
);
101 OUString
stripEnd(const OUString
&rIn
, sal_Unicode c
)
103 return tmpl_stripEnd
<OUString
, sal_Unicode
>(rIn
, c
);
106 OString
strip(const OString
&rIn
, sal_Char c
)
108 return stripEnd(stripStart(rIn
, c
), c
);
111 OUString
strip(const OUString
&rIn
, sal_Unicode c
)
113 return stripEnd(stripStart(rIn
, c
), c
);
118 template <typename T
, typename C
> sal_Int32
tmpl_getTokenCount(const T
&rIn
,
121 // Empty String: TokenCount by Definition is 0
125 sal_Int32 nTokCount
= 1;
126 for (sal_Int32 i
= 0; i
< rIn
.getLength(); ++i
)
135 sal_Int32
getTokenCount(const OString
&rIn
, sal_Char cTok
)
137 return tmpl_getTokenCount
<OString
, sal_Char
>(rIn
, cTok
);
140 sal_Int32
getTokenCount(const OUString
&rIn
, sal_Unicode cTok
)
142 return tmpl_getTokenCount
<OUString
, sal_Unicode
>(rIn
, cTok
);
145 sal_uInt32
decimalStringToNumber(
146 OUString
const & str
)
148 sal_uInt32 result
= 0;
149 for( sal_Int32 i
= 0 ; i
< str
.getLength() ; )
151 sal_uInt32 c
= str
.iterateCodePoints(&i
);
152 sal_uInt32 value
= 0;
153 if( c
<= 0x0039) // ASCII decimal digits, most common
155 else if( c
>= 0x1D7F6 ) // mathematical monospace digits
157 else if( c
>= 0x1D7EC ) // mathematical sans-serif bold digits
159 else if( c
>= 0x1D7E2 ) // mathematical sans-serif digits
161 else if( c
>= 0x1D7D8 ) // mathematical double-struck digits
163 else if( c
>= 0x1D7CE ) // mathematical bold digits
165 else if( c
>= 0x11066 ) // brahmi digits
167 else if( c
>= 0x104A0 ) // osmanya digits
169 else if( c
>= 0xFF10 ) // fullwidth digits
171 else if( c
>= 0xABF0 ) // meetei mayek digits
173 else if( c
>= 0xAA50 ) // cham digits
175 else if( c
>= 0xA9D0 ) // javanese digits
177 else if( c
>= 0xA900 ) // kayah li digits
179 else if( c
>= 0xA8D0 ) // saurashtra digits
181 else if( c
>= 0xA620 ) // vai digits
183 else if( c
>= 0x1C50 ) // ol chiki digits
185 else if( c
>= 0x1C40 ) // lepcha digits
187 else if( c
>= 0x1BB0 ) // sundanese digits
189 else if( c
>= 0x1B50 ) // balinese digits
191 else if( c
>= 0x1A90 ) // tai tham tham digits
193 else if( c
>= 0x1A80 ) // tai tham hora digits
195 else if( c
>= 0x19D0 ) // new tai lue digits
197 else if( c
>= 0x1946 ) // limbu digits
199 else if( c
>= 0x1810 ) // mongolian digits
201 else if( c
>= 0x17E0 ) // khmer digits
203 else if( c
>= 0x1090 ) // myanmar shan digits
205 else if( c
>= 0x1040 ) // myanmar digits
207 else if( c
>= 0x0F20 ) // tibetan digits
209 else if( c
>= 0x0ED0 ) // lao digits
211 else if( c
>= 0x0E50 ) // thai digits
213 else if( c
>= 0x0D66 ) // malayalam digits
215 else if( c
>= 0x0CE6 ) // kannada digits
217 else if( c
>= 0x0C66 ) // telugu digits
219 else if( c
>= 0x0BE6 ) // tamil digits
221 else if( c
>= 0x0B66 ) // oriya digits
223 else if( c
>= 0x0AE6 ) // gujarati digits
225 else if( c
>= 0x0A66 ) // gurmukhi digits
227 else if( c
>= 0x09E6 ) // bengali digits
229 else if( c
>= 0x0966 ) // devanagari digit
231 else if( c
>= 0x07C0 ) // nko digits
233 else if( c
>= 0x06F0 ) // extended arabic-indic digits
235 else if( c
>= 0x0660 ) // arabic-indic digits
237 result
= result
* 10 + value
;
242 using namespace ::com::sun::star
;
244 // convert between sequence of string and comma separated string
246 OUString
convertCommaSeparated(
247 uno::Sequence
< OUString
> const& i_rSeq
)
250 ::comphelper::intersperse(
251 ::comphelper::stl_begin(i_rSeq
), ::comphelper::stl_end(i_rSeq
),
252 ::comphelper::OUStringBufferAppender(buf
),
254 return buf
.makeStringAndClear();
257 uno::Sequence
< OUString
>
258 convertCommaSeparated( OUString
const& i_rString
)
260 std::vector
< OUString
> vec
;
264 i_rString
.getToken(0, static_cast<sal_Unicode
> (','), idx
);
270 uno::Sequence
< OUString
> kws(vec
.size());
271 std::copy(vec
.begin(), vec
.end(), stl_begin(kws
));
276 sal_Int32
compareNatural( const OUString
& rLHS
, const OUString
& rRHS
,
277 const uno::Reference
< i18n::XCollator
> &rCollator
,
278 const uno::Reference
< i18n::XBreakIterator
> &rBI
,
279 const lang::Locale
&rLocale
)
283 sal_Int32 nLHSLastNonDigitPos
= 0;
284 sal_Int32 nRHSLastNonDigitPos
= 0;
285 sal_Int32 nLHSFirstDigitPos
= 0;
286 sal_Int32 nRHSFirstDigitPos
= 0;
288 while (nLHSFirstDigitPos
< rLHS
.getLength() || nRHSFirstDigitPos
< rRHS
.getLength())
290 sal_Int32 nLHSChunkLen
;
291 sal_Int32 nRHSChunkLen
;
293 //Compare non digit block as normal strings
294 nLHSFirstDigitPos
= rBI
->nextCharBlock(rLHS
, nLHSLastNonDigitPos
,
295 rLocale
, i18n::CharType::DECIMAL_DIGIT_NUMBER
);
296 nRHSFirstDigitPos
= rBI
->nextCharBlock(rRHS
, nRHSLastNonDigitPos
,
297 rLocale
, i18n::CharType::DECIMAL_DIGIT_NUMBER
);
298 if (nLHSFirstDigitPos
== -1)
299 nLHSFirstDigitPos
= rLHS
.getLength();
300 if (nRHSFirstDigitPos
== -1)
301 nRHSFirstDigitPos
= rRHS
.getLength();
302 nLHSChunkLen
= nLHSFirstDigitPos
- nLHSLastNonDigitPos
;
303 nRHSChunkLen
= nRHSFirstDigitPos
- nRHSLastNonDigitPos
;
305 nRet
= rCollator
->compareSubstring(rLHS
, nLHSLastNonDigitPos
,
306 nLHSChunkLen
, rRHS
, nRHSLastNonDigitPos
, nRHSChunkLen
);
310 //Compare digit block as one number vs another
311 nLHSLastNonDigitPos
= rBI
->endOfCharBlock(rLHS
, nLHSFirstDigitPos
,
312 rLocale
, i18n::CharType::DECIMAL_DIGIT_NUMBER
);
313 nRHSLastNonDigitPos
= rBI
->endOfCharBlock(rRHS
, nRHSFirstDigitPos
,
314 rLocale
, i18n::CharType::DECIMAL_DIGIT_NUMBER
);
315 if (nLHSLastNonDigitPos
== -1)
316 nLHSLastNonDigitPos
= rLHS
.getLength();
317 if (nRHSLastNonDigitPos
== -1)
318 nRHSLastNonDigitPos
= rRHS
.getLength();
319 nLHSChunkLen
= nLHSLastNonDigitPos
- nLHSFirstDigitPos
;
320 nRHSChunkLen
= nRHSLastNonDigitPos
- nRHSFirstDigitPos
;
322 //To-Do: Possibly scale down those unicode codepoints that relate to
323 //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in
326 sal_uInt32 nLHS
= comphelper::string::decimalStringToNumber(rLHS
.copy(nLHSFirstDigitPos
, nLHSChunkLen
));
327 sal_uInt32 nRHS
= comphelper::string::decimalStringToNumber(rRHS
.copy(nRHSFirstDigitPos
, nRHSChunkLen
));
334 //Squeeze these down to -1, 0, 1 in case it gets casted to a StringCompare
343 NaturalStringSorter::NaturalStringSorter(
344 const uno::Reference
< uno::XComponentContext
> &rContext
,
345 const lang::Locale
&rLocale
) : m_aLocale(rLocale
)
347 m_xCollator
= i18n::Collator::create( rContext
);
348 m_xCollator
->loadDefaultCollator(m_aLocale
, 0);
349 m_xBI
= i18n::BreakIterator::create( rContext
);
354 //do OPER on each element of the string, return false
355 //if any OPER is false, true otherwise
356 template <bool (*OPER
)(sal_Unicode
), typename T
>
357 bool tmpl_is_OPER_AsciiString(const T
&rString
)
359 for (sal_Int32 i
= 0; i
< rString
.getLength(); ++i
)
361 if (!OPER(rString
[i
]))
368 bool isdigitAsciiString(const OString
&rString
)
370 return tmpl_is_OPER_AsciiString
<isdigitAscii
>(rString
);
373 bool isdigitAsciiString(const OUString
&rString
)
375 return tmpl_is_OPER_AsciiString
<isdigitAscii
>(rString
);
380 template <typename T
, typename O
> T
tmpl_reverseString(const T
&rIn
)
385 sal_Int32 i
= rIn
.getLength();
388 sBuf
.append(rIn
[--i
]);
389 return sBuf
.makeStringAndClear();
393 OUString
reverseString(const OUString
&rStr
)
395 return tmpl_reverseString
<OUString
, OUStringBuffer
>(rStr
);
398 OString
reverseString(const OString
&rStr
)
400 return tmpl_reverseString
<OString
, OStringBuffer
>(rStr
);
403 sal_Int32
indexOfAny(OUString
const& rIn
,
404 sal_Unicode
const*const pChars
, sal_Int32
const nPos
)
406 for (sal_Int32 i
= nPos
; i
< rIn
.getLength(); ++i
)
408 sal_Unicode
const c
= rIn
[i
];
409 for (sal_Unicode
const* pChar
= pChars
; *pChar
; ++pChar
)
422 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */