bump product version to 5.0.4.1
[LibreOffice.git] / comphelper / source / misc / string.cxx
blob98391741a13d87d9139be6c48f75be38a1659129
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <cstddef>
23 #include <string.h>
24 #include <vector>
25 #include <algorithm>
27 #include <rtl/ustring.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <rtl/string.hxx>
30 #include <rtl/strbuf.hxx>
31 #include <sal/types.h>
33 #include <comphelper/string.hxx>
34 #include <comphelper/stl_types.hxx>
36 #include <com/sun/star/i18n/BreakIterator.hpp>
37 #include <com/sun/star/i18n/CharType.hpp>
38 #include <com/sun/star/i18n/Collator.hpp>
41 namespace comphelper { namespace string {
43 namespace
45 template <typename T, typename C> T tmpl_stripStart(const T &rIn,
46 const C cRemove)
48 if (rIn.isEmpty())
49 return rIn;
51 sal_Int32 i = 0;
53 while (i < rIn.getLength())
55 if (rIn[i] != cRemove)
56 break;
57 ++i;
60 return rIn.copy(i);
64 OString stripStart(const OString &rIn, sal_Char c)
66 return tmpl_stripStart<OString, sal_Char>(rIn, c);
69 OUString stripStart(const OUString &rIn, sal_Unicode c)
71 return tmpl_stripStart<OUString, sal_Unicode>(rIn, c);
74 namespace
76 template <typename T, typename C> T tmpl_stripEnd(const T &rIn,
77 const C cRemove)
79 if (rIn.isEmpty())
80 return rIn;
82 sal_Int32 i = rIn.getLength();
84 while (i > 0)
86 if (rIn[i-1] != cRemove)
87 break;
88 --i;
91 return rIn.copy(0, i);
95 OString stripEnd(const OString &rIn, sal_Char c)
97 return tmpl_stripEnd<OString, sal_Char>(rIn, c);
100 OUString stripEnd(const OUString &rIn, sal_Unicode c)
102 return tmpl_stripEnd<OUString, sal_Unicode>(rIn, c);
105 OString strip(const OString &rIn, sal_Char c)
107 return stripEnd(stripStart(rIn, c), c);
110 OUString strip(const OUString &rIn, sal_Unicode c)
112 return stripEnd(stripStart(rIn, c), c);
115 namespace
117 template <typename T, typename C> sal_Int32 tmpl_getTokenCount(const T &rIn,
118 C cTok)
120 // Empty String: TokenCount by Definition is 0
121 if (rIn.isEmpty())
122 return 0;
124 sal_Int32 nTokCount = 1;
125 for (sal_Int32 i = 0; i < rIn.getLength(); ++i)
127 if (rIn[i] == cTok)
128 ++nTokCount;
130 return nTokCount;
134 sal_Int32 getTokenCount(const OString &rIn, sal_Char cTok)
136 return tmpl_getTokenCount<OString, sal_Char>(rIn, cTok);
139 sal_Int32 getTokenCount(const OUString &rIn, sal_Unicode cTok)
141 return tmpl_getTokenCount<OUString, sal_Unicode>(rIn, cTok);
144 sal_uInt32 decimalStringToNumber(
145 OUString const & str )
147 sal_uInt32 result = 0;
148 for( sal_Int32 i = 0 ; i < str.getLength() ; )
150 sal_uInt32 c = str.iterateCodePoints(&i);
151 sal_uInt32 value = 0;
152 if( c <= 0x0039) // ASCII decimal digits, most common
153 value = c - 0x0030;
154 else if( c >= 0x1D7F6 ) // mathematical monospace digits
155 value = c - 0x1D7F6;
156 else if( c >= 0x1D7EC ) // mathematical sans-serif bold digits
157 value = c - 0x1D7EC;
158 else if( c >= 0x1D7E2 ) // mathematical sans-serif digits
159 value = c - 0x1D7E2;
160 else if( c >= 0x1D7D8 ) // mathematical double-struck digits
161 value = c - 0x1D7D8;
162 else if( c >= 0x1D7CE ) // mathematical bold digits
163 value = c - 0x1D7CE;
164 else if( c >= 0x11066 ) // brahmi digits
165 value = c - 0x11066;
166 else if( c >= 0x104A0 ) // osmanya digits
167 value = c - 0x104A0;
168 else if( c >= 0xFF10 ) // fullwidth digits
169 value = c - 0xFF10;
170 else if( c >= 0xABF0 ) // meetei mayek digits
171 value = c - 0xABF0;
172 else if( c >= 0xAA50 ) // cham digits
173 value = c - 0xAA50;
174 else if( c >= 0xA9D0 ) // javanese digits
175 value = c - 0xA9D0;
176 else if( c >= 0xA900 ) // kayah li digits
177 value = c - 0xA900;
178 else if( c >= 0xA8D0 ) // saurashtra digits
179 value = c - 0xA8D0;
180 else if( c >= 0xA620 ) // vai digits
181 value = c - 0xA620;
182 else if( c >= 0x1C50 ) // ol chiki digits
183 value = c - 0x1C50;
184 else if( c >= 0x1C40 ) // lepcha digits
185 value = c - 0x1C40;
186 else if( c >= 0x1BB0 ) // sundanese digits
187 value = c - 0x1BB0;
188 else if( c >= 0x1B50 ) // balinese digits
189 value = c - 0x1B50;
190 else if( c >= 0x1A90 ) // tai tham tham digits
191 value = c - 0x1A90;
192 else if( c >= 0x1A80 ) // tai tham hora digits
193 value = c - 0x1A80;
194 else if( c >= 0x19D0 ) // new tai lue digits
195 value = c - 0x19D0;
196 else if( c >= 0x1946 ) // limbu digits
197 value = c - 0x1946;
198 else if( c >= 0x1810 ) // mongolian digits
199 value = c - 0x1810;
200 else if( c >= 0x17E0 ) // khmer digits
201 value = c - 0x17E0;
202 else if( c >= 0x1090 ) // myanmar shan digits
203 value = c - 0x1090;
204 else if( c >= 0x1040 ) // myanmar digits
205 value = c - 0x1040;
206 else if( c >= 0x0F20 ) // tibetan digits
207 value = c - 0x0F20;
208 else if( c >= 0x0ED0 ) // lao digits
209 value = c - 0x0ED0;
210 else if( c >= 0x0E50 ) // thai digits
211 value = c - 0x0E50;
212 else if( c >= 0x0D66 ) // malayalam digits
213 value = c - 0x0D66;
214 else if( c >= 0x0CE6 ) // kannada digits
215 value = c - 0x0CE6;
216 else if( c >= 0x0C66 ) // telugu digits
217 value = c - 0x0C66;
218 else if( c >= 0x0BE6 ) // tamil digits
219 value = c - 0x0BE6;
220 else if( c >= 0x0B66 ) // odia digits
221 value = c - 0x0B66;
222 else if( c >= 0x0AE6 ) // gujarati digits
223 value = c - 0x0AE6;
224 else if( c >= 0x0A66 ) // gurmukhi digits
225 value = c - 0x0A66;
226 else if( c >= 0x09E6 ) // bengali digits
227 value = c - 0x09E6;
228 else if( c >= 0x0966 ) // devanagari digit
229 value = c - 0x0966;
230 else if( c >= 0x07C0 ) // nko digits
231 value = c - 0x07C0;
232 else if( c >= 0x06F0 ) // extended arabic-indic digits
233 value = c - 0x06F0;
234 else if( c >= 0x0660 ) // arabic-indic digits
235 value = c - 0x0660;
236 result = result * 10 + value;
238 return result;
241 using namespace ::com::sun::star;
243 // convert between sequence of string and comma separated string
245 OUString convertCommaSeparated(
246 uno::Sequence< OUString > const& i_rSeq)
248 OUStringBuffer buf;
249 ::comphelper::intersperse(
250 i_rSeq.begin(), i_rSeq.end(), ::comphelper::OUStringBufferAppender(buf), OUString( ", " ));
251 return buf.makeStringAndClear();
254 uno::Sequence< OUString >
255 convertCommaSeparated( OUString const& i_rString )
257 std::vector< OUString > vec;
258 sal_Int32 idx = 0;
259 do {
260 OUString kw =
261 i_rString.getToken(0, static_cast<sal_Unicode> (','), idx);
262 kw = kw.trim();
263 if (!kw.isEmpty()) {
264 vec.push_back(kw);
266 } while (idx >= 0);
267 uno::Sequence< OUString > kws(vec.size());
268 std::copy(vec.begin(), vec.end(), kws.begin());
269 return kws;
273 sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS,
274 const uno::Reference< i18n::XCollator > &rCollator,
275 const uno::Reference< i18n::XBreakIterator > &rBI,
276 const lang::Locale &rLocale )
278 sal_Int32 nRet = 0;
280 sal_Int32 nLHSLastNonDigitPos = 0;
281 sal_Int32 nRHSLastNonDigitPos = 0;
282 sal_Int32 nLHSFirstDigitPos = 0;
283 sal_Int32 nRHSFirstDigitPos = 0;
285 while (nLHSFirstDigitPos < rLHS.getLength() || nRHSFirstDigitPos < rRHS.getLength())
287 sal_Int32 nLHSChunkLen;
288 sal_Int32 nRHSChunkLen;
290 //Compare non digit block as normal strings
291 nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos,
292 rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
293 nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos,
294 rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
295 if (nLHSFirstDigitPos == -1)
296 nLHSFirstDigitPos = rLHS.getLength();
297 if (nRHSFirstDigitPos == -1)
298 nRHSFirstDigitPos = rRHS.getLength();
299 nLHSChunkLen = nLHSFirstDigitPos - nLHSLastNonDigitPos;
300 nRHSChunkLen = nRHSFirstDigitPos - nRHSLastNonDigitPos;
302 nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos,
303 nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen);
304 if (nRet != 0)
305 break;
307 //Compare digit block as one number vs another
308 nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos,
309 rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
310 nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos,
311 rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
312 if (nLHSLastNonDigitPos == -1)
313 nLHSLastNonDigitPos = rLHS.getLength();
314 if (nRHSLastNonDigitPos == -1)
315 nRHSLastNonDigitPos = rRHS.getLength();
316 nLHSChunkLen = nLHSLastNonDigitPos - nLHSFirstDigitPos;
317 nRHSChunkLen = nRHSLastNonDigitPos - nRHSFirstDigitPos;
319 //To-Do: Possibly scale down those unicode codepoints that relate to
320 //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in
321 //vcl
323 sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.copy(nLHSFirstDigitPos, nLHSChunkLen));
324 sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.copy(nRHSFirstDigitPos, nRHSChunkLen));
326 if (nLHS != nRHS)
328 nRet = (nLHS < nRHS) ? -1 : 1;
329 break;
333 return nRet;
336 NaturalStringSorter::NaturalStringSorter(
337 const uno::Reference< uno::XComponentContext > &rContext,
338 const lang::Locale &rLocale) : m_aLocale(rLocale)
340 m_xCollator = i18n::Collator::create( rContext );
341 m_xCollator->loadDefaultCollator(m_aLocale, 0);
342 m_xBI = i18n::BreakIterator::create( rContext );
345 namespace
347 //do OPER on each element of the string, return false
348 //if any OPER is false, true otherwise
349 template <bool (*OPER)(sal_Unicode), typename T>
350 bool tmpl_is_OPER_AsciiString(const T &rString)
352 for (sal_Int32 i = 0; i < rString.getLength(); ++i)
354 if (!OPER(rString[i]))
355 return false;
357 return true;
361 bool isdigitAsciiString(const OString &rString)
363 return tmpl_is_OPER_AsciiString<isdigitAscii>(rString);
366 bool isdigitAsciiString(const OUString &rString)
368 return tmpl_is_OPER_AsciiString<isdigitAscii>(rString);
371 namespace
373 template <typename T, typename O> T tmpl_reverseString(const T &rIn)
375 if (rIn.isEmpty())
376 return rIn;
378 sal_Int32 i = rIn.getLength();
379 O sBuf(i);
380 while (i)
381 sBuf.append(rIn[--i]);
382 return sBuf.makeStringAndClear();
386 OUString reverseString(const OUString &rStr)
388 return tmpl_reverseString<OUString, OUStringBuffer>(rStr);
391 OString reverseString(const OString &rStr)
393 return tmpl_reverseString<OString, OStringBuffer>(rStr);
396 sal_Int32 indexOfAny(OUString const& rIn,
397 sal_Unicode const*const pChars, sal_Int32 const nPos)
399 for (sal_Int32 i = nPos; i < rIn.getLength(); ++i)
401 sal_Unicode const c = rIn[i];
402 for (sal_Unicode const* pChar = pChars; *pChar; ++pChar)
404 if (c == *pChar)
406 return i;
410 return -1;
413 OUString setToken(const OUString& rIn, sal_Int32 nToken, sal_Unicode cTok,
414 const OUString& rNewToken)
416 const sal_Unicode* pStr = rIn.getStr();
417 sal_Int32 nLen = rIn.getLength();
418 sal_Int32 nTok = 0;
419 sal_Int32 nFirstChar = 0;
420 sal_Int32 i = 0;
422 // Determine token position and length
423 while ( i < nLen )
425 // Increase token count if match
426 if (*pStr == cTok)
428 ++nTok;
430 if (nTok == nToken)
431 nFirstChar = i+1;
432 else if (nTok > nToken)
433 break;
436 ++pStr,
437 ++i;
440 if (nTok >= nToken)
441 return rIn.replaceAt(nFirstChar, i-nFirstChar, rNewToken);
442 return rIn;
447 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */