Avoid potential negative array index access to cached text.
[LibreOffice.git] / include / comphelper / string.hxx
blobbab63f06f0343766e95936cb42f0930b4e4be3f3
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #pragma once
21 #include <sal/config.h>
23 #include <algorithm>
24 #include <vector>
25 #include <comphelper/comphelperdllapi.h>
26 #include <sal/types.h>
27 #include <rtl/strbuf.hxx>
28 #include <rtl/ustrbuf.hxx>
29 #include <com/sun/star/uno/Sequence.h>
30 #include <com/sun/star/uno/Reference.hxx>
32 #include <com/sun/star/lang/Locale.hpp>
34 namespace com::sun::star::i18n { class XBreakIterator; }
35 namespace com::sun::star::i18n { class XCollator; }
36 namespace com::sun::star::uno { class XComponentContext; }
38 // OUString helper functions that are not widespread or mature enough to
39 // go into the stable URE API:
40 namespace comphelper::string {
42 /** Removes all occurrences of a character from within the source string
44 @param rIn The input OUStringBuffer
45 @param c The character to be removed
47 @return The resulting OUStringBuffer
49 inline OUStringBuffer& remove(OUStringBuffer &rIn,
50 sal_Unicode c)
52 sal_Int32 index = 0;
53 while (true)
55 if (index >= rIn.getLength())
56 break;
57 index = rIn.indexOf(c, index);
58 if (index == -1)
59 break;
60 rIn.remove(index, 1);
62 return rIn;
65 /** Strips occurrences of a character from the start of the source string
67 @param rIn The input OString
68 @param c The character to be stripped from the start
70 @return The resulting OString
72 COMPHELPER_DLLPUBLIC OString stripStart(const OString& rIn,
73 char c);
74 COMPHELPER_DLLPUBLIC std::string_view stripStart(std::string_view rIn,
75 char c);
77 /** Strips occurrences of a character from the start of the source string
79 @param rIn The input OUString
80 @param c The character to be stripped from the start
82 @return The resulting OUString
84 COMPHELPER_DLLPUBLIC OUString stripStart(const OUString& rIn,
85 sal_Unicode c);
86 COMPHELPER_DLLPUBLIC std::u16string_view stripStart(std::u16string_view rIn,
87 sal_Unicode c);
89 /** Strips occurrences of a character from the end of the source string
91 @param rIn The input OString
92 @param c The character to be stripped from the end
94 @return The resulting OString
96 COMPHELPER_DLLPUBLIC OString stripEnd(const OString& rIn,
97 char c);
98 COMPHELPER_DLLPUBLIC std::string_view stripEnd(std::string_view rIn,
99 char c);
101 /** Strips occurrences of a character from the end of the source string
103 @param rIn The input OUString
104 @param c The character to be stripped from the end
106 @return The resulting OUString
108 COMPHELPER_DLLPUBLIC OUString stripEnd(const OUString& rIn,
109 sal_Unicode c);
110 COMPHELPER_DLLPUBLIC std::u16string_view stripEnd(std::u16string_view rIn,
111 sal_Unicode c);
113 /** Strips occurrences of a character from the start and end of the source string
115 @param rIn The input OString
116 @param c The character to be stripped from the start and end
118 @return The resulting OString
120 COMPHELPER_DLLPUBLIC OString strip(const OString& rIn,
121 char c);
122 COMPHELPER_DLLPUBLIC std::string_view strip(std::string_view rIn,
123 char c);
125 /** Strips occurrences of a character from the start and end of the source string
127 @param rIn The input OUString
128 @param c The character to be stripped from the start and end
130 @return The resulting OUString
132 COMPHELPER_DLLPUBLIC OUString strip(const OUString& rIn,
133 sal_Unicode c);
134 COMPHELPER_DLLPUBLIC std::u16string_view strip(std::u16string_view rIn,
135 sal_Unicode c);
137 /** Returns number of tokens in an OUString
139 @param rIn the input OString
140 @param cTok the character which separate the tokens.
141 @return the number of tokens
143 COMPHELPER_DLLPUBLIC sal_Int32 getTokenCount(std::string_view rIn, char cTok);
145 /** Returns number of tokens in an OUString
147 @param rIn the input OUString
148 @param cTok the character which separate the tokens.
149 @return the number of tokens
151 COMPHELPER_DLLPUBLIC sal_Int32 getTokenCount(std::u16string_view rIn, sal_Unicode cTok);
153 /** Reverse an OUString's UTF-16 code units.
155 @param rIn the input OUString
156 @return the reversed input
158 COMPHELPER_DLLPUBLIC OUString reverseString(std::u16string_view rStr);
160 /** Reverse an OUString's Unicode code points.
162 COMPHELPER_DLLPUBLIC OUString reverseCodePoints(OUString const & str);
165 namespace detail
167 template<typename B> B& truncateToLength(B& rBuffer, sal_Int32 nLen)
169 if (nLen < rBuffer.getLength())
170 rBuffer.setLength(nLen);
171 return rBuffer;
175 /** Truncate a buffer to a given length.
177 If the StringBuffer has more characters than nLength it will be truncated
178 on the right to nLength characters.
180 Has no effect if the StringBuffer is <= nLength
182 @param rBuf StringBuffer to operate on
183 @param nLength Length to truncate the buffer to
185 @return rBuf;
187 inline OUStringBuffer& truncateToLength(
188 OUStringBuffer& rBuffer, sal_Int32 nLength)
190 return detail::truncateToLength(rBuffer, nLength);
193 namespace detail
195 template<typename B, typename U> B& padToLength(B& rBuffer, sal_Int32 nLen, U cFill)
197 const sal_Int32 nPadLen = nLen - rBuffer.getLength();
198 if (nPadLen > 0)
199 std::fill_n(rBuffer.appendUninitialized(nPadLen), nPadLen, cFill);
200 return rBuffer;
204 /** Pad a buffer to a given length using a given char.
206 If the StringBuffer has less characters than nLength it will be expanded on
207 the right to nLength characters, with the expansion filled using cFill.
209 Has no effect if the StringBuffer is >= nLength
211 @param rBuf StringBuffer to operate on
212 @param nLength Length to pad the buffer to
213 @param cFill character to fill expansion with
215 @return rBuf;
217 inline OStringBuffer& padToLength(
218 OStringBuffer& rBuffer, sal_Int32 nLength,
219 char cFill = '\0')
221 return detail::padToLength(rBuffer, nLength, cFill);
224 inline OUStringBuffer& padToLength(
225 OUStringBuffer& rBuffer, sal_Int32 nLength,
226 sal_Unicode cFill = '\0')
228 return detail::padToLength(rBuffer, nLength, cFill);
231 /** Similar to OUString::replaceAt, but for an OUStringBuffer.
233 Replace n = count characters
234 from position index in this string with newStr.
236 COMPHELPER_DLLPUBLIC void replaceAt(OUStringBuffer& rIn, sal_Int32 index, sal_Int32 count, std::u16string_view newStr );
238 /** Replace a token in a string
239 @param rIn OUString in which the token is to be replaced
240 @param nToken which nToken to replace
241 @param cTok token delimiter
242 @param rNewToken replacement token
244 @return original string with token nToken replaced by rNewToken
246 COMPHELPER_DLLPUBLIC OUString setToken(const OUString& rIn, sal_Int32 nToken, sal_Unicode cTok,
247 std::u16string_view rNewToken);
249 /** Find any of a list of code units in the string.
250 @param rIn OUString to search
251 @param pChars 0-terminated array of sal_Unicode code units to search for
252 @param nPos start position
254 @return position of first occurrence of any of the elements of pChars
255 or -1 if none of the code units occur in the string
257 COMPHELPER_DLLPUBLIC sal_Int32 indexOfAny(std::u16string_view rIn,
258 sal_Unicode const*const pChars, sal_Int32 const nPos);
260 /** Remove any of a list of code units in the string.
261 @param rIn OUString to search
262 @param pChars 0-terminated array of sal_Unicode code units to search for
264 @return OUString that has all of the pChars code units removed
266 COMPHELPER_DLLPUBLIC OUString removeAny(std::u16string_view rIn,
267 sal_Unicode const*const pChars);
269 /** Convert a sequence of strings to a single comma separated string.
271 Note that no escaping of commas or anything fancy is done.
273 @param i_rSeq A list of strings to be concatenated.
275 @return A single string containing the concatenation of the given
276 list, interspersed with the string ", ".
278 COMPHELPER_DLLPUBLIC OUString convertCommaSeparated(
279 css::uno::Sequence< OUString > const & i_rSeq);
281 /// Return a string which is the concatenation of the strings in the sequence.
282 COMPHELPER_DLLPUBLIC OString join(std::string_view rSeparator, const std::vector<OString>& rSequence);
284 /** Convert a decimal string to a number.
286 The string must be base-10, no sign but can contain any
287 codepoint listed in the "Number, Decimal Digit" Unicode
288 category.
290 No verification is made about the validity of the string,
291 passing string not containing decimal digit code points
292 gives unspecified results
294 If your string is guaranteed to contain only ASCII digit
295 use OUString::toInt32 instead.
297 @param str The string to convert containing only decimal
298 digit codepoints.
300 @return The value of the string as an int32.
302 COMPHELPER_DLLPUBLIC sal_uInt32 decimalStringToNumber(
303 std::u16string_view str );
305 COMPHELPER_DLLPUBLIC std::vector<OUString>
306 split(std::u16string_view rString, const sal_Unicode cSeparator);
308 /** Convert a single comma separated string to a sequence of strings.
310 Note that no escaping of commas or anything fancy is done.
312 @param i_rString A string containing comma-separated words.
314 @return A sequence of strings resulting from splitting the given
315 string at ',' tokens and stripping whitespace.
317 COMPHELPER_DLLPUBLIC css::uno::Sequence< OUString >
318 convertCommaSeparated( std::u16string_view i_rString );
321 Compares two strings using natural order.
323 For non digit characters, the comparison use the same algorithm as
324 rtl_str_compare. When a number is encountered during the comparison,
325 natural order is used. Thus, Heading 10 will be considered as greater
326 than Heading 2. Numerical comparison is done using decimal representation.
328 Beware that "MyString 001" and "MyString 1" will be considered as equal
329 since leading 0 are meaningless.
331 @param str the object to be compared.
332 @return 0 - if both strings are equal
333 < 0 - if this string is less than the string argument
334 > 0 - if this string is greater than the string argument
336 COMPHELPER_DLLPUBLIC sal_Int32 compareNatural( const OUString &rLHS, const OUString &rRHS,
337 const css::uno::Reference< css::i18n::XCollator > &rCollator,
338 const css::uno::Reference< css::i18n::XBreakIterator > &rBI,
339 const css::lang::Locale &rLocale );
341 class COMPHELPER_DLLPUBLIC NaturalStringSorter
343 private:
344 css::lang::Locale const m_aLocale;
345 css::uno::Reference< css::i18n::XCollator > m_xCollator;
346 css::uno::Reference< css::i18n::XBreakIterator > m_xBI;
347 public:
348 NaturalStringSorter(
349 const css::uno::Reference< css::uno::XComponentContext > &rContext,
350 css::lang::Locale aLocale);
351 sal_Int32 compare(const OUString &rLHS, const OUString &rRHS) const
353 return compareNatural(rLHS, rRHS, m_xCollator, m_xBI, m_aLocale);
355 const css::lang::Locale& getLocale() const { return m_aLocale; }
358 /** Determine if an OString contains solely ASCII numeric digits
360 @param rString An OString
362 @return false if string contains any characters outside
363 the ASCII '0'-'9' range
364 true otherwise, including for empty string
366 COMPHELPER_DLLPUBLIC bool isdigitAsciiString(std::string_view rString);
368 /** Determine if an OUString contains solely ASCII numeric digits
370 @param rString An OUString
372 @return false if string contains any characters outside
373 the ASCII '0'-'9' range
374 true otherwise, including for empty string
376 COMPHELPER_DLLPUBLIC bool isdigitAsciiString(std::u16string_view rString);
378 /** Sanitize an OUString to not have invalid surrogates
380 @param rString An OUString
382 @return same string if no surrogates or surrogates are valid.
383 Otherwise the string truncated to the valid sequence.
385 COMPHELPER_DLLPUBLIC OUString sanitizeStringSurrogates(const OUString& rString);
387 } // namespace comphelper::string
389 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */