Version 5.2.6.1, tag libreoffice-5.2.6.1
[LibreOffice.git] / include / rtl / character.hxx
blobba3088efdeda04a28765bbcc0170bbbce35b7785
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_RTL_CHARACTER_HXX
21 #define INCLUDED_RTL_CHARACTER_HXX
23 #include <sal/config.h>
25 #include <cassert>
27 #include <sal/types.h>
29 namespace rtl
32 /** Check for Unicode code point.
34 @param code An integer.
36 @return True if code is a Unicode code point.
38 @since LibreOffice 5.2
40 inline bool isUnicodeCodePoint(sal_uInt32 code)
42 return code <= 0x10FFFF;
45 /** Check for ASCII character.
47 @param code A Unicode code point.
49 @return True if code is an ASCII character (0x00--0x7F).
51 @since LibreOffice 4.1
53 inline bool isAscii(sal_uInt32 code)
55 assert(isUnicodeCodePoint(code));
56 return code <= 0x7F;
59 /** Check for ASCII lower case character.
61 @param code A Unicode code point.
63 @return True if code is an ASCII lower case alphabetic character (ASCII
64 'a'--'z').
66 @since LibreOffice 4.1
68 inline bool isAsciiLowerCase(sal_uInt32 code)
70 assert(isUnicodeCodePoint(code));
71 return code >= 'a' && code <= 'z';
74 /** Check for ASCII upper case character.
76 @param code A Unicode code point.
78 @return True if code is an ASCII upper case alphabetic character (ASCII
79 'A'--'Z').
81 @since LibreOffice 4.1
83 inline bool isAsciiUpperCase(sal_uInt32 code)
85 assert(isUnicodeCodePoint(code));
86 return code >= 'A' && code <= 'Z';
89 /** Check for ASCII alphabetic character.
91 @param code A Unicode code point.
93 @return True if code is an ASCII alphabetic character (ASCII 'A'--'Z' or
94 'a'--'z').
96 @since LibreOffice 4.1
98 inline bool isAsciiAlpha(sal_uInt32 code)
100 assert(isUnicodeCodePoint(code));
101 return isAsciiLowerCase(code) || isAsciiUpperCase(code);
104 /** Check for ASCII digit character.
106 @param code A Unicode code point.
108 @return True if code is an ASCII (decimal) digit character (ASCII
109 '0'--'9').
111 @since LibreOffice 4.1
113 inline bool isAsciiDigit(sal_uInt32 code)
115 assert(isUnicodeCodePoint(code));
116 return code >= '0' && code <= '9';
119 /** Check for ASCII alphanumeric character.
121 @param code A Unicode code point.
123 @return True if code is an ASCII alphanumeric character (ASCII '0'--'9',
124 'A'--'Z', or 'a'--'z').
126 @since LibreOffice 4.1
128 inline bool isAsciiAlphanumeric(sal_uInt32 code)
130 assert(isUnicodeCodePoint(code));
131 return isAsciiDigit(code) || isAsciiAlpha(code);
134 /** Check for ASCII canonic hexadecimal digit character.
136 @param code A Unicode code point.
138 @return True if code is an ASCII canonic (i.e., upper case) hexadecimal
139 digit character (ASCII '0'--'9' or 'A'--'F').
141 @since LibreOffice 4.1
143 inline bool isAsciiCanonicHexDigit(sal_uInt32 code)
145 assert(isUnicodeCodePoint(code));
146 return isAsciiDigit(code) || (code >= 'A' && code <= 'F');
149 /** Check for ASCII hexadecimal digit character.
151 @param code A Unicode code point.
153 @return True if code is an ASCII hexadecimal digit character (ASCII
154 '0'--'9', 'A'--'F', or 'a'--'f').
156 @since LibreOffice 4.1
158 inline bool isAsciiHexDigit(sal_uInt32 code)
160 assert(isUnicodeCodePoint(code));
161 return isAsciiCanonicHexDigit(code) || (code >= 'a' && code <= 'f');
164 /** Check for ASCII octal digit character.
166 @param code A Unicode code point.
168 @return True if code is an ASCII octal digit character (ASCII '0'--'7').
170 @since LibreOffice 5.0
172 inline bool isAsciiOctalDigit(sal_uInt32 code)
174 assert(isUnicodeCodePoint(code));
175 return code >= '0' && code <= '7';
179 /** Convert a character, if ASCII, to upper case.
181 @param code A Unicode code point.
183 @return code converted to ASCII upper case.
185 @since LibreOffice 4.2
187 inline sal_uInt32 toAsciiUpperCase(sal_uInt32 code)
189 assert(isUnicodeCodePoint(code));
190 return isAsciiLowerCase(code) ? code - 32 : code;
193 /** Convert a character, if ASCII, to lower case.
195 @param code A Unicode code point.
197 @return code converted to ASCII lower case.
199 @since LibreOffice 4.2
201 inline sal_uInt32 toAsciiLowerCase(sal_uInt32 code)
203 assert(isUnicodeCodePoint(code));
204 return isAsciiUpperCase(code) ? code + 32 : code;
207 /** Compare two characters ignoring ASCII case.
209 @param code1 A Unicode code point.
211 @param code2 A unicode code point.
213 @return 0 if both code points are equal,
214 < 0 if code1 is less than code2,
215 > 0 if code1 is greater than code2.
217 @since LibreOffice 4.2
219 inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
221 assert(isUnicodeCodePoint(code1));
222 assert(isUnicodeCodePoint(code2));
223 return static_cast<sal_Int32>(toAsciiLowerCase(code1))
224 - static_cast<sal_Int32>(toAsciiLowerCase(code2));
227 /// @cond INTERNAL
228 namespace detail {
230 sal_uInt32 const surrogatesHighFirst = 0xD800;
231 sal_uInt32 const surrogatesHighLast = 0xDBFF;
232 sal_uInt32 const surrogatesLowFirst = 0xDC00;
233 sal_uInt32 const surrogatesLowLast = 0xDFFF;
236 /// @endcond
238 /** Check for high surrogate.
240 @param code A Unicode code point.
242 @return True if code is a high surrogate code point (0xD800--0xDBFF).
244 @since LibreOffice 5.0
246 inline bool isHighSurrogate(sal_uInt32 code) {
247 assert(isUnicodeCodePoint(code));
248 return code >= detail::surrogatesHighFirst
249 && code <= detail::surrogatesHighLast;
252 /** Check for low surrogate.
254 @param code A Unicode code point.
256 @return True if code is a low surrogate code point (0xDC00--0xDFFF).
258 @since LibreOffice 5.0
260 inline bool isLowSurrogate(sal_uInt32 code) {
261 assert(isUnicodeCodePoint(code));
262 return code >= detail::surrogatesLowFirst
263 && code <= detail::surrogatesLowLast;
266 /** Get high surrogate half of a non-BMP Unicode code point.
268 @param code A non-BMP Unicode code point.
270 @return The UTF-16 high surrogate half for the give code point.
272 @since LibreOffice 5.0
274 inline sal_Unicode getHighSurrogate(sal_uInt32 code) {
275 assert(isUnicodeCodePoint(code));
276 assert(code >= 0x10000);
277 return static_cast<sal_Unicode>(((code - 0x10000) >> 10) | detail::surrogatesHighFirst);
280 /** Get low surrogate half of a non-BMP Unicode code point.
282 @param code A non-BMP Unicode code point.
284 @return The UTF-16 low surrogate half for the give code point.
286 @since LibreOffice 5.0
288 inline sal_Unicode getLowSurrogate(sal_uInt32 code) {
289 assert(isUnicodeCodePoint(code));
290 assert(code >= 0x10000);
291 return static_cast<sal_Unicode>(((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst);
294 /** Combine surrogates to form a code point.
296 @param high A high surrogate code point.
298 @param low A low surrogate code point.
300 @return The code point represented by the surrogate pair.
302 @since LibreOffice 5.0
304 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) {
305 assert(isHighSurrogate(high));
306 assert(isLowSurrogate(low));
307 return ((high - detail::surrogatesHighFirst) << 10)
308 + (low - detail::surrogatesLowFirst) + 0x10000;
313 #endif
315 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */