1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_RTL_CHARACTER_HXX
21 #define INCLUDED_RTL_CHARACTER_HXX
23 #include <sal/config.h>
27 #include <sal/types.h>
32 /** Check for Unicode code point.
34 @param code An integer.
36 @return True if code is a Unicode code point.
38 @since LibreOffice 5.2
40 inline bool isUnicodeCodePoint(sal_uInt32 code
)
42 return code
<= 0x10FFFF;
45 /** Check for ASCII character.
47 @param code A Unicode code point.
49 @return True if code is an ASCII character (0x00--0x7F).
51 @since LibreOffice 4.1
53 inline bool isAscii(sal_uInt32 code
)
55 assert(isUnicodeCodePoint(code
));
59 /** Check for ASCII lower case character.
61 @param code A Unicode code point.
63 @return True if code is an ASCII lower case alphabetic character (ASCII
66 @since LibreOffice 4.1
68 inline bool isAsciiLowerCase(sal_uInt32 code
)
70 assert(isUnicodeCodePoint(code
));
71 return code
>= 'a' && code
<= 'z';
74 /** Check for ASCII upper case character.
76 @param code A Unicode code point.
78 @return True if code is an ASCII upper case alphabetic character (ASCII
81 @since LibreOffice 4.1
83 inline bool isAsciiUpperCase(sal_uInt32 code
)
85 assert(isUnicodeCodePoint(code
));
86 return code
>= 'A' && code
<= 'Z';
89 /** Check for ASCII alphabetic character.
91 @param code A Unicode code point.
93 @return True if code is an ASCII alphabetic character (ASCII 'A'--'Z' or
96 @since LibreOffice 4.1
98 inline bool isAsciiAlpha(sal_uInt32 code
)
100 assert(isUnicodeCodePoint(code
));
101 return isAsciiLowerCase(code
) || isAsciiUpperCase(code
);
104 /** Check for ASCII digit character.
106 @param code A Unicode code point.
108 @return True if code is an ASCII (decimal) digit character (ASCII
111 @since LibreOffice 4.1
113 inline bool isAsciiDigit(sal_uInt32 code
)
115 assert(isUnicodeCodePoint(code
));
116 return code
>= '0' && code
<= '9';
119 /** Check for ASCII alphanumeric character.
121 @param code A Unicode code point.
123 @return True if code is an ASCII alphanumeric character (ASCII '0'--'9',
124 'A'--'Z', or 'a'--'z').
126 @since LibreOffice 4.1
128 inline bool isAsciiAlphanumeric(sal_uInt32 code
)
130 assert(isUnicodeCodePoint(code
));
131 return isAsciiDigit(code
) || isAsciiAlpha(code
);
134 /** Check for ASCII canonic hexadecimal digit character.
136 @param code A Unicode code point.
138 @return True if code is an ASCII canonic (i.e., upper case) hexadecimal
139 digit character (ASCII '0'--'9' or 'A'--'F').
141 @since LibreOffice 4.1
143 inline bool isAsciiCanonicHexDigit(sal_uInt32 code
)
145 assert(isUnicodeCodePoint(code
));
146 return isAsciiDigit(code
) || (code
>= 'A' && code
<= 'F');
149 /** Check for ASCII hexadecimal digit character.
151 @param code A Unicode code point.
153 @return True if code is an ASCII hexadecimal digit character (ASCII
154 '0'--'9', 'A'--'F', or 'a'--'f').
156 @since LibreOffice 4.1
158 inline bool isAsciiHexDigit(sal_uInt32 code
)
160 assert(isUnicodeCodePoint(code
));
161 return isAsciiCanonicHexDigit(code
) || (code
>= 'a' && code
<= 'f');
164 /** Check for ASCII octal digit character.
166 @param code A Unicode code point.
168 @return True if code is an ASCII octal digit character (ASCII '0'--'7').
170 @since LibreOffice 5.0
172 inline bool isAsciiOctalDigit(sal_uInt32 code
)
174 assert(isUnicodeCodePoint(code
));
175 return code
>= '0' && code
<= '7';
179 /** Convert a character, if ASCII, to upper case.
181 @param code A Unicode code point.
183 @return code converted to ASCII upper case.
185 @since LibreOffice 4.2
187 inline sal_uInt32
toAsciiUpperCase(sal_uInt32 code
)
189 assert(isUnicodeCodePoint(code
));
190 return isAsciiLowerCase(code
) ? code
- 32 : code
;
193 /** Convert a character, if ASCII, to lower case.
195 @param code A Unicode code point.
197 @return code converted to ASCII lower case.
199 @since LibreOffice 4.2
201 inline sal_uInt32
toAsciiLowerCase(sal_uInt32 code
)
203 assert(isUnicodeCodePoint(code
));
204 return isAsciiUpperCase(code
) ? code
+ 32 : code
;
207 /** Compare two characters ignoring ASCII case.
209 @param code1 A Unicode code point.
211 @param code2 A unicode code point.
213 @return 0 if both code points are equal,
214 < 0 if code1 is less than code2,
215 > 0 if code1 is greater than code2.
217 @since LibreOffice 4.2
219 inline sal_Int32
compareIgnoreAsciiCase(sal_uInt32 code1
, sal_uInt32 code2
)
221 assert(isUnicodeCodePoint(code1
));
222 assert(isUnicodeCodePoint(code2
));
223 return static_cast<sal_Int32
>(toAsciiLowerCase(code1
))
224 - static_cast<sal_Int32
>(toAsciiLowerCase(code2
));
230 sal_uInt32
const surrogatesHighFirst
= 0xD800;
231 sal_uInt32
const surrogatesHighLast
= 0xDBFF;
232 sal_uInt32
const surrogatesLowFirst
= 0xDC00;
233 sal_uInt32
const surrogatesLowLast
= 0xDFFF;
238 /** Check for high surrogate.
240 @param code A Unicode code point.
242 @return True if code is a high surrogate code point (0xD800--0xDBFF).
244 @since LibreOffice 5.0
246 inline bool isHighSurrogate(sal_uInt32 code
) {
247 assert(isUnicodeCodePoint(code
));
248 return code
>= detail::surrogatesHighFirst
249 && code
<= detail::surrogatesHighLast
;
252 /** Check for low surrogate.
254 @param code A Unicode code point.
256 @return True if code is a low surrogate code point (0xDC00--0xDFFF).
258 @since LibreOffice 5.0
260 inline bool isLowSurrogate(sal_uInt32 code
) {
261 assert(isUnicodeCodePoint(code
));
262 return code
>= detail::surrogatesLowFirst
263 && code
<= detail::surrogatesLowLast
;
266 /** Get high surrogate half of a non-BMP Unicode code point.
268 @param code A non-BMP Unicode code point.
270 @return The UTF-16 high surrogate half for the give code point.
272 @since LibreOffice 5.0
274 inline sal_Unicode
getHighSurrogate(sal_uInt32 code
) {
275 assert(isUnicodeCodePoint(code
));
276 assert(code
>= 0x10000);
277 return static_cast<sal_Unicode
>(((code
- 0x10000) >> 10) | detail::surrogatesHighFirst
);
280 /** Get low surrogate half of a non-BMP Unicode code point.
282 @param code A non-BMP Unicode code point.
284 @return The UTF-16 low surrogate half for the give code point.
286 @since LibreOffice 5.0
288 inline sal_Unicode
getLowSurrogate(sal_uInt32 code
) {
289 assert(isUnicodeCodePoint(code
));
290 assert(code
>= 0x10000);
291 return static_cast<sal_Unicode
>(((code
- 0x10000) & 0x3FF) | detail::surrogatesLowFirst
);
294 /** Combine surrogates to form a code point.
296 @param high A high surrogate code point.
298 @param low A low surrogate code point.
300 @return The code point represented by the surrogate pair.
302 @since LibreOffice 5.0
304 inline sal_uInt32
combineSurrogates(sal_uInt32 high
, sal_uInt32 low
) {
305 assert(isHighSurrogate(high
));
306 assert(isLowSurrogate(low
));
307 return ((high
- detail::surrogatesHighFirst
) << 10)
308 + (low
- detail::surrogatesLowFirst
) + 0x10000;
315 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */