BPicture: Fix archive constructor.
[haiku.git] / src / kits / locale / UnicodeChar.cpp
blobda476c3aa3e418e7abd24180db76f2ddd112e965
1 /*
2 * Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3 * Distributed under the terms of the MIT License.
5 * Authors:
6 * Axel Dörfler, axeld@pinc-software.de
7 * Siarzhuk Zharski, zharik@gmx.li
9 */
12 #include <UnicodeChar.h>
14 #include <unicode/uchar.h>
15 #include <unicode/utf8.h>
18 BUnicodeChar::BUnicodeChar()
23 // Returns the general category value for the code point.
24 int8
25 BUnicodeChar::Type(uint32 c)
27 BUnicodeChar();
28 return u_charType(c);
32 // Determines whether the specified code point is a letter character.
33 // True for general categories "L" (letters).
34 bool
35 BUnicodeChar::IsAlpha(uint32 c)
37 BUnicodeChar();
38 return u_isalpha(c);
42 // Determines whether the specified code point is an alphanumeric character
43 // (letter or digit).
44 // True for characters with general categories
45 // "L" (letters) and "Nd" (decimal digit numbers).
46 bool
47 BUnicodeChar::IsAlNum(uint32 c)
49 BUnicodeChar();
50 return u_isalnum(c);
54 // Check if a code point has the Lowercase Unicode property (UCHAR_LOWERCASE).
55 bool
56 BUnicodeChar::IsLower(uint32 c)
58 BUnicodeChar();
59 return u_isULowercase(c);
63 // Check if a code point has the Uppercase Unicode property (UCHAR_UPPERCASE).
64 bool
65 BUnicodeChar::IsUpper(uint32 c)
67 BUnicodeChar();
68 return u_isUUppercase(c);
72 // Determines whether the specified code point is a titlecase letter.
73 // True for general category "Lt" (titlecase letter).
74 bool
75 BUnicodeChar::IsTitle(uint32 c)
77 BUnicodeChar();
78 return u_istitle(c);
82 // Determines whether the specified code point is a digit character.
83 // True for characters with general category "Nd" (decimal digit numbers).
84 // Beginning with Unicode 4, this is the same as
85 // testing for the Numeric_Type of Decimal.
86 bool
87 BUnicodeChar::IsDigit(uint32 c)
89 BUnicodeChar();
90 return u_isdigit(c);
94 // Determines whether the specified code point is a hexadecimal digit.
95 // This is equivalent to u_digit(c, 16)>=0.
96 // True for characters with general category "Nd" (decimal digit numbers)
97 // as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
98 // (That is, for letters with code points
99 // 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
100 bool
101 BUnicodeChar::IsHexDigit(uint32 c)
103 BUnicodeChar();
104 return u_isxdigit(c);
108 // Determines whether the specified code point is "defined",
109 // which usually means that it is assigned a character.
110 // True for general categories other than "Cn" (other, not assigned),
111 // i.e., true for all code points mentioned in UnicodeData.txt.
112 bool
113 BUnicodeChar::IsDefined(uint32 c)
115 BUnicodeChar();
116 return u_isdefined(c);
120 // Determines whether the specified code point is a base character.
121 // True for general categories "L" (letters), "N" (numbers),
122 // "Mc" (spacing combining marks), and "Me" (enclosing marks).
123 bool
124 BUnicodeChar::IsBase(uint32 c)
126 BUnicodeChar();
127 return u_isbase(c);
131 // Determines whether the specified code point is a control character
132 // (as defined by this function).
133 // A control character is one of the following:
134 // - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
135 // - U_CONTROL_CHAR (Cc)
136 // - U_FORMAT_CHAR (Cf)
137 // - U_LINE_SEPARATOR (Zl)
138 // - U_PARAGRAPH_SEPARATOR (Zp)
139 bool
140 BUnicodeChar::IsControl(uint32 c)
142 BUnicodeChar();
143 return u_iscntrl(c);
147 // Determines whether the specified code point is a punctuation character.
148 // True for characters with general categories "P" (punctuation).
149 bool
150 BUnicodeChar::IsPunctuation(uint32 c)
152 BUnicodeChar();
153 return u_ispunct(c);
157 // Determine if the specified code point is a space character according to Java.
158 // True for characters with general categories "Z" (separators),
159 // which does not include control codes (e.g., TAB or Line Feed).
160 bool
161 BUnicodeChar::IsSpace(uint32 c)
163 BUnicodeChar();
164 return u_isJavaSpaceChar(c);
168 // Determines if the specified code point is a whitespace character
169 // A character is considered to be a whitespace character if and only
170 // if it satisfies one of the following criteria:
171 // - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"),
172 // but is not also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space
173 // or U+202F Narrow NBSP).
174 // - It is U+0009 HORIZONTAL TABULATION.
175 // - It is U+000A LINE FEED.
176 // - It is U+000B VERTICAL TABULATION.
177 // - It is U+000C FORM FEED.
178 // - It is U+000D CARRIAGE RETURN.
179 // - It is U+001C FILE SEPARATOR.
180 // - It is U+001D GROUP SEPARATOR.
181 // - It is U+001E RECORD SEPARATOR.
182 // - It is U+001F UNIT SEPARATOR.
183 bool
184 BUnicodeChar::IsWhitespace(uint32 c)
186 BUnicodeChar();
187 return u_isWhitespace(c);
191 // Determines whether the specified code point is a printable character.
192 // True for general categories other than "C" (controls).
193 bool
194 BUnicodeChar::IsPrintable(uint32 c)
196 BUnicodeChar();
197 return u_isprint(c);
201 // #pragma mark -
203 uint32
204 BUnicodeChar::ToLower(uint32 c)
206 BUnicodeChar();
207 return u_tolower(c);
211 uint32
212 BUnicodeChar::ToUpper(uint32 c)
214 BUnicodeChar();
215 return u_toupper(c);
219 uint32
220 BUnicodeChar::ToTitle(uint32 c)
222 BUnicodeChar();
223 return u_totitle(c);
227 int32
228 BUnicodeChar::DigitValue(uint32 c)
230 BUnicodeChar();
231 return u_digit(c, 10);
235 unicode_east_asian_width
236 BUnicodeChar::EastAsianWidth(uint32 c)
238 return (unicode_east_asian_width)u_getIntPropertyValue(c,
239 UCHAR_EAST_ASIAN_WIDTH);
243 void
244 BUnicodeChar::ToUTF8(uint32 c, char** out)
246 int i = 0;
247 U8_APPEND_UNSAFE(*out, i, c);
248 *out += i;
252 uint32
253 BUnicodeChar::FromUTF8(const char** in)
255 int i = 0;
256 uint32 c = 0;
257 U8_NEXT_UNSAFE(*in, i, c);
258 *in += i;
260 return c;
264 size_t
265 BUnicodeChar::UTF8StringLength(const char* string)
267 size_t len = 0;
268 while (*string) {
269 FromUTF8(&string);
270 len++;
272 return len;
276 size_t
277 BUnicodeChar::UTF8StringLength(const char* string, size_t maxLength)
279 size_t len = 0;
280 while (len < maxLength && *string) {
281 FromUTF8(&string);
282 len++;
284 return len;