2 * Copyright © 2011,2012,2014 Google, Inc.
4 * This is part of HarfBuzz, a text shaping library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 * Google Author(s): Behdad Esfahbod
27 #ifndef HB_UTF_PRIVATE_HH
28 #define HB_UTF_PRIVATE_HH
30 #include "hb-private.hh"
32 template <typename T
, bool validate
=true> struct hb_utf_t
;
38 struct hb_utf_t
<uint8_t, true>
40 static inline const uint8_t *
41 next (const uint8_t *text
,
43 hb_codepoint_t
*unicode
,
44 hb_codepoint_t replacement
)
46 /* Written to only accept well-formed sequences.
47 * Based on ideas from ICU's U8_NEXT.
48 * Generates one "replacement" for each ill-formed byte. */
50 hb_codepoint_t c
= *text
++;
54 if (hb_in_range (c
, 0xC2u
, 0xDFu
)) /* Two-byte */
57 if (likely (text
< end
&&
58 (t1
= text
[0] - 0x80u
) <= 0x3Fu
))
60 c
= ((c
&0x1Fu
)<<6) | t1
;
66 else if (hb_in_range (c
, 0xE0u
, 0xEFu
)) /* Three-byte */
69 if (likely (1 < end
- text
&&
70 (t1
= text
[0] - 0x80u
) <= 0x3Fu
&&
71 (t2
= text
[1] - 0x80u
) <= 0x3Fu
))
73 c
= ((c
&0xFu
)<<12) | (t1
<<6) | t2
;
74 if (unlikely (c
< 0x0800u
|| hb_in_range (c
, 0xD800u
, 0xDFFFu
)))
81 else if (hb_in_range (c
, 0xF0u
, 0xF4u
)) /* Four-byte */
83 unsigned int t1
, t2
, t3
;
84 if (likely (2 < end
- text
&&
85 (t1
= text
[0] - 0x80u
) <= 0x3Fu
&&
86 (t2
= text
[1] - 0x80u
) <= 0x3Fu
&&
87 (t3
= text
[2] - 0x80u
) <= 0x3Fu
))
89 c
= ((c
&0x7u
)<<18) | (t1
<<12) | (t2
<<6) | t3
;
90 if (unlikely (!hb_in_range (c
, 0x10000u
, 0x10FFFFu
)))
105 *unicode
= replacement
;
109 static inline const uint8_t *
110 prev (const uint8_t *text
,
111 const uint8_t *start
,
112 hb_codepoint_t
*unicode
,
113 hb_codepoint_t replacement
)
115 const uint8_t *end
= text
--;
116 while (start
< text
&& (*text
& 0xc0) == 0x80 && end
- text
< 4)
119 if (likely (next (text
, end
, unicode
, replacement
) == end
))
122 *unicode
= replacement
;
126 static inline unsigned int
127 strlen (const uint8_t *text
)
129 return ::strlen ((const char *) text
);
137 struct hb_utf_t
<uint16_t, true>
139 static inline const uint16_t *
140 next (const uint16_t *text
,
142 hb_codepoint_t
*unicode
,
143 hb_codepoint_t replacement
)
145 hb_codepoint_t c
= *text
++;
147 if (likely (!hb_in_range (c
, 0xD800u
, 0xDFFFu
)))
153 if (likely (hb_in_range (c
, 0xD800u
, 0xDBFFu
)))
155 /* High-surrogate in c */
157 if (text
< end
&& ((l
= *text
), likely (hb_in_range (l
, 0xDC00u
, 0xDFFFu
))))
159 /* Low-surrogate in l */
160 *unicode
= (c
<< 10) + l
- ((0xD800u
<< 10) - 0x10000u
+ 0xDC00u
);
166 /* Lonely / out-of-order surrogate. */
167 *unicode
= replacement
;
171 static inline const uint16_t *
172 prev (const uint16_t *text
,
173 const uint16_t *start
,
174 hb_codepoint_t
*unicode
,
175 hb_codepoint_t replacement
)
177 const uint16_t *end
= text
--;
178 hb_codepoint_t c
= *text
;
180 if (likely (!hb_in_range (c
, 0xD800u
, 0xDFFFu
)))
186 if (likely (start
< text
&& hb_in_range (c
, 0xDC00u
, 0xDFFFu
)))
189 if (likely (next (text
, end
, unicode
, replacement
) == end
))
192 *unicode
= replacement
;
197 static inline unsigned int
198 strlen (const uint16_t *text
)
209 template <bool validate
>
210 struct hb_utf_t
<uint32_t, validate
>
212 static inline const uint32_t *
213 next (const uint32_t *text
,
214 const uint32_t *end HB_UNUSED
,
215 hb_codepoint_t
*unicode
,
216 hb_codepoint_t replacement
)
218 hb_codepoint_t c
= *text
++;
219 if (validate
&& unlikely (c
> 0x10FFFFu
|| hb_in_range (c
, 0xD800u
, 0xDFFFu
)))
225 *unicode
= replacement
;
229 static inline const uint32_t *
230 prev (const uint32_t *text
,
231 const uint32_t *start HB_UNUSED
,
232 hb_codepoint_t
*unicode
,
233 hb_codepoint_t replacement
)
235 next (text
- 1, text
, unicode
, replacement
);
239 static inline unsigned int
240 strlen (const uint32_t *text
)
249 #endif /* HB_UTF_PRIVATE_HH */