1 /* $NetBSD: normalize.c,v 1.1.1.2 2014/04/24 12:45:56 pettai Exp $ */
4 * Copyright (c) 2004 Kungliga Tekniska Högskolan
5 * (Royal Institute of Technology, Stockholm, Sweden).
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the Institute nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 #include <krb5/roken.h>
48 #include "normalize_table.h"
51 translation_cmp(const void *key
, const void *data
)
53 const struct translation
*t1
= (const struct translation
*)key
;
54 const struct translation
*t2
= (const struct translation
*)data
;
56 return t1
->key
- t2
->key
;
59 enum { s_base
= 0xAC00};
60 enum { s_count
= 11172};
61 enum { l_base
= 0x1100};
63 enum { v_base
= 0x1161};
65 enum { t_base
= 0x11A7};
67 enum { n_count
= v_count
* t_count
};
70 hangul_decomp(const uint32_t *in
, size_t in_len
,
71 uint32_t *out
, size_t *out_len
)
78 if (u
< s_base
|| u
>= s_base
+ s_count
)
81 l
= l_base
+ s_index
/ n_count
;
82 v
= v_base
+ (s_index
% n_count
) / t_count
;
83 t
= t_base
+ s_index
% t_count
;
88 return WIND_ERR_OVERRUN
;
98 hangul_composition(const uint32_t *in
, size_t in_len
)
102 if (in
[0] >= l_base
&& in
[0] < l_base
+ l_count
) {
103 unsigned l_index
= in
[0] - l_base
;
106 if (in
[1] < v_base
|| in
[1] >= v_base
+ v_count
)
108 v_index
= in
[1] - v_base
;
109 return (l_index
* v_count
+ v_index
) * t_count
+ s_base
;
110 } else if (in
[0] >= s_base
&& in
[0] < s_base
+ s_count
) {
111 unsigned s_index
= in
[0] - s_base
;
114 if (s_index
% t_count
!= 0)
116 if (in
[1] < t_base
|| in
[1] >= t_base
+ t_count
)
118 t_index
= in
[1] - t_base
;
119 return in
[0] + t_index
;
125 compat_decomp(const uint32_t *in
, size_t in_len
,
126 uint32_t *out
, size_t *out_len
)
131 for (i
= 0; i
< in_len
; ++i
) {
132 struct translation ts
= {in
[i
]};
133 size_t sub_len
= *out_len
- o
;
136 ret
= hangul_decomp(in
+ i
, in_len
- i
,
139 if (ret
== WIND_ERR_OVERRUN
)
143 void *s
= bsearch(&ts
,
144 _wind_normalize_table
,
145 _wind_normalize_table_size
,
146 sizeof(_wind_normalize_table
[0]),
149 const struct translation
*t
= (const struct translation
*)s
;
151 ret
= compat_decomp(_wind_normalize_val_table
+ t
->val_offset
,
159 return WIND_ERR_OVERRUN
;
170 swap_char(uint32_t * a
, uint32_t * b
)
178 /* Unicode 5.2.0 D109 Canonical Ordering for a sequence of code points
179 * that all have Canonical_Combining_Class > 0 */
181 canonical_reorder_sequence(uint32_t * a
, size_t len
)
188 for (i
= 1; i
< len
; i
++) {
191 _wind_combining_class(a
[j
]) < _wind_combining_class(a
[j
-1]);
193 swap_char(&a
[j
], &a
[j
-1]);
198 canonical_reorder(uint32_t *tmp
, size_t tmp_len
)
202 for (i
= 0; i
< tmp_len
; ++i
) {
203 int cc
= _wind_combining_class(tmp
[i
]);
207 j
< tmp_len
&& _wind_combining_class(tmp
[j
]);
210 canonical_reorder_sequence(&tmp
[i
], j
- i
);
217 find_composition(const uint32_t *in
, unsigned in_len
)
219 unsigned short canon_index
= 0;
223 cur
= hangul_composition(in
, in_len
);
228 const struct canon_node
*c
= &_wind_canon_table
[canon_index
];
238 if (i
< c
->next_start
|| i
>= c
->next_end
)
242 _wind_canon_next_table
[c
->next_offset
+ i
- c
->next_start
];
243 if (canon_index
!= 0) {
244 cur
= (cur
<< 4) & 0xFFFFF;
247 } while (canon_index
!= 0);
252 combine(const uint32_t *in
, size_t in_len
,
253 uint32_t *out
, size_t *out_len
)
260 for (i
= 0; i
< in_len
;) {
261 while (i
< in_len
&& _wind_combining_class(in
[i
]) != 0) {
266 return WIND_ERR_OVERRUN
;
276 v
[0] = out
[ostarter
];
279 cc
= _wind_combining_class(in
[i
]);
280 if (old_cc
!= cc
&& (comb
= find_composition(v
, 2))) {
281 out
[ostarter
] = comb
;
282 } else if (cc
== 0) {
286 return WIND_ERR_OVERRUN
;
299 _wind_stringprep_normalize(const uint32_t *in
, size_t in_len
,
300 uint32_t *out
, size_t *out_len
)
311 tmp_len
= in_len
* 4;
312 if (tmp_len
< MAX_LENGTH_CANON
)
313 tmp_len
= MAX_LENGTH_CANON
;
314 tmp
= malloc(tmp_len
* sizeof(uint32_t));
318 ret
= compat_decomp(in
, in_len
, tmp
, &tmp_len
);
323 canonical_reorder(tmp
, tmp_len
);
324 ret
= combine(tmp
, tmp_len
, out
, out_len
);