Sync usage with man page.
[netbsd-mini2440.git] / external / bsd / bind / dist / contrib / idn / idnkit-1.0-src / lib / race.c
blobcd402bd58d0133d453ad2a230732f524071be1e6
1 /* $NetBSD$ */
3 #ifndef lint
4 static char *rcsid = "Id: race.c,v 1.1.1.1 2003/06/04 00:26:07 marka Exp";
5 #endif
7 /*
8 * Copyright (c) 2000,2001,2002 Japan Network Information Center.
9 * All rights reserved.
11 * By using this file, you agree to the terms and conditions set forth bellow.
13 * LICENSE TERMS AND CONDITIONS
15 * The following License Terms and Conditions apply, unless a different
16 * license is obtained from Japan Network Information Center ("JPNIC"),
17 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18 * Chiyoda-ku, Tokyo 101-0047, Japan.
20 * 1. Use, Modification and Redistribution (including distribution of any
21 * modified or derived work) in source and/or binary forms is permitted
22 * under this License Terms and Conditions.
24 * 2. Redistribution of source code must retain the copyright notices as they
25 * appear in each source code file, this License Terms and Conditions.
27 * 3. Redistribution in binary form must reproduce the Copyright Notice,
28 * this License Terms and Conditions, in the documentation and/or other
29 * materials provided with the distribution. For the purposes of binary
30 * distribution the "Copyright Notice" refers to the following language:
31 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
33 * 4. The name of JPNIC may not be used to endorse or promote products
34 * derived from this Software without specific prior written approval of
35 * JPNIC.
37 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
50 #include <config.h>
52 #include <stddef.h>
53 #include <stdlib.h>
54 #include <string.h>
56 #include <idn/result.h>
57 #include <idn/assert.h>
58 #include <idn/logmacro.h>
59 #include <idn/converter.h>
60 #include <idn/ucs4.h>
61 #include <idn/debug.h>
62 #include <idn/race.h>
63 #include <idn/util.h>
65 #ifndef IDN_RACE_PREFIX
66 #define IDN_RACE_PREFIX "bq--"
67 #endif
68 #define RACE_2OCTET_MODE 0xd8
69 #define RACE_ESCAPE 0xff
70 #define RACE_ESCAPE_2ND 0x99
72 #define RACE_BUF_SIZE 128 /* more than enough */
75 * Unicode surrogate pair.
77 #define IS_SURROGATE_HIGH(v) (0xd800 <= (v) && (v) <= 0xdbff)
78 #define IS_SURROGATE_LOW(v) (0xdc00 <= (v) && (v) <= 0xdfff)
79 #define SURROGATE_HIGH(v) (SURROGATE_H_OFF + (((v) - 0x10000) >> 10))
80 #define SURROGATE_LOW(v) (SURROGATE_L_OFF + ((v) & 0x3ff))
81 #define SURROGATE_BASE 0x10000
82 #define SURROGATE_H_OFF 0xd800
83 #define SURROGATE_L_OFF 0xdc00
84 #define COMBINE_SURROGATE(h, l) \
85 (SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF))
88 * Compression type.
90 enum {
91 compress_one, /* all characters are in a single row */
92 compress_two, /* row 0 and another row */
93 compress_none /* nope */
96 static idn_result_t race_decode_decompress(const char *from,
97 unsigned short *buf,
98 size_t buflen);
99 static idn_result_t race_compress_encode(const unsigned short *p,
100 int compress_mode,
101 char *to, size_t tolen);
102 static int get_compress_mode(unsigned short *p);
104 idn_result_t
105 idn__race_decode(idn_converter_t ctx, void *privdata,
106 const char *from, unsigned long *to, size_t tolen) {
107 unsigned short *buf = NULL;
108 size_t prefixlen = strlen(IDN_RACE_PREFIX);
109 size_t fromlen;
110 size_t buflen;
111 idn_result_t r;
113 assert(ctx != NULL);
115 TRACE(("idn__race_decode(from=\"%s\", tolen=%d)\n",
116 idn__debug_xstring(from, 50), (int)tolen));
118 if (!idn__util_asciihaveaceprefix(from, IDN_RACE_PREFIX)) {
119 if (*from == '\0') {
120 r = idn_ucs4_utf8toucs4(from, to, tolen);
121 goto ret;
123 r = idn_invalid_encoding;
124 goto ret;
126 from += prefixlen;
127 fromlen = strlen(from);
130 * Allocate sufficient buffer.
132 buflen = fromlen + 1;
133 buf = malloc(sizeof(*buf) * buflen);
134 if (buf == NULL) {
135 r = idn_nomemory;
136 goto ret;
140 * Decode base32 and decompress.
142 r = race_decode_decompress(from, buf, buflen);
143 if (r != idn_success)
144 goto ret;
147 * Now 'buf' points the decompressed string, which must contain
148 * UTF-16 characters.
152 * Convert to UCS4.
154 r = idn_ucs4_utf16toucs4(buf, to, tolen);
155 if (r != idn_success)
156 goto ret;
158 ret:
159 free(buf);
160 if (r == idn_success) {
161 TRACE(("idn__race_decode(): succcess (to=\"%s\")\n",
162 idn__debug_ucs4xstring(to, 50)));
163 } else {
164 TRACE(("idn__race_decode(): %s\n", idn_result_tostring(r)));
166 return (r);
169 static idn_result_t
170 race_decode_decompress(const char *from, unsigned short *buf, size_t buflen)
172 unsigned short *p = buf;
173 unsigned int bitbuf = 0;
174 int bitlen = 0;
175 int i, j;
176 size_t len;
178 while (*from != '\0') {
179 int c = *from++;
180 int x;
182 if ('a' <= c && c <= 'z')
183 x = c - 'a';
184 else if ('A' <= c && c <= 'Z')
185 x = c - 'A';
186 else if ('2' <= c && c <= '7')
187 x = c - '2' + 26;
188 else
189 return (idn_invalid_encoding);
191 bitbuf = (bitbuf << 5) + x;
192 bitlen += 5;
193 if (bitlen >= 8) {
194 *p++ = (bitbuf >> (bitlen - 8)) & 0xff;
195 bitlen -= 8;
198 len = p - buf;
201 * Now 'buf' holds the decoded string.
205 * Decompress.
207 if (buf[0] == RACE_2OCTET_MODE) {
208 if ((len - 1) % 2 != 0)
209 return (idn_invalid_encoding);
210 for (i = 1, j = 0; i < len; i += 2, j++)
211 buf[j] = (buf[i] << 8) + buf[i + 1];
212 len = j;
213 } else {
214 unsigned short c = buf[0] << 8; /* higher octet */
216 for (i = 1, j = 0; i < len; j++) {
217 if (buf[i] == RACE_ESCAPE) {
218 if (i + 1 >= len)
219 return (idn_invalid_encoding);
220 else if (buf[i + 1] == RACE_ESCAPE_2ND)
221 buf[j] = c | 0xff;
222 else
223 buf[j] = buf[i + 1];
224 i += 2;
226 } else if (buf[i] == 0x99 && c == 0x00) {
228 * The RACE specification says this is error.
230 return (idn_invalid_encoding);
232 } else {
233 buf[j] = c | buf[i++];
236 len = j;
238 buf[len] = '\0';
240 return (idn_success);
243 idn_result_t
244 idn__race_encode(idn_converter_t ctx, void *privdata,
245 const unsigned long *from, char *to, size_t tolen) {
246 char *to_org = to;
247 unsigned short *p, *buf = NULL;
248 size_t prefixlen = strlen(IDN_RACE_PREFIX);
249 size_t buflen;
250 size_t fromlen;
251 idn_result_t r;
252 int compress_mode;
254 assert(ctx != NULL);
256 TRACE(("idn__race_encode(from=\"%s\", tolen=%d)\n",
257 idn__debug_ucs4xstring(from, 50), (int)tolen));
259 if (*from == '\0') {
260 r = idn_ucs4_ucs4toutf8(from, to, tolen);
261 goto ret;
262 } else if (idn__util_ucs4haveaceprefix(from, IDN_RACE_PREFIX)) {
263 r = idn_prohibited;
264 goto ret;
267 if (tolen < prefixlen) {
268 r = idn_buffer_overflow;
269 goto ret;
271 memcpy(to, IDN_RACE_PREFIX, prefixlen);
272 to += prefixlen;
273 tolen -= prefixlen;
275 fromlen = idn_ucs4_strlen(from);
276 buflen = fromlen * 2 + 2;
279 * Convert to UTF-16.
280 * Preserve space for a character at the top of the buffer.
282 for (;;) {
283 unsigned short *new_buf;
285 new_buf = realloc(buf, sizeof(*buf) * buflen);
286 if (new_buf == NULL) {
287 r = idn_nomemory;
288 goto ret;
290 buf = new_buf;
292 r = idn_ucs4_ucs4toutf16(from, buf + 1, buflen - 1);
293 if (r == idn_success)
294 break;
295 else if (r != idn_buffer_overflow)
296 goto ret;
298 buflen = fromlen * 2 + 2;
300 p = buf + 1;
303 * Now 'p' contains UTF-16 encoded string.
307 * Check U+0099.
308 * RACE doesn't permit U+0099 in an input string.
310 for (p = buf + 1; *p != '\0'; p++) {
311 if (*p == 0x0099) {
312 r = idn_invalid_encoding;
313 goto ret;
318 * Compress, encode in base-32 and output.
320 compress_mode = get_compress_mode(buf + 1);
321 r = race_compress_encode(buf, compress_mode, to, tolen);
323 ret:
324 free(buf);
325 if (r == idn_success) {
326 TRACE(("idn__race_encode(): succcess (to=\"%s\")\n",
327 idn__debug_xstring(to_org, 50)));
328 } else {
329 TRACE(("idn__race_encode(): %s\n", idn_result_tostring(r)));
331 return (r);
334 static idn_result_t
335 race_compress_encode(const unsigned short *p, int compress_mode,
336 char *to, size_t tolen)
338 unsigned long bitbuf = *p++; /* bit stream buffer */
339 int bitlen = 8; /* # of bits in 'bitbuf' */
341 while (*p != '\0' || bitlen > 0) {
342 unsigned int c = *p;
344 if (c == '\0') {
345 /* End of data. Flush. */
346 bitbuf <<= (5 - bitlen);
347 bitlen = 5;
348 } else if (compress_mode == compress_none) {
349 /* Push 16 bit data. */
350 bitbuf = (bitbuf << 16) | c;
351 bitlen += 16;
352 p++;
353 } else {/* compress_mode == compress_one/compress_two */
354 /* Push 8 or 16 bit data. */
355 if (compress_mode == compress_two &&
356 (c & 0xff00) == 0) {
357 /* Upper octet is zero (and not U1). */
358 bitbuf = (bitbuf << 16) | 0xff00 | c;
359 bitlen += 16;
360 } else if ((c & 0xff) == 0xff) {
361 /* Lower octet is 0xff. */
362 bitbuf = (bitbuf << 16) |
363 (RACE_ESCAPE << 8) | RACE_ESCAPE_2ND;
364 bitlen += 16;
365 } else {
366 /* Just output lower octet. */
367 bitbuf = (bitbuf << 8) | (c & 0xff);
368 bitlen += 8;
370 p++;
374 * Output bits in 'bitbuf' in 5-bit unit.
376 while (bitlen >= 5) {
377 int x;
379 /* Get top 5 bits. */
380 x = (bitbuf >> (bitlen - 5)) & 0x1f;
381 bitlen -= 5;
383 /* Encode. */
384 if (x < 26)
385 x += 'a';
386 else
387 x = (x - 26) + '2';
389 if (tolen < 1)
390 return (idn_buffer_overflow);
392 *to++ = x;
393 tolen--;
397 if (tolen <= 0)
398 return (idn_buffer_overflow);
400 *to = '\0';
401 return (idn_success);
404 static int
405 get_compress_mode(unsigned short *p) {
406 int zero = 0;
407 unsigned int upper = 0;
408 unsigned short *modepos = p - 1;
410 while (*p != '\0') {
411 unsigned int hi = *p++ & 0xff00;
413 if (hi == 0) {
414 zero++;
415 } else if (hi == upper) {
417 } else if (upper == 0) {
418 upper = hi;
419 } else {
420 *modepos = RACE_2OCTET_MODE;
421 return (compress_none);
424 *modepos = upper >> 8;
425 if (upper > 0 && zero > 0)
426 return (compress_two);
427 else
428 return (compress_one);