4 static char *rcsid
= "Id: race.c,v 1.1.1.1 2003/06/04 00:26:07 marka Exp";
8 * Copyright (c) 2000,2001,2002 Japan Network Information Center.
11 * By using this file, you agree to the terms and conditions set forth bellow.
13 * LICENSE TERMS AND CONDITIONS
15 * The following License Terms and Conditions apply, unless a different
16 * license is obtained from Japan Network Information Center ("JPNIC"),
17 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18 * Chiyoda-ku, Tokyo 101-0047, Japan.
20 * 1. Use, Modification and Redistribution (including distribution of any
21 * modified or derived work) in source and/or binary forms is permitted
22 * under this License Terms and Conditions.
24 * 2. Redistribution of source code must retain the copyright notices as they
25 * appear in each source code file, this License Terms and Conditions.
27 * 3. Redistribution in binary form must reproduce the Copyright Notice,
28 * this License Terms and Conditions, in the documentation and/or other
29 * materials provided with the distribution. For the purposes of binary
30 * distribution the "Copyright Notice" refers to the following language:
31 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
33 * 4. The name of JPNIC may not be used to endorse or promote products
34 * derived from this Software without specific prior written approval of
37 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
56 #include <idn/result.h>
57 #include <idn/assert.h>
58 #include <idn/logmacro.h>
59 #include <idn/converter.h>
61 #include <idn/debug.h>
65 #ifndef IDN_RACE_PREFIX
66 #define IDN_RACE_PREFIX "bq--"
68 #define RACE_2OCTET_MODE 0xd8
69 #define RACE_ESCAPE 0xff
70 #define RACE_ESCAPE_2ND 0x99
72 #define RACE_BUF_SIZE 128 /* more than enough */
75 * Unicode surrogate pair.
77 #define IS_SURROGATE_HIGH(v) (0xd800 <= (v) && (v) <= 0xdbff)
78 #define IS_SURROGATE_LOW(v) (0xdc00 <= (v) && (v) <= 0xdfff)
79 #define SURROGATE_HIGH(v) (SURROGATE_H_OFF + (((v) - 0x10000) >> 10))
80 #define SURROGATE_LOW(v) (SURROGATE_L_OFF + ((v) & 0x3ff))
81 #define SURROGATE_BASE 0x10000
82 #define SURROGATE_H_OFF 0xd800
83 #define SURROGATE_L_OFF 0xdc00
84 #define COMBINE_SURROGATE(h, l) \
85 (SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF))
91 compress_one
, /* all characters are in a single row */
92 compress_two
, /* row 0 and another row */
93 compress_none
/* nope */
96 static idn_result_t
race_decode_decompress(const char *from
,
99 static idn_result_t
race_compress_encode(const unsigned short *p
,
101 char *to
, size_t tolen
);
102 static int get_compress_mode(unsigned short *p
);
105 idn__race_decode(idn_converter_t ctx
, void *privdata
,
106 const char *from
, unsigned long *to
, size_t tolen
) {
107 unsigned short *buf
= NULL
;
108 size_t prefixlen
= strlen(IDN_RACE_PREFIX
);
115 TRACE(("idn__race_decode(from=\"%s\", tolen=%d)\n",
116 idn__debug_xstring(from
, 50), (int)tolen
));
118 if (!idn__util_asciihaveaceprefix(from
, IDN_RACE_PREFIX
)) {
120 r
= idn_ucs4_utf8toucs4(from
, to
, tolen
);
123 r
= idn_invalid_encoding
;
127 fromlen
= strlen(from
);
130 * Allocate sufficient buffer.
132 buflen
= fromlen
+ 1;
133 buf
= malloc(sizeof(*buf
) * buflen
);
140 * Decode base32 and decompress.
142 r
= race_decode_decompress(from
, buf
, buflen
);
143 if (r
!= idn_success
)
147 * Now 'buf' points the decompressed string, which must contain
154 r
= idn_ucs4_utf16toucs4(buf
, to
, tolen
);
155 if (r
!= idn_success
)
160 if (r
== idn_success
) {
161 TRACE(("idn__race_decode(): succcess (to=\"%s\")\n",
162 idn__debug_ucs4xstring(to
, 50)));
164 TRACE(("idn__race_decode(): %s\n", idn_result_tostring(r
)));
170 race_decode_decompress(const char *from
, unsigned short *buf
, size_t buflen
)
172 unsigned short *p
= buf
;
173 unsigned int bitbuf
= 0;
178 while (*from
!= '\0') {
182 if ('a' <= c
&& c
<= 'z')
184 else if ('A' <= c
&& c
<= 'Z')
186 else if ('2' <= c
&& c
<= '7')
189 return (idn_invalid_encoding
);
191 bitbuf
= (bitbuf
<< 5) + x
;
194 *p
++ = (bitbuf
>> (bitlen
- 8)) & 0xff;
201 * Now 'buf' holds the decoded string.
207 if (buf
[0] == RACE_2OCTET_MODE
) {
208 if ((len
- 1) % 2 != 0)
209 return (idn_invalid_encoding
);
210 for (i
= 1, j
= 0; i
< len
; i
+= 2, j
++)
211 buf
[j
] = (buf
[i
] << 8) + buf
[i
+ 1];
214 unsigned short c
= buf
[0] << 8; /* higher octet */
216 for (i
= 1, j
= 0; i
< len
; j
++) {
217 if (buf
[i
] == RACE_ESCAPE
) {
219 return (idn_invalid_encoding
);
220 else if (buf
[i
+ 1] == RACE_ESCAPE_2ND
)
226 } else if (buf
[i
] == 0x99 && c
== 0x00) {
228 * The RACE specification says this is error.
230 return (idn_invalid_encoding
);
233 buf
[j
] = c
| buf
[i
++];
240 return (idn_success
);
244 idn__race_encode(idn_converter_t ctx
, void *privdata
,
245 const unsigned long *from
, char *to
, size_t tolen
) {
247 unsigned short *p
, *buf
= NULL
;
248 size_t prefixlen
= strlen(IDN_RACE_PREFIX
);
256 TRACE(("idn__race_encode(from=\"%s\", tolen=%d)\n",
257 idn__debug_ucs4xstring(from
, 50), (int)tolen
));
260 r
= idn_ucs4_ucs4toutf8(from
, to
, tolen
);
262 } else if (idn__util_ucs4haveaceprefix(from
, IDN_RACE_PREFIX
)) {
267 if (tolen
< prefixlen
) {
268 r
= idn_buffer_overflow
;
271 memcpy(to
, IDN_RACE_PREFIX
, prefixlen
);
275 fromlen
= idn_ucs4_strlen(from
);
276 buflen
= fromlen
* 2 + 2;
280 * Preserve space for a character at the top of the buffer.
283 unsigned short *new_buf
;
285 new_buf
= realloc(buf
, sizeof(*buf
) * buflen
);
286 if (new_buf
== NULL
) {
292 r
= idn_ucs4_ucs4toutf16(from
, buf
+ 1, buflen
- 1);
293 if (r
== idn_success
)
295 else if (r
!= idn_buffer_overflow
)
298 buflen
= fromlen
* 2 + 2;
303 * Now 'p' contains UTF-16 encoded string.
308 * RACE doesn't permit U+0099 in an input string.
310 for (p
= buf
+ 1; *p
!= '\0'; p
++) {
312 r
= idn_invalid_encoding
;
318 * Compress, encode in base-32 and output.
320 compress_mode
= get_compress_mode(buf
+ 1);
321 r
= race_compress_encode(buf
, compress_mode
, to
, tolen
);
325 if (r
== idn_success
) {
326 TRACE(("idn__race_encode(): succcess (to=\"%s\")\n",
327 idn__debug_xstring(to_org
, 50)));
329 TRACE(("idn__race_encode(): %s\n", idn_result_tostring(r
)));
335 race_compress_encode(const unsigned short *p
, int compress_mode
,
336 char *to
, size_t tolen
)
338 unsigned long bitbuf
= *p
++; /* bit stream buffer */
339 int bitlen
= 8; /* # of bits in 'bitbuf' */
341 while (*p
!= '\0' || bitlen
> 0) {
345 /* End of data. Flush. */
346 bitbuf
<<= (5 - bitlen
);
348 } else if (compress_mode
== compress_none
) {
349 /* Push 16 bit data. */
350 bitbuf
= (bitbuf
<< 16) | c
;
353 } else {/* compress_mode == compress_one/compress_two */
354 /* Push 8 or 16 bit data. */
355 if (compress_mode
== compress_two
&&
357 /* Upper octet is zero (and not U1). */
358 bitbuf
= (bitbuf
<< 16) | 0xff00 | c
;
360 } else if ((c
& 0xff) == 0xff) {
361 /* Lower octet is 0xff. */
362 bitbuf
= (bitbuf
<< 16) |
363 (RACE_ESCAPE
<< 8) | RACE_ESCAPE_2ND
;
366 /* Just output lower octet. */
367 bitbuf
= (bitbuf
<< 8) | (c
& 0xff);
374 * Output bits in 'bitbuf' in 5-bit unit.
376 while (bitlen
>= 5) {
379 /* Get top 5 bits. */
380 x
= (bitbuf
>> (bitlen
- 5)) & 0x1f;
390 return (idn_buffer_overflow
);
398 return (idn_buffer_overflow
);
401 return (idn_success
);
405 get_compress_mode(unsigned short *p
) {
407 unsigned int upper
= 0;
408 unsigned short *modepos
= p
- 1;
411 unsigned int hi
= *p
++ & 0xff00;
415 } else if (hi
== upper
) {
417 } else if (upper
== 0) {
420 *modepos
= RACE_2OCTET_MODE
;
421 return (compress_none
);
424 *modepos
= upper
>> 8;
425 if (upper
> 0 && zero
> 0)
426 return (compress_two
);
428 return (compress_one
);