Sync usage with man page.
[netbsd-mini2440.git] / external / bsd / bind / dist / contrib / idn / idnkit-1.0-src / lib / punycode.c
blob7a5b42ca780935018280032f4d5a6af824e0d2e0
1 /* $NetBSD$ */
3 #ifndef lint
4 static char *rcsid = "Id: punycode.c,v 1.1.1.1 2003/06/04 00:26:06 marka Exp";
5 #endif
7 /*
8 * Copyright (c) 2001,2002 Japan Network Information Center.
9 * All rights reserved.
11 * By using this file, you agree to the terms and conditions set forth bellow.
13 * LICENSE TERMS AND CONDITIONS
15 * The following License Terms and Conditions apply, unless a different
16 * license is obtained from Japan Network Information Center ("JPNIC"),
17 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18 * Chiyoda-ku, Tokyo 101-0047, Japan.
20 * 1. Use, Modification and Redistribution (including distribution of any
21 * modified or derived work) in source and/or binary forms is permitted
22 * under this License Terms and Conditions.
24 * 2. Redistribution of source code must retain the copyright notices as they
25 * appear in each source code file, this License Terms and Conditions.
27 * 3. Redistribution in binary form must reproduce the Copyright Notice,
28 * this License Terms and Conditions, in the documentation and/or other
29 * materials provided with the distribution. For the purposes of binary
30 * distribution the "Copyright Notice" refers to the following language:
31 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved."
33 * 4. The name of JPNIC may not be used to endorse or promote products
34 * derived from this Software without specific prior written approval of
35 * JPNIC.
37 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
50 #include <config.h>
52 #include <stddef.h>
53 #include <stdlib.h>
54 #include <string.h>
56 #include <idn/result.h>
57 #include <idn/assert.h>
58 #include <idn/logmacro.h>
59 #include <idn/converter.h>
60 #include <idn/ucs4.h>
61 #include <idn/debug.h>
62 #include <idn/punycode.h>
63 #include <idn/util.h>
66 * Although draft-ietf-idn-punycode-00.txt doesn't specify the ACE
67 * signature, we have to choose one. In order to prevent the converted
68 * name from beginning with a hyphen, we should choose a prefix rather
69 * than a suffix.
71 #ifndef IDN_PUNYCODE_PREFIX
72 #define IDN_PUNYCODE_PREFIX "xn--"
73 #endif
75 #define INVALID_UCS 0x80000000
76 #define MAX_UCS 0x10FFFF
79 * As the draft states, it is possible that `delta' may overflow during
80 * the encoding. The upper bound of 'delta' is:
81 * <# of chars. of input string> + <max. difference in code point> *
82 * <# of chars. of input string + 1>
83 * For this value not to be greater than 0xffffffff (since the calculation
84 * is done using unsigned long, which is at least 32bit long), the maxmum
85 * input string size is about 3850 characters, which is long enough for
86 * a domain label...
88 #define PUNYCODE_MAXINPUT 3800
91 * Parameters.
93 #define PUNYCODE_BASE 36
94 #define PUNYCODE_TMIN 1
95 #define PUNYCODE_TMAX 26
96 #define PUNYCODE_SKEW 38
97 #define PUNYCODE_DAMP 700
98 #define PUNYCODE_INITIAL_BIAS 72
99 #define PUNYCODE_INITIAL_N 0x80
101 static int punycode_getwc(const char *s, size_t len,
102 int bias, unsigned long *vp);
103 static int punycode_putwc(char *s, size_t len,
104 unsigned long delta, int bias);
105 static int punycode_update_bias(unsigned long delta,
106 size_t npoints, int first);
108 idn_result_t
109 idn__punycode_decode(idn_converter_t ctx, void *privdata,
110 const char *from, unsigned long *to, size_t tolen) {
111 unsigned long *to_org = to;
112 unsigned long c, idx;
113 size_t prefixlen = strlen(IDN_PUNYCODE_PREFIX);
114 size_t fromlen;
115 size_t uidx, fidx, ucslen;
116 int first, bias;
117 idn_result_t r;
119 assert(ctx != NULL);
121 TRACE(("idn__punycode_decode(from=\"%s\", tolen=%d)\n",
122 idn__debug_xstring(from, 50), (int)tolen));
124 if (!idn__util_asciihaveaceprefix(from, IDN_PUNYCODE_PREFIX)) {
125 if (*from == '\0') {
126 r = idn_ucs4_utf8toucs4(from, to, tolen);
127 goto ret;
129 r = idn_invalid_encoding;
130 goto ret;
132 from += prefixlen;
133 fromlen = strlen(from);
136 * Find the last delimiter, and copy the characters
137 * before it verbatim.
139 ucslen = 0;
140 for (fidx = fromlen; fidx > 0; fidx--) {
141 if (from[fidx - 1] == '-') {
142 if (tolen < fidx) {
143 r = idn_buffer_overflow;
144 goto ret;
146 for (uidx = 0; uidx < fidx - 1; uidx++) {
147 to[uidx] = from[uidx];
149 ucslen = uidx;
150 break;
154 first = 1;
155 bias = PUNYCODE_INITIAL_BIAS;
156 c = PUNYCODE_INITIAL_N;
157 idx = 0;
158 while (fidx < fromlen) {
159 int len;
160 unsigned long delta;
161 int i;
163 len = punycode_getwc(from + fidx, fromlen - fidx, bias, &delta);
164 if (len == 0) {
165 r = idn_invalid_encoding;
166 goto ret;
168 fidx += len;
170 bias = punycode_update_bias(delta, ucslen + 1, first);
171 first = 0;
172 idx += delta;
173 c += idx / (ucslen + 1);
174 uidx = idx % (ucslen + 1);
176 /* Insert 'c' at uidx. */
177 if (tolen-- <= 0) {
178 r = idn_buffer_overflow;
179 goto ret;
181 for (i = ucslen; i > uidx; i--)
182 to[i] = to[i - 1];
183 to[uidx] = c;
185 ucslen++;
186 idx = uidx + 1;
189 /* Terminate with NUL. */
190 if (tolen <= 0) {
191 r = idn_buffer_overflow;
192 goto ret;
194 to[ucslen] = '\0';
195 r = idn_success;
197 ret:
198 if (r == idn_success) {
199 TRACE(("idn__punycode_decode(): succcess (to=\"%s\")\n",
200 idn__debug_ucs4xstring(to_org, 50)));
201 } else {
202 TRACE(("idn__punycode_decode(): %s\n", idn_result_tostring(r)));
204 return (r);
207 idn_result_t
208 idn__punycode_encode(idn_converter_t ctx, void *privdata,
209 const unsigned long *from, char *to, size_t tolen) {
210 char *to_org = to;
211 unsigned long cur_code, next_code, delta;
212 size_t prefixlen = strlen(IDN_PUNYCODE_PREFIX);
213 size_t fromlen;
214 size_t ucsdone;
215 size_t toidx;
216 int uidx, bias, first;
217 idn_result_t r;
219 assert(ctx != NULL);
221 TRACE(("idn__punycode_encode(from=\"%s\", tolen=%d)\n",
222 idn__debug_ucs4xstring(from, 50), (int)tolen));
224 if (*from == '\0') {
225 r = idn_ucs4_ucs4toutf8(from, to, tolen);
226 goto ret;
227 } else if (idn__util_ucs4haveaceprefix(from, IDN_PUNYCODE_PREFIX)) {
228 r = idn_prohibited;
229 goto ret;
232 if (tolen < prefixlen) {
233 r = idn_buffer_overflow;
234 goto ret;
236 memcpy(to, IDN_PUNYCODE_PREFIX, prefixlen);
237 to += prefixlen;
238 tolen -= prefixlen;
240 fromlen = idn_ucs4_strlen(from);
243 * If the input string is too long (actually too long to be sane),
244 * return failure in order to prevent possible overflow.
246 if (fromlen > PUNYCODE_MAXINPUT) {
247 ERROR(("idn__punycode_encode(): "
248 "the input string is too long to convert Punycode\n",
249 idn__debug_ucs4xstring(from, 50)));
250 r = idn_failure;
251 goto ret;
254 ucsdone = 0; /* number of characters processed */
255 toidx = 0;
258 * First, pick up basic code points and copy them to 'to'.
260 for (uidx = 0; uidx < fromlen; uidx++) {
261 if (from[uidx] < 0x80) {
262 if (toidx >= tolen) {
263 r = idn_buffer_overflow;
264 goto ret;
266 to[toidx++] = from[uidx];
267 ucsdone++;
272 * If there are any basic code points, output a delimiter
273 * (hyphen-minus).
275 if (toidx > 0) {
276 if (toidx >= tolen) {
277 r = idn_buffer_overflow;
278 goto ret;
280 to[toidx++] = '-';
281 to += toidx;
282 tolen -= toidx;
286 * Then encode non-basic characters.
288 first = 1;
289 cur_code = PUNYCODE_INITIAL_N;
290 bias = PUNYCODE_INITIAL_BIAS;
291 delta = 0;
292 while (ucsdone < fromlen) {
293 int limit = -1, rest;
296 * Find the smallest code point equal to or greater
297 * than 'cur_code'. Also remember the index of the
298 * last occurence of the code point.
300 for (next_code = MAX_UCS, uidx = fromlen - 1;
301 uidx >= 0; uidx--) {
302 if (from[uidx] >= cur_code && from[uidx] < next_code) {
303 next_code = from[uidx];
304 limit = uidx;
307 /* There must be such code point. */
308 assert(limit >= 0);
310 delta += (next_code - cur_code) * (ucsdone + 1);
311 cur_code = next_code;
314 * Scan the input string again, and encode characters
315 * whose code point is 'cur_code'. Use 'limit' to avoid
316 * unnecessary scan.
318 for (uidx = 0, rest = ucsdone; uidx <= limit; uidx++) {
319 if (from[uidx] < cur_code) {
320 delta++;
321 rest--;
322 } else if (from[uidx] == cur_code) {
323 int sz = punycode_putwc(to, tolen, delta, bias);
324 if (sz == 0) {
325 r = idn_buffer_overflow;
326 goto ret;
328 to += sz;
329 tolen -= sz;
330 ucsdone++;
331 bias = punycode_update_bias(delta, ucsdone,
332 first);
333 delta = 0;
334 first = 0;
337 delta += rest + 1;
338 cur_code++;
342 * Terminate with NUL.
344 if (tolen <= 0) {
345 r = idn_buffer_overflow;
346 goto ret;
348 *to = '\0';
349 r = idn_success;
351 ret:
352 if (r == idn_success) {
353 TRACE(("idn__punycode_encode(): succcess (to=\"%s\")\n",
354 idn__debug_xstring(to_org, 50)));
355 } else {
356 TRACE(("idn__punycode_encode(): %s\n", idn_result_tostring(r)));
358 return (r);
361 static int
362 punycode_getwc(const char *s, size_t len, int bias, unsigned long *vp) {
363 size_t orglen = len;
364 unsigned long v = 0, w = 1;
365 int k;
367 for (k = PUNYCODE_BASE - bias; len > 0; k += PUNYCODE_BASE) {
368 int c = *s++;
369 int t = (k < PUNYCODE_TMIN) ? PUNYCODE_TMIN :
370 (k > PUNYCODE_TMAX) ? PUNYCODE_TMAX : k;
372 len--;
373 if ('a' <= c && c <= 'z')
374 c = c - 'a';
375 else if ('A' <= c && c <= 'Z')
376 c = c - 'A';
377 else if ('0' <= c && c <= '9')
378 c = c - '0' + 26;
379 else
380 c = -1;
382 if (c < 0)
383 return (0); /* invalid character */
385 v += c * w;
387 if (c < t) {
388 *vp = v;
389 return (orglen - len);
392 w *= (PUNYCODE_BASE - t);
395 return (0); /* final character missing */
398 static int
399 punycode_putwc(char *s, size_t len, unsigned long delta, int bias) {
400 const char *punycode_base36 = "abcdefghijklmnopqrstuvwxyz0123456789";
401 int k;
402 char *sorg = s;
404 for (k = PUNYCODE_BASE - bias; 1; k += PUNYCODE_BASE) {
405 int t = (k < PUNYCODE_TMIN) ? PUNYCODE_TMIN :
406 (k > PUNYCODE_TMAX) ? PUNYCODE_TMAX : k;
408 if (delta < t)
409 break;
410 if (len < 1)
411 return (0);
412 *s++ = punycode_base36[t + ((delta - t) % (PUNYCODE_BASE - t))];
413 len--;
414 delta = (delta - t) / (PUNYCODE_BASE - t);
416 if (len < 1)
417 return (0);
418 *s++ = punycode_base36[delta];
419 return (s - sorg);
422 static int
423 punycode_update_bias(unsigned long delta, size_t npoints, int first) {
424 int k = 0;
426 delta /= first ? PUNYCODE_DAMP : 2;
427 delta += delta / npoints;
429 while (delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2) {
430 delta /= PUNYCODE_BASE - PUNYCODE_TMIN;
431 k++;
433 return (PUNYCODE_BASE * k +
434 (((PUNYCODE_BASE - PUNYCODE_TMIN + 1) * delta) /
435 (delta + PUNYCODE_SKEW)));