Bug 460926 A11y hierachy is broken on Ubuntu 8.10 (GNOME 2.24), r=Evan.Yan sr=roc
[wine-gecko.git] / security / nss / lib / util / utf8.c
blob7b26f48287b318d9b0997237e62c74ce68b994d1
1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
14 * The Original Code is the Netscape security libraries.
16 * The Initial Developer of the Original Code is
17 * Netscape Communications Corporation.
18 * Portions created by the Initial Developer are Copyright (C) 1994-2000
19 * the Initial Developer. All Rights Reserved.
21 * Contributor(s):
22 * John Gardiner Myers <jgmyers@speakeasy.net>
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 #ifdef DEBUG
39 static const char CVS_ID[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.13 $ $Date: 2008/10/05 20:59:26 $";
40 #endif /* DEBUG */
42 #include "seccomon.h"
43 #include "secport.h"
45 #ifdef TEST_UTF8
46 #include <assert.h>
47 #undef PORT_Assert
48 #define PORT_Assert assert
49 #endif
52 * From RFC 2044:
54 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
55 * 0000 0000-0000 007F 0xxxxxxx
56 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
57 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
58 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
59 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
60 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
61 */
64 * From http://www.imc.org/draft-hoffman-utf16
66 * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000
68 * U' = yyyyyyyyyyxxxxxxxxxx
69 * W1 = 110110yyyyyyyyyy
70 * W2 = 110111xxxxxxxxxx
74 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
75 * character values. If you wish to use this code for working with
76 * host byte order values, define the following:
78 * #if IS_BIG_ENDIAN
79 * #define L_0 0
80 * #define L_1 1
81 * #define L_2 2
82 * #define L_3 3
83 * #define H_0 0
84 * #define H_1 1
85 * #else / * not everyone has elif * /
86 * #if IS_LITTLE_ENDIAN
87 * #define L_0 3
88 * #define L_1 2
89 * #define L_2 1
90 * #define L_3 0
91 * #define H_0 1
92 * #define H_1 0
93 * #else
94 * #error "PDP and NUXI support deferred"
95 * #endif / * IS_LITTLE_ENDIAN * /
96 * #endif / * IS_BIG_ENDIAN * /
99 #define L_0 0
100 #define L_1 1
101 #define L_2 2
102 #define L_3 3
103 #define H_0 0
104 #define H_1 1
106 #define BAD_UTF8 ((PRUint32)-1)
109 * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
110 * of Unicode 4.0.0.
112 * Parameters:
113 * index - Points to the byte offset in inBuf of character to read. On success,
114 * updated to the offset of the following character.
115 * inBuf - Input buffer, UTF-8 encoded
116 * inbufLen - Length of input buffer, in bytes.
118 * Returns:
119 * Success - The UCS4 encoded character
120 * Failure - BAD_UTF8
122 static PRUint32
123 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen)
125 PRUint32 result;
126 unsigned int i = *index;
127 int bytes_left;
128 PRUint32 min_value;
130 PORT_Assert(i < inBufLen);
132 if ( (inBuf[i] & 0x80) == 0x00 ) {
133 result = inBuf[i++];
134 bytes_left = 0;
135 min_value = 0;
136 } else if ( (inBuf[i] & 0xE0) == 0xC0 ) {
137 result = inBuf[i++] & 0x1F;
138 bytes_left = 1;
139 min_value = 0x80;
140 } else if ( (inBuf[i] & 0xF0) == 0xE0) {
141 result = inBuf[i++] & 0x0F;
142 bytes_left = 2;
143 min_value = 0x800;
144 } else if ( (inBuf[i] & 0xF8) == 0xF0) {
145 result = inBuf[i++] & 0x07;
146 bytes_left = 3;
147 min_value = 0x10000;
148 } else {
149 return BAD_UTF8;
152 while (bytes_left--) {
153 if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8;
154 result = (result << 6) | (inBuf[i++] & 0x3F);
157 /* Check for overlong sequences, surrogates, and outside unicode range */
158 if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) {
159 return BAD_UTF8;
162 *index = i;
163 return result;
166 PRBool
167 sec_port_ucs4_utf8_conversion_function
169 PRBool toUnicode,
170 unsigned char *inBuf,
171 unsigned int inBufLen,
172 unsigned char *outBuf,
173 unsigned int maxOutBufLen,
174 unsigned int *outBufLen
177 PORT_Assert((unsigned int *)NULL != outBufLen);
179 if( toUnicode ) {
180 unsigned int i, len = 0;
182 for( i = 0; i < inBufLen; ) {
183 if( (inBuf[i] & 0x80) == 0x00 ) i += 1;
184 else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;
185 else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;
186 else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;
187 else return PR_FALSE;
189 len += 4;
192 if( len > maxOutBufLen ) {
193 *outBufLen = len;
194 return PR_FALSE;
197 len = 0;
199 for( i = 0; i < inBufLen; ) {
200 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
202 if (ucs4 == BAD_UTF8) return PR_FALSE;
204 outBuf[len+L_0] = 0x00;
205 outBuf[len+L_1] = (unsigned char)(ucs4 >> 16);
206 outBuf[len+L_2] = (unsigned char)(ucs4 >> 8);
207 outBuf[len+L_3] = (unsigned char)ucs4;
209 len += 4;
212 *outBufLen = len;
213 return PR_TRUE;
214 } else {
215 unsigned int i, len = 0;
216 PORT_Assert((inBufLen % 4) == 0);
217 if ((inBufLen % 4) != 0) {
218 *outBufLen = 0;
219 return PR_FALSE;
222 for( i = 0; i < inBufLen; i += 4 ) {
223 if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) {
224 *outBufLen = 0;
225 return PR_FALSE;
226 } else if( inBuf[i+L_1] >= 0x01 ) len += 4;
227 else if( inBuf[i+L_2] >= 0x08 ) len += 3;
228 else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;
229 else len += 1;
232 if( len > maxOutBufLen ) {
233 *outBufLen = len;
234 return PR_FALSE;
237 len = 0;
239 for( i = 0; i < inBufLen; i += 4 ) {
240 if( inBuf[i+L_1] >= 0x01 ) {
241 /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
242 /* 00000000 000abcde fghijklm nopqrstu ->
243 11110abc 10defghi 10jklmno 10pqrstu */
245 outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);
246 outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)
247 | ((inBuf[i+L_2] & 0xF0) >> 4);
248 outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
249 | ((inBuf[i+L_3] & 0xC0) >> 6);
250 outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
252 len += 4;
253 } else if( inBuf[i+L_2] >= 0x08 ) {
254 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
255 /* 00000000 00000000 abcdefgh ijklmnop ->
256 1110abcd 10efghij 10klmnop */
258 outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);
259 outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
260 | ((inBuf[i+L_3] & 0xC0) >> 6);
261 outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
263 len += 3;
264 } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {
265 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
266 /* 00000000 00000000 00000abc defghijk ->
267 110abcde 10fghijk */
269 outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)
270 | ((inBuf[i+L_3] & 0xC0) >> 6);
271 outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
273 len += 2;
274 } else {
275 /* 0000 0000-0000 007F -> 0xxxxxx */
276 /* 00000000 00000000 00000000 0abcdefg ->
277 0abcdefg */
279 outBuf[len+0] = (inBuf[i+L_3] & 0x7F);
281 len += 1;
285 *outBufLen = len;
286 return PR_TRUE;
290 PRBool
291 sec_port_ucs2_utf8_conversion_function
293 PRBool toUnicode,
294 unsigned char *inBuf,
295 unsigned int inBufLen,
296 unsigned char *outBuf,
297 unsigned int maxOutBufLen,
298 unsigned int *outBufLen
301 PORT_Assert((unsigned int *)NULL != outBufLen);
303 if( toUnicode ) {
304 unsigned int i, len = 0;
306 for( i = 0; i < inBufLen; ) {
307 if( (inBuf[i] & 0x80) == 0x00 ) {
308 i += 1;
309 len += 2;
310 } else if( (inBuf[i] & 0xE0) == 0xC0 ) {
311 i += 2;
312 len += 2;
313 } else if( (inBuf[i] & 0xF0) == 0xE0 ) {
314 i += 3;
315 len += 2;
316 } else if( (inBuf[i] & 0xF8) == 0xF0 ) {
317 i += 4;
318 len += 4;
319 } else return PR_FALSE;
322 if( len > maxOutBufLen ) {
323 *outBufLen = len;
324 return PR_FALSE;
327 len = 0;
329 for( i = 0; i < inBufLen; ) {
330 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
332 if (ucs4 == BAD_UTF8) return PR_FALSE;
334 if( ucs4 < 0x10000) {
335 outBuf[len+H_0] = (unsigned char)(ucs4 >> 8);
336 outBuf[len+H_1] = (unsigned char)ucs4;
337 len += 2;
338 } else {
339 ucs4 -= 0x10000;
340 outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
341 outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10);
342 outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
343 outBuf[len+2+H_1] = (unsigned char)ucs4;
344 len += 4;
348 *outBufLen = len;
349 return PR_TRUE;
350 } else {
351 unsigned int i, len = 0;
352 PORT_Assert((inBufLen % 2) == 0);
353 if ((inBufLen % 2) != 0) {
354 *outBufLen = 0;
355 return PR_FALSE;
358 for( i = 0; i < inBufLen; i += 2 ) {
359 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1;
360 else if( inBuf[i+H_0] < 0x08 ) len += 2;
361 else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) {
362 if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) {
363 i += 2;
364 len += 4;
365 } else {
366 return PR_FALSE;
369 else len += 3;
372 if( len > maxOutBufLen ) {
373 *outBufLen = len;
374 return PR_FALSE;
377 len = 0;
379 for( i = 0; i < inBufLen; i += 2 ) {
380 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) {
381 /* 0000-007F -> 0xxxxxx */
382 /* 00000000 0abcdefg -> 0abcdefg */
384 outBuf[len] = inBuf[i+H_1] & 0x7F;
386 len += 1;
387 } else if( inBuf[i+H_0] < 0x08 ) {
388 /* 0080-07FF -> 110xxxxx 10xxxxxx */
389 /* 00000abc defghijk -> 110abcde 10fghijk */
391 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2)
392 | ((inBuf[i+H_1] & 0xC0) >> 6);
393 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
395 len += 2;
396 } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) {
397 int abcde, BCDE;
399 PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2));
401 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
402 /* 110110BC DEfghijk 110111lm nopqrstu ->
403 { Let abcde = BCDE + 1 }
404 11110abc 10defghi 10jklmno 10pqrstu */
406 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
407 abcde = BCDE + 1;
409 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2);
410 outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4)
411 | ((inBuf[i+0+H_1] & 0x3C) >> 2);
412 outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4)
413 | ((inBuf[i+2+H_0] & 0x03) << 2)
414 | ((inBuf[i+2+H_1] & 0xC0) >> 6);
415 outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0);
417 i += 2;
418 len += 4;
419 } else {
420 /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
421 /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
423 outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4);
424 outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2)
425 | ((inBuf[i+H_1] & 0xC0) >> 6);
426 outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
428 len += 3;
432 *outBufLen = len;
433 return PR_TRUE;
437 PRBool
438 sec_port_iso88591_utf8_conversion_function
440 const unsigned char *inBuf,
441 unsigned int inBufLen,
442 unsigned char *outBuf,
443 unsigned int maxOutBufLen,
444 unsigned int *outBufLen
447 unsigned int i, len = 0;
449 PORT_Assert((unsigned int *)NULL != outBufLen);
451 for( i = 0; i < inBufLen; i++) {
452 if( (inBuf[i] & 0x80) == 0x00 ) len += 1;
453 else len += 2;
456 if( len > maxOutBufLen ) {
457 *outBufLen = len;
458 return PR_FALSE;
461 len = 0;
463 for( i = 0; i < inBufLen; i++) {
464 if( (inBuf[i] & 0x80) == 0x00 ) {
465 /* 00-7F -> 0xxxxxxx */
466 /* 0abcdefg -> 0abcdefg */
468 outBuf[len] = inBuf[i];
469 len += 1;
470 } else {
471 /* 80-FF <- 110xxxxx 10xxxxxx */
472 /* 00000000 abcdefgh -> 110000ab 10cdefgh */
474 outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
475 outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
477 len += 2;
481 *outBufLen = len;
482 return PR_TRUE;
485 #ifdef TEST_UTF8
487 #include <stdio.h>
488 #include <string.h>
489 #include <stdlib.h>
490 #include <netinet/in.h> /* for htonl and htons */
493 * UCS-4 vectors
496 struct ucs4 {
497 PRUint32 c;
498 char *utf8;
502 * UCS-2 vectors
505 struct ucs2 {
506 PRUint16 c;
507 char *utf8;
511 * UTF-16 vectors
514 struct utf16 {
515 PRUint32 c;
516 PRUint16 w[2];
521 * UCS-4 vectors
524 struct ucs4 ucs4[] = {
525 { 0x00000001, "\x01" },
526 { 0x00000002, "\x02" },
527 { 0x00000003, "\x03" },
528 { 0x00000004, "\x04" },
529 { 0x00000007, "\x07" },
530 { 0x00000008, "\x08" },
531 { 0x0000000F, "\x0F" },
532 { 0x00000010, "\x10" },
533 { 0x0000001F, "\x1F" },
534 { 0x00000020, "\x20" },
535 { 0x0000003F, "\x3F" },
536 { 0x00000040, "\x40" },
537 { 0x0000007F, "\x7F" },
539 { 0x00000080, "\xC2\x80" },
540 { 0x00000081, "\xC2\x81" },
541 { 0x00000082, "\xC2\x82" },
542 { 0x00000084, "\xC2\x84" },
543 { 0x00000088, "\xC2\x88" },
544 { 0x00000090, "\xC2\x90" },
545 { 0x000000A0, "\xC2\xA0" },
546 { 0x000000C0, "\xC3\x80" },
547 { 0x000000FF, "\xC3\xBF" },
548 { 0x00000100, "\xC4\x80" },
549 { 0x00000101, "\xC4\x81" },
550 { 0x00000102, "\xC4\x82" },
551 { 0x00000104, "\xC4\x84" },
552 { 0x00000108, "\xC4\x88" },
553 { 0x00000110, "\xC4\x90" },
554 { 0x00000120, "\xC4\xA0" },
555 { 0x00000140, "\xC5\x80" },
556 { 0x00000180, "\xC6\x80" },
557 { 0x000001FF, "\xC7\xBF" },
558 { 0x00000200, "\xC8\x80" },
559 { 0x00000201, "\xC8\x81" },
560 { 0x00000202, "\xC8\x82" },
561 { 0x00000204, "\xC8\x84" },
562 { 0x00000208, "\xC8\x88" },
563 { 0x00000210, "\xC8\x90" },
564 { 0x00000220, "\xC8\xA0" },
565 { 0x00000240, "\xC9\x80" },
566 { 0x00000280, "\xCA\x80" },
567 { 0x00000300, "\xCC\x80" },
568 { 0x000003FF, "\xCF\xBF" },
569 { 0x00000400, "\xD0\x80" },
570 { 0x00000401, "\xD0\x81" },
571 { 0x00000402, "\xD0\x82" },
572 { 0x00000404, "\xD0\x84" },
573 { 0x00000408, "\xD0\x88" },
574 { 0x00000410, "\xD0\x90" },
575 { 0x00000420, "\xD0\xA0" },
576 { 0x00000440, "\xD1\x80" },
577 { 0x00000480, "\xD2\x80" },
578 { 0x00000500, "\xD4\x80" },
579 { 0x00000600, "\xD8\x80" },
580 { 0x000007FF, "\xDF\xBF" },
582 { 0x00000800, "\xE0\xA0\x80" },
583 { 0x00000801, "\xE0\xA0\x81" },
584 { 0x00000802, "\xE0\xA0\x82" },
585 { 0x00000804, "\xE0\xA0\x84" },
586 { 0x00000808, "\xE0\xA0\x88" },
587 { 0x00000810, "\xE0\xA0\x90" },
588 { 0x00000820, "\xE0\xA0\xA0" },
589 { 0x00000840, "\xE0\xA1\x80" },
590 { 0x00000880, "\xE0\xA2\x80" },
591 { 0x00000900, "\xE0\xA4\x80" },
592 { 0x00000A00, "\xE0\xA8\x80" },
593 { 0x00000C00, "\xE0\xB0\x80" },
594 { 0x00000FFF, "\xE0\xBF\xBF" },
595 { 0x00001000, "\xE1\x80\x80" },
596 { 0x00001001, "\xE1\x80\x81" },
597 { 0x00001002, "\xE1\x80\x82" },
598 { 0x00001004, "\xE1\x80\x84" },
599 { 0x00001008, "\xE1\x80\x88" },
600 { 0x00001010, "\xE1\x80\x90" },
601 { 0x00001020, "\xE1\x80\xA0" },
602 { 0x00001040, "\xE1\x81\x80" },
603 { 0x00001080, "\xE1\x82\x80" },
604 { 0x00001100, "\xE1\x84\x80" },
605 { 0x00001200, "\xE1\x88\x80" },
606 { 0x00001400, "\xE1\x90\x80" },
607 { 0x00001800, "\xE1\xA0\x80" },
608 { 0x00001FFF, "\xE1\xBF\xBF" },
609 { 0x00002000, "\xE2\x80\x80" },
610 { 0x00002001, "\xE2\x80\x81" },
611 { 0x00002002, "\xE2\x80\x82" },
612 { 0x00002004, "\xE2\x80\x84" },
613 { 0x00002008, "\xE2\x80\x88" },
614 { 0x00002010, "\xE2\x80\x90" },
615 { 0x00002020, "\xE2\x80\xA0" },
616 { 0x00002040, "\xE2\x81\x80" },
617 { 0x00002080, "\xE2\x82\x80" },
618 { 0x00002100, "\xE2\x84\x80" },
619 { 0x00002200, "\xE2\x88\x80" },
620 { 0x00002400, "\xE2\x90\x80" },
621 { 0x00002800, "\xE2\xA0\x80" },
622 { 0x00003000, "\xE3\x80\x80" },
623 { 0x00003FFF, "\xE3\xBF\xBF" },
624 { 0x00004000, "\xE4\x80\x80" },
625 { 0x00004001, "\xE4\x80\x81" },
626 { 0x00004002, "\xE4\x80\x82" },
627 { 0x00004004, "\xE4\x80\x84" },
628 { 0x00004008, "\xE4\x80\x88" },
629 { 0x00004010, "\xE4\x80\x90" },
630 { 0x00004020, "\xE4\x80\xA0" },
631 { 0x00004040, "\xE4\x81\x80" },
632 { 0x00004080, "\xE4\x82\x80" },
633 { 0x00004100, "\xE4\x84\x80" },
634 { 0x00004200, "\xE4\x88\x80" },
635 { 0x00004400, "\xE4\x90\x80" },
636 { 0x00004800, "\xE4\xA0\x80" },
637 { 0x00005000, "\xE5\x80\x80" },
638 { 0x00006000, "\xE6\x80\x80" },
639 { 0x00007FFF, "\xE7\xBF\xBF" },
640 { 0x00008000, "\xE8\x80\x80" },
641 { 0x00008001, "\xE8\x80\x81" },
642 { 0x00008002, "\xE8\x80\x82" },
643 { 0x00008004, "\xE8\x80\x84" },
644 { 0x00008008, "\xE8\x80\x88" },
645 { 0x00008010, "\xE8\x80\x90" },
646 { 0x00008020, "\xE8\x80\xA0" },
647 { 0x00008040, "\xE8\x81\x80" },
648 { 0x00008080, "\xE8\x82\x80" },
649 { 0x00008100, "\xE8\x84\x80" },
650 { 0x00008200, "\xE8\x88\x80" },
651 { 0x00008400, "\xE8\x90\x80" },
652 { 0x00008800, "\xE8\xA0\x80" },
653 { 0x00009000, "\xE9\x80\x80" },
654 { 0x0000A000, "\xEA\x80\x80" },
655 { 0x0000C000, "\xEC\x80\x80" },
656 { 0x0000FFFF, "\xEF\xBF\xBF" },
658 { 0x00010000, "\xF0\x90\x80\x80" },
659 { 0x00010001, "\xF0\x90\x80\x81" },
660 { 0x00010002, "\xF0\x90\x80\x82" },
661 { 0x00010004, "\xF0\x90\x80\x84" },
662 { 0x00010008, "\xF0\x90\x80\x88" },
663 { 0x00010010, "\xF0\x90\x80\x90" },
664 { 0x00010020, "\xF0\x90\x80\xA0" },
665 { 0x00010040, "\xF0\x90\x81\x80" },
666 { 0x00010080, "\xF0\x90\x82\x80" },
667 { 0x00010100, "\xF0\x90\x84\x80" },
668 { 0x00010200, "\xF0\x90\x88\x80" },
669 { 0x00010400, "\xF0\x90\x90\x80" },
670 { 0x00010800, "\xF0\x90\xA0\x80" },
671 { 0x00011000, "\xF0\x91\x80\x80" },
672 { 0x00012000, "\xF0\x92\x80\x80" },
673 { 0x00014000, "\xF0\x94\x80\x80" },
674 { 0x00018000, "\xF0\x98\x80\x80" },
675 { 0x0001FFFF, "\xF0\x9F\xBF\xBF" },
676 { 0x00020000, "\xF0\xA0\x80\x80" },
677 { 0x00020001, "\xF0\xA0\x80\x81" },
678 { 0x00020002, "\xF0\xA0\x80\x82" },
679 { 0x00020004, "\xF0\xA0\x80\x84" },
680 { 0x00020008, "\xF0\xA0\x80\x88" },
681 { 0x00020010, "\xF0\xA0\x80\x90" },
682 { 0x00020020, "\xF0\xA0\x80\xA0" },
683 { 0x00020040, "\xF0\xA0\x81\x80" },
684 { 0x00020080, "\xF0\xA0\x82\x80" },
685 { 0x00020100, "\xF0\xA0\x84\x80" },
686 { 0x00020200, "\xF0\xA0\x88\x80" },
687 { 0x00020400, "\xF0\xA0\x90\x80" },
688 { 0x00020800, "\xF0\xA0\xA0\x80" },
689 { 0x00021000, "\xF0\xA1\x80\x80" },
690 { 0x00022000, "\xF0\xA2\x80\x80" },
691 { 0x00024000, "\xF0\xA4\x80\x80" },
692 { 0x00028000, "\xF0\xA8\x80\x80" },
693 { 0x00030000, "\xF0\xB0\x80\x80" },
694 { 0x0003FFFF, "\xF0\xBF\xBF\xBF" },
695 { 0x00040000, "\xF1\x80\x80\x80" },
696 { 0x00040001, "\xF1\x80\x80\x81" },
697 { 0x00040002, "\xF1\x80\x80\x82" },
698 { 0x00040004, "\xF1\x80\x80\x84" },
699 { 0x00040008, "\xF1\x80\x80\x88" },
700 { 0x00040010, "\xF1\x80\x80\x90" },
701 { 0x00040020, "\xF1\x80\x80\xA0" },
702 { 0x00040040, "\xF1\x80\x81\x80" },
703 { 0x00040080, "\xF1\x80\x82\x80" },
704 { 0x00040100, "\xF1\x80\x84\x80" },
705 { 0x00040200, "\xF1\x80\x88\x80" },
706 { 0x00040400, "\xF1\x80\x90\x80" },
707 { 0x00040800, "\xF1\x80\xA0\x80" },
708 { 0x00041000, "\xF1\x81\x80\x80" },
709 { 0x00042000, "\xF1\x82\x80\x80" },
710 { 0x00044000, "\xF1\x84\x80\x80" },
711 { 0x00048000, "\xF1\x88\x80\x80" },
712 { 0x00050000, "\xF1\x90\x80\x80" },
713 { 0x00060000, "\xF1\xA0\x80\x80" },
714 { 0x0007FFFF, "\xF1\xBF\xBF\xBF" },
715 { 0x00080000, "\xF2\x80\x80\x80" },
716 { 0x00080001, "\xF2\x80\x80\x81" },
717 { 0x00080002, "\xF2\x80\x80\x82" },
718 { 0x00080004, "\xF2\x80\x80\x84" },
719 { 0x00080008, "\xF2\x80\x80\x88" },
720 { 0x00080010, "\xF2\x80\x80\x90" },
721 { 0x00080020, "\xF2\x80\x80\xA0" },
722 { 0x00080040, "\xF2\x80\x81\x80" },
723 { 0x00080080, "\xF2\x80\x82\x80" },
724 { 0x00080100, "\xF2\x80\x84\x80" },
725 { 0x00080200, "\xF2\x80\x88\x80" },
726 { 0x00080400, "\xF2\x80\x90\x80" },
727 { 0x00080800, "\xF2\x80\xA0\x80" },
728 { 0x00081000, "\xF2\x81\x80\x80" },
729 { 0x00082000, "\xF2\x82\x80\x80" },
730 { 0x00084000, "\xF2\x84\x80\x80" },
731 { 0x00088000, "\xF2\x88\x80\x80" },
732 { 0x00090000, "\xF2\x90\x80\x80" },
733 { 0x000A0000, "\xF2\xA0\x80\x80" },
734 { 0x000C0000, "\xF3\x80\x80\x80" },
735 { 0x000FFFFF, "\xF3\xBF\xBF\xBF" },
736 { 0x00100000, "\xF4\x80\x80\x80" },
737 { 0x00100001, "\xF4\x80\x80\x81" },
738 { 0x00100002, "\xF4\x80\x80\x82" },
739 { 0x00100004, "\xF4\x80\x80\x84" },
740 { 0x00100008, "\xF4\x80\x80\x88" },
741 { 0x00100010, "\xF4\x80\x80\x90" },
742 { 0x00100020, "\xF4\x80\x80\xA0" },
743 { 0x00100040, "\xF4\x80\x81\x80" },
744 { 0x00100080, "\xF4\x80\x82\x80" },
745 { 0x00100100, "\xF4\x80\x84\x80" },
746 { 0x00100200, "\xF4\x80\x88\x80" },
747 { 0x00100400, "\xF4\x80\x90\x80" },
748 { 0x00100800, "\xF4\x80\xA0\x80" },
749 { 0x00101000, "\xF4\x81\x80\x80" },
750 { 0x00102000, "\xF4\x82\x80\x80" },
751 { 0x00104000, "\xF4\x84\x80\x80" },
752 { 0x00108000, "\xF4\x88\x80\x80" },
753 { 0x0010FFFF, "\xF4\x8F\xBF\xBF" },
757 * UCS-2 vectors
760 struct ucs2 ucs2[] = {
761 { 0x0001, "\x01" },
762 { 0x0002, "\x02" },
763 { 0x0003, "\x03" },
764 { 0x0004, "\x04" },
765 { 0x0007, "\x07" },
766 { 0x0008, "\x08" },
767 { 0x000F, "\x0F" },
768 { 0x0010, "\x10" },
769 { 0x001F, "\x1F" },
770 { 0x0020, "\x20" },
771 { 0x003F, "\x3F" },
772 { 0x0040, "\x40" },
773 { 0x007F, "\x7F" },
775 { 0x0080, "\xC2\x80" },
776 { 0x0081, "\xC2\x81" },
777 { 0x0082, "\xC2\x82" },
778 { 0x0084, "\xC2\x84" },
779 { 0x0088, "\xC2\x88" },
780 { 0x0090, "\xC2\x90" },
781 { 0x00A0, "\xC2\xA0" },
782 { 0x00C0, "\xC3\x80" },
783 { 0x00FF, "\xC3\xBF" },
784 { 0x0100, "\xC4\x80" },
785 { 0x0101, "\xC4\x81" },
786 { 0x0102, "\xC4\x82" },
787 { 0x0104, "\xC4\x84" },
788 { 0x0108, "\xC4\x88" },
789 { 0x0110, "\xC4\x90" },
790 { 0x0120, "\xC4\xA0" },
791 { 0x0140, "\xC5\x80" },
792 { 0x0180, "\xC6\x80" },
793 { 0x01FF, "\xC7\xBF" },
794 { 0x0200, "\xC8\x80" },
795 { 0x0201, "\xC8\x81" },
796 { 0x0202, "\xC8\x82" },
797 { 0x0204, "\xC8\x84" },
798 { 0x0208, "\xC8\x88" },
799 { 0x0210, "\xC8\x90" },
800 { 0x0220, "\xC8\xA0" },
801 { 0x0240, "\xC9\x80" },
802 { 0x0280, "\xCA\x80" },
803 { 0x0300, "\xCC\x80" },
804 { 0x03FF, "\xCF\xBF" },
805 { 0x0400, "\xD0\x80" },
806 { 0x0401, "\xD0\x81" },
807 { 0x0402, "\xD0\x82" },
808 { 0x0404, "\xD0\x84" },
809 { 0x0408, "\xD0\x88" },
810 { 0x0410, "\xD0\x90" },
811 { 0x0420, "\xD0\xA0" },
812 { 0x0440, "\xD1\x80" },
813 { 0x0480, "\xD2\x80" },
814 { 0x0500, "\xD4\x80" },
815 { 0x0600, "\xD8\x80" },
816 { 0x07FF, "\xDF\xBF" },
818 { 0x0800, "\xE0\xA0\x80" },
819 { 0x0801, "\xE0\xA0\x81" },
820 { 0x0802, "\xE0\xA0\x82" },
821 { 0x0804, "\xE0\xA0\x84" },
822 { 0x0808, "\xE0\xA0\x88" },
823 { 0x0810, "\xE0\xA0\x90" },
824 { 0x0820, "\xE0\xA0\xA0" },
825 { 0x0840, "\xE0\xA1\x80" },
826 { 0x0880, "\xE0\xA2\x80" },
827 { 0x0900, "\xE0\xA4\x80" },
828 { 0x0A00, "\xE0\xA8\x80" },
829 { 0x0C00, "\xE0\xB0\x80" },
830 { 0x0FFF, "\xE0\xBF\xBF" },
831 { 0x1000, "\xE1\x80\x80" },
832 { 0x1001, "\xE1\x80\x81" },
833 { 0x1002, "\xE1\x80\x82" },
834 { 0x1004, "\xE1\x80\x84" },
835 { 0x1008, "\xE1\x80\x88" },
836 { 0x1010, "\xE1\x80\x90" },
837 { 0x1020, "\xE1\x80\xA0" },
838 { 0x1040, "\xE1\x81\x80" },
839 { 0x1080, "\xE1\x82\x80" },
840 { 0x1100, "\xE1\x84\x80" },
841 { 0x1200, "\xE1\x88\x80" },
842 { 0x1400, "\xE1\x90\x80" },
843 { 0x1800, "\xE1\xA0\x80" },
844 { 0x1FFF, "\xE1\xBF\xBF" },
845 { 0x2000, "\xE2\x80\x80" },
846 { 0x2001, "\xE2\x80\x81" },
847 { 0x2002, "\xE2\x80\x82" },
848 { 0x2004, "\xE2\x80\x84" },
849 { 0x2008, "\xE2\x80\x88" },
850 { 0x2010, "\xE2\x80\x90" },
851 { 0x2020, "\xE2\x80\xA0" },
852 { 0x2040, "\xE2\x81\x80" },
853 { 0x2080, "\xE2\x82\x80" },
854 { 0x2100, "\xE2\x84\x80" },
855 { 0x2200, "\xE2\x88\x80" },
856 { 0x2400, "\xE2\x90\x80" },
857 { 0x2800, "\xE2\xA0\x80" },
858 { 0x3000, "\xE3\x80\x80" },
859 { 0x3FFF, "\xE3\xBF\xBF" },
860 { 0x4000, "\xE4\x80\x80" },
861 { 0x4001, "\xE4\x80\x81" },
862 { 0x4002, "\xE4\x80\x82" },
863 { 0x4004, "\xE4\x80\x84" },
864 { 0x4008, "\xE4\x80\x88" },
865 { 0x4010, "\xE4\x80\x90" },
866 { 0x4020, "\xE4\x80\xA0" },
867 { 0x4040, "\xE4\x81\x80" },
868 { 0x4080, "\xE4\x82\x80" },
869 { 0x4100, "\xE4\x84\x80" },
870 { 0x4200, "\xE4\x88\x80" },
871 { 0x4400, "\xE4\x90\x80" },
872 { 0x4800, "\xE4\xA0\x80" },
873 { 0x5000, "\xE5\x80\x80" },
874 { 0x6000, "\xE6\x80\x80" },
875 { 0x7FFF, "\xE7\xBF\xBF" },
876 { 0x8000, "\xE8\x80\x80" },
877 { 0x8001, "\xE8\x80\x81" },
878 { 0x8002, "\xE8\x80\x82" },
879 { 0x8004, "\xE8\x80\x84" },
880 { 0x8008, "\xE8\x80\x88" },
881 { 0x8010, "\xE8\x80\x90" },
882 { 0x8020, "\xE8\x80\xA0" },
883 { 0x8040, "\xE8\x81\x80" },
884 { 0x8080, "\xE8\x82\x80" },
885 { 0x8100, "\xE8\x84\x80" },
886 { 0x8200, "\xE8\x88\x80" },
887 { 0x8400, "\xE8\x90\x80" },
888 { 0x8800, "\xE8\xA0\x80" },
889 { 0x9000, "\xE9\x80\x80" },
890 { 0xA000, "\xEA\x80\x80" },
891 { 0xC000, "\xEC\x80\x80" },
892 { 0xFFFF, "\xEF\xBF\xBF" }
897 * UTF-16 vectors
900 struct utf16 utf16[] = {
901 { 0x00010000, { 0xD800, 0xDC00 } },
902 { 0x00010001, { 0xD800, 0xDC01 } },
903 { 0x00010002, { 0xD800, 0xDC02 } },
904 { 0x00010003, { 0xD800, 0xDC03 } },
905 { 0x00010004, { 0xD800, 0xDC04 } },
906 { 0x00010007, { 0xD800, 0xDC07 } },
907 { 0x00010008, { 0xD800, 0xDC08 } },
908 { 0x0001000F, { 0xD800, 0xDC0F } },
909 { 0x00010010, { 0xD800, 0xDC10 } },
910 { 0x0001001F, { 0xD800, 0xDC1F } },
911 { 0x00010020, { 0xD800, 0xDC20 } },
912 { 0x0001003F, { 0xD800, 0xDC3F } },
913 { 0x00010040, { 0xD800, 0xDC40 } },
914 { 0x0001007F, { 0xD800, 0xDC7F } },
915 { 0x00010080, { 0xD800, 0xDC80 } },
916 { 0x00010081, { 0xD800, 0xDC81 } },
917 { 0x00010082, { 0xD800, 0xDC82 } },
918 { 0x00010084, { 0xD800, 0xDC84 } },
919 { 0x00010088, { 0xD800, 0xDC88 } },
920 { 0x00010090, { 0xD800, 0xDC90 } },
921 { 0x000100A0, { 0xD800, 0xDCA0 } },
922 { 0x000100C0, { 0xD800, 0xDCC0 } },
923 { 0x000100FF, { 0xD800, 0xDCFF } },
924 { 0x00010100, { 0xD800, 0xDD00 } },
925 { 0x00010101, { 0xD800, 0xDD01 } },
926 { 0x00010102, { 0xD800, 0xDD02 } },
927 { 0x00010104, { 0xD800, 0xDD04 } },
928 { 0x00010108, { 0xD800, 0xDD08 } },
929 { 0x00010110, { 0xD800, 0xDD10 } },
930 { 0x00010120, { 0xD800, 0xDD20 } },
931 { 0x00010140, { 0xD800, 0xDD40 } },
932 { 0x00010180, { 0xD800, 0xDD80 } },
933 { 0x000101FF, { 0xD800, 0xDDFF } },
934 { 0x00010200, { 0xD800, 0xDE00 } },
935 { 0x00010201, { 0xD800, 0xDE01 } },
936 { 0x00010202, { 0xD800, 0xDE02 } },
937 { 0x00010204, { 0xD800, 0xDE04 } },
938 { 0x00010208, { 0xD800, 0xDE08 } },
939 { 0x00010210, { 0xD800, 0xDE10 } },
940 { 0x00010220, { 0xD800, 0xDE20 } },
941 { 0x00010240, { 0xD800, 0xDE40 } },
942 { 0x00010280, { 0xD800, 0xDE80 } },
943 { 0x00010300, { 0xD800, 0xDF00 } },
944 { 0x000103FF, { 0xD800, 0xDFFF } },
945 { 0x00010400, { 0xD801, 0xDC00 } },
946 { 0x00010401, { 0xD801, 0xDC01 } },
947 { 0x00010402, { 0xD801, 0xDC02 } },
948 { 0x00010404, { 0xD801, 0xDC04 } },
949 { 0x00010408, { 0xD801, 0xDC08 } },
950 { 0x00010410, { 0xD801, 0xDC10 } },
951 { 0x00010420, { 0xD801, 0xDC20 } },
952 { 0x00010440, { 0xD801, 0xDC40 } },
953 { 0x00010480, { 0xD801, 0xDC80 } },
954 { 0x00010500, { 0xD801, 0xDD00 } },
955 { 0x00010600, { 0xD801, 0xDE00 } },
956 { 0x000107FF, { 0xD801, 0xDFFF } },
957 { 0x00010800, { 0xD802, 0xDC00 } },
958 { 0x00010801, { 0xD802, 0xDC01 } },
959 { 0x00010802, { 0xD802, 0xDC02 } },
960 { 0x00010804, { 0xD802, 0xDC04 } },
961 { 0x00010808, { 0xD802, 0xDC08 } },
962 { 0x00010810, { 0xD802, 0xDC10 } },
963 { 0x00010820, { 0xD802, 0xDC20 } },
964 { 0x00010840, { 0xD802, 0xDC40 } },
965 { 0x00010880, { 0xD802, 0xDC80 } },
966 { 0x00010900, { 0xD802, 0xDD00 } },
967 { 0x00010A00, { 0xD802, 0xDE00 } },
968 { 0x00010C00, { 0xD803, 0xDC00 } },
969 { 0x00010FFF, { 0xD803, 0xDFFF } },
970 { 0x00011000, { 0xD804, 0xDC00 } },
971 { 0x00011001, { 0xD804, 0xDC01 } },
972 { 0x00011002, { 0xD804, 0xDC02 } },
973 { 0x00011004, { 0xD804, 0xDC04 } },
974 { 0x00011008, { 0xD804, 0xDC08 } },
975 { 0x00011010, { 0xD804, 0xDC10 } },
976 { 0x00011020, { 0xD804, 0xDC20 } },
977 { 0x00011040, { 0xD804, 0xDC40 } },
978 { 0x00011080, { 0xD804, 0xDC80 } },
979 { 0x00011100, { 0xD804, 0xDD00 } },
980 { 0x00011200, { 0xD804, 0xDE00 } },
981 { 0x00011400, { 0xD805, 0xDC00 } },
982 { 0x00011800, { 0xD806, 0xDC00 } },
983 { 0x00011FFF, { 0xD807, 0xDFFF } },
984 { 0x00012000, { 0xD808, 0xDC00 } },
985 { 0x00012001, { 0xD808, 0xDC01 } },
986 { 0x00012002, { 0xD808, 0xDC02 } },
987 { 0x00012004, { 0xD808, 0xDC04 } },
988 { 0x00012008, { 0xD808, 0xDC08 } },
989 { 0x00012010, { 0xD808, 0xDC10 } },
990 { 0x00012020, { 0xD808, 0xDC20 } },
991 { 0x00012040, { 0xD808, 0xDC40 } },
992 { 0x00012080, { 0xD808, 0xDC80 } },
993 { 0x00012100, { 0xD808, 0xDD00 } },
994 { 0x00012200, { 0xD808, 0xDE00 } },
995 { 0x00012400, { 0xD809, 0xDC00 } },
996 { 0x00012800, { 0xD80A, 0xDC00 } },
997 { 0x00013000, { 0xD80C, 0xDC00 } },
998 { 0x00013FFF, { 0xD80F, 0xDFFF } },
999 { 0x00014000, { 0xD810, 0xDC00 } },
1000 { 0x00014001, { 0xD810, 0xDC01 } },
1001 { 0x00014002, { 0xD810, 0xDC02 } },
1002 { 0x00014004, { 0xD810, 0xDC04 } },
1003 { 0x00014008, { 0xD810, 0xDC08 } },
1004 { 0x00014010, { 0xD810, 0xDC10 } },
1005 { 0x00014020, { 0xD810, 0xDC20 } },
1006 { 0x00014040, { 0xD810, 0xDC40 } },
1007 { 0x00014080, { 0xD810, 0xDC80 } },
1008 { 0x00014100, { 0xD810, 0xDD00 } },
1009 { 0x00014200, { 0xD810, 0xDE00 } },
1010 { 0x00014400, { 0xD811, 0xDC00 } },
1011 { 0x00014800, { 0xD812, 0xDC00 } },
1012 { 0x00015000, { 0xD814, 0xDC00 } },
1013 { 0x00016000, { 0xD818, 0xDC00 } },
1014 { 0x00017FFF, { 0xD81F, 0xDFFF } },
1015 { 0x00018000, { 0xD820, 0xDC00 } },
1016 { 0x00018001, { 0xD820, 0xDC01 } },
1017 { 0x00018002, { 0xD820, 0xDC02 } },
1018 { 0x00018004, { 0xD820, 0xDC04 } },
1019 { 0x00018008, { 0xD820, 0xDC08 } },
1020 { 0x00018010, { 0xD820, 0xDC10 } },
1021 { 0x00018020, { 0xD820, 0xDC20 } },
1022 { 0x00018040, { 0xD820, 0xDC40 } },
1023 { 0x00018080, { 0xD820, 0xDC80 } },
1024 { 0x00018100, { 0xD820, 0xDD00 } },
1025 { 0x00018200, { 0xD820, 0xDE00 } },
1026 { 0x00018400, { 0xD821, 0xDC00 } },
1027 { 0x00018800, { 0xD822, 0xDC00 } },
1028 { 0x00019000, { 0xD824, 0xDC00 } },
1029 { 0x0001A000, { 0xD828, 0xDC00 } },
1030 { 0x0001C000, { 0xD830, 0xDC00 } },
1031 { 0x0001FFFF, { 0xD83F, 0xDFFF } },
1032 { 0x00020000, { 0xD840, 0xDC00 } },
1033 { 0x00020001, { 0xD840, 0xDC01 } },
1034 { 0x00020002, { 0xD840, 0xDC02 } },
1035 { 0x00020004, { 0xD840, 0xDC04 } },
1036 { 0x00020008, { 0xD840, 0xDC08 } },
1037 { 0x00020010, { 0xD840, 0xDC10 } },
1038 { 0x00020020, { 0xD840, 0xDC20 } },
1039 { 0x00020040, { 0xD840, 0xDC40 } },
1040 { 0x00020080, { 0xD840, 0xDC80 } },
1041 { 0x00020100, { 0xD840, 0xDD00 } },
1042 { 0x00020200, { 0xD840, 0xDE00 } },
1043 { 0x00020400, { 0xD841, 0xDC00 } },
1044 { 0x00020800, { 0xD842, 0xDC00 } },
1045 { 0x00021000, { 0xD844, 0xDC00 } },
1046 { 0x00022000, { 0xD848, 0xDC00 } },
1047 { 0x00024000, { 0xD850, 0xDC00 } },
1048 { 0x00028000, { 0xD860, 0xDC00 } },
1049 { 0x0002FFFF, { 0xD87F, 0xDFFF } },
1050 { 0x00030000, { 0xD880, 0xDC00 } },
1051 { 0x00030001, { 0xD880, 0xDC01 } },
1052 { 0x00030002, { 0xD880, 0xDC02 } },
1053 { 0x00030004, { 0xD880, 0xDC04 } },
1054 { 0x00030008, { 0xD880, 0xDC08 } },
1055 { 0x00030010, { 0xD880, 0xDC10 } },
1056 { 0x00030020, { 0xD880, 0xDC20 } },
1057 { 0x00030040, { 0xD880, 0xDC40 } },
1058 { 0x00030080, { 0xD880, 0xDC80 } },
1059 { 0x00030100, { 0xD880, 0xDD00 } },
1060 { 0x00030200, { 0xD880, 0xDE00 } },
1061 { 0x00030400, { 0xD881, 0xDC00 } },
1062 { 0x00030800, { 0xD882, 0xDC00 } },
1063 { 0x00031000, { 0xD884, 0xDC00 } },
1064 { 0x00032000, { 0xD888, 0xDC00 } },
1065 { 0x00034000, { 0xD890, 0xDC00 } },
1066 { 0x00038000, { 0xD8A0, 0xDC00 } },
1067 { 0x0003FFFF, { 0xD8BF, 0xDFFF } },
1068 { 0x00040000, { 0xD8C0, 0xDC00 } },
1069 { 0x00040001, { 0xD8C0, 0xDC01 } },
1070 { 0x00040002, { 0xD8C0, 0xDC02 } },
1071 { 0x00040004, { 0xD8C0, 0xDC04 } },
1072 { 0x00040008, { 0xD8C0, 0xDC08 } },
1073 { 0x00040010, { 0xD8C0, 0xDC10 } },
1074 { 0x00040020, { 0xD8C0, 0xDC20 } },
1075 { 0x00040040, { 0xD8C0, 0xDC40 } },
1076 { 0x00040080, { 0xD8C0, 0xDC80 } },
1077 { 0x00040100, { 0xD8C0, 0xDD00 } },
1078 { 0x00040200, { 0xD8C0, 0xDE00 } },
1079 { 0x00040400, { 0xD8C1, 0xDC00 } },
1080 { 0x00040800, { 0xD8C2, 0xDC00 } },
1081 { 0x00041000, { 0xD8C4, 0xDC00 } },
1082 { 0x00042000, { 0xD8C8, 0xDC00 } },
1083 { 0x00044000, { 0xD8D0, 0xDC00 } },
1084 { 0x00048000, { 0xD8E0, 0xDC00 } },
1085 { 0x0004FFFF, { 0xD8FF, 0xDFFF } },
1086 { 0x00050000, { 0xD900, 0xDC00 } },
1087 { 0x00050001, { 0xD900, 0xDC01 } },
1088 { 0x00050002, { 0xD900, 0xDC02 } },
1089 { 0x00050004, { 0xD900, 0xDC04 } },
1090 { 0x00050008, { 0xD900, 0xDC08 } },
1091 { 0x00050010, { 0xD900, 0xDC10 } },
1092 { 0x00050020, { 0xD900, 0xDC20 } },
1093 { 0x00050040, { 0xD900, 0xDC40 } },
1094 { 0x00050080, { 0xD900, 0xDC80 } },
1095 { 0x00050100, { 0xD900, 0xDD00 } },
1096 { 0x00050200, { 0xD900, 0xDE00 } },
1097 { 0x00050400, { 0xD901, 0xDC00 } },
1098 { 0x00050800, { 0xD902, 0xDC00 } },
1099 { 0x00051000, { 0xD904, 0xDC00 } },
1100 { 0x00052000, { 0xD908, 0xDC00 } },
1101 { 0x00054000, { 0xD910, 0xDC00 } },
1102 { 0x00058000, { 0xD920, 0xDC00 } },
1103 { 0x00060000, { 0xD940, 0xDC00 } },
1104 { 0x00070000, { 0xD980, 0xDC00 } },
1105 { 0x0007FFFF, { 0xD9BF, 0xDFFF } },
1106 { 0x00080000, { 0xD9C0, 0xDC00 } },
1107 { 0x00080001, { 0xD9C0, 0xDC01 } },
1108 { 0x00080002, { 0xD9C0, 0xDC02 } },
1109 { 0x00080004, { 0xD9C0, 0xDC04 } },
1110 { 0x00080008, { 0xD9C0, 0xDC08 } },
1111 { 0x00080010, { 0xD9C0, 0xDC10 } },
1112 { 0x00080020, { 0xD9C0, 0xDC20 } },
1113 { 0x00080040, { 0xD9C0, 0xDC40 } },
1114 { 0x00080080, { 0xD9C0, 0xDC80 } },
1115 { 0x00080100, { 0xD9C0, 0xDD00 } },
1116 { 0x00080200, { 0xD9C0, 0xDE00 } },
1117 { 0x00080400, { 0xD9C1, 0xDC00 } },
1118 { 0x00080800, { 0xD9C2, 0xDC00 } },
1119 { 0x00081000, { 0xD9C4, 0xDC00 } },
1120 { 0x00082000, { 0xD9C8, 0xDC00 } },
1121 { 0x00084000, { 0xD9D0, 0xDC00 } },
1122 { 0x00088000, { 0xD9E0, 0xDC00 } },
1123 { 0x0008FFFF, { 0xD9FF, 0xDFFF } },
1124 { 0x00090000, { 0xDA00, 0xDC00 } },
1125 { 0x00090001, { 0xDA00, 0xDC01 } },
1126 { 0x00090002, { 0xDA00, 0xDC02 } },
1127 { 0x00090004, { 0xDA00, 0xDC04 } },
1128 { 0x00090008, { 0xDA00, 0xDC08 } },
1129 { 0x00090010, { 0xDA00, 0xDC10 } },
1130 { 0x00090020, { 0xDA00, 0xDC20 } },
1131 { 0x00090040, { 0xDA00, 0xDC40 } },
1132 { 0x00090080, { 0xDA00, 0xDC80 } },
1133 { 0x00090100, { 0xDA00, 0xDD00 } },
1134 { 0x00090200, { 0xDA00, 0xDE00 } },
1135 { 0x00090400, { 0xDA01, 0xDC00 } },
1136 { 0x00090800, { 0xDA02, 0xDC00 } },
1137 { 0x00091000, { 0xDA04, 0xDC00 } },
1138 { 0x00092000, { 0xDA08, 0xDC00 } },
1139 { 0x00094000, { 0xDA10, 0xDC00 } },
1140 { 0x00098000, { 0xDA20, 0xDC00 } },
1141 { 0x000A0000, { 0xDA40, 0xDC00 } },
1142 { 0x000B0000, { 0xDA80, 0xDC00 } },
1143 { 0x000C0000, { 0xDAC0, 0xDC00 } },
1144 { 0x000D0000, { 0xDB00, 0xDC00 } },
1145 { 0x000FFFFF, { 0xDBBF, 0xDFFF } },
1146 { 0x0010FFFF, { 0xDBFF, 0xDFFF } }
1150 /* illegal utf8 sequences */
1151 char *utf8_bad[] = {
1152 "\xC0\x80",
1153 "\xC1\xBF",
1154 "\xE0\x80\x80",
1155 "\xE0\x9F\xBF",
1156 "\xF0\x80\x80\x80",
1157 "\xF0\x8F\xBF\xBF",
1158 "\xF4\x90\x80\x80",
1159 "\xF7\xBF\xBF\xBF",
1160 "\xF8\x80\x80\x80\x80",
1161 "\xF8\x88\x80\x80\x80",
1162 "\xF8\x92\x80\x80\x80",
1163 "\xF8\x9F\xBF\xBF\xBF",
1164 "\xF8\xA0\x80\x80\x80",
1165 "\xF8\xA8\x80\x80\x80",
1166 "\xF8\xB0\x80\x80\x80",
1167 "\xF8\xBF\xBF\xBF\xBF",
1168 "\xF9\x80\x80\x80\x88",
1169 "\xF9\x84\x80\x80\x80",
1170 "\xF9\xBF\xBF\xBF\xBF",
1171 "\xFA\x80\x80\x80\x80",
1172 "\xFA\x90\x80\x80\x80",
1173 "\xFB\xBF\xBF\xBF\xBF",
1174 "\xFC\x84\x80\x80\x80\x81",
1175 "\xFC\x85\x80\x80\x80\x80",
1176 "\xFC\x86\x80\x80\x80\x80",
1177 "\xFC\x87\xBF\xBF\xBF\xBF",
1178 "\xFC\x88\xA0\x80\x80\x80",
1179 "\xFC\x89\x80\x80\x80\x80",
1180 "\xFC\x8A\x80\x80\x80\x80",
1181 "\xFC\x90\x80\x80\x80\x82",
1182 "\xFD\x80\x80\x80\x80\x80",
1183 "\xFD\xBF\xBF\xBF\xBF\xBF",
1184 "\x80",
1185 "\xC3",
1186 "\xC3\xC3\x80",
1187 "\xED\xA0\x80",
1188 "\xED\xBF\x80",
1189 "\xED\xBF\xBF",
1190 "\xED\xA0\x80\xE0\xBF\xBF",
1193 static void
1194 dump_utf8
1196 char *word,
1197 unsigned char *utf8,
1198 char *end
1201 fprintf(stdout, "%s ", word);
1202 for( ; *utf8; utf8++ ) {
1203 fprintf(stdout, "%02.2x ", (unsigned int)*utf8);
1205 fprintf(stdout, "%s", end);
1208 static PRBool
1209 test_ucs4_chars
1211 void
1214 PRBool rv = PR_TRUE;
1215 int i;
1217 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1218 struct ucs4 *e = &ucs4[i];
1219 PRBool result;
1220 unsigned char utf8[8];
1221 unsigned int len = 0;
1222 PRUint32 back = 0;
1224 (void)memset(utf8, 0, sizeof(utf8));
1226 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1227 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1229 if( !result ) {
1230 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c);
1231 rv = PR_FALSE;
1232 continue;
1235 if( (len >= sizeof(utf8)) ||
1236 (strlen(e->utf8) != len) ||
1237 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1238 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c);
1239 dump_utf8("expected", e->utf8, ", ");
1240 dump_utf8("received", utf8, "\n");
1241 rv = PR_FALSE;
1242 continue;
1245 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1246 utf8, len, (unsigned char *)&back, sizeof(back), &len);
1248 if( !result ) {
1249 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n");
1250 rv = PR_FALSE;
1251 continue;
1254 if( (sizeof(back) != len) || (e->c != back) ) {
1255 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:");
1256 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
1257 rv = PR_FALSE;
1258 continue;
1262 return rv;
1265 static PRBool
1266 test_ucs2_chars
1268 void
1271 PRBool rv = PR_TRUE;
1272 int i;
1274 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1275 struct ucs2 *e = &ucs2[i];
1276 PRBool result;
1277 unsigned char utf8[8];
1278 unsigned int len = 0;
1279 PRUint16 back = 0;
1281 (void)memset(utf8, 0, sizeof(utf8));
1283 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1284 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1286 if( !result ) {
1287 fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c);
1288 rv = PR_FALSE;
1289 continue;
1292 if( (len >= sizeof(utf8)) ||
1293 (strlen(e->utf8) != len) ||
1294 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1295 fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c);
1296 dump_utf8("expected", e->utf8, ", ");
1297 dump_utf8("received", utf8, "\n");
1298 rv = PR_FALSE;
1299 continue;
1302 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1303 utf8, len, (unsigned char *)&back, sizeof(back), &len);
1305 if( !result ) {
1306 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n");
1307 rv = PR_FALSE;
1308 continue;
1311 if( (sizeof(back) != len) || (e->c != back) ) {
1312 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:");
1313 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
1314 rv = PR_FALSE;
1315 continue;
1319 return rv;
1322 static PRBool
1323 test_utf16_chars
1325 void
1328 PRBool rv = PR_TRUE;
1329 int i;
1331 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
1332 struct utf16 *e = &utf16[i];
1333 PRBool result;
1334 unsigned char utf8[8];
1335 unsigned int len = 0;
1336 PRUint32 back32 = 0;
1337 PRUint16 back[2];
1339 (void)memset(utf8, 0, sizeof(utf8));
1341 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1342 (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len);
1344 if( !result ) {
1345 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n",
1346 e->w[0], e->w[1]);
1347 rv = PR_FALSE;
1348 continue;
1351 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1352 utf8, len, (unsigned char *)&back32, sizeof(back32), &len);
1354 if( 4 != len ) {
1355 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: "
1356 "unexpected len %d\n", e->w[0], e->w[1], len);
1357 rv = PR_FALSE;
1358 continue;
1361 utf8[len] = '\0'; /* null-terminate for printing */
1363 if( !result ) {
1364 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n");
1365 rv = PR_FALSE;
1366 continue;
1369 if( (sizeof(back32) != len) || (e->c != back32) ) {
1370 fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ",
1371 e->w[0], e->w[1]);
1372 dump_utf8("to UTF-8", utf8, "and then to UCS-4: ");
1373 if( sizeof(back32) != len ) {
1374 fprintf(stdout, "len is %d\n", len);
1375 } else {
1376 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32);
1378 rv = PR_FALSE;
1379 continue;
1382 (void)memset(utf8, 0, sizeof(utf8));
1383 back[0] = back[1] = 0;
1385 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1386 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1388 if( !result ) {
1389 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n",
1390 e->c);
1391 rv = PR_FALSE;
1392 continue;
1395 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1396 utf8, len, (unsigned char *)&back[0], sizeof(back), &len);
1398 if( 4 != len ) {
1399 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: "
1400 "unexpected len %d\n", e->c, len);
1401 rv = PR_FALSE;
1402 continue;
1405 utf8[len] = '\0'; /* null-terminate for printing */
1407 if( !result ) {
1408 dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n");
1409 rv = PR_FALSE;
1410 continue;
1413 if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) {
1414 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c);
1415 dump_utf8("", utf8, "and then to UTF-16:");
1416 if( sizeof(back) != len ) {
1417 fprintf(stdout, "len is %d\n", len);
1418 } else {
1419 fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx\n",
1420 e->w[0], e->w[1], back[0], back[1]);
1422 rv = PR_FALSE;
1423 continue;
1427 return rv;
1430 static PRBool
1431 test_utf8_bad_chars
1433 void
1436 PRBool rv = PR_TRUE;
1437 int i;
1439 for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) {
1440 PRBool result;
1441 unsigned char destbuf[30];
1442 unsigned int len = 0;
1444 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1445 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
1447 if( result ) {
1448 dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_bad[i], "\n");
1449 rv = PR_FALSE;
1450 continue;
1452 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1453 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
1455 if( result ) {
1456 dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_bad[i], "\n");
1457 rv = PR_FALSE;
1458 continue;
1463 return rv;
1466 static PRBool
1467 test_iso88591_chars
1469 void
1472 PRBool rv = PR_TRUE;
1473 int i;
1475 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1476 struct ucs2 *e = &ucs2[i];
1477 PRBool result;
1478 unsigned char iso88591;
1479 unsigned char utf8[3];
1480 unsigned int len = 0;
1482 if (ntohs(e->c) > 0xFF) continue;
1484 (void)memset(utf8, 0, sizeof(utf8));
1485 iso88591 = ntohs(e->c);
1487 result = sec_port_iso88591_utf8_conversion_function(&iso88591,
1488 1, utf8, sizeof(utf8), &len);
1490 if( !result ) {
1491 fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso88591);
1492 rv = PR_FALSE;
1493 continue;
1496 if( (len >= sizeof(utf8)) ||
1497 (strlen(e->utf8) != len) ||
1498 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1499 fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso88591);
1500 dump_utf8("expected", e->utf8, ", ");
1501 dump_utf8("received", utf8, "\n");
1502 rv = PR_FALSE;
1503 continue;
1508 return rv;
1511 static PRBool
1512 test_zeroes
1514 void
1517 PRBool rv = PR_TRUE;
1518 PRBool result;
1519 PRUint32 lzero = 0;
1520 PRUint16 szero = 0;
1521 unsigned char utf8[8];
1522 unsigned int len = 0;
1523 PRUint32 lback = 1;
1524 PRUint16 sback = 1;
1526 (void)memset(utf8, 1, sizeof(utf8));
1528 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1529 (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len);
1531 if( !result ) {
1532 fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n");
1533 rv = PR_FALSE;
1534 } else if( 1 != len ) {
1535 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len);
1536 rv = PR_FALSE;
1537 } else if( '\0' != *utf8 ) {
1538 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ,"
1539 "received %02.2x\n", (unsigned int)*utf8);
1540 rv = PR_FALSE;
1543 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1544 "", 1, (unsigned char *)&lback, sizeof(lback), &len);
1546 if( !result ) {
1547 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n");
1548 rv = PR_FALSE;
1549 } else if( 4 != len ) {
1550 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len);
1551 rv = PR_FALSE;
1552 } else if( 0 != lback ) {
1553 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: "
1554 "expected 0x00000000, received 0x%08.8x\n", lback);
1555 rv = PR_FALSE;
1558 (void)memset(utf8, 1, sizeof(utf8));
1560 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1561 (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len);
1563 if( !result ) {
1564 fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n");
1565 rv = PR_FALSE;
1566 } else if( 1 != len ) {
1567 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len);
1568 rv = PR_FALSE;
1569 } else if( '\0' != *utf8 ) {
1570 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ,"
1571 "received %02.2x\n", (unsigned int)*utf8);
1572 rv = PR_FALSE;
1575 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1576 "", 1, (unsigned char *)&sback, sizeof(sback), &len);
1578 if( !result ) {
1579 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n");
1580 rv = PR_FALSE;
1581 } else if( 2 != len ) {
1582 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len);
1583 rv = PR_FALSE;
1584 } else if( 0 != sback ) {
1585 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: "
1586 "expected 0x0000, received 0x%04.4x\n", sback);
1587 rv = PR_FALSE;
1590 return rv;
1593 static PRBool
1594 test_multichars
1596 void
1599 int i;
1600 unsigned int len, lenout;
1601 PRUint32 *ucs4s;
1602 char *ucs4_utf8;
1603 PRUint16 *ucs2s;
1604 char *ucs2_utf8;
1605 void *tmp;
1606 PRBool result;
1608 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32));
1609 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16));
1611 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) {
1612 fprintf(stderr, "out of memory\n");
1613 exit(1);
1616 len = 0;
1617 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1618 ucs4s[i] = ucs4[i].c;
1619 len += strlen(ucs4[i].utf8);
1622 ucs4_utf8 = (char *)malloc(len);
1624 len = 0;
1625 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1626 ucs2s[i] = ucs2[i].c;
1627 len += strlen(ucs2[i].utf8);
1630 ucs2_utf8 = (char *)malloc(len);
1632 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) {
1633 fprintf(stderr, "out of memory\n");
1634 exit(1);
1637 *ucs4_utf8 = '\0';
1638 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1639 strcat(ucs4_utf8, ucs4[i].utf8);
1642 *ucs2_utf8 = '\0';
1643 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1644 strcat(ucs2_utf8, ucs2[i].utf8);
1647 /* UTF-8 -> UCS-4 */
1648 len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32);
1649 tmp = calloc(len, 1);
1650 if( (void *)NULL == tmp ) {
1651 fprintf(stderr, "out of memory\n");
1652 exit(1);
1655 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1656 ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout);
1657 if( !result ) {
1658 fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n");
1659 goto done;
1662 if( lenout != len ) {
1663 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n");
1664 goto loser;
1667 if( 0 != memcmp(ucs4s, tmp, len) ) {
1668 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n");
1669 goto loser;
1672 free(tmp); tmp = (void *)NULL;
1674 /* UCS-4 -> UTF-8 */
1675 len = strlen(ucs4_utf8);
1676 tmp = calloc(len, 1);
1677 if( (void *)NULL == tmp ) {
1678 fprintf(stderr, "out of memory\n");
1679 exit(1);
1682 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1683 (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32),
1684 tmp, len, &lenout);
1685 if( !result ) {
1686 fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n");
1687 goto done;
1690 if( lenout != len ) {
1691 fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n");
1692 goto loser;
1695 if( 0 != strncmp(ucs4_utf8, tmp, len) ) {
1696 fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n");
1697 goto loser;
1700 free(tmp); tmp = (void *)NULL;
1702 /* UTF-8 -> UCS-2 */
1703 len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16);
1704 tmp = calloc(len, 1);
1705 if( (void *)NULL == tmp ) {
1706 fprintf(stderr, "out of memory\n");
1707 exit(1);
1710 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1711 ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout);
1712 if( !result ) {
1713 fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n");
1714 goto done;
1717 if( lenout != len ) {
1718 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n");
1719 goto loser;
1722 if( 0 != memcmp(ucs2s, tmp, len) ) {
1723 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n");
1724 goto loser;
1727 free(tmp); tmp = (void *)NULL;
1729 /* UCS-2 -> UTF-8 */
1730 len = strlen(ucs2_utf8);
1731 tmp = calloc(len, 1);
1732 if( (void *)NULL == tmp ) {
1733 fprintf(stderr, "out of memory\n");
1734 exit(1);
1737 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1738 (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16),
1739 tmp, len, &lenout);
1740 if( !result ) {
1741 fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n");
1742 goto done;
1745 if( lenout != len ) {
1746 fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n");
1747 goto loser;
1750 if( 0 != strncmp(ucs2_utf8, tmp, len) ) {
1751 fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n");
1752 goto loser;
1755 /* implement UTF16 */
1757 result = PR_TRUE;
1758 goto done;
1760 loser:
1761 result = PR_FALSE;
1762 done:
1763 free(ucs4s);
1764 free(ucs4_utf8);
1765 free(ucs2s);
1766 free(ucs2_utf8);
1767 if( (void *)NULL != tmp ) free(tmp);
1768 return result;
1771 void
1772 byte_order
1774 void
1778 * The implementation (now) expects the 16- and 32-bit characters
1779 * to be in network byte order, not host byte order. Therefore I
1780 * have to byteswap all those test vectors above. hton[ls] may be
1781 * functions, so I have to do this dynamically. If you want to
1782 * use this code to do host byte order conversions, just remove
1783 * the call in main() to this function.
1786 int i;
1788 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1789 struct ucs4 *e = &ucs4[i];
1790 e->c = htonl(e->c);
1793 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1794 struct ucs2 *e = &ucs2[i];
1795 e->c = htons(e->c);
1798 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
1799 struct utf16 *e = &utf16[i];
1800 e->c = htonl(e->c);
1801 e->w[0] = htons(e->w[0]);
1802 e->w[1] = htons(e->w[1]);
1805 return;
1809 main
1811 int argc,
1812 char *argv[]
1815 byte_order();
1817 if( test_ucs4_chars() &&
1818 test_ucs2_chars() &&
1819 test_utf16_chars() &&
1820 test_utf8_bad_chars() &&
1821 test_iso88591_chars() &&
1822 test_zeroes() &&
1823 test_multichars() &&
1824 PR_TRUE ) {
1825 fprintf(stderr, "PASS\n");
1826 return 1;
1827 } else {
1828 fprintf(stderr, "FAIL\n");
1829 return 0;
1833 #endif /* TEST_UTF8 */