1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
21 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
22 * file for a list of people on the GLib Team. See the ChangeLog
23 * files for a list of changes. These files are distributed with
24 * GLib at ftp://ftp.gtk.org/pub/gtk/.
27 #undef G_DISABLE_ASSERT
36 test_iconv_state (void)
38 gchar
*in
= "\xf4\xe5\xf8\xe5\xed";
39 gchar
*expected
= "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
42 gsize bytes_written
= 0;
45 out
= g_convert (in
, -1, "UTF-8", "CP1255",
46 &bytes_read
, &bytes_written
, &error
);
48 g_assert (error
== NULL
);
49 g_assert (bytes_read
== 5);
50 g_assert (bytes_written
== 10);
51 g_assert (strcmp (out
, expected
) == 0);
55 /* some tests involving "vulgar fraction one half" */
59 gchar
*in
= "\xc2\xbd";
62 gsize bytes_written
= 0;
65 out
= g_convert (in
, -1,
66 "ISO-8859-1", "UTF-8",
67 &bytes_read
, &bytes_written
,
70 g_assert (error
== NULL
);
71 g_assert (bytes_read
== 2);
72 g_assert (bytes_written
== 1);
73 g_assert (strcmp (out
, "\xbd") == 0);
76 out
= g_convert (in
, -1,
77 "ISO-8859-15", "UTF-8",
78 &bytes_read
, &bytes_written
,
81 g_assert (error
&& error
->code
== G_CONVERT_ERROR_ILLEGAL_SEQUENCE
);
82 g_assert (bytes_read
== 0);
83 g_assert (bytes_written
== 0);
84 g_assert (out
== NULL
);
85 g_clear_error (&error
);
88 out
= g_convert_with_fallback (in
, -1,
89 "ISO8859-15", "UTF-8",
91 &bytes_read
, &bytes_written
,
94 g_assert (error
== NULL
);
95 g_assert (bytes_read
== 2);
96 g_assert (bytes_written
== 1);
97 g_assert (strcmp (out
, "a") == 0);
102 test_byte_order (void)
104 gchar in_be
[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
105 gchar in_le
[4] = { 0xff, 0xfe, 0x93, 0x03};
106 gchar
*expected
= "\xce\x93";
108 gsize bytes_read
= 0;
109 gsize bytes_written
= 0;
110 GError
*error
= NULL
;
112 out
= g_convert (in_be
, sizeof (in_be
),
114 &bytes_read
, &bytes_written
,
117 g_assert (error
== NULL
);
118 g_assert (bytes_read
== 4);
119 g_assert (bytes_written
== 2);
120 g_assert (strcmp (out
, expected
) == 0);
123 out
= g_convert (in_le
, sizeof (in_le
),
125 &bytes_read
, &bytes_written
,
128 g_assert (error
== NULL
);
129 g_assert (bytes_read
== 4);
130 g_assert (bytes_written
== 2);
131 g_assert (strcmp (out
, expected
) == 0);
136 check_utf8_to_ucs4 (const char *utf8
,
138 const gunichar
*ucs4
,
142 gunichar
*result
, *result2
, *result3
;
143 glong items_read
, items_read2
;
144 glong items_written
, items_written2
;
145 GError
*error
, *error2
, *error3
;
150 /* check the fast conversion */
151 result
= g_utf8_to_ucs4_fast (utf8
, utf8_len
, &items_written
);
153 g_assert (items_written
== ucs4_len
);
155 for (i
= 0; i
<= items_written
; i
++)
156 g_assert (result
[i
] == ucs4
[i
]);
162 result
= g_utf8_to_ucs4 (utf8
, utf8_len
, &items_read
, &items_written
, &error
);
164 if (utf8_len
== strlen (utf8
))
166 /* check that len == -1 yields identical results */
168 result2
= g_utf8_to_ucs4 (utf8
, -1, &items_read2
, &items_written2
, &error2
);
169 g_assert (error
|| items_read2
== items_read
);
170 g_assert (error
|| items_written2
== items_written2
);
171 g_assert (!!result
== !!result2
);
172 g_assert (!!error
== !!error2
);
174 for (i
= 0; i
<= items_written
; i
++)
175 g_assert (result
[i
] == result2
[i
]);
179 g_error_free (error2
);
183 result3
= g_utf8_to_ucs4 (utf8
, utf8_len
, NULL
, NULL
, &error3
);
185 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
187 g_assert (error
== NULL
);
188 g_assert (items_read
== error_pos
);
189 g_assert (items_written
== ucs4_len
);
191 for (i
= 0; i
<= items_written
; i
++)
192 g_assert (result
[i
] == ucs4
[i
]);
196 g_assert (error
!= NULL
);
197 g_assert (result
== NULL
);
198 g_assert (items_read
== error_pos
);
199 g_error_free (error
);
201 g_assert (error3
!= NULL
);
202 g_assert (result3
== NULL
);
203 g_error_free (error3
);
207 g_assert (error
== NULL
);
208 g_assert (items_read
== utf8_len
);
209 g_assert (items_written
== ucs4_len
);
211 for (i
= 0; i
<= items_written
; i
++)
212 g_assert (result
[i
] == ucs4
[i
]);
214 g_assert (error3
== NULL
);
216 for (i
= 0; i
<= ucs4_len
; i
++)
217 g_assert (result3
[i
] == ucs4
[i
]);
225 check_ucs4_to_utf8 (const gunichar
*ucs4
,
231 gchar
*result
, *result2
, *result3
;
232 glong items_read
, items_read2
;
233 glong items_written
, items_written2
;
234 GError
*error
, *error2
, *error3
;
237 result
= g_ucs4_to_utf8 (ucs4
, ucs4_len
, &items_read
, &items_written
, &error
);
239 if (ucs4
[ucs4_len
] == 0)
241 /* check that len == -1 yields identical results */
243 result2
= g_ucs4_to_utf8 (ucs4
, -1, &items_read2
, &items_written2
, &error2
);
245 g_assert (error
|| items_read2
== items_read
);
246 g_assert (error
|| items_written2
== items_written
);
247 g_assert (!!result
== !!result2
);
248 g_assert (!!error
== !!error2
);
250 g_assert (strcmp (result
, result2
) == 0);
254 g_error_free (error2
);
258 result3
= g_ucs4_to_utf8 (ucs4
, ucs4_len
, NULL
, NULL
, &error3
);
262 g_assert (error
!= NULL
);
263 g_assert (result
== NULL
);
264 g_assert (items_read
== error_pos
);
265 g_error_free (error
);
267 g_assert (error3
!= NULL
);
268 g_assert (result3
== NULL
);
269 g_error_free (error3
);
273 g_assert (error
== NULL
);
274 g_assert (items_read
== ucs4_len
);
275 g_assert (items_written
== utf8_len
);
277 g_assert (strcmp (result
, utf8
) == 0);
279 g_assert (error3
== NULL
);
281 g_assert (strcmp (result3
, utf8
) == 0);
289 check_utf8_to_utf16 (const char *utf8
,
291 const gunichar2
*utf16
,
295 gunichar2
*result
, *result2
, *result3
;
296 glong items_read
, items_read2
;
297 glong items_written
, items_written2
;
298 GError
*error
, *error2
, *error3
;
302 result
= g_utf8_to_utf16 (utf8
, utf8_len
, &items_read
, &items_written
, &error
);
304 if (utf8_len
== strlen (utf8
))
306 /* check that len == -1 yields identical results */
308 result2
= g_utf8_to_utf16 (utf8
, -1, &items_read2
, &items_written2
, &error2
);
309 g_assert (error
|| items_read2
== items_read
);
310 g_assert (error
|| items_written2
== items_written2
);
311 g_assert (!!result
== !!result2
);
312 g_assert (!!error
== !!error2
);
314 for (i
= 0; i
<= items_written
; i
++)
315 g_assert (result
[i
] == result2
[i
]);
319 g_error_free (error2
);
323 result3
= g_utf8_to_utf16 (utf8
, utf8_len
, NULL
, NULL
, &error3
);
325 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
327 g_assert (error
== NULL
);
328 g_assert (items_read
== error_pos
);
329 g_assert (items_written
== utf16_len
);
331 for (i
= 0; i
<= items_written
; i
++)
332 g_assert (result
[i
] == utf16
[i
]);
336 g_assert (error
!= NULL
);
337 g_assert (result
== NULL
);
338 g_assert (items_read
== error_pos
);
339 g_error_free (error
);
341 g_assert (error3
!= NULL
);
342 g_assert (result3
== NULL
);
343 g_error_free (error3
);
347 g_assert (error
== NULL
);
348 g_assert (items_read
== utf8_len
);
349 g_assert (items_written
== utf16_len
);
351 for (i
= 0; i
<= items_written
; i
++)
352 g_assert (result
[i
] == utf16
[i
]);
354 g_assert (error3
== NULL
);
356 for (i
= 0; i
<= utf16_len
; i
++)
357 g_assert (result3
[i
] == utf16
[i
]);
365 check_utf16_to_utf8 (const gunichar2
*utf16
,
371 gchar
*result
, *result2
, *result3
;
372 glong items_read
, items_read2
;
373 glong items_written
, items_written2
;
374 GError
*error
, *error2
, *error3
;
377 result
= g_utf16_to_utf8 (utf16
, utf16_len
, &items_read
, &items_written
, &error
);
378 if (utf16
[utf16_len
] == 0)
380 /* check that len == -1 yields identical results */
382 result2
= g_utf16_to_utf8 (utf16
, -1, &items_read2
, &items_written2
, &error2
);
384 g_assert (error
|| items_read2
== items_read
);
385 g_assert (error
|| items_written2
== items_written
);
386 g_assert (!!result
== !!result2
);
387 g_assert (!!error
== !!error2
);
389 g_assert (strcmp (result
, result2
) == 0);
393 g_error_free (error2
);
397 result3
= g_utf16_to_utf8 (utf16
, utf16_len
, NULL
, NULL
, &error3
);
399 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
401 g_assert (error
== NULL
);
402 g_assert (items_read
== error_pos
);
403 g_assert (items_read
+ 1 == utf16_len
);
404 g_assert (items_written
== utf8_len
);
406 g_assert (strcmp (result
, utf8
) == 0);
410 g_assert (error
!= NULL
);
411 g_assert (result
== NULL
);
412 g_assert (items_read
== error_pos
);
413 g_error_free (error
);
415 g_assert (error3
!= NULL
);
416 g_assert (result3
== NULL
);
417 g_error_free (error3
);
421 g_assert (error
== NULL
);
422 g_assert (items_read
== utf16_len
);
423 g_assert (items_written
== utf8_len
);
425 g_assert (strcmp (result
, utf8
) == 0);
427 g_assert (error3
== NULL
);
429 g_assert (strcmp (result3
, utf8
) == 0);
437 check_ucs4_to_utf16 (const gunichar
*ucs4
,
439 const gunichar2
*utf16
,
443 gunichar2
*result
, *result2
, *result3
;
444 glong items_read
, items_read2
;
445 glong items_written
, items_written2
;
446 GError
*error
, *error2
, *error3
;
450 result
= g_ucs4_to_utf16 (ucs4
, ucs4_len
, &items_read
, &items_written
, &error
);
452 if (ucs4
[ucs4_len
] == 0)
454 /* check that len == -1 yields identical results */
456 result2
= g_ucs4_to_utf16 (ucs4
, -1, &items_read2
, &items_written2
, &error2
);
458 g_assert (error
|| items_read2
== items_read
);
459 g_assert (error
|| items_written2
== items_written
);
460 g_assert (!!result
== !!result2
);
461 g_assert (!!error
== !!error2
);
463 for (i
= 0; i
<= utf16_len
; i
++)
464 g_assert (result
[i
] == result2
[i
]);
468 g_error_free (error2
);
472 result3
= g_ucs4_to_utf16 (ucs4
, -1, NULL
, NULL
, &error3
);
476 g_assert (error
!= NULL
);
477 g_assert (result
== NULL
);
478 g_assert (items_read
== error_pos
);
479 g_error_free (error
);
481 g_assert (error3
!= NULL
);
482 g_assert (result3
== NULL
);
483 g_error_free (error3
);
487 g_assert (error
== NULL
);
488 g_assert (items_read
== ucs4_len
);
489 g_assert (items_written
== utf16_len
);
491 for (i
= 0; i
<= utf16_len
; i
++)
492 g_assert (result
[i
] == utf16
[i
]);
494 g_assert (error3
== NULL
);
496 for (i
= 0; i
<= utf16_len
; i
++)
497 g_assert (result3
[i
] == utf16
[i
]);
505 check_utf16_to_ucs4 (const gunichar2
*utf16
,
507 const gunichar
*ucs4
,
511 gunichar
*result
, *result2
, *result3
;
512 glong items_read
, items_read2
;
513 glong items_written
, items_written2
;
514 GError
*error
, *error2
, *error3
;
518 result
= g_utf16_to_ucs4 (utf16
, utf16_len
, &items_read
, &items_written
, &error
);
519 if (utf16
[utf16_len
] == 0)
521 /* check that len == -1 yields identical results */
523 result2
= g_utf16_to_ucs4 (utf16
, -1, &items_read2
, &items_written2
, &error2
);
524 g_assert (error
|| items_read2
== items_read
);
525 g_assert (error
|| items_written2
== items_written2
);
526 g_assert (!!result
== !!result2
);
527 g_assert (!!error
== !!error2
);
529 for (i
= 0; i
<= items_written
; i
++)
530 g_assert (result
[i
] == result2
[i
]);
534 g_error_free (error2
);
538 result3
= g_utf16_to_ucs4 (utf16
, utf16_len
, NULL
, NULL
, &error3
);
540 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
542 g_assert (error
== NULL
);
543 g_assert (items_read
== error_pos
);
544 g_assert (items_read
+ 1 == utf16_len
);
545 g_assert (items_written
== ucs4_len
);
547 for (i
= 0; i
<= items_written
; i
++)
548 g_assert (result
[i
] == ucs4
[i
]);
552 g_assert (error
!= NULL
);
553 g_assert (result
== NULL
);
554 g_assert (items_read
== error_pos
);
555 g_error_free (error
);
557 g_assert (error3
!= NULL
);
558 g_assert (result3
== NULL
);
559 g_error_free (error3
);
563 g_assert (error
== NULL
);
564 g_assert (items_read
== utf16_len
);
565 g_assert (items_written
== ucs4_len
);
567 for (i
= 0; i
<= ucs4_len
; i
++)
568 g_assert (result
[i
] == ucs4
[i
]);
570 g_assert (error3
== NULL
);
572 for (i
= 0; i
<= ucs4_len
; i
++)
573 g_assert (result3
[i
] == ucs4
[i
]);
581 test_unicode_conversions (void)
585 gunichar2 utf16
[100];
588 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0x63; ucs4
[3] = 0;
589 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0x63; utf16
[3] = 0;
591 check_utf8_to_ucs4 (utf8
, 3, ucs4
, 3, 0);
592 check_ucs4_to_utf8 (ucs4
, 3, utf8
, 3, 0);
593 check_utf8_to_utf16 (utf8
, 3, utf16
, 3, 0);
594 check_utf16_to_utf8 (utf16
, 3, utf8
, 3, 0);
595 check_ucs4_to_utf16 (ucs4
, 3, utf16
, 3, 0);
596 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 3, 0);
598 utf8
= "\316\261\316\262\316\263";
599 ucs4
[0] = 0x03b1; ucs4
[1] = 0x03b2; ucs4
[2] = 0x03b3; ucs4
[3] = 0;
600 utf16
[0] = 0x03b1; utf16
[1] = 0x03b2; utf16
[2] = 0x03b3; utf16
[3] = 0;
602 check_utf8_to_ucs4 (utf8
, 6, ucs4
, 3, 0);
603 check_ucs4_to_utf8 (ucs4
, 3, utf8
, 6, 0);
604 check_utf8_to_utf16 (utf8
, 6, utf16
, 3, 0);
605 check_utf16_to_utf8 (utf16
, 3, utf8
, 6, 0);
606 check_ucs4_to_utf16 (ucs4
, 3, utf16
, 3, 0);
607 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 3, 0);
609 /* partial utf8 character */
611 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0x63; ucs4
[3] = 0;
612 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0x63; utf16
[3] = 0;
614 check_utf8_to_ucs4 (utf8
, 4, ucs4
, 3, 3);
615 check_utf8_to_utf16 (utf8
, 4, utf16
, 3, 3);
618 utf8
= "abc\316\316";
622 check_utf8_to_ucs4 (utf8
, 5, ucs4
, 0, 3);
623 check_utf8_to_utf16 (utf8
, 5, utf16
, 0, 3);
625 /* partial utf16 character */
627 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0;
628 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0xd801; utf16
[3] = 0;
630 check_utf16_to_utf8 (utf16
, 3, utf8
, 2, 2);
631 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 2, 2);
636 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0xdc01; utf16
[3] = 0;
638 check_utf16_to_utf8 (utf16
, 3, utf8
, 0, 2);
639 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 0, 2);
643 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0x80000000; ucs4
[3] = 0;
646 check_ucs4_to_utf8 (ucs4
, 3, utf8
, 0, 2);
647 check_ucs4_to_utf16 (ucs4
, 3, utf16
, 0, 2);
651 main (int argc
, char *argv
[])
656 test_unicode_conversions ();