1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
25 #undef G_DISABLE_ASSERT
34 test_iconv_state (void)
36 gchar
*in
= "\xf4\xe5\xf8\xe5\xed";
37 gchar
*expected
= "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
40 gsize bytes_written
= 0;
43 out
= g_convert (in
, -1, "UTF-8", "CP1255",
44 &bytes_read
, &bytes_written
, &error
);
46 if (error
&& error
->code
== G_CONVERT_ERROR_NO_CONVERSION
)
47 return; /* silently skip if CP1255 is not supported, see bug 467707 */
49 g_assert_no_error (error
);
50 g_assert_cmpint (bytes_read
, ==, 5);
51 g_assert_cmpint (bytes_written
, ==, 10);
52 g_assert_cmpstr (out
, ==, expected
);
56 /* some tests involving "vulgar fraction one half" */
60 gchar
*in
= "\xc2\xbd";
63 gsize bytes_written
= 0;
66 out
= g_convert (in
, -1,
67 "ISO-8859-1", "UTF-8",
68 &bytes_read
, &bytes_written
,
71 g_assert_no_error (error
);
72 g_assert_cmpint (bytes_read
, ==, 2);
73 g_assert_cmpint (bytes_written
, ==, 1);
74 g_assert_cmpstr (out
, ==, "\xbd");
77 out
= g_convert (in
, -1,
78 "ISO-8859-15", "UTF-8",
79 &bytes_read
, &bytes_written
,
82 g_assert_error (error
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
);
83 g_assert_cmpint (bytes_read
, ==, 0);
84 g_assert_cmpint (bytes_written
, ==, 0);
85 g_assert_cmpstr (out
, ==, NULL
);
86 g_clear_error (&error
);
89 out
= g_convert_with_fallback (in
, -1,
90 "ISO8859-15", "UTF-8",
92 &bytes_read
, &bytes_written
,
95 g_assert_no_error (error
);
96 g_assert_cmpint (bytes_read
, ==, 2);
97 g_assert_cmpint (bytes_written
, ==, 1);
98 g_assert_cmpstr (out
, ==, "a");
103 test_byte_order (void)
105 gchar in_be
[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
106 gchar in_le
[4] = { 0xff, 0xfe, 0x93, 0x03};
107 gchar
*expected
= "\xce\x93";
109 gsize bytes_read
= 0;
110 gsize bytes_written
= 0;
111 GError
*error
= NULL
;
113 out
= g_convert (in_be
, sizeof (in_be
),
115 &bytes_read
, &bytes_written
,
118 g_assert_no_error (error
);
119 g_assert_cmpint (bytes_read
, ==, 4);
120 g_assert_cmpint (bytes_written
, ==, 2);
121 g_assert_cmpstr (out
, ==, expected
);
124 out
= g_convert (in_le
, sizeof (in_le
),
126 &bytes_read
, &bytes_written
,
129 g_assert_no_error (error
);
130 g_assert_cmpint (bytes_read
, ==, 4);
131 g_assert_cmpint (bytes_written
, ==, 2);
132 g_assert_cmpstr (out
, ==, expected
);
137 check_utf8_to_ucs4 (const char *utf8
,
139 const gunichar
*ucs4
,
143 gunichar
*result
, *result2
, *result3
;
144 glong items_read
, items_read2
;
145 glong items_written
, items_written2
;
146 GError
*error
, *error2
, *error3
;
151 /* check the fast conversion */
152 result
= g_utf8_to_ucs4_fast (utf8
, utf8_len
, &items_written
);
154 g_assert_cmpint (items_written
, ==, ucs4_len
);
156 for (i
= 0; i
<= items_written
; i
++)
157 g_assert (result
[i
] == ucs4
[i
]);
163 result
= g_utf8_to_ucs4 (utf8
, utf8_len
, &items_read
, &items_written
, &error
);
165 if (utf8_len
== strlen (utf8
))
167 /* check that len == -1 yields identical results */
169 result2
= g_utf8_to_ucs4 (utf8
, -1, &items_read2
, &items_written2
, &error2
);
170 g_assert (error
|| items_read2
== items_read
);
171 g_assert (error
|| items_written2
== items_written2
);
172 g_assert_cmpint (!!result
, ==, !!result2
);
173 g_assert_cmpint (!!error
, ==, !!error2
);
175 for (i
= 0; i
<= items_written
; i
++)
176 g_assert (result
[i
] == result2
[i
]);
180 g_error_free (error2
);
184 result3
= g_utf8_to_ucs4 (utf8
, utf8_len
, NULL
, NULL
, &error3
);
186 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
188 g_assert_no_error (error
);
189 g_assert_cmpint (items_read
, ==, error_pos
);
190 g_assert_cmpint (items_written
, ==, ucs4_len
);
192 for (i
= 0; i
<= items_written
; i
++)
193 g_assert (result
[i
] == ucs4
[i
]);
194 g_error_free (error3
);
198 g_assert (error
!= NULL
);
199 g_assert (result
== NULL
);
200 g_assert_cmpint (items_read
, ==, error_pos
);
201 g_error_free (error
);
203 g_assert (error3
!= NULL
);
204 g_assert (result3
== NULL
);
205 g_error_free (error3
);
209 g_assert_no_error (error
);
210 g_assert_cmpint (items_read
, ==, utf8_len
);
211 g_assert_cmpint (items_written
, ==, ucs4_len
);
213 for (i
= 0; i
<= items_written
; i
++)
214 g_assert (result
[i
] == ucs4
[i
]);
216 g_assert_no_error (error3
);
218 for (i
= 0; i
<= ucs4_len
; i
++)
219 g_assert (result3
[i
] == ucs4
[i
]);
227 check_ucs4_to_utf8 (const gunichar
*ucs4
,
233 gchar
*result
, *result2
, *result3
;
234 glong items_read
, items_read2
;
235 glong items_written
, items_written2
;
236 GError
*error
, *error2
, *error3
;
239 result
= g_ucs4_to_utf8 (ucs4
, ucs4_len
, &items_read
, &items_written
, &error
);
241 if (ucs4
[ucs4_len
] == 0)
243 /* check that len == -1 yields identical results */
245 result2
= g_ucs4_to_utf8 (ucs4
, -1, &items_read2
, &items_written2
, &error2
);
247 g_assert (error
|| items_read2
== items_read
);
248 g_assert (error
|| items_written2
== items_written
);
249 g_assert_cmpint (!!result
, ==, !!result2
);
250 g_assert_cmpint (!!error
, ==, !!error2
);
252 g_assert_cmpstr (result
, ==, result2
);
256 g_error_free (error2
);
260 result3
= g_ucs4_to_utf8 (ucs4
, ucs4_len
, NULL
, NULL
, &error3
);
264 g_assert (error
!= NULL
);
265 g_assert (result
== NULL
);
266 g_assert_cmpint (items_read
, ==, error_pos
);
267 g_error_free (error
);
269 g_assert (error3
!= NULL
);
270 g_assert (result3
== NULL
);
271 g_error_free (error3
);
275 g_assert_no_error (error
);
276 g_assert_cmpint (items_read
, ==, ucs4_len
);
277 g_assert_cmpint (items_written
, ==, utf8_len
);
279 g_assert_cmpstr (result
, ==, utf8
);
281 g_assert_no_error (error3
);
283 g_assert_cmpstr (result3
, ==, utf8
);
291 check_utf8_to_utf16 (const char *utf8
,
293 const gunichar2
*utf16
,
297 gunichar2
*result
, *result2
, *result3
;
298 glong items_read
, items_read2
;
299 glong items_written
, items_written2
;
300 GError
*error
, *error2
, *error3
;
304 result
= g_utf8_to_utf16 (utf8
, utf8_len
, &items_read
, &items_written
, &error
);
306 if (utf8_len
== strlen (utf8
))
308 /* check that len == -1 yields identical results */
310 result2
= g_utf8_to_utf16 (utf8
, -1, &items_read2
, &items_written2
, &error2
);
311 g_assert (error
|| items_read2
== items_read
);
312 g_assert (error
|| items_written2
== items_written2
);
313 g_assert_cmpint (!!result
, ==, !!result2
);
314 g_assert_cmpint (!!error
, ==, !!error2
);
316 for (i
= 0; i
<= items_written
; i
++)
317 g_assert (result
[i
] == result2
[i
]);
321 g_error_free (error2
);
325 result3
= g_utf8_to_utf16 (utf8
, utf8_len
, NULL
, NULL
, &error3
);
327 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
329 g_assert_no_error (error
);
330 g_assert_cmpint (items_read
, ==, error_pos
);
331 g_assert_cmpint (items_written
, ==, utf16_len
);
333 for (i
= 0; i
<= items_written
; i
++)
334 g_assert (result
[i
] == utf16
[i
]);
335 g_error_free (error3
);
339 g_assert (error
!= NULL
);
340 g_assert (result
== NULL
);
341 g_assert_cmpint (items_read
, ==, error_pos
);
342 g_error_free (error
);
344 g_assert (error3
!= NULL
);
345 g_assert (result3
== NULL
);
346 g_error_free (error3
);
350 g_assert_no_error (error
);
351 g_assert_cmpint (items_read
, ==, utf8_len
);
352 g_assert_cmpint (items_written
, ==, utf16_len
);
354 for (i
= 0; i
<= items_written
; i
++)
355 g_assert (result
[i
] == utf16
[i
]);
357 g_assert_no_error (error3
);
359 for (i
= 0; i
<= utf16_len
; i
++)
360 g_assert (result3
[i
] == utf16
[i
]);
368 check_utf16_to_utf8 (const gunichar2
*utf16
,
374 gchar
*result
, *result2
, *result3
;
375 glong items_read
, items_read2
;
376 glong items_written
, items_written2
;
377 GError
*error
, *error2
, *error3
;
380 result
= g_utf16_to_utf8 (utf16
, utf16_len
, &items_read
, &items_written
, &error
);
381 if (utf16
[utf16_len
] == 0)
383 /* check that len == -1 yields identical results */
385 result2
= g_utf16_to_utf8 (utf16
, -1, &items_read2
, &items_written2
, &error2
);
387 g_assert (error
|| items_read2
== items_read
);
388 g_assert (error
|| items_written2
== items_written
);
389 g_assert_cmpint (!!result
, ==, !!result2
);
390 g_assert_cmpint (!!error
, ==, !!error2
);
392 g_assert_cmpstr (result
, ==, result2
);
396 g_error_free (error2
);
400 result3
= g_utf16_to_utf8 (utf16
, utf16_len
, NULL
, NULL
, &error3
);
402 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
404 g_assert_no_error (error
);
405 g_assert_cmpint (items_read
, ==, error_pos
);
406 g_assert_cmpint (items_read
+ 1, ==, utf16_len
);
407 g_assert_cmpint (items_written
, ==, utf8_len
);
409 g_assert_cmpstr (result
, ==, utf8
);
410 g_error_free (error3
);
414 g_assert (error
!= NULL
);
415 g_assert (result
== NULL
);
416 g_assert_cmpint (items_read
, ==, error_pos
);
417 g_error_free (error
);
419 g_assert (error3
!= NULL
);
420 g_assert (result3
== NULL
);
421 g_error_free (error3
);
425 g_assert_no_error (error
);
426 g_assert_cmpint (items_read
, ==, utf16_len
);
427 g_assert_cmpint (items_written
, ==, utf8_len
);
429 g_assert_cmpstr (result
, ==, utf8
);
431 g_assert_no_error (error3
);
433 g_assert_cmpstr (result3
, ==, utf8
);
441 check_ucs4_to_utf16 (const gunichar
*ucs4
,
443 const gunichar2
*utf16
,
447 gunichar2
*result
, *result2
, *result3
;
448 glong items_read
, items_read2
;
449 glong items_written
, items_written2
;
450 GError
*error
, *error2
, *error3
;
454 result
= g_ucs4_to_utf16 (ucs4
, ucs4_len
, &items_read
, &items_written
, &error
);
456 if (ucs4
[ucs4_len
] == 0)
458 /* check that len == -1 yields identical results */
460 result2
= g_ucs4_to_utf16 (ucs4
, -1, &items_read2
, &items_written2
, &error2
);
462 g_assert (error
|| items_read2
== items_read
);
463 g_assert (error
|| items_written2
== items_written
);
464 g_assert_cmpint (!!result
, ==, !!result2
);
465 g_assert_cmpint (!!error
, ==, !!error2
);
467 for (i
= 0; i
<= utf16_len
; i
++)
468 g_assert (result
[i
] == result2
[i
]);
472 g_error_free (error2
);
476 result3
= g_ucs4_to_utf16 (ucs4
, -1, NULL
, NULL
, &error3
);
480 g_assert (error
!= NULL
);
481 g_assert (result
== NULL
);
482 g_assert_cmpint (items_read
, ==, error_pos
);
483 g_error_free (error
);
485 g_assert (error3
!= NULL
);
486 g_assert (result3
== NULL
);
487 g_error_free (error3
);
491 g_assert_no_error (error
);
492 g_assert_cmpint (items_read
, ==, ucs4_len
);
493 g_assert_cmpint (items_written
, ==, utf16_len
);
495 for (i
= 0; i
<= utf16_len
; i
++)
496 g_assert (result
[i
] == utf16
[i
]);
498 g_assert_no_error (error3
);
500 for (i
= 0; i
<= utf16_len
; i
++)
501 g_assert (result3
[i
] == utf16
[i
]);
509 check_utf16_to_ucs4 (const gunichar2
*utf16
,
511 const gunichar
*ucs4
,
515 gunichar
*result
, *result2
, *result3
;
516 glong items_read
, items_read2
;
517 glong items_written
, items_written2
;
518 GError
*error
, *error2
, *error3
;
522 result
= g_utf16_to_ucs4 (utf16
, utf16_len
, &items_read
, &items_written
, &error
);
523 if (utf16
[utf16_len
] == 0)
525 /* check that len == -1 yields identical results */
527 result2
= g_utf16_to_ucs4 (utf16
, -1, &items_read2
, &items_written2
, &error2
);
528 g_assert (error
|| items_read2
== items_read
);
529 g_assert (error
|| items_written2
== items_written2
);
530 g_assert_cmpint (!!result
, ==, !!result2
);
531 g_assert_cmpint (!!error
, ==, !!error2
);
533 for (i
= 0; i
<= items_written
; i
++)
534 g_assert (result
[i
] == result2
[i
]);
538 g_error_free (error2
);
542 result3
= g_utf16_to_ucs4 (utf16
, utf16_len
, NULL
, NULL
, &error3
);
544 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
546 g_assert_no_error (error
);
547 g_assert_cmpint (items_read
, ==, error_pos
);
548 g_assert_cmpint (items_read
+ 1, ==, utf16_len
);
549 g_assert_cmpint (items_written
, ==, ucs4_len
);
551 for (i
= 0; i
<= items_written
; i
++)
552 g_assert (result
[i
] == ucs4
[i
]);
553 g_error_free (error3
);
557 g_assert (error
!= NULL
);
558 g_assert (result
== NULL
);
559 g_assert_cmpint (items_read
, ==, error_pos
);
560 g_error_free (error
);
562 g_assert (error3
!= NULL
);
563 g_assert (result3
== NULL
);
564 g_error_free (error3
);
568 g_assert_no_error (error
);
569 g_assert_cmpint (items_read
, ==, utf16_len
);
570 g_assert_cmpint (items_written
, ==, ucs4_len
);
572 for (i
= 0; i
<= ucs4_len
; i
++)
573 g_assert (result
[i
] == ucs4
[i
]);
575 g_assert_no_error (error3
);
577 for (i
= 0; i
<= ucs4_len
; i
++)
578 g_assert (result3
[i
] == ucs4
[i
]);
586 test_unicode_conversions (void)
590 gunichar2 utf16
[100];
593 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0x63; ucs4
[3] = 0;
594 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0x63; utf16
[3] = 0;
596 check_utf8_to_ucs4 (utf8
, 3, ucs4
, 3, 0);
597 check_ucs4_to_utf8 (ucs4
, 3, utf8
, 3, 0);
598 check_utf8_to_utf16 (utf8
, 3, utf16
, 3, 0);
599 check_utf16_to_utf8 (utf16
, 3, utf8
, 3, 0);
600 check_ucs4_to_utf16 (ucs4
, 3, utf16
, 3, 0);
601 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 3, 0);
603 utf8
= "\316\261\316\262\316\263";
604 ucs4
[0] = 0x03b1; ucs4
[1] = 0x03b2; ucs4
[2] = 0x03b3; ucs4
[3] = 0;
605 utf16
[0] = 0x03b1; utf16
[1] = 0x03b2; utf16
[2] = 0x03b3; utf16
[3] = 0;
607 check_utf8_to_ucs4 (utf8
, 6, ucs4
, 3, 0);
608 check_ucs4_to_utf8 (ucs4
, 3, utf8
, 6, 0);
609 check_utf8_to_utf16 (utf8
, 6, utf16
, 3, 0);
610 check_utf16_to_utf8 (utf16
, 3, utf8
, 6, 0);
611 check_ucs4_to_utf16 (ucs4
, 3, utf16
, 3, 0);
612 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 3, 0);
614 /* partial utf8 character */
616 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0x63; ucs4
[3] = 0;
617 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0x63; utf16
[3] = 0;
619 check_utf8_to_ucs4 (utf8
, 4, ucs4
, 3, 3);
620 check_utf8_to_utf16 (utf8
, 4, utf16
, 3, 3);
623 utf8
= "abc\316\316";
627 check_utf8_to_ucs4 (utf8
, 5, ucs4
, 0, 3);
628 check_utf8_to_utf16 (utf8
, 5, utf16
, 0, 3);
630 /* partial utf16 character */
632 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0;
633 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0xd801; utf16
[3] = 0;
635 check_utf16_to_utf8 (utf16
, 3, utf8
, 2, 2);
636 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 2, 2);
641 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0xdc01; utf16
[3] = 0;
643 check_utf16_to_utf8 (utf16
, 3, utf8
, 0, 2);
644 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 0, 2);
648 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0x80000000; ucs4
[3] = 0;
651 check_ucs4_to_utf8 (ucs4
, 3, utf8
, 0, 2);
652 check_ucs4_to_utf16 (ucs4
, 3, utf16
, 0, 2);
656 test_filename_utf8 (void)
658 const gchar
*filename
= "/my/path/to/foo";
664 utf8
= g_filename_to_utf8 (filename
, -1, NULL
, NULL
, &error
);
665 g_assert_no_error (error
);
666 back
= g_filename_from_utf8 (utf8
, -1, NULL
, NULL
, &error
);
667 g_assert_no_error (error
);
668 g_assert_cmpstr (back
, ==, filename
);
675 test_filename_display (void)
677 const gchar
*filename
= "/my/path/to/foo";
680 display
= g_filename_display_basename (filename
);
681 g_assert_cmpstr (display
, ==, "foo");
690 gchar
*out G_GNUC_UNUSED
;
691 gsize bytes_read
= 0;
692 gsize bytes_written
= 0;
693 GError
*error
= NULL
;
695 out
= g_convert (in
, -1, "XXX", "UVZ",
696 &bytes_read
, &bytes_written
, &error
);
698 /* error code is unreliable, since we mishandle errno there */
699 g_assert (error
&& error
->domain
== G_CONVERT_ERROR
);
700 g_error_free (error
);
704 main (int argc
, char *argv
[])
706 g_test_init (&argc
, &argv
, NULL
);
708 g_test_add_func ("/conversion/no-conv", test_no_conv
);
709 g_test_add_func ("/conversion/iconv-state", test_iconv_state
);
710 g_test_add_func ("/conversion/illegal-sequence", test_one_half
);
711 g_test_add_func ("/conversion/byte-order", test_byte_order
);
712 g_test_add_func ("/conversion/unicode", test_unicode_conversions
);
713 g_test_add_func ("/conversion/filename-utf8", test_filename_utf8
);
714 g_test_add_func ("/conversion/filename-display", test_filename_display
);
716 return g_test_run ();