1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
21 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
22 * file for a list of people on the GLib Team. See the ChangeLog
23 * files for a list of changes. These files are distributed with
24 * GLib at ftp://ftp.gtk.org/pub/gtk/.
27 #undef G_DISABLE_ASSERT
36 test_iconv_state (void)
38 gchar
*in
= "\xf4\xe5\xf8\xe5\xed";
39 gchar
*expected
= "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
42 gsize bytes_written
= 0;
45 out
= g_convert (in
, -1, "UTF-8", "CP1255",
46 &bytes_read
, &bytes_written
, &error
);
48 if (error
&& error
->code
== G_CONVERT_ERROR_NO_CONVERSION
)
49 return; /* silently skip if CP1255 is not supported, see bug 467707 */
51 g_assert_no_error (error
);
52 g_assert_cmpint (bytes_read
, ==, 5);
53 g_assert_cmpint (bytes_written
, ==, 10);
54 g_assert_cmpstr (out
, ==, expected
);
58 /* some tests involving "vulgar fraction one half" */
62 gchar
*in
= "\xc2\xbd";
65 gsize bytes_written
= 0;
68 out
= g_convert (in
, -1,
69 "ISO-8859-1", "UTF-8",
70 &bytes_read
, &bytes_written
,
73 g_assert_no_error (error
);
74 g_assert_cmpint (bytes_read
, ==, 2);
75 g_assert_cmpint (bytes_written
, ==, 1);
76 g_assert_cmpstr (out
, ==, "\xbd");
79 out
= g_convert (in
, -1,
80 "ISO-8859-15", "UTF-8",
81 &bytes_read
, &bytes_written
,
84 g_assert_error (error
, G_CONVERT_ERROR
, G_CONVERT_ERROR_ILLEGAL_SEQUENCE
);
85 g_assert_cmpint (bytes_read
, ==, 0);
86 g_assert_cmpint (bytes_written
, ==, 0);
87 g_assert_cmpstr (out
, ==, NULL
);
88 g_clear_error (&error
);
91 out
= g_convert_with_fallback (in
, -1,
92 "ISO8859-15", "UTF-8",
94 &bytes_read
, &bytes_written
,
97 g_assert_no_error (error
);
98 g_assert_cmpint (bytes_read
, ==, 2);
99 g_assert_cmpint (bytes_written
, ==, 1);
100 g_assert_cmpstr (out
, ==, "a");
105 test_byte_order (void)
107 gchar in_be
[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
108 gchar in_le
[4] = { 0xff, 0xfe, 0x93, 0x03};
109 gchar
*expected
= "\xce\x93";
111 gsize bytes_read
= 0;
112 gsize bytes_written
= 0;
113 GError
*error
= NULL
;
115 out
= g_convert (in_be
, sizeof (in_be
),
117 &bytes_read
, &bytes_written
,
120 g_assert_no_error (error
);
121 g_assert_cmpint (bytes_read
, ==, 4);
122 g_assert_cmpint (bytes_written
, ==, 2);
123 g_assert_cmpstr (out
, ==, expected
);
126 out
= g_convert (in_le
, sizeof (in_le
),
128 &bytes_read
, &bytes_written
,
131 g_assert_no_error (error
);
132 g_assert_cmpint (bytes_read
, ==, 4);
133 g_assert_cmpint (bytes_written
, ==, 2);
134 g_assert_cmpstr (out
, ==, expected
);
139 check_utf8_to_ucs4 (const char *utf8
,
141 const gunichar
*ucs4
,
145 gunichar
*result
, *result2
, *result3
;
146 glong items_read
, items_read2
;
147 glong items_written
, items_written2
;
148 GError
*error
, *error2
, *error3
;
153 /* check the fast conversion */
154 result
= g_utf8_to_ucs4_fast (utf8
, utf8_len
, &items_written
);
156 g_assert_cmpint (items_written
, ==, ucs4_len
);
158 for (i
= 0; i
<= items_written
; i
++)
159 g_assert (result
[i
] == ucs4
[i
]);
165 result
= g_utf8_to_ucs4 (utf8
, utf8_len
, &items_read
, &items_written
, &error
);
167 if (utf8_len
== strlen (utf8
))
169 /* check that len == -1 yields identical results */
171 result2
= g_utf8_to_ucs4 (utf8
, -1, &items_read2
, &items_written2
, &error2
);
172 g_assert (error
|| items_read2
== items_read
);
173 g_assert (error
|| items_written2
== items_written2
);
174 g_assert_cmpint (!!result
, ==, !!result2
);
175 g_assert_cmpint (!!error
, ==, !!error2
);
177 for (i
= 0; i
<= items_written
; i
++)
178 g_assert (result
[i
] == result2
[i
]);
182 g_error_free (error2
);
186 result3
= g_utf8_to_ucs4 (utf8
, utf8_len
, NULL
, NULL
, &error3
);
188 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
190 g_assert_no_error (error
);
191 g_assert_cmpint (items_read
, ==, error_pos
);
192 g_assert_cmpint (items_written
, ==, ucs4_len
);
194 for (i
= 0; i
<= items_written
; i
++)
195 g_assert (result
[i
] == ucs4
[i
]);
196 g_error_free (error3
);
200 g_assert (error
!= NULL
);
201 g_assert (result
== NULL
);
202 g_assert_cmpint (items_read
, ==, error_pos
);
203 g_error_free (error
);
205 g_assert (error3
!= NULL
);
206 g_assert (result3
== NULL
);
207 g_error_free (error3
);
211 g_assert_no_error (error
);
212 g_assert_cmpint (items_read
, ==, utf8_len
);
213 g_assert_cmpint (items_written
, ==, ucs4_len
);
215 for (i
= 0; i
<= items_written
; i
++)
216 g_assert (result
[i
] == ucs4
[i
]);
218 g_assert_no_error (error3
);
220 for (i
= 0; i
<= ucs4_len
; i
++)
221 g_assert (result3
[i
] == ucs4
[i
]);
229 check_ucs4_to_utf8 (const gunichar
*ucs4
,
235 gchar
*result
, *result2
, *result3
;
236 glong items_read
, items_read2
;
237 glong items_written
, items_written2
;
238 GError
*error
, *error2
, *error3
;
241 result
= g_ucs4_to_utf8 (ucs4
, ucs4_len
, &items_read
, &items_written
, &error
);
243 if (ucs4
[ucs4_len
] == 0)
245 /* check that len == -1 yields identical results */
247 result2
= g_ucs4_to_utf8 (ucs4
, -1, &items_read2
, &items_written2
, &error2
);
249 g_assert (error
|| items_read2
== items_read
);
250 g_assert (error
|| items_written2
== items_written
);
251 g_assert_cmpint (!!result
, ==, !!result2
);
252 g_assert_cmpint (!!error
, ==, !!error2
);
254 g_assert_cmpstr (result
, ==, result2
);
258 g_error_free (error2
);
262 result3
= g_ucs4_to_utf8 (ucs4
, ucs4_len
, NULL
, NULL
, &error3
);
266 g_assert (error
!= NULL
);
267 g_assert (result
== NULL
);
268 g_assert_cmpint (items_read
, ==, error_pos
);
269 g_error_free (error
);
271 g_assert (error3
!= NULL
);
272 g_assert (result3
== NULL
);
273 g_error_free (error3
);
277 g_assert_no_error (error
);
278 g_assert_cmpint (items_read
, ==, ucs4_len
);
279 g_assert_cmpint (items_written
, ==, utf8_len
);
281 g_assert_cmpstr (result
, ==, utf8
);
283 g_assert_no_error (error3
);
285 g_assert_cmpstr (result3
, ==, utf8
);
293 check_utf8_to_utf16 (const char *utf8
,
295 const gunichar2
*utf16
,
299 gunichar2
*result
, *result2
, *result3
;
300 glong items_read
, items_read2
;
301 glong items_written
, items_written2
;
302 GError
*error
, *error2
, *error3
;
306 result
= g_utf8_to_utf16 (utf8
, utf8_len
, &items_read
, &items_written
, &error
);
308 if (utf8_len
== strlen (utf8
))
310 /* check that len == -1 yields identical results */
312 result2
= g_utf8_to_utf16 (utf8
, -1, &items_read2
, &items_written2
, &error2
);
313 g_assert (error
|| items_read2
== items_read
);
314 g_assert (error
|| items_written2
== items_written2
);
315 g_assert_cmpint (!!result
, ==, !!result2
);
316 g_assert_cmpint (!!error
, ==, !!error2
);
318 for (i
= 0; i
<= items_written
; i
++)
319 g_assert (result
[i
] == result2
[i
]);
323 g_error_free (error2
);
327 result3
= g_utf8_to_utf16 (utf8
, utf8_len
, NULL
, NULL
, &error3
);
329 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
331 g_assert_no_error (error
);
332 g_assert_cmpint (items_read
, ==, error_pos
);
333 g_assert_cmpint (items_written
, ==, utf16_len
);
335 for (i
= 0; i
<= items_written
; i
++)
336 g_assert (result
[i
] == utf16
[i
]);
337 g_error_free (error3
);
341 g_assert (error
!= NULL
);
342 g_assert (result
== NULL
);
343 g_assert_cmpint (items_read
, ==, error_pos
);
344 g_error_free (error
);
346 g_assert (error3
!= NULL
);
347 g_assert (result3
== NULL
);
348 g_error_free (error3
);
352 g_assert_no_error (error
);
353 g_assert_cmpint (items_read
, ==, utf8_len
);
354 g_assert_cmpint (items_written
, ==, utf16_len
);
356 for (i
= 0; i
<= items_written
; i
++)
357 g_assert (result
[i
] == utf16
[i
]);
359 g_assert_no_error (error3
);
361 for (i
= 0; i
<= utf16_len
; i
++)
362 g_assert (result3
[i
] == utf16
[i
]);
370 check_utf16_to_utf8 (const gunichar2
*utf16
,
376 gchar
*result
, *result2
, *result3
;
377 glong items_read
, items_read2
;
378 glong items_written
, items_written2
;
379 GError
*error
, *error2
, *error3
;
382 result
= g_utf16_to_utf8 (utf16
, utf16_len
, &items_read
, &items_written
, &error
);
383 if (utf16
[utf16_len
] == 0)
385 /* check that len == -1 yields identical results */
387 result2
= g_utf16_to_utf8 (utf16
, -1, &items_read2
, &items_written2
, &error2
);
389 g_assert (error
|| items_read2
== items_read
);
390 g_assert (error
|| items_written2
== items_written
);
391 g_assert_cmpint (!!result
, ==, !!result2
);
392 g_assert_cmpint (!!error
, ==, !!error2
);
394 g_assert_cmpstr (result
, ==, result2
);
398 g_error_free (error2
);
402 result3
= g_utf16_to_utf8 (utf16
, utf16_len
, NULL
, NULL
, &error3
);
404 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
406 g_assert_no_error (error
);
407 g_assert_cmpint (items_read
, ==, error_pos
);
408 g_assert_cmpint (items_read
+ 1, ==, utf16_len
);
409 g_assert_cmpint (items_written
, ==, utf8_len
);
411 g_assert_cmpstr (result
, ==, utf8
);
412 g_error_free (error3
);
416 g_assert (error
!= NULL
);
417 g_assert (result
== NULL
);
418 g_assert_cmpint (items_read
, ==, error_pos
);
419 g_error_free (error
);
421 g_assert (error3
!= NULL
);
422 g_assert (result3
== NULL
);
423 g_error_free (error3
);
427 g_assert_no_error (error
);
428 g_assert_cmpint (items_read
, ==, utf16_len
);
429 g_assert_cmpint (items_written
, ==, utf8_len
);
431 g_assert_cmpstr (result
, ==, utf8
);
433 g_assert_no_error (error3
);
435 g_assert_cmpstr (result3
, ==, utf8
);
443 check_ucs4_to_utf16 (const gunichar
*ucs4
,
445 const gunichar2
*utf16
,
449 gunichar2
*result
, *result2
, *result3
;
450 glong items_read
, items_read2
;
451 glong items_written
, items_written2
;
452 GError
*error
, *error2
, *error3
;
456 result
= g_ucs4_to_utf16 (ucs4
, ucs4_len
, &items_read
, &items_written
, &error
);
458 if (ucs4
[ucs4_len
] == 0)
460 /* check that len == -1 yields identical results */
462 result2
= g_ucs4_to_utf16 (ucs4
, -1, &items_read2
, &items_written2
, &error2
);
464 g_assert (error
|| items_read2
== items_read
);
465 g_assert (error
|| items_written2
== items_written
);
466 g_assert_cmpint (!!result
, ==, !!result2
);
467 g_assert_cmpint (!!error
, ==, !!error2
);
469 for (i
= 0; i
<= utf16_len
; i
++)
470 g_assert (result
[i
] == result2
[i
]);
474 g_error_free (error2
);
478 result3
= g_ucs4_to_utf16 (ucs4
, -1, NULL
, NULL
, &error3
);
482 g_assert (error
!= NULL
);
483 g_assert (result
== NULL
);
484 g_assert_cmpint (items_read
, ==, error_pos
);
485 g_error_free (error
);
487 g_assert (error3
!= NULL
);
488 g_assert (result3
== NULL
);
489 g_error_free (error3
);
493 g_assert_no_error (error
);
494 g_assert_cmpint (items_read
, ==, ucs4_len
);
495 g_assert_cmpint (items_written
, ==, utf16_len
);
497 for (i
= 0; i
<= utf16_len
; i
++)
498 g_assert (result
[i
] == utf16
[i
]);
500 g_assert_no_error (error3
);
502 for (i
= 0; i
<= utf16_len
; i
++)
503 g_assert (result3
[i
] == utf16
[i
]);
511 check_utf16_to_ucs4 (const gunichar2
*utf16
,
513 const gunichar
*ucs4
,
517 gunichar
*result
, *result2
, *result3
;
518 glong items_read
, items_read2
;
519 glong items_written
, items_written2
;
520 GError
*error
, *error2
, *error3
;
524 result
= g_utf16_to_ucs4 (utf16
, utf16_len
, &items_read
, &items_written
, &error
);
525 if (utf16
[utf16_len
] == 0)
527 /* check that len == -1 yields identical results */
529 result2
= g_utf16_to_ucs4 (utf16
, -1, &items_read2
, &items_written2
, &error2
);
530 g_assert (error
|| items_read2
== items_read
);
531 g_assert (error
|| items_written2
== items_written2
);
532 g_assert_cmpint (!!result
, ==, !!result2
);
533 g_assert_cmpint (!!error
, ==, !!error2
);
535 for (i
= 0; i
<= items_written
; i
++)
536 g_assert (result
[i
] == result2
[i
]);
540 g_error_free (error2
);
544 result3
= g_utf16_to_ucs4 (utf16
, utf16_len
, NULL
, NULL
, &error3
);
546 if (error3
&& error3
->code
== G_CONVERT_ERROR_PARTIAL_INPUT
)
548 g_assert_no_error (error
);
549 g_assert_cmpint (items_read
, ==, error_pos
);
550 g_assert_cmpint (items_read
+ 1, ==, utf16_len
);
551 g_assert_cmpint (items_written
, ==, ucs4_len
);
553 for (i
= 0; i
<= items_written
; i
++)
554 g_assert (result
[i
] == ucs4
[i
]);
555 g_error_free (error3
);
559 g_assert (error
!= NULL
);
560 g_assert (result
== NULL
);
561 g_assert_cmpint (items_read
, ==, error_pos
);
562 g_error_free (error
);
564 g_assert (error3
!= NULL
);
565 g_assert (result3
== NULL
);
566 g_error_free (error3
);
570 g_assert_no_error (error
);
571 g_assert_cmpint (items_read
, ==, utf16_len
);
572 g_assert_cmpint (items_written
, ==, ucs4_len
);
574 for (i
= 0; i
<= ucs4_len
; i
++)
575 g_assert (result
[i
] == ucs4
[i
]);
577 g_assert_no_error (error3
);
579 for (i
= 0; i
<= ucs4_len
; i
++)
580 g_assert (result3
[i
] == ucs4
[i
]);
588 test_unicode_conversions (void)
592 gunichar2 utf16
[100];
595 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0x63; ucs4
[3] = 0;
596 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0x63; utf16
[3] = 0;
598 check_utf8_to_ucs4 (utf8
, 3, ucs4
, 3, 0);
599 check_ucs4_to_utf8 (ucs4
, 3, utf8
, 3, 0);
600 check_utf8_to_utf16 (utf8
, 3, utf16
, 3, 0);
601 check_utf16_to_utf8 (utf16
, 3, utf8
, 3, 0);
602 check_ucs4_to_utf16 (ucs4
, 3, utf16
, 3, 0);
603 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 3, 0);
605 utf8
= "\316\261\316\262\316\263";
606 ucs4
[0] = 0x03b1; ucs4
[1] = 0x03b2; ucs4
[2] = 0x03b3; ucs4
[3] = 0;
607 utf16
[0] = 0x03b1; utf16
[1] = 0x03b2; utf16
[2] = 0x03b3; utf16
[3] = 0;
609 check_utf8_to_ucs4 (utf8
, 6, ucs4
, 3, 0);
610 check_ucs4_to_utf8 (ucs4
, 3, utf8
, 6, 0);
611 check_utf8_to_utf16 (utf8
, 6, utf16
, 3, 0);
612 check_utf16_to_utf8 (utf16
, 3, utf8
, 6, 0);
613 check_ucs4_to_utf16 (ucs4
, 3, utf16
, 3, 0);
614 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 3, 0);
616 /* partial utf8 character */
618 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0x63; ucs4
[3] = 0;
619 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0x63; utf16
[3] = 0;
621 check_utf8_to_ucs4 (utf8
, 4, ucs4
, 3, 3);
622 check_utf8_to_utf16 (utf8
, 4, utf16
, 3, 3);
625 utf8
= "abc\316\316";
629 check_utf8_to_ucs4 (utf8
, 5, ucs4
, 0, 3);
630 check_utf8_to_utf16 (utf8
, 5, utf16
, 0, 3);
632 /* partial utf16 character */
634 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0;
635 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0xd801; utf16
[3] = 0;
637 check_utf16_to_utf8 (utf16
, 3, utf8
, 2, 2);
638 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 2, 2);
643 utf16
[0] = 0x61; utf16
[1] = 0x62; utf16
[2] = 0xdc01; utf16
[3] = 0;
645 check_utf16_to_utf8 (utf16
, 3, utf8
, 0, 2);
646 check_utf16_to_ucs4 (utf16
, 3, ucs4
, 0, 2);
650 ucs4
[0] = 0x61; ucs4
[1] = 0x62; ucs4
[2] = 0x80000000; ucs4
[3] = 0;
653 check_ucs4_to_utf8 (ucs4
, 3, utf8
, 0, 2);
654 check_ucs4_to_utf16 (ucs4
, 3, utf16
, 0, 2);
658 test_filename_utf8 (void)
660 const gchar
*filename
= "/my/path/to/foo";
666 utf8
= g_filename_to_utf8 (filename
, -1, NULL
, NULL
, &error
);
667 g_assert_no_error (error
);
668 back
= g_filename_from_utf8 (utf8
, -1, NULL
, NULL
, &error
);
669 g_assert_no_error (error
);
670 g_assert_cmpstr (back
, ==, filename
);
677 test_filename_display (void)
679 const gchar
*filename
= "/my/path/to/foo";
682 display
= g_filename_display_basename (filename
);
683 g_assert_cmpstr (display
, ==, "foo");
692 gchar
*out G_GNUC_UNUSED
;
693 gsize bytes_read
= 0;
694 gsize bytes_written
= 0;
695 GError
*error
= NULL
;
697 out
= g_convert (in
, -1, "XXX", "UVZ",
698 &bytes_read
, &bytes_written
, &error
);
700 /* error code is unreliable, since we mishandle errno there */
701 g_assert (error
&& error
->domain
== G_CONVERT_ERROR
);
702 g_error_free (error
);
706 main (int argc
, char *argv
[])
708 g_test_init (&argc
, &argv
, NULL
);
710 g_test_add_func ("/conversion/no-conv", test_no_conv
);
711 g_test_add_func ("/conversion/iconv-state", test_iconv_state
);
712 g_test_add_func ("/conversion/illegal-sequence", test_one_half
);
713 g_test_add_func ("/conversion/byte-order", test_byte_order
);
714 g_test_add_func ("/conversion/unicode", test_unicode_conversions
);
715 g_test_add_func ("/conversion/filename-utf8", test_filename_utf8
);
716 g_test_add_func ("/conversion/filename-display", test_filename_display
);
718 return g_test_run ();