utf8: add unit test for g_utf8_make_valid
[glib.git] / glib / tests / convert.c
bloba712ff34c432579fbe72420c0c62b2e4bebd7b15
1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
25 #undef G_DISABLE_ASSERT
26 #undef G_LOG_DOMAIN
28 #include <string.h>
30 #include <glib.h>
32 /* Bug 311337 */
33 static void
34 test_iconv_state (void)
36 gchar *in = "\xf4\xe5\xf8\xe5\xed";
37 gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
38 gchar *out;
39 gsize bytes_read = 0;
40 gsize bytes_written = 0;
41 GError *error = NULL;
43 out = g_convert (in, -1, "UTF-8", "CP1255",
44 &bytes_read, &bytes_written, &error);
46 if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
47 return; /* silently skip if CP1255 is not supported, see bug 467707 */
49 g_assert_no_error (error);
50 g_assert_cmpint (bytes_read, ==, 5);
51 g_assert_cmpint (bytes_written, ==, 10);
52 g_assert_cmpstr (out, ==, expected);
53 g_free (out);
56 /* some tests involving "vulgar fraction one half" */
57 static void
58 test_one_half (void)
60 gchar *in = "\xc2\xbd";
61 gchar *out;
62 gsize bytes_read = 0;
63 gsize bytes_written = 0;
64 GError *error = NULL;
66 out = g_convert (in, -1,
67 "ISO-8859-1", "UTF-8",
68 &bytes_read, &bytes_written,
69 &error);
71 g_assert_no_error (error);
72 g_assert_cmpint (bytes_read, ==, 2);
73 g_assert_cmpint (bytes_written, ==, 1);
74 g_assert_cmpstr (out, ==, "\xbd");
75 g_free (out);
77 out = g_convert (in, -1,
78 "ISO-8859-15", "UTF-8",
79 &bytes_read, &bytes_written,
80 &error);
82 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
83 g_assert_cmpint (bytes_read, ==, 0);
84 g_assert_cmpint (bytes_written, ==, 0);
85 g_assert_cmpstr (out, ==, NULL);
86 g_clear_error (&error);
87 g_free (out);
89 out = g_convert_with_fallback (in, -1,
90 "ISO8859-15", "UTF-8",
91 "a",
92 &bytes_read, &bytes_written,
93 &error);
95 g_assert_no_error (error);
96 g_assert_cmpint (bytes_read, ==, 2);
97 g_assert_cmpint (bytes_written, ==, 1);
98 g_assert_cmpstr (out, ==, "a");
99 g_free (out);
102 static void
103 test_byte_order (void)
105 gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
106 gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
107 gchar *expected = "\xce\x93";
108 gchar *out;
109 gsize bytes_read = 0;
110 gsize bytes_written = 0;
111 GError *error = NULL;
113 out = g_convert (in_be, sizeof (in_be),
114 "UTF-8", "UTF-16",
115 &bytes_read, &bytes_written,
116 &error);
118 g_assert_no_error (error);
119 g_assert_cmpint (bytes_read, ==, 4);
120 g_assert_cmpint (bytes_written, ==, 2);
121 g_assert_cmpstr (out, ==, expected);
122 g_free (out);
124 out = g_convert (in_le, sizeof (in_le),
125 "UTF-8", "UTF-16",
126 &bytes_read, &bytes_written,
127 &error);
129 g_assert_no_error (error);
130 g_assert_cmpint (bytes_read, ==, 4);
131 g_assert_cmpint (bytes_written, ==, 2);
132 g_assert_cmpstr (out, ==, expected);
133 g_free (out);
136 static void
137 check_utf8_to_ucs4 (const char *utf8,
138 glong utf8_len,
139 const gunichar *ucs4,
140 glong ucs4_len,
141 glong error_pos)
143 gunichar *result, *result2, *result3;
144 glong items_read, items_read2;
145 glong items_written, items_written2;
146 GError *error, *error2, *error3;
147 gint i;
149 if (!error_pos)
151 /* check the fast conversion */
152 result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written);
154 g_assert_cmpint (items_written, ==, ucs4_len);
155 g_assert (result);
156 for (i = 0; i <= items_written; i++)
157 g_assert (result[i] == ucs4[i]);
159 g_free (result);
162 error = NULL;
163 result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error);
165 if (utf8_len == strlen (utf8))
167 /* check that len == -1 yields identical results */
168 error2 = NULL;
169 result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2);
170 g_assert (error || items_read2 == items_read);
171 g_assert (error || items_written2 == items_written2);
172 g_assert_cmpint (!!result, ==, !!result2);
173 g_assert_cmpint (!!error, ==, !!error2);
174 if (result)
175 for (i = 0; i <= items_written; i++)
176 g_assert (result[i] == result2[i]);
178 g_free (result2);
179 if (error2)
180 g_error_free (error2);
183 error3 = NULL;
184 result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3);
186 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
188 g_assert_no_error (error);
189 g_assert_cmpint (items_read, ==, error_pos);
190 g_assert_cmpint (items_written, ==, ucs4_len);
191 g_assert (result);
192 for (i = 0; i <= items_written; i++)
193 g_assert (result[i] == ucs4[i]);
194 g_error_free (error3);
196 else if (error_pos)
198 g_assert (error != NULL);
199 g_assert (result == NULL);
200 g_assert_cmpint (items_read, ==, error_pos);
201 g_error_free (error);
203 g_assert (error3 != NULL);
204 g_assert (result3 == NULL);
205 g_error_free (error3);
207 else
209 g_assert_no_error (error);
210 g_assert_cmpint (items_read, ==, utf8_len);
211 g_assert_cmpint (items_written, ==, ucs4_len);
212 g_assert (result);
213 for (i = 0; i <= items_written; i++)
214 g_assert (result[i] == ucs4[i]);
216 g_assert_no_error (error3);
217 g_assert (result3);
218 for (i = 0; i <= ucs4_len; i++)
219 g_assert (result3[i] == ucs4[i]);
222 g_free (result);
223 g_free (result3);
226 static void
227 check_ucs4_to_utf8 (const gunichar *ucs4,
228 glong ucs4_len,
229 const char *utf8,
230 glong utf8_len,
231 glong error_pos)
233 gchar *result, *result2, *result3;
234 glong items_read, items_read2;
235 glong items_written, items_written2;
236 GError *error, *error2, *error3;
238 error = NULL;
239 result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error);
241 if (ucs4[ucs4_len] == 0)
243 /* check that len == -1 yields identical results */
244 error2 = NULL;
245 result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2);
247 g_assert (error || items_read2 == items_read);
248 g_assert (error || items_written2 == items_written);
249 g_assert_cmpint (!!result, ==, !!result2);
250 g_assert_cmpint (!!error, ==, !!error2);
251 if (result)
252 g_assert_cmpstr (result, ==, result2);
254 g_free (result2);
255 if (error2)
256 g_error_free (error2);
259 error3 = NULL;
260 result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3);
262 if (error_pos)
264 g_assert (error != NULL);
265 g_assert (result == NULL);
266 g_assert_cmpint (items_read, ==, error_pos);
267 g_error_free (error);
269 g_assert (error3 != NULL);
270 g_assert (result3 == NULL);
271 g_error_free (error3);
273 else
275 g_assert_no_error (error);
276 g_assert_cmpint (items_read, ==, ucs4_len);
277 g_assert_cmpint (items_written, ==, utf8_len);
278 g_assert (result);
279 g_assert_cmpstr (result, ==, utf8);
281 g_assert_no_error (error3);
282 g_assert (result3);
283 g_assert_cmpstr (result3, ==, utf8);
286 g_free (result);
287 g_free (result3);
290 static void
291 check_utf8_to_utf16 (const char *utf8,
292 glong utf8_len,
293 const gunichar2 *utf16,
294 glong utf16_len,
295 glong error_pos)
297 gunichar2 *result, *result2, *result3;
298 glong items_read, items_read2;
299 glong items_written, items_written2;
300 GError *error, *error2, *error3;
301 gint i;
303 error = NULL;
304 result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error);
306 if (utf8_len == strlen (utf8))
308 /* check that len == -1 yields identical results */
309 error2 = NULL;
310 result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2);
311 g_assert (error || items_read2 == items_read);
312 g_assert (error || items_written2 == items_written2);
313 g_assert_cmpint (!!result, ==, !!result2);
314 g_assert_cmpint (!!error, ==, !!error2);
315 if (result)
316 for (i = 0; i <= items_written; i++)
317 g_assert (result[i] == result2[i]);
319 g_free (result2);
320 if (error2)
321 g_error_free (error2);
324 error3 = NULL;
325 result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3);
327 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
329 g_assert_no_error (error);
330 g_assert_cmpint (items_read, ==, error_pos);
331 g_assert_cmpint (items_written, ==, utf16_len);
332 g_assert (result);
333 for (i = 0; i <= items_written; i++)
334 g_assert (result[i] == utf16[i]);
335 g_error_free (error3);
337 else if (error_pos)
339 g_assert (error != NULL);
340 g_assert (result == NULL);
341 g_assert_cmpint (items_read, ==, error_pos);
342 g_error_free (error);
344 g_assert (error3 != NULL);
345 g_assert (result3 == NULL);
346 g_error_free (error3);
348 else
350 g_assert_no_error (error);
351 g_assert_cmpint (items_read, ==, utf8_len);
352 g_assert_cmpint (items_written, ==, utf16_len);
353 g_assert (result);
354 for (i = 0; i <= items_written; i++)
355 g_assert (result[i] == utf16[i]);
357 g_assert_no_error (error3);
358 g_assert (result3);
359 for (i = 0; i <= utf16_len; i++)
360 g_assert (result3[i] == utf16[i]);
363 g_free (result);
364 g_free (result3);
367 static void
368 check_utf16_to_utf8 (const gunichar2 *utf16,
369 glong utf16_len,
370 const char *utf8,
371 glong utf8_len,
372 glong error_pos)
374 gchar *result, *result2, *result3;
375 glong items_read, items_read2;
376 glong items_written, items_written2;
377 GError *error, *error2, *error3;
379 error = NULL;
380 result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error);
381 if (utf16[utf16_len] == 0)
383 /* check that len == -1 yields identical results */
384 error2 = NULL;
385 result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2);
387 g_assert (error || items_read2 == items_read);
388 g_assert (error || items_written2 == items_written);
389 g_assert_cmpint (!!result, ==, !!result2);
390 g_assert_cmpint (!!error, ==, !!error2);
391 if (result)
392 g_assert_cmpstr (result, ==, result2);
394 g_free (result2);
395 if (error2)
396 g_error_free (error2);
399 error3 = NULL;
400 result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3);
402 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
404 g_assert_no_error (error);
405 g_assert_cmpint (items_read, ==, error_pos);
406 g_assert_cmpint (items_read + 1, ==, utf16_len);
407 g_assert_cmpint (items_written, ==, utf8_len);
408 g_assert (result);
409 g_assert_cmpstr (result, ==, utf8);
410 g_error_free (error3);
412 else if (error_pos)
414 g_assert (error != NULL);
415 g_assert (result == NULL);
416 g_assert_cmpint (items_read, ==, error_pos);
417 g_error_free (error);
419 g_assert (error3 != NULL);
420 g_assert (result3 == NULL);
421 g_error_free (error3);
423 else
425 g_assert_no_error (error);
426 g_assert_cmpint (items_read, ==, utf16_len);
427 g_assert_cmpint (items_written, ==, utf8_len);
428 g_assert (result);
429 g_assert_cmpstr (result, ==, utf8);
431 g_assert_no_error (error3);
432 g_assert (result3);
433 g_assert_cmpstr (result3, ==, utf8);
436 g_free (result);
437 g_free (result3);
440 static void
441 check_ucs4_to_utf16 (const gunichar *ucs4,
442 glong ucs4_len,
443 const gunichar2 *utf16,
444 glong utf16_len,
445 glong error_pos)
447 gunichar2 *result, *result2, *result3;
448 glong items_read, items_read2;
449 glong items_written, items_written2;
450 GError *error, *error2, *error3;
451 gint i;
453 error = NULL;
454 result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error);
456 if (ucs4[ucs4_len] == 0)
458 /* check that len == -1 yields identical results */
459 error2 = NULL;
460 result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2);
462 g_assert (error || items_read2 == items_read);
463 g_assert (error || items_written2 == items_written);
464 g_assert_cmpint (!!result, ==, !!result2);
465 g_assert_cmpint (!!error, ==, !!error2);
466 if (result)
467 for (i = 0; i <= utf16_len; i++)
468 g_assert (result[i] == result2[i]);
470 g_free (result2);
471 if (error2)
472 g_error_free (error2);
475 error3 = NULL;
476 result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3);
478 if (error_pos)
480 g_assert (error != NULL);
481 g_assert (result == NULL);
482 g_assert_cmpint (items_read, ==, error_pos);
483 g_error_free (error);
485 g_assert (error3 != NULL);
486 g_assert (result3 == NULL);
487 g_error_free (error3);
489 else
491 g_assert_no_error (error);
492 g_assert_cmpint (items_read, ==, ucs4_len);
493 g_assert_cmpint (items_written, ==, utf16_len);
494 g_assert (result);
495 for (i = 0; i <= utf16_len; i++)
496 g_assert (result[i] == utf16[i]);
498 g_assert_no_error (error3);
499 g_assert (result3);
500 for (i = 0; i <= utf16_len; i++)
501 g_assert (result3[i] == utf16[i]);
504 g_free (result);
505 g_free (result3);
508 static void
509 check_utf16_to_ucs4 (const gunichar2 *utf16,
510 glong utf16_len,
511 const gunichar *ucs4,
512 glong ucs4_len,
513 glong error_pos)
515 gunichar *result, *result2, *result3;
516 glong items_read, items_read2;
517 glong items_written, items_written2;
518 GError *error, *error2, *error3;
519 gint i;
521 error = NULL;
522 result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error);
523 if (utf16[utf16_len] == 0)
525 /* check that len == -1 yields identical results */
526 error2 = NULL;
527 result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2);
528 g_assert (error || items_read2 == items_read);
529 g_assert (error || items_written2 == items_written2);
530 g_assert_cmpint (!!result, ==, !!result2);
531 g_assert_cmpint (!!error, ==, !!error2);
532 if (result)
533 for (i = 0; i <= items_written; i++)
534 g_assert (result[i] == result2[i]);
536 g_free (result2);
537 if (error2)
538 g_error_free (error2);
541 error3 = NULL;
542 result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3);
544 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
546 g_assert_no_error (error);
547 g_assert_cmpint (items_read, ==, error_pos);
548 g_assert_cmpint (items_read + 1, ==, utf16_len);
549 g_assert_cmpint (items_written, ==, ucs4_len);
550 g_assert (result);
551 for (i = 0; i <= items_written; i++)
552 g_assert (result[i] == ucs4[i]);
553 g_error_free (error3);
555 else if (error_pos)
557 g_assert (error != NULL);
558 g_assert (result == NULL);
559 g_assert_cmpint (items_read, ==, error_pos);
560 g_error_free (error);
562 g_assert (error3 != NULL);
563 g_assert (result3 == NULL);
564 g_error_free (error3);
566 else
568 g_assert_no_error (error);
569 g_assert_cmpint (items_read, ==, utf16_len);
570 g_assert_cmpint (items_written, ==, ucs4_len);
571 g_assert (result);
572 for (i = 0; i <= ucs4_len; i++)
573 g_assert (result[i] == ucs4[i]);
575 g_assert_no_error (error3);
576 g_assert (result3);
577 for (i = 0; i <= ucs4_len; i++)
578 g_assert (result3[i] == ucs4[i]);
581 g_free (result);
582 g_free (result3);
585 static void
586 test_unicode_conversions (void)
588 char *utf8;
589 gunichar ucs4[100];
590 gunichar2 utf16[100];
592 utf8 = "abc";
593 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
594 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
596 check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0);
597 check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0);
598 check_utf8_to_utf16 (utf8, 3, utf16, 3, 0);
599 check_utf16_to_utf8 (utf16, 3, utf8, 3, 0);
600 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
601 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
603 utf8 = "\316\261\316\262\316\263";
604 ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
605 utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
607 check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0);
608 check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0);
609 check_utf8_to_utf16 (utf8, 6, utf16, 3, 0);
610 check_utf16_to_utf8 (utf16, 3, utf8, 6, 0);
611 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
612 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
614 /* partial utf8 character */
615 utf8 = "abc\316";
616 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
617 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
619 check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3);
620 check_utf8_to_utf16 (utf8, 4, utf16, 3, 3);
622 /* invalid utf8 */
623 utf8 = "abc\316\316";
624 ucs4[0] = 0;
625 utf16[0] = 0;
627 check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3);
628 check_utf8_to_utf16 (utf8, 5, utf16, 0, 3);
630 /* partial utf16 character */
631 utf8 = "ab";
632 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
633 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
635 check_utf16_to_utf8 (utf16, 3, utf8, 2, 2);
636 check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2);
638 /* invalid utf16 */
639 utf8 = NULL;
640 ucs4[0] = 0;
641 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
643 check_utf16_to_utf8 (utf16, 3, utf8, 0, 2);
644 check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2);
646 /* invalid ucs4 */
647 utf8 = NULL;
648 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
649 utf16[0] = 0;
651 check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2);
652 check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2);
655 static void
656 test_filename_utf8 (void)
658 const gchar *filename = "/my/path/to/foo";
659 gchar *utf8;
660 gchar *back;
661 GError *error;
663 error = NULL;
664 utf8 = g_filename_to_utf8 (filename, -1, NULL, NULL, &error);
665 g_assert_no_error (error);
666 back = g_filename_from_utf8 (utf8, -1, NULL, NULL, &error);
667 g_assert_no_error (error);
668 g_assert_cmpstr (back, ==, filename);
670 g_free (utf8);
671 g_free (back);
674 static void
675 test_filename_display (void)
677 const gchar *filename = "/my/path/to/foo";
678 char *display;
680 display = g_filename_display_basename (filename);
681 g_assert_cmpstr (display, ==, "foo");
683 g_free (display);
686 static void
687 test_no_conv (void)
689 gchar *in = "";
690 gchar *out G_GNUC_UNUSED;
691 gsize bytes_read = 0;
692 gsize bytes_written = 0;
693 GError *error = NULL;
695 out = g_convert (in, -1, "XXX", "UVZ",
696 &bytes_read, &bytes_written, &error);
698 /* error code is unreliable, since we mishandle errno there */
699 g_assert (error && error->domain == G_CONVERT_ERROR);
700 g_error_free (error);
704 main (int argc, char *argv[])
706 g_test_init (&argc, &argv, NULL);
708 g_test_add_func ("/conversion/no-conv", test_no_conv);
709 g_test_add_func ("/conversion/iconv-state", test_iconv_state);
710 g_test_add_func ("/conversion/illegal-sequence", test_one_half);
711 g_test_add_func ("/conversion/byte-order", test_byte_order);
712 g_test_add_func ("/conversion/unicode", test_unicode_conversions);
713 g_test_add_func ("/conversion/filename-utf8", test_filename_utf8);
714 g_test_add_func ("/conversion/filename-display", test_filename_display);
716 return g_test_run ();