remove unused label
[glib.git] / tests / convert-test.c
blob10b8c00032b157cb05638bc65b26725cb7df0db1
1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
21 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
22 * file for a list of people on the GLib Team. See the ChangeLog
23 * files for a list of changes. These files are distributed with
24 * GLib at ftp://ftp.gtk.org/pub/gtk/.
27 #undef G_DISABLE_ASSERT
28 #undef G_LOG_DOMAIN
30 #include <string.h>
32 #include <glib.h>
34 /* Bug 311337 */
35 static void
36 test_iconv_state (void)
38 gchar *in = "\xf4\xe5\xf8\xe5\xed";
39 gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
40 gchar *out;
41 gsize bytes_read = 0;
42 gsize bytes_written = 0;
43 GError *error = NULL;
45 out = g_convert (in, -1, "UTF-8", "CP1255",
46 &bytes_read, &bytes_written, &error);
48 if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
49 return; /* silently skip if CP1255 is not supported, see bug 467707 */
51 g_assert_no_error (error);
52 g_assert (bytes_read == 5);
53 g_assert (bytes_written == 10);
54 g_assert (strcmp (out, expected) == 0);
55 g_free (out);
58 /* some tests involving "vulgar fraction one half" */
59 static void
60 test_one_half (void)
62 gchar *in = "\xc2\xbd";
63 gchar *out;
64 gsize bytes_read = 0;
65 gsize bytes_written = 0;
66 GError *error = NULL;
68 out = g_convert (in, -1,
69 "ISO-8859-1", "UTF-8",
70 &bytes_read, &bytes_written,
71 &error);
73 g_assert_no_error (error);
74 g_assert (bytes_read == 2);
75 g_assert (bytes_written == 1);
76 g_assert (strcmp (out, "\xbd") == 0);
77 g_free (out);
79 out = g_convert (in, -1,
80 "ISO-8859-15", "UTF-8",
81 &bytes_read, &bytes_written,
82 &error);
84 g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
85 g_assert (bytes_read == 0);
86 g_assert (bytes_written == 0);
87 g_assert (out == NULL);
88 g_clear_error (&error);
89 g_free (out);
91 out = g_convert_with_fallback (in, -1,
92 "ISO8859-15", "UTF-8",
93 "a",
94 &bytes_read, &bytes_written,
95 &error);
97 g_assert_no_error (error);
98 g_assert (bytes_read == 2);
99 g_assert (bytes_written == 1);
100 g_assert (strcmp (out, "a") == 0);
101 g_free (out);
104 static void
105 test_byte_order (void)
107 gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
108 gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
109 gchar *expected = "\xce\x93";
110 gchar *out;
111 gsize bytes_read = 0;
112 gsize bytes_written = 0;
113 GError *error = NULL;
115 out = g_convert (in_be, sizeof (in_be),
116 "UTF-8", "UTF-16",
117 &bytes_read, &bytes_written,
118 &error);
120 g_assert_no_error (error);
121 g_assert (bytes_read == 4);
122 g_assert (bytes_written == 2);
123 g_assert (strcmp (out, expected) == 0);
124 g_free (out);
126 out = g_convert (in_le, sizeof (in_le),
127 "UTF-8", "UTF-16",
128 &bytes_read, &bytes_written,
129 &error);
131 g_assert_no_error (error);
132 g_assert (bytes_read == 4);
133 g_assert (bytes_written == 2);
134 g_assert (strcmp (out, expected) == 0);
135 g_free (out);
138 static void
139 check_utf8_to_ucs4 (const char *utf8,
140 glong utf8_len,
141 const gunichar *ucs4,
142 glong ucs4_len,
143 glong error_pos)
145 gunichar *result, *result2, *result3;
146 glong items_read, items_read2;
147 glong items_written, items_written2;
148 GError *error, *error2, *error3;
149 gint i;
151 if (!error_pos)
153 /* check the fast conversion */
154 result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written);
156 g_assert (items_written == ucs4_len);
157 g_assert (result);
158 for (i = 0; i <= items_written; i++)
159 g_assert (result[i] == ucs4[i]);
161 g_free (result);
164 error = NULL;
165 result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error);
167 if (utf8_len == strlen (utf8))
169 /* check that len == -1 yields identical results */
170 error2 = NULL;
171 result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2);
172 g_assert (error || items_read2 == items_read);
173 g_assert (error || items_written2 == items_written2);
174 g_assert (!!result == !!result2);
175 g_assert (!!error == !!error2);
176 if (result)
177 for (i = 0; i <= items_written; i++)
178 g_assert (result[i] == result2[i]);
180 g_free (result2);
181 if (error2)
182 g_error_free (error2);
185 error3 = NULL;
186 result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3);
188 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
190 g_assert_no_error (error);
191 g_assert (items_read == error_pos);
192 g_assert (items_written == ucs4_len);
193 g_assert (result);
194 for (i = 0; i <= items_written; i++)
195 g_assert (result[i] == ucs4[i]);
197 else if (error_pos)
199 g_assert (error != NULL);
200 g_assert (result == NULL);
201 g_assert (items_read == error_pos);
202 g_error_free (error);
204 g_assert (error3 != NULL);
205 g_assert (result3 == NULL);
206 g_error_free (error3);
208 else
210 g_assert_no_error (error);
211 g_assert (items_read == utf8_len);
212 g_assert (items_written == ucs4_len);
213 g_assert (result);
214 for (i = 0; i <= items_written; i++)
215 g_assert (result[i] == ucs4[i]);
217 g_assert_no_error (error3);
218 g_assert (result3);
219 for (i = 0; i <= ucs4_len; i++)
220 g_assert (result3[i] == ucs4[i]);
223 g_free (result);
224 g_free (result3);
227 static void
228 check_ucs4_to_utf8 (const gunichar *ucs4,
229 glong ucs4_len,
230 const char *utf8,
231 glong utf8_len,
232 glong error_pos)
234 gchar *result, *result2, *result3;
235 glong items_read, items_read2;
236 glong items_written, items_written2;
237 GError *error, *error2, *error3;
239 error = NULL;
240 result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error);
242 if (ucs4[ucs4_len] == 0)
244 /* check that len == -1 yields identical results */
245 error2 = NULL;
246 result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2);
248 g_assert (error || items_read2 == items_read);
249 g_assert (error || items_written2 == items_written);
250 g_assert (!!result == !!result2);
251 g_assert (!!error == !!error2);
252 if (result)
253 g_assert (strcmp (result, result2) == 0);
255 g_free (result2);
256 if (error2)
257 g_error_free (error2);
260 error3 = NULL;
261 result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3);
263 if (error_pos)
265 g_assert (error != NULL);
266 g_assert (result == NULL);
267 g_assert (items_read == error_pos);
268 g_error_free (error);
270 g_assert (error3 != NULL);
271 g_assert (result3 == NULL);
272 g_error_free (error3);
274 else
276 g_assert_no_error (error);
277 g_assert (items_read == ucs4_len);
278 g_assert (items_written == utf8_len);
279 g_assert (result);
280 g_assert (strcmp (result, utf8) == 0);
282 g_assert_no_error (error3);
283 g_assert (result3);
284 g_assert (strcmp (result3, utf8) == 0);
287 g_free (result);
288 g_free (result3);
291 static void
292 check_utf8_to_utf16 (const char *utf8,
293 glong utf8_len,
294 const gunichar2 *utf16,
295 glong utf16_len,
296 glong error_pos)
298 gunichar2 *result, *result2, *result3;
299 glong items_read, items_read2;
300 glong items_written, items_written2;
301 GError *error, *error2, *error3;
302 gint i;
304 error = NULL;
305 result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error);
307 if (utf8_len == strlen (utf8))
309 /* check that len == -1 yields identical results */
310 error2 = NULL;
311 result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2);
312 g_assert (error || items_read2 == items_read);
313 g_assert (error || items_written2 == items_written2);
314 g_assert (!!result == !!result2);
315 g_assert (!!error == !!error2);
316 if (result)
317 for (i = 0; i <= items_written; i++)
318 g_assert (result[i] == result2[i]);
320 g_free (result2);
321 if (error2)
322 g_error_free (error2);
325 error3 = NULL;
326 result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3);
328 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
330 g_assert_no_error (error);
331 g_assert (items_read == error_pos);
332 g_assert (items_written == utf16_len);
333 g_assert (result);
334 for (i = 0; i <= items_written; i++)
335 g_assert (result[i] == utf16[i]);
337 else if (error_pos)
339 g_assert (error != NULL);
340 g_assert (result == NULL);
341 g_assert (items_read == error_pos);
342 g_error_free (error);
344 g_assert (error3 != NULL);
345 g_assert (result3 == NULL);
346 g_error_free (error3);
348 else
350 g_assert_no_error (error);
351 g_assert (items_read == utf8_len);
352 g_assert (items_written == utf16_len);
353 g_assert (result);
354 for (i = 0; i <= items_written; i++)
355 g_assert (result[i] == utf16[i]);
357 g_assert_no_error (error3);
358 g_assert (result3);
359 for (i = 0; i <= utf16_len; i++)
360 g_assert (result3[i] == utf16[i]);
363 g_free (result);
364 g_free (result3);
367 static void
368 check_utf16_to_utf8 (const gunichar2 *utf16,
369 glong utf16_len,
370 const char *utf8,
371 glong utf8_len,
372 glong error_pos)
374 gchar *result, *result2, *result3;
375 glong items_read, items_read2;
376 glong items_written, items_written2;
377 GError *error, *error2, *error3;
379 error = NULL;
380 result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error);
381 if (utf16[utf16_len] == 0)
383 /* check that len == -1 yields identical results */
384 error2 = NULL;
385 result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2);
387 g_assert (error || items_read2 == items_read);
388 g_assert (error || items_written2 == items_written);
389 g_assert (!!result == !!result2);
390 g_assert (!!error == !!error2);
391 if (result)
392 g_assert (strcmp (result, result2) == 0);
394 g_free (result2);
395 if (error2)
396 g_error_free (error2);
399 error3 = NULL;
400 result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3);
402 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
404 g_assert_no_error (error);
405 g_assert (items_read == error_pos);
406 g_assert (items_read + 1 == utf16_len);
407 g_assert (items_written == utf8_len);
408 g_assert (result);
409 g_assert (strcmp (result, utf8) == 0);
411 else if (error_pos)
413 g_assert (error != NULL);
414 g_assert (result == NULL);
415 g_assert (items_read == error_pos);
416 g_error_free (error);
418 g_assert (error3 != NULL);
419 g_assert (result3 == NULL);
420 g_error_free (error3);
422 else
424 g_assert_no_error (error);
425 g_assert (items_read == utf16_len);
426 g_assert (items_written == utf8_len);
427 g_assert (result);
428 g_assert (strcmp (result, utf8) == 0);
430 g_assert_no_error (error3);
431 g_assert (result3);
432 g_assert (strcmp (result3, utf8) == 0);
435 g_free (result);
436 g_free (result3);
439 static void
440 check_ucs4_to_utf16 (const gunichar *ucs4,
441 glong ucs4_len,
442 const gunichar2 *utf16,
443 glong utf16_len,
444 glong error_pos)
446 gunichar2 *result, *result2, *result3;
447 glong items_read, items_read2;
448 glong items_written, items_written2;
449 GError *error, *error2, *error3;
450 gint i;
452 error = NULL;
453 result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error);
455 if (ucs4[ucs4_len] == 0)
457 /* check that len == -1 yields identical results */
458 error2 = NULL;
459 result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2);
461 g_assert (error || items_read2 == items_read);
462 g_assert (error || items_written2 == items_written);
463 g_assert (!!result == !!result2);
464 g_assert (!!error == !!error2);
465 if (result)
466 for (i = 0; i <= utf16_len; i++)
467 g_assert (result[i] == result2[i]);
469 g_free (result2);
470 if (error2)
471 g_error_free (error2);
474 error3 = NULL;
475 result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3);
477 if (error_pos)
479 g_assert (error != NULL);
480 g_assert (result == NULL);
481 g_assert (items_read == error_pos);
482 g_error_free (error);
484 g_assert (error3 != NULL);
485 g_assert (result3 == NULL);
486 g_error_free (error3);
488 else
490 g_assert_no_error (error);
491 g_assert (items_read == ucs4_len);
492 g_assert (items_written == utf16_len);
493 g_assert (result);
494 for (i = 0; i <= utf16_len; i++)
495 g_assert (result[i] == utf16[i]);
497 g_assert_no_error (error3);
498 g_assert (result3);
499 for (i = 0; i <= utf16_len; i++)
500 g_assert (result3[i] == utf16[i]);
503 g_free (result);
504 g_free (result3);
507 static void
508 check_utf16_to_ucs4 (const gunichar2 *utf16,
509 glong utf16_len,
510 const gunichar *ucs4,
511 glong ucs4_len,
512 glong error_pos)
514 gunichar *result, *result2, *result3;
515 glong items_read, items_read2;
516 glong items_written, items_written2;
517 GError *error, *error2, *error3;
518 gint i;
520 error = NULL;
521 result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error);
522 if (utf16[utf16_len] == 0)
524 /* check that len == -1 yields identical results */
525 error2 = NULL;
526 result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2);
527 g_assert (error || items_read2 == items_read);
528 g_assert (error || items_written2 == items_written2);
529 g_assert (!!result == !!result2);
530 g_assert (!!error == !!error2);
531 if (result)
532 for (i = 0; i <= items_written; i++)
533 g_assert (result[i] == result2[i]);
535 g_free (result2);
536 if (error2)
537 g_error_free (error2);
540 error3 = NULL;
541 result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3);
543 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
545 g_assert_no_error (error);
546 g_assert (items_read == error_pos);
547 g_assert (items_read + 1 == utf16_len);
548 g_assert (items_written == ucs4_len);
549 g_assert (result);
550 for (i = 0; i <= items_written; i++)
551 g_assert (result[i] == ucs4[i]);
553 else if (error_pos)
555 g_assert (error != NULL);
556 g_assert (result == NULL);
557 g_assert (items_read == error_pos);
558 g_error_free (error);
560 g_assert (error3 != NULL);
561 g_assert (result3 == NULL);
562 g_error_free (error3);
564 else
566 g_assert_no_error (error);
567 g_assert (items_read == utf16_len);
568 g_assert (items_written == ucs4_len);
569 g_assert (result);
570 for (i = 0; i <= ucs4_len; i++)
571 g_assert (result[i] == ucs4[i]);
573 g_assert_no_error (error3);
574 g_assert (result3);
575 for (i = 0; i <= ucs4_len; i++)
576 g_assert (result3[i] == ucs4[i]);
579 g_free (result);
580 g_free (result3);
583 static void
584 test_unicode_conversions (void)
586 char *utf8;
587 gunichar ucs4[100];
588 gunichar2 utf16[100];
590 utf8 = "abc";
591 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
592 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
594 check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0);
595 check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0);
596 check_utf8_to_utf16 (utf8, 3, utf16, 3, 0);
597 check_utf16_to_utf8 (utf16, 3, utf8, 3, 0);
598 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
599 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
601 utf8 = "\316\261\316\262\316\263";
602 ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
603 utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
605 check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0);
606 check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0);
607 check_utf8_to_utf16 (utf8, 6, utf16, 3, 0);
608 check_utf16_to_utf8 (utf16, 3, utf8, 6, 0);
609 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
610 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
612 /* partial utf8 character */
613 utf8 = "abc\316";
614 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
615 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
617 check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3);
618 check_utf8_to_utf16 (utf8, 4, utf16, 3, 3);
620 /* invalid utf8 */
621 utf8 = "abc\316\316";
622 ucs4[0] = 0;
623 utf16[0] = 0;
625 check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3);
626 check_utf8_to_utf16 (utf8, 5, utf16, 0, 3);
628 /* partial utf16 character */
629 utf8 = "ab";
630 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
631 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
633 check_utf16_to_utf8 (utf16, 3, utf8, 2, 2);
634 check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2);
636 /* invalid utf16 */
637 utf8 = NULL;
638 ucs4[0] = 0;
639 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
641 check_utf16_to_utf8 (utf16, 3, utf8, 0, 2);
642 check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2);
644 /* invalid ucs4 */
645 utf8 = NULL;
646 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
647 utf16[0] = 0;
649 check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2);
650 check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2);
654 main (int argc, char *argv[])
656 test_iconv_state ();
657 test_one_half ();
658 test_byte_order ();
659 test_unicode_conversions ();
661 return 0;