Add a missing since tag. (#464259, Mark Doliner)
[glib.git] / tests / convert-test.c
blob3ea3a6602ad83a4f0a7f53f7223364f0d23996d1
1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
21 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
22 * file for a list of people on the GLib Team. See the ChangeLog
23 * files for a list of changes. These files are distributed with
24 * GLib at ftp://ftp.gtk.org/pub/gtk/.
27 #undef G_DISABLE_ASSERT
28 #undef G_LOG_DOMAIN
30 #include <string.h>
32 #include <glib.h>
34 /* Bug 311337 */
35 static void
36 test_iconv_state (void)
38 gchar *in = "\xf4\xe5\xf8\xe5\xed";
39 gchar *expected = "\xd7\xa4\xd7\x95\xd7\xa8\xd7\x95\xd7\x9d";
40 gchar *out;
41 gsize bytes_read = 0;
42 gsize bytes_written = 0;
43 GError *error = NULL;
45 out = g_convert (in, -1, "UTF-8", "CP1255",
46 &bytes_read, &bytes_written, &error);
48 g_assert (error == NULL);
49 g_assert (bytes_read == 5);
50 g_assert (bytes_written == 10);
51 g_assert (strcmp (out, expected) == 0);
52 g_free (out);
55 /* some tests involving "vulgar fraction one half" */
56 static void
57 test_one_half (void)
59 gchar *in = "\xc2\xbd";
60 gchar *out;
61 gsize bytes_read = 0;
62 gsize bytes_written = 0;
63 GError *error = NULL;
65 out = g_convert (in, -1,
66 "ISO-8859-1", "UTF-8",
67 &bytes_read, &bytes_written,
68 &error);
70 g_assert (error == NULL);
71 g_assert (bytes_read == 2);
72 g_assert (bytes_written == 1);
73 g_assert (strcmp (out, "\xbd") == 0);
74 g_free (out);
76 out = g_convert (in, -1,
77 "ISO-8859-15", "UTF-8",
78 &bytes_read, &bytes_written,
79 &error);
81 g_assert (error && error->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
82 g_assert (bytes_read == 0);
83 g_assert (bytes_written == 0);
84 g_assert (out == NULL);
85 g_clear_error (&error);
86 g_free (out);
88 out = g_convert_with_fallback (in, -1,
89 "ISO8859-15", "UTF-8",
90 "a",
91 &bytes_read, &bytes_written,
92 &error);
94 g_assert (error == NULL);
95 g_assert (bytes_read == 2);
96 g_assert (bytes_written == 1);
97 g_assert (strcmp (out, "a") == 0);
98 g_free (out);
101 static void
102 test_byte_order (void)
104 gchar in_be[4] = { 0xfe, 0xff, 0x03, 0x93}; /* capital gamma */
105 gchar in_le[4] = { 0xff, 0xfe, 0x93, 0x03};
106 gchar *expected = "\xce\x93";
107 gchar *out;
108 gsize bytes_read = 0;
109 gsize bytes_written = 0;
110 GError *error = NULL;
112 out = g_convert (in_be, sizeof (in_be),
113 "UTF-8", "UTF-16",
114 &bytes_read, &bytes_written,
115 &error);
117 g_assert (error == NULL);
118 g_assert (bytes_read == 4);
119 g_assert (bytes_written == 2);
120 g_assert (strcmp (out, expected) == 0);
121 g_free (out);
123 out = g_convert (in_le, sizeof (in_le),
124 "UTF-8", "UTF-16",
125 &bytes_read, &bytes_written,
126 &error);
128 g_assert (error == NULL);
129 g_assert (bytes_read == 4);
130 g_assert (bytes_written == 2);
131 g_assert (strcmp (out, expected) == 0);
132 g_free (out);
135 static void
136 check_utf8_to_ucs4 (const char *utf8,
137 glong utf8_len,
138 const gunichar *ucs4,
139 glong ucs4_len,
140 glong error_pos)
142 gunichar *result, *result2, *result3;
143 glong items_read, items_read2;
144 glong items_written, items_written2;
145 GError *error, *error2, *error3;
146 gint i;
148 if (!error_pos)
150 /* check the fast conversion */
151 result = g_utf8_to_ucs4_fast (utf8, utf8_len, &items_written);
153 g_assert (items_written == ucs4_len);
154 g_assert (result);
155 for (i = 0; i <= items_written; i++)
156 g_assert (result[i] == ucs4[i]);
158 g_free (result);
161 error = NULL;
162 result = g_utf8_to_ucs4 (utf8, utf8_len, &items_read, &items_written, &error);
164 if (utf8_len == strlen (utf8))
166 /* check that len == -1 yields identical results */
167 error2 = NULL;
168 result2 = g_utf8_to_ucs4 (utf8, -1, &items_read2, &items_written2, &error2);
169 g_assert (error || items_read2 == items_read);
170 g_assert (error || items_written2 == items_written2);
171 g_assert (!!result == !!result2);
172 g_assert (!!error == !!error2);
173 if (result)
174 for (i = 0; i <= items_written; i++)
175 g_assert (result[i] == result2[i]);
177 g_free (result2);
178 if (error2)
179 g_error_free (error2);
182 error3 = NULL;
183 result3 = g_utf8_to_ucs4 (utf8, utf8_len, NULL, NULL, &error3);
185 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
187 g_assert (error == NULL);
188 g_assert (items_read == error_pos);
189 g_assert (items_written == ucs4_len);
190 g_assert (result);
191 for (i = 0; i <= items_written; i++)
192 g_assert (result[i] == ucs4[i]);
194 else if (error_pos)
196 g_assert (error != NULL);
197 g_assert (result == NULL);
198 g_assert (items_read == error_pos);
199 g_error_free (error);
201 g_assert (error3 != NULL);
202 g_assert (result3 == NULL);
203 g_error_free (error3);
205 else
207 g_assert (error == NULL);
208 g_assert (items_read == utf8_len);
209 g_assert (items_written == ucs4_len);
210 g_assert (result);
211 for (i = 0; i <= items_written; i++)
212 g_assert (result[i] == ucs4[i]);
214 g_assert (error3 == NULL);
215 g_assert (result3);
216 for (i = 0; i <= ucs4_len; i++)
217 g_assert (result3[i] == ucs4[i]);
220 g_free (result);
221 g_free (result3);
224 static void
225 check_ucs4_to_utf8 (const gunichar *ucs4,
226 glong ucs4_len,
227 const char *utf8,
228 glong utf8_len,
229 glong error_pos)
231 gchar *result, *result2, *result3;
232 glong items_read, items_read2;
233 glong items_written, items_written2;
234 GError *error, *error2, *error3;
236 error = NULL;
237 result = g_ucs4_to_utf8 (ucs4, ucs4_len, &items_read, &items_written, &error);
239 if (ucs4[ucs4_len] == 0)
241 /* check that len == -1 yields identical results */
242 error2 = NULL;
243 result2 = g_ucs4_to_utf8 (ucs4, -1, &items_read2, &items_written2, &error2);
245 g_assert (error || items_read2 == items_read);
246 g_assert (error || items_written2 == items_written);
247 g_assert (!!result == !!result2);
248 g_assert (!!error == !!error2);
249 if (result)
250 g_assert (strcmp (result, result2) == 0);
252 g_free (result2);
253 if (error2)
254 g_error_free (error2);
257 error3 = NULL;
258 result3 = g_ucs4_to_utf8 (ucs4, ucs4_len, NULL, NULL, &error3);
260 if (error_pos)
262 g_assert (error != NULL);
263 g_assert (result == NULL);
264 g_assert (items_read == error_pos);
265 g_error_free (error);
267 g_assert (error3 != NULL);
268 g_assert (result3 == NULL);
269 g_error_free (error3);
271 else
273 g_assert (error == NULL);
274 g_assert (items_read == ucs4_len);
275 g_assert (items_written == utf8_len);
276 g_assert (result);
277 g_assert (strcmp (result, utf8) == 0);
279 g_assert (error3 == NULL);
280 g_assert (result3);
281 g_assert (strcmp (result3, utf8) == 0);
284 g_free (result);
285 g_free (result3);
288 static void
289 check_utf8_to_utf16 (const char *utf8,
290 glong utf8_len,
291 const gunichar2 *utf16,
292 glong utf16_len,
293 glong error_pos)
295 gunichar2 *result, *result2, *result3;
296 glong items_read, items_read2;
297 glong items_written, items_written2;
298 GError *error, *error2, *error3;
299 gint i;
301 error = NULL;
302 result = g_utf8_to_utf16 (utf8, utf8_len, &items_read, &items_written, &error);
304 if (utf8_len == strlen (utf8))
306 /* check that len == -1 yields identical results */
307 error2 = NULL;
308 result2 = g_utf8_to_utf16 (utf8, -1, &items_read2, &items_written2, &error2);
309 g_assert (error || items_read2 == items_read);
310 g_assert (error || items_written2 == items_written2);
311 g_assert (!!result == !!result2);
312 g_assert (!!error == !!error2);
313 if (result)
314 for (i = 0; i <= items_written; i++)
315 g_assert (result[i] == result2[i]);
317 g_free (result2);
318 if (error2)
319 g_error_free (error2);
322 error3 = NULL;
323 result3 = g_utf8_to_utf16 (utf8, utf8_len, NULL, NULL, &error3);
325 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
327 g_assert (error == NULL);
328 g_assert (items_read == error_pos);
329 g_assert (items_written == utf16_len);
330 g_assert (result);
331 for (i = 0; i <= items_written; i++)
332 g_assert (result[i] == utf16[i]);
334 else if (error_pos)
336 g_assert (error != NULL);
337 g_assert (result == NULL);
338 g_assert (items_read == error_pos);
339 g_error_free (error);
341 g_assert (error3 != NULL);
342 g_assert (result3 == NULL);
343 g_error_free (error3);
345 else
347 g_assert (error == NULL);
348 g_assert (items_read == utf8_len);
349 g_assert (items_written == utf16_len);
350 g_assert (result);
351 for (i = 0; i <= items_written; i++)
352 g_assert (result[i] == utf16[i]);
354 g_assert (error3 == NULL);
355 g_assert (result3);
356 for (i = 0; i <= utf16_len; i++)
357 g_assert (result3[i] == utf16[i]);
360 g_free (result);
361 g_free (result3);
364 static void
365 check_utf16_to_utf8 (const gunichar2 *utf16,
366 glong utf16_len,
367 const char *utf8,
368 glong utf8_len,
369 glong error_pos)
371 gchar *result, *result2, *result3;
372 glong items_read, items_read2;
373 glong items_written, items_written2;
374 GError *error, *error2, *error3;
376 error = NULL;
377 result = g_utf16_to_utf8 (utf16, utf16_len, &items_read, &items_written, &error);
378 if (utf16[utf16_len] == 0)
380 /* check that len == -1 yields identical results */
381 error2 = NULL;
382 result2 = g_utf16_to_utf8 (utf16, -1, &items_read2, &items_written2, &error2);
384 g_assert (error || items_read2 == items_read);
385 g_assert (error || items_written2 == items_written);
386 g_assert (!!result == !!result2);
387 g_assert (!!error == !!error2);
388 if (result)
389 g_assert (strcmp (result, result2) == 0);
391 g_free (result2);
392 if (error2)
393 g_error_free (error2);
396 error3 = NULL;
397 result3 = g_utf16_to_utf8 (utf16, utf16_len, NULL, NULL, &error3);
399 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
401 g_assert (error == NULL);
402 g_assert (items_read == error_pos);
403 g_assert (items_read + 1 == utf16_len);
404 g_assert (items_written == utf8_len);
405 g_assert (result);
406 g_assert (strcmp (result, utf8) == 0);
408 else if (error_pos)
410 g_assert (error != NULL);
411 g_assert (result == NULL);
412 g_assert (items_read == error_pos);
413 g_error_free (error);
415 g_assert (error3 != NULL);
416 g_assert (result3 == NULL);
417 g_error_free (error3);
419 else
421 g_assert (error == NULL);
422 g_assert (items_read == utf16_len);
423 g_assert (items_written == utf8_len);
424 g_assert (result);
425 g_assert (strcmp (result, utf8) == 0);
427 g_assert (error3 == NULL);
428 g_assert (result3);
429 g_assert (strcmp (result3, utf8) == 0);
432 g_free (result);
433 g_free (result3);
436 static void
437 check_ucs4_to_utf16 (const gunichar *ucs4,
438 glong ucs4_len,
439 const gunichar2 *utf16,
440 glong utf16_len,
441 glong error_pos)
443 gunichar2 *result, *result2, *result3;
444 glong items_read, items_read2;
445 glong items_written, items_written2;
446 GError *error, *error2, *error3;
447 gint i;
449 error = NULL;
450 result = g_ucs4_to_utf16 (ucs4, ucs4_len, &items_read, &items_written, &error);
452 if (ucs4[ucs4_len] == 0)
454 /* check that len == -1 yields identical results */
455 error2 = NULL;
456 result2 = g_ucs4_to_utf16 (ucs4, -1, &items_read2, &items_written2, &error2);
458 g_assert (error || items_read2 == items_read);
459 g_assert (error || items_written2 == items_written);
460 g_assert (!!result == !!result2);
461 g_assert (!!error == !!error2);
462 if (result)
463 for (i = 0; i <= utf16_len; i++)
464 g_assert (result[i] == result2[i]);
466 g_free (result2);
467 if (error2)
468 g_error_free (error2);
471 error3 = NULL;
472 result3 = g_ucs4_to_utf16 (ucs4, -1, NULL, NULL, &error3);
474 if (error_pos)
476 g_assert (error != NULL);
477 g_assert (result == NULL);
478 g_assert (items_read == error_pos);
479 g_error_free (error);
481 g_assert (error3 != NULL);
482 g_assert (result3 == NULL);
483 g_error_free (error3);
485 else
487 g_assert (error == NULL);
488 g_assert (items_read == ucs4_len);
489 g_assert (items_written == utf16_len);
490 g_assert (result);
491 for (i = 0; i <= utf16_len; i++)
492 g_assert (result[i] == utf16[i]);
494 g_assert (error3 == NULL);
495 g_assert (result3);
496 for (i = 0; i <= utf16_len; i++)
497 g_assert (result3[i] == utf16[i]);
500 g_free (result);
501 g_free (result3);
504 static void
505 check_utf16_to_ucs4 (const gunichar2 *utf16,
506 glong utf16_len,
507 const gunichar *ucs4,
508 glong ucs4_len,
509 glong error_pos)
511 gunichar *result, *result2, *result3;
512 glong items_read, items_read2;
513 glong items_written, items_written2;
514 GError *error, *error2, *error3;
515 gint i;
517 error = NULL;
518 result = g_utf16_to_ucs4 (utf16, utf16_len, &items_read, &items_written, &error);
519 if (utf16[utf16_len] == 0)
521 /* check that len == -1 yields identical results */
522 error2 = NULL;
523 result2 = g_utf16_to_ucs4 (utf16, -1, &items_read2, &items_written2, &error2);
524 g_assert (error || items_read2 == items_read);
525 g_assert (error || items_written2 == items_written2);
526 g_assert (!!result == !!result2);
527 g_assert (!!error == !!error2);
528 if (result)
529 for (i = 0; i <= items_written; i++)
530 g_assert (result[i] == result2[i]);
532 g_free (result2);
533 if (error2)
534 g_error_free (error2);
537 error3 = NULL;
538 result3 = g_utf16_to_ucs4 (utf16, utf16_len, NULL, NULL, &error3);
540 if (error3 && error3->code == G_CONVERT_ERROR_PARTIAL_INPUT)
542 g_assert (error == NULL);
543 g_assert (items_read == error_pos);
544 g_assert (items_read + 1 == utf16_len);
545 g_assert (items_written == ucs4_len);
546 g_assert (result);
547 for (i = 0; i <= items_written; i++)
548 g_assert (result[i] == ucs4[i]);
550 else if (error_pos)
552 g_assert (error != NULL);
553 g_assert (result == NULL);
554 g_assert (items_read == error_pos);
555 g_error_free (error);
557 g_assert (error3 != NULL);
558 g_assert (result3 == NULL);
559 g_error_free (error3);
561 else
563 g_assert (error == NULL);
564 g_assert (items_read == utf16_len);
565 g_assert (items_written == ucs4_len);
566 g_assert (result);
567 for (i = 0; i <= ucs4_len; i++)
568 g_assert (result[i] == ucs4[i]);
570 g_assert (error3 == NULL);
571 g_assert (result3);
572 for (i = 0; i <= ucs4_len; i++)
573 g_assert (result3[i] == ucs4[i]);
576 g_free (result);
577 g_free (result3);
580 static void
581 test_unicode_conversions (void)
583 char *utf8;
584 gunichar ucs4[100];
585 gunichar2 utf16[100];
587 utf8 = "abc";
588 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
589 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
591 check_utf8_to_ucs4 (utf8, 3, ucs4, 3, 0);
592 check_ucs4_to_utf8 (ucs4, 3, utf8, 3, 0);
593 check_utf8_to_utf16 (utf8, 3, utf16, 3, 0);
594 check_utf16_to_utf8 (utf16, 3, utf8, 3, 0);
595 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
596 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
598 utf8 = "\316\261\316\262\316\263";
599 ucs4[0] = 0x03b1; ucs4[1] = 0x03b2; ucs4[2] = 0x03b3; ucs4[3] = 0;
600 utf16[0] = 0x03b1; utf16[1] = 0x03b2; utf16[2] = 0x03b3; utf16[3] = 0;
602 check_utf8_to_ucs4 (utf8, 6, ucs4, 3, 0);
603 check_ucs4_to_utf8 (ucs4, 3, utf8, 6, 0);
604 check_utf8_to_utf16 (utf8, 6, utf16, 3, 0);
605 check_utf16_to_utf8 (utf16, 3, utf8, 6, 0);
606 check_ucs4_to_utf16 (ucs4, 3, utf16, 3, 0);
607 check_utf16_to_ucs4 (utf16, 3, ucs4, 3, 0);
609 /* partial utf8 character */
610 utf8 = "abc\316";
611 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x63; ucs4[3] = 0;
612 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0x63; utf16[3] = 0;
614 check_utf8_to_ucs4 (utf8, 4, ucs4, 3, 3);
615 check_utf8_to_utf16 (utf8, 4, utf16, 3, 3);
617 /* invalid utf8 */
618 utf8 = "abc\316\316";
619 ucs4[0] = 0;
620 utf16[0] = 0;
622 check_utf8_to_ucs4 (utf8, 5, ucs4, 0, 3);
623 check_utf8_to_utf16 (utf8, 5, utf16, 0, 3);
625 /* partial utf16 character */
626 utf8 = "ab";
627 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0;
628 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xd801; utf16[3] = 0;
630 check_utf16_to_utf8 (utf16, 3, utf8, 2, 2);
631 check_utf16_to_ucs4 (utf16, 3, ucs4, 2, 2);
633 /* invalid utf16 */
634 utf8 = NULL;
635 ucs4[0] = 0;
636 utf16[0] = 0x61; utf16[1] = 0x62; utf16[2] = 0xdc01; utf16[3] = 0;
638 check_utf16_to_utf8 (utf16, 3, utf8, 0, 2);
639 check_utf16_to_ucs4 (utf16, 3, ucs4, 0, 2);
641 /* invalid ucs4 */
642 utf8 = NULL;
643 ucs4[0] = 0x61; ucs4[1] = 0x62; ucs4[2] = 0x80000000; ucs4[3] = 0;
644 utf16[0] = 0;
646 check_ucs4_to_utf8 (ucs4, 3, utf8, 0, 2);
647 check_ucs4_to_utf16 (ucs4, 3, utf16, 0, 2);
651 main (int argc, char *argv[])
653 test_iconv_state ();
654 test_one_half ();
655 test_byte_order ();
656 test_unicode_conversions ();
658 return 0;