1 #undef G_DISABLE_ASSERT
10 static gint exit_status
= 0;
13 croak (char *format
, ...)
17 va_start (va
, format
);
18 vfprintf (stderr
, format
, va
);
25 fail (char *format
, ...)
29 va_start (va
, format
);
30 vfprintf (stderr
, format
, va
);
46 ucs4_equal (gunichar
*a
, gunichar
*b
)
48 while (*a
&& *b
&& (*a
== *b
))
58 utf16_equal (gunichar2
*a
, gunichar2
*b
)
60 while (*a
&& *b
&& (*a
== *b
))
70 utf16_count (gunichar2
*a
)
81 print_ucs4 (const gchar
*prefix
, gunichar
*ucs4
, gint ucs4_len
)
84 g_print ("%s ", prefix
);
85 for (i
= 0; i
< ucs4_len
; i
++)
86 g_print ("%x ", ucs4
[i
]);
98 gboolean is_valid
= g_utf8_validate (utf8
, -1, &end
);
100 glong items_read
, items_written
;
107 fail ("line %d: valid but g_utf8_validate returned FALSE\n", line
);
117 fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line
);
123 if (status
== INCOMPLETE
)
125 gunichar
*ucs4_result
;
127 ucs4_result
= g_utf8_to_ucs4 (utf8
, -1, NULL
, NULL
, &error
);
129 if (!error
|| !g_error_matches (error
, G_CONVERT_ERROR
, G_CONVERT_ERROR_PARTIAL_INPUT
))
131 fail ("line %d: incomplete input not properly detected\n", line
);
134 g_clear_error (&error
);
136 ucs4_result
= g_utf8_to_ucs4 (utf8
, -1, &items_read
, NULL
, &error
);
138 if (!ucs4_result
|| items_read
== strlen (utf8
))
140 fail ("line %d: incomplete input not properly detected\n", line
);
144 g_free (ucs4_result
);
147 if (status
== VALID
|| status
== NOTUNICODE
)
149 gunichar
*ucs4_result
;
151 ucs4_result
= g_utf8_to_ucs4 (utf8
, -1, &items_read
, &items_written
, &error
);
154 fail ("line %d: conversion with status %d to ucs4 failed: %s\n", line
, status
, error
->message
);
158 if (!ucs4_equal (ucs4_result
, ucs4
) ||
159 items_read
!= strlen (utf8
) ||
160 items_written
!= ucs4_len
)
162 fail ("line %d: results of conversion with status %d to ucs4 do not match expected.\n", line
, status
);
163 print_ucs4 ("expected: ", ucs4
, ucs4_len
);
164 print_ucs4 ("received: ", ucs4_result
, items_written
);
168 g_free (ucs4_result
);
173 gunichar
*ucs4_result
;
176 ucs4_result
= g_utf8_to_ucs4_fast (utf8
, -1, &items_written
);
178 if (!ucs4_equal (ucs4_result
, ucs4
) ||
179 items_written
!= ucs4_len
)
181 fail ("line %d: results of fast conversion with status %d to ucs4 do not match expected.\n", line
, status
);
182 print_ucs4 ("expected: ", ucs4
, ucs4_len
);
183 print_ucs4 ("received: ", ucs4_result
, items_written
);
187 utf8_result
= g_ucs4_to_utf8 (ucs4_result
, -1, &items_read
, &items_written
, &error
);
190 fail ("line %d: conversion back to utf8 failed: %s", line
, error
->message
);
194 if (strcmp (utf8_result
, utf8
) != 0 ||
195 items_read
!= ucs4_len
||
196 items_written
!= strlen (utf8
))
198 fail ("line %d: conversion back to utf8 did not match original\n", line
);
202 g_free (utf8_result
);
203 g_free (ucs4_result
);
208 gunichar2
*utf16_expected_tmp
;
209 gunichar2
*utf16_expected
;
210 gunichar2
*utf16_from_utf8
;
211 gunichar2
*utf16_from_ucs4
;
212 gunichar
*ucs4_result
;
217 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
218 #define TARGET "UTF-16LE"
220 #define TARGET "UTF-16"
223 if (!(utf16_expected_tmp
= (gunichar2
*)g_convert (utf8
, -1, TARGET
, "UTF-8",
224 NULL
, &bytes_written
, NULL
)))
226 fail ("line %d: could not convert to UTF-16 via g_convert\n", line
);
230 /* zero-terminate and remove BOM
232 n_chars
= bytes_written
/ 2;
233 if (utf16_expected_tmp
[0] == 0xfeff) /* BOM */
236 utf16_expected
= g_new (gunichar2
, n_chars
+ 1);
237 memcpy (utf16_expected
, utf16_expected_tmp
+ 1, sizeof(gunichar2
) * n_chars
);
239 else if (utf16_expected_tmp
[0] == 0xfffe) /* ANTI-BOM */
241 fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line
);
246 utf16_expected
= g_new (gunichar2
, n_chars
+ 1);
247 memcpy (utf16_expected
, utf16_expected_tmp
, sizeof(gunichar2
) * n_chars
);
250 utf16_expected
[n_chars
] = '\0';
252 if (!(utf16_from_utf8
= g_utf8_to_utf16 (utf8
, -1, &items_read
, &items_written
, &error
)))
254 fail ("line %d: conversion to ucs16 failed: %s\n", line
, error
->message
);
258 if (items_read
!= strlen (utf8
) ||
259 utf16_count (utf16_from_utf8
) != items_written
)
261 fail ("line %d: length error in conversion to ucs16\n", line
);
265 if (!(utf16_from_ucs4
= g_ucs4_to_utf16 (ucs4
, -1, &items_read
, &items_written
, &error
)))
267 fail ("line %d: conversion to ucs16 failed: %s\n", line
, error
->message
);
271 if (items_read
!= ucs4_len
||
272 utf16_count (utf16_from_ucs4
) != items_written
)
274 fail ("line %d: length error in conversion to ucs16\n", line
);
278 if (!utf16_equal (utf16_from_utf8
, utf16_expected
) ||
279 !utf16_equal (utf16_from_ucs4
, utf16_expected
))
281 fail ("line %d: results of conversion to ucs16 do not match\n", line
);
285 if (!(utf8_result
= g_utf16_to_utf8 (utf16_from_utf8
, -1, &items_read
, &items_written
, &error
)))
287 fail ("line %d: conversion back to utf8 failed: %s\n", line
, error
->message
);
291 if (items_read
!= utf16_count (utf16_from_utf8
) ||
292 items_written
!= strlen (utf8
))
294 fail ("line %d: length error in conversion from ucs16 to utf8\n", line
);
298 if (!(ucs4_result
= g_utf16_to_ucs4 (utf16_from_ucs4
, -1, &items_read
, &items_written
, &error
)))
300 fail ("line %d: conversion back to utf8/ucs4 failed\n", line
);
304 if (items_read
!= utf16_count (utf16_from_utf8
) ||
305 items_written
!= ucs4_len
)
307 fail ("line %d: length error in conversion from ucs16 to ucs4\n", line
);
311 if (strcmp (utf8
, utf8_result
) != 0 ||
312 !ucs4_equal (ucs4
, ucs4_result
))
314 fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line
);
318 g_free (utf16_expected_tmp
);
319 g_free (utf16_expected
);
320 g_free (utf16_from_utf8
);
321 g_free (utf16_from_ucs4
);
322 g_free (utf8_result
);
323 g_free (ucs4_result
);
328 main (int argc
, char **argv
)
332 GError
*error
= NULL
;
337 gint start_line
= 0; /* Quiet GCC */
338 gchar
*utf8
= NULL
; /* Quiet GCC */
340 Status status
= VALID
; /* Quiet GCC */
342 g_test_init (&argc
, &argv
, NULL
);
344 testfile
= g_test_build_filename (G_TEST_DIST
, "utf8.txt", NULL
);
346 g_file_get_contents (testfile
, &contents
, NULL
, &error
);
348 croak ("Cannot open utf8.txt: %s", error
->message
);
350 ucs4
= g_array_new (TRUE
, FALSE
, sizeof(gunichar
));
354 /* Loop over lines */
357 while (*p
&& (*p
== ' ' || *p
== '\t'))
361 while (*end
&& (*end
!= '\r' && *end
!= '\n'))
364 if (!*p
|| *p
== '#' || *p
== '\r' || *p
== '\n')
367 tmp
= g_strstrip (g_strndup (p
, end
- p
));
380 if (!strcmp (tmp
, "VALID"))
382 else if (!strcmp (tmp
, "INCOMPLETE"))
384 else if (!strcmp (tmp
, "NOTUNICODE"))
386 else if (!strcmp (tmp
, "OVERLONG"))
388 else if (!strcmp (tmp
, "MALFORMED"))
391 croak ("Invalid status on line %d\n", line
);
393 if (status
!= VALID
&& status
!= NOTUNICODE
)
394 state
++; /* No UCS-4 data */
401 p
= strtok (tmp
, " \t");
406 gunichar ch
= strtoul (p
, &endptr
, 16);
408 croak ("Invalid UCS-4 character on line %d\n", line
);
410 g_array_append_val (ucs4
, ch
);
412 p
= strtok (NULL
, " \t");
419 state
= (state
+ 1) % 3;
423 process (start_line
, utf8
, status
, (gunichar
*)ucs4
->data
, ucs4
->len
);
424 g_array_set_size (ucs4
, 0);
430 if (*p
&& *p
== '\r')
432 if (*p
&& *p
== '\n')
439 g_array_free (ucs4
, TRUE
);