1 #undef G_DISABLE_ASSERT
10 static gint exit_status
= 0;
13 croak (char *format
, ...)
17 va_start (va
, format
);
18 vfprintf (stderr
, format
, va
);
25 fail (char *format
, ...)
29 va_start (va
, format
);
30 vfprintf (stderr
, format
, va
);
46 ucs4_equal (gunichar
*a
, gunichar
*b
)
48 while (*a
&& *b
&& (*a
== *b
))
58 utf16_equal (gunichar2
*a
, gunichar2
*b
)
60 while (*a
&& *b
&& (*a
== *b
))
70 utf16_count (gunichar2
*a
)
88 gboolean is_valid
= g_utf8_validate (utf8
, -1, &end
);
90 glong items_read
, items_written
;
97 fail ("line %d: valid but g_utf8_validate returned FALSE\n", line
);
107 fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line
);
113 if (status
== INCOMPLETE
)
115 gunichar
*ucs4_result
;
117 ucs4_result
= g_utf8_to_ucs4 (utf8
, -1, NULL
, NULL
, &error
);
119 if (!error
|| !g_error_matches (error
, G_CONVERT_ERROR
, G_CONVERT_ERROR_PARTIAL_INPUT
))
121 fail ("line %d: incomplete input not properly detected\n", line
);
124 g_clear_error (&error
);
126 ucs4_result
= g_utf8_to_ucs4 (utf8
, -1, &items_read
, NULL
, &error
);
128 if (!ucs4_result
|| items_read
== strlen (utf8
))
130 fail ("line %d: incomplete input not properly detected\n", line
);
134 g_free (ucs4_result
);
137 if (status
== VALID
|| status
== NOTUNICODE
)
139 gunichar
*ucs4_result
;
142 ucs4_result
= g_utf8_to_ucs4 (utf8
, -1, &items_read
, &items_written
, &error
);
145 fail ("line %d: conversion to ucs4 failed: %s\n", line
, error
->message
);
149 if (!ucs4_equal (ucs4_result
, ucs4
) ||
150 items_read
!= strlen (utf8
) ||
151 items_written
!= ucs4_len
)
153 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line
);
157 g_free (ucs4_result
);
159 ucs4_result
= g_utf8_to_ucs4_fast (utf8
, -1, &items_written
);
161 if (!ucs4_equal (ucs4_result
, ucs4
) ||
162 items_written
!= ucs4_len
)
164 fail ("line %d: results of conversion to ucs4 do not match expected.\n", line
);
168 utf8_result
= g_ucs4_to_utf8 (ucs4_result
, -1, &items_read
, &items_written
, &error
);
171 fail ("line %d: conversion back to utf8 failed: %s", line
, error
->message
);
175 if (strcmp (utf8_result
, utf8
) != 0 ||
176 items_read
!= ucs4_len
||
177 items_written
!= strlen (utf8
))
179 fail ("line %d: conversion back to utf8 did not match original\n", line
);
183 g_free (utf8_result
);
184 g_free (ucs4_result
);
189 gunichar2
*utf16_expected_tmp
;
190 gunichar2
*utf16_expected
;
191 gunichar2
*utf16_from_utf8
;
192 gunichar2
*utf16_from_ucs4
;
193 gunichar
*ucs4_result
;
198 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
199 #define TARGET "UTF-16LE"
201 #define TARGET "UTF-16"
204 if (!(utf16_expected_tmp
= (gunichar2
*)g_convert (utf8
, -1, TARGET
, "UTF-8",
205 NULL
, &bytes_written
, NULL
)))
207 fail ("line %d: could not convert to UTF-16 via g_convert\n", line
);
211 /* zero-terminate and remove BOM
213 n_chars
= bytes_written
/ 2;
214 if (utf16_expected_tmp
[0] == 0xfeff) /* BOM */
217 utf16_expected
= g_new (gunichar2
, n_chars
+ 1);
218 memcpy (utf16_expected
, utf16_expected_tmp
+ 1, sizeof(gunichar2
) * n_chars
);
220 else if (utf16_expected_tmp
[0] == 0xfffe) /* ANTI-BOM */
222 fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line
);
227 utf16_expected
= g_new (gunichar2
, n_chars
+ 1);
228 memcpy (utf16_expected
, utf16_expected_tmp
, sizeof(gunichar2
) * n_chars
);
231 utf16_expected
[n_chars
] = '\0';
233 if (!(utf16_from_utf8
= g_utf8_to_utf16 (utf8
, -1, &items_read
, &items_written
, &error
)))
235 fail ("line %d: conversion to ucs16 failed: %s\n", line
, error
->message
);
239 if (items_read
!= strlen (utf8
) ||
240 utf16_count (utf16_from_utf8
) != items_written
)
242 fail ("line %d: length error in conversion to ucs16\n", line
);
246 if (!(utf16_from_ucs4
= g_ucs4_to_utf16 (ucs4
, -1, &items_read
, &items_written
, &error
)))
248 fail ("line %d: conversion to ucs16 failed: %s\n", line
, error
->message
);
252 if (items_read
!= ucs4_len
||
253 utf16_count (utf16_from_ucs4
) != items_written
)
255 fail ("line %d: length error in conversion to ucs16\n", line
);
259 if (!utf16_equal (utf16_from_utf8
, utf16_expected
) ||
260 !utf16_equal (utf16_from_ucs4
, utf16_expected
))
262 fail ("line %d: results of conversion to ucs16 do not match\n", line
);
266 if (!(utf8_result
= g_utf16_to_utf8 (utf16_from_utf8
, -1, &items_read
, &items_written
, &error
)))
268 fail ("line %d: conversion back to utf8 failed: %s\n", line
, error
->message
);
272 if (items_read
!= utf16_count (utf16_from_utf8
) ||
273 items_written
!= strlen (utf8
))
275 fail ("line %d: length error in conversion from ucs16 to utf8\n", line
);
279 if (!(ucs4_result
= g_utf16_to_ucs4 (utf16_from_ucs4
, -1, &items_read
, &items_written
, &error
)))
281 fail ("line %d: conversion back to utf8/ucs4 failed\n", line
);
285 if (items_read
!= utf16_count (utf16_from_utf8
) ||
286 items_written
!= ucs4_len
)
288 fail ("line %d: length error in conversion from ucs16 to ucs4\n", line
);
292 if (strcmp (utf8
, utf8_result
) != 0 ||
293 !ucs4_equal (ucs4
, ucs4_result
))
295 fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line
);
299 g_free (utf16_expected_tmp
);
300 g_free (utf16_expected
);
301 g_free (utf16_from_utf8
);
302 g_free (utf16_from_ucs4
);
303 g_free (utf8_result
);
304 g_free (ucs4_result
);
309 main (int argc
, char **argv
)
313 GError
*error
= NULL
;
318 gint start_line
= 0; /* Quiet GCC */
319 gchar
*utf8
= NULL
; /* Quiet GCC */
321 Status status
= VALID
; /* Quiet GCC */
323 g_test_init (&argc
, &argv
, NULL
);
325 testfile
= g_test_build_filename (G_TEST_DIST
, "utf8.txt", NULL
);
327 g_file_get_contents (testfile
, &contents
, NULL
, &error
);
329 croak ("Cannot open utf8.txt: %s", error
->message
);
331 ucs4
= g_array_new (TRUE
, FALSE
, sizeof(gunichar
));
335 /* Loop over lines */
338 while (*p
&& (*p
== ' ' || *p
== '\t'))
342 while (*end
&& (*end
!= '\r' && *end
!= '\n'))
345 if (!*p
|| *p
== '#' || *p
== '\r' || *p
== '\n')
348 tmp
= g_strstrip (g_strndup (p
, end
- p
));
361 if (!strcmp (tmp
, "VALID"))
363 else if (!strcmp (tmp
, "INCOMPLETE"))
365 else if (!strcmp (tmp
, "NOTUNICODE"))
367 else if (!strcmp (tmp
, "OVERLONG"))
369 else if (!strcmp (tmp
, "MALFORMED"))
372 croak ("Invalid status on line %d\n", line
);
374 if (status
!= VALID
&& status
!= NOTUNICODE
)
375 state
++; /* No UCS-4 data */
382 p
= strtok (tmp
, " \t");
387 gunichar ch
= strtoul (p
, &endptr
, 16);
389 croak ("Invalid UCS-4 character on line %d\n", line
);
391 g_array_append_val (ucs4
, ch
);
393 p
= strtok (NULL
, " \t");
400 state
= (state
+ 1) % 3;
404 process (start_line
, utf8
, status
, (gunichar
*)ucs4
->data
, ucs4
->len
);
405 g_array_set_size (ucs4
, 0);
411 if (*p
&& *p
== '\r')
413 if (*p
&& *p
== '\n')