openat: don’t close (-1)
[gnulib.git] / tests / test-striconveha.c
blob5ac1c0d85001d3f863df2c656b97006060f694a1
1 /* Test of character set conversion with error handling and autodetection.
2 Copyright (C) 2007-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */
19 #include <config.h>
21 #include "striconveha.h"
23 #if HAVE_ICONV
24 # include <iconv.h>
25 #endif
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <string.h>
31 #include "macros.h"
32 extern int iconv_supports_encoding (const char *encoding);
34 /* Magic number for detecting bounds violations. */
35 #define MAGIC 0x1983EFF1
37 static size_t *
38 new_offsets (size_t n)
40 size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
41 offsets[n] = MAGIC;
42 return offsets;
45 int
46 main ()
48 #if HAVE_ICONV
49 static enum iconv_ilseq_handler handlers[] =
50 { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
51 size_t h;
52 size_t o;
53 size_t i;
55 /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
56 ISO-8859-2, and UTF-8. */
58 /* ------------------------- Test mem_iconveha() ------------------------- */
60 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
61 for (h = 0; h < SIZEOF (handlers); h++)
63 enum iconv_ilseq_handler handler = handlers[h];
64 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
65 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
66 for (o = 0; o < 2; o++)
68 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
69 char *result = NULL;
70 size_t length = 0;
71 int retval = mem_iconveha (input, strlen (input),
72 "ISO-8859-2", "ISO-8859-1",
73 false, handler,
74 offsets,
75 &result, &length);
76 ASSERT (retval == 0);
77 ASSERT (length == strlen (expected));
78 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
79 if (o)
81 for (i = 0; i < 37; i++)
82 ASSERT (offsets[i] == i);
83 ASSERT (offsets[37] == MAGIC);
84 free (offsets);
86 free (result);
90 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
91 for (h = 0; h < SIZEOF (handlers); h++)
93 enum iconv_ilseq_handler handler = handlers[h];
94 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
95 for (o = 0; o < 2; o++)
97 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
98 char *result = NULL;
99 size_t length = 0;
100 int retval = mem_iconveha (input, strlen (input),
101 "ISO-8859-2", "ISO-8859-1",
102 false, handler,
103 offsets,
104 &result, &length);
105 switch (handler)
107 case iconveh_error:
108 ASSERT (retval == -1 && errno == EILSEQ);
109 ASSERT (result == NULL);
110 if (o)
111 free (offsets);
112 break;
113 case iconveh_question_mark:
115 static const char expected[] = "Rafa? Maszkowski";
116 ASSERT (retval == 0);
117 ASSERT (length == strlen (expected));
118 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
119 if (o)
121 for (i = 0; i < 16; i++)
122 ASSERT (offsets[i] == i);
123 ASSERT (offsets[16] == MAGIC);
124 free (offsets);
126 free (result);
128 break;
129 case iconveh_escape_sequence:
131 static const char expected[] = "Rafa\\u0142 Maszkowski";
132 ASSERT (retval == 0);
133 ASSERT (length == strlen (expected));
134 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
135 if (o)
137 for (i = 0; i < 16; i++)
138 ASSERT (offsets[i] == (i < 5 ? i :
139 i + 5));
140 ASSERT (offsets[16] == MAGIC);
141 free (offsets);
143 free (result);
145 break;
150 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
151 for (h = 0; h < SIZEOF (handlers); h++)
153 enum iconv_ilseq_handler handler = handlers[h];
154 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
155 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
156 for (o = 0; o < 2; o++)
158 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
159 char *result = NULL;
160 size_t length = 0;
161 int retval = mem_iconveha (input, strlen (input),
162 "ISO-8859-1", "UTF-8",
163 false, handler,
164 offsets,
165 &result, &length);
166 ASSERT (retval == 0);
167 ASSERT (length == strlen (expected));
168 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
169 if (o)
171 for (i = 0; i < 37; i++)
172 ASSERT (offsets[i] == (i < 1 ? i :
173 i < 12 ? i + 1 :
174 i < 18 ? i + 2 :
175 i + 3));
176 ASSERT (offsets[37] == MAGIC);
177 free (offsets);
179 free (result);
183 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
184 for (h = 0; h < SIZEOF (handlers); h++)
186 enum iconv_ilseq_handler handler = handlers[h];
187 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
188 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
189 for (o = 0; o < 2; o++)
191 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
192 char *result = NULL;
193 size_t length = 0;
194 int retval = mem_iconveha (input, strlen (input),
195 "UTF-8", "ISO-8859-1",
196 false, handler,
197 offsets,
198 &result, &length);
199 ASSERT (retval == 0);
200 ASSERT (length == strlen (expected));
201 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
202 if (o)
204 for (i = 0; i < 41; i++)
205 ASSERT (offsets[i] == (i < 1 ? i :
206 i == 1 ? (size_t)(-1) :
207 i < 13 ? i - 1 :
208 i == 13 ? (size_t)(-1) :
209 i < 20 ? i - 2 :
210 i == 20 ? (size_t)(-1) :
211 i < 40 ? i - 3 :
212 (size_t)(-1)));
213 ASSERT (offsets[41] == MAGIC);
214 free (offsets);
216 free (result);
220 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
221 for (h = 0; h < SIZEOF (handlers); h++)
223 enum iconv_ilseq_handler handler = handlers[h];
224 static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
225 for (o = 0; o < 2; o++)
227 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
228 char *result = NULL;
229 size_t length = 0;
230 int retval = mem_iconveha (input, strlen (input),
231 "UTF-8", "ISO-8859-1",
232 false, handler,
233 offsets,
234 &result, &length);
235 switch (handler)
237 case iconveh_error:
238 ASSERT (retval == -1 && errno == EILSEQ);
239 ASSERT (result == NULL);
240 if (o)
241 free (offsets);
242 break;
243 case iconveh_question_mark:
245 static const char expected[] = "Rafa? Maszkowski";
246 ASSERT (retval == 0);
247 ASSERT (length == strlen (expected));
248 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
249 if (o)
251 for (i = 0; i < 17; i++)
252 ASSERT (offsets[i] == (i < 5 ? i :
253 i == 5 ? (size_t)(-1) :
254 i - 1));
255 ASSERT (offsets[17] == MAGIC);
256 free (offsets);
258 free (result);
260 break;
261 case iconveh_escape_sequence:
263 static const char expected[] = "Rafa\\u0142 Maszkowski";
264 ASSERT (retval == 0);
265 ASSERT (length == strlen (expected));
266 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
267 if (o)
269 for (i = 0; i < 17; i++)
270 ASSERT (offsets[i] == (i < 5 ? i :
271 i == 5 ? (size_t)(-1) :
272 i + 4));
273 ASSERT (offsets[17] == MAGIC);
274 free (offsets);
276 free (result);
278 break;
283 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
284 for (h = 0; h < SIZEOF (handlers); h++)
286 enum iconv_ilseq_handler handler = handlers[h];
287 static const char input[] = "\342";
288 for (o = 0; o < 2; o++)
290 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
291 char *result = NULL;
292 size_t length = 0;
293 int retval = mem_iconveha (input, strlen (input),
294 "UTF-8", "ISO-8859-1",
295 false, handler,
296 offsets,
297 &result, &length);
298 ASSERT (retval == 0);
299 ASSERT (length == 0);
300 if (o)
302 ASSERT (offsets[0] == 0);
303 ASSERT (offsets[1] == MAGIC);
304 free (offsets);
306 free (result);
310 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
311 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
312 || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
313 if (iconv_supports_encoding ("ISO-2022-JP-2"))
315 /* Test conversions from autodetect_jp to UTF-8. */
316 for (h = 0; h < SIZEOF (handlers); h++)
318 enum iconv_ilseq_handler handler = handlers[h];
319 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
320 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
321 for (o = 0; o < 2; o++)
323 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
324 char *result = NULL;
325 size_t length = 0;
326 int retval = mem_iconveha (input, strlen (input),
327 "autodetect_jp", "UTF-8",
328 false, handler,
329 offsets,
330 &result, &length);
331 ASSERT (retval == 0);
332 ASSERT (length == strlen (expected));
333 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
334 if (o)
336 for (i = 0; i < 10; i++)
337 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
338 ASSERT (offsets[10] == MAGIC);
339 free (offsets);
341 free (result);
344 for (h = 0; h < SIZEOF (handlers); h++)
346 enum iconv_ilseq_handler handler = handlers[h];
347 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
348 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
349 for (o = 0; o < 2; o++)
351 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
352 char *result = NULL;
353 size_t length = 0;
354 int retval = mem_iconveha (input, strlen (input),
355 "autodetect_jp", "UTF-8",
356 false, handler,
357 offsets,
358 &result, &length);
359 ASSERT (retval == 0);
360 ASSERT (length == strlen (expected));
361 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
362 if (o)
364 for (i = 0; i < 10; i++)
365 ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
366 ASSERT (offsets[10] == MAGIC);
367 free (offsets);
369 free (result);
372 for (h = 0; h < SIZEOF (handlers); h++)
374 enum iconv_ilseq_handler handler = handlers[h];
375 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
376 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
377 for (o = 0; o < 2; o++)
379 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
380 char *result = NULL;
381 size_t length = 0;
382 int retval = mem_iconveha (input, strlen (input),
383 "autodetect_jp", "UTF-8",
384 false, handler,
385 offsets,
386 &result, &length);
387 ASSERT (retval == 0);
388 ASSERT (length == strlen (expected));
389 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
390 if (o)
392 for (i = 0; i < 16; i++)
393 ASSERT (offsets[i] == (i == 0 ? 0 :
394 i == 5 ? 3 :
395 i == 7 ? 6 :
396 i == 9 ? 9 :
397 i == 11 ? 12 :
398 i == 13 ? 15 :
399 (size_t)(-1)));
400 ASSERT (offsets[16] == MAGIC);
401 free (offsets);
403 free (result);
407 # endif
409 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) && !defined __UCLIBC__) || (_LIBICONV_VERSION >= 0x0105 && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__))
410 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
411 for (h = 0; h < SIZEOF (handlers); h++)
413 enum iconv_ilseq_handler handler = handlers[h];
414 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
415 static const char expected[] = "Costs: 27 EUR";
416 for (o = 0; o < 2; o++)
418 size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
419 char *result = NULL;
420 size_t length = 0;
421 int retval = mem_iconveha (input, strlen (input),
422 "UTF-8", "ISO-8859-1",
423 true, handler,
424 offsets,
425 &result, &length);
426 ASSERT (retval == 0);
427 ASSERT (length == strlen (expected));
428 ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
429 if (o)
431 for (i = 0; i < 13; i++)
432 ASSERT (offsets[i] == (i < 11 ? i : (size_t)(-1)));
433 ASSERT (offsets[13] == MAGIC);
434 free (offsets);
436 free (result);
439 # endif
441 /* ------------------------- Test str_iconveha() ------------------------- */
443 /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */
444 for (h = 0; h < SIZEOF (handlers); h++)
446 enum iconv_ilseq_handler handler = handlers[h];
447 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
448 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
449 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
450 ASSERT (result != NULL);
451 ASSERT (strcmp (result, expected) == 0);
452 free (result);
455 /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ. */
456 for (h = 0; h < SIZEOF (handlers); h++)
458 enum iconv_ilseq_handler handler = handlers[h];
459 static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
460 char *result = str_iconveha (input, "ISO-8859-2", "ISO-8859-1", false, handler);
461 switch (handler)
463 case iconveh_error:
464 ASSERT (result == NULL && errno == EILSEQ);
465 break;
466 case iconveh_question_mark:
468 static const char expected[] = "Rafa? Maszkowski";
469 ASSERT (result != NULL);
470 ASSERT (strcmp (result, expected) == 0);
471 free (result);
473 break;
474 case iconveh_escape_sequence:
476 static const char expected[] = "Rafa\\u0142 Maszkowski";
477 ASSERT (result != NULL);
478 ASSERT (strcmp (result, expected) == 0);
479 free (result);
481 break;
485 /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */
486 for (h = 0; h < SIZEOF (handlers); h++)
488 enum iconv_ilseq_handler handler = handlers[h];
489 static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
490 static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
491 char *result = str_iconveha (input, "ISO-8859-1", "UTF-8", false, handler);
492 ASSERT (result != NULL);
493 ASSERT (strcmp (result, expected) == 0);
494 free (result);
497 /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */
498 for (h = 0; h < SIZEOF (handlers); h++)
500 enum iconv_ilseq_handler handler = handlers[h];
501 static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
502 static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
503 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
504 ASSERT (result != NULL);
505 ASSERT (strcmp (result, expected) == 0);
506 free (result);
509 /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */
510 for (h = 0; h < SIZEOF (handlers); h++)
512 enum iconv_ilseq_handler handler = handlers[h];
513 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
514 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
515 switch (handler)
517 case iconveh_error:
518 ASSERT (result == NULL && errno == EILSEQ);
519 break;
520 case iconveh_question_mark:
522 static const char expected[] = "Costs: 27 ?";
523 ASSERT (result != NULL);
524 ASSERT (strcmp (result, expected) == 0);
525 free (result);
527 break;
528 case iconveh_escape_sequence:
530 static const char expected[] = "Costs: 27 \\u20AC";
531 ASSERT (result != NULL);
532 ASSERT (strcmp (result, expected) == 0);
533 free (result);
535 break;
539 /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */
540 for (h = 0; h < SIZEOF (handlers); h++)
542 enum iconv_ilseq_handler handler = handlers[h];
543 static const char input[] = "\342";
544 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", false, handler);
545 ASSERT (result != NULL);
546 ASSERT (strcmp (result, "") == 0);
547 free (result);
550 /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2. */
551 # if (defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
552 || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
553 if (iconv_supports_encoding ("ISO-2022-JP-2"))
555 /* Test conversions from autodetect_jp to UTF-8. */
556 for (h = 0; h < SIZEOF (handlers); h++)
558 enum iconv_ilseq_handler handler = handlers[h];
559 static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
560 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
561 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
562 ASSERT (result != NULL);
563 ASSERT (strcmp (result, expected) == 0);
564 free (result);
566 for (h = 0; h < SIZEOF (handlers); h++)
568 enum iconv_ilseq_handler handler = handlers[h];
569 static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
570 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
571 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
572 ASSERT (result != NULL);
573 ASSERT (strcmp (result, expected) == 0);
574 free (result);
576 for (h = 0; h < SIZEOF (handlers); h++)
578 enum iconv_ilseq_handler handler = handlers[h];
579 static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
580 static const char expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
581 char *result = str_iconveha (input, "autodetect_jp", "UTF-8", false, handler);
582 ASSERT (result != NULL);
583 ASSERT (strcmp (result, expected) == 0);
584 free (result);
587 # endif
589 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) && !defined __UCLIBC__) || (_LIBICONV_VERSION >= 0x0105 && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__))
590 /* Test conversion from UTF-8 to ISO-8859-1 with transliteration. */
591 for (h = 0; h < SIZEOF (handlers); h++)
593 enum iconv_ilseq_handler handler = handlers[h];
594 static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */
595 static const char expected[] = "Costs: 27 EUR";
596 char *result = str_iconveha (input, "UTF-8", "ISO-8859-1", true, handler);
597 ASSERT (result != NULL);
598 ASSERT (strcmp (result, expected) == 0);
599 free (result);
601 # endif
603 #endif
605 return test_exit_status;