tests/test-c32tolower.c

   1 /* Test of c32tolower() function.
   2    Copyright (C) 2020-2024 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 #include <config.h>
  18
  19 #include <uchar.h>
  20
  21 #include "signature.h"
  22 SIGNATURE_CHECK (c32tolower, wint_t, (wint_t));
  23
  24 #include <locale.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <wchar.h>
  28
  29 #include "macros.h"
  30
  31 /* Representation of a multibyte character.  */
  32 #define MBCHAR_BUF_SIZE 6
  33 struct multibyte
  34 {
  35   size_t nbytes;             /* number of bytes of current character, > 0 */
  36   char buf[MBCHAR_BUF_SIZE]; /* room for the bytes */
  37 };
  38
  39 /* Returns the value of c32tolower for the multibyte character s[0..n-1],
  40    as a multibyte character.  */
  41 static struct multibyte
  42 for_character (const char *s, size_t n)
  43 {
  44   mbstate_t state;
  45   char32_t wc;
  46   size_t ret;
  47   struct multibyte result;
  48
  49   memset (&state, '\0', sizeof (mbstate_t));
  50   wc = (char32_t) 0xBADFACE;
  51   ret = mbrtoc32 (&wc, s, n, &state);
  52   ASSERT (ret == n);
  53
  54   wc = c32tolower (wc);
  55   ASSERT (wc != WEOF);
  56
  57   memset (&state, '\0', sizeof (mbstate_t));
  58   ret = c32rtomb (result.buf, wc, &state);
  59   ASSERT (ret != 0);
  60   if (ret == (size_t)(-1))
  61     /* wc cannot be converted back to multibyte.  */
  62     result.nbytes = 0;
  63   else
  64     {
  65       ASSERT (ret <= MBCHAR_BUF_SIZE);
  66       result.nbytes = ret;
  67     }
  68   return result;
  69 }
  70
  71 int
  72 main (int argc, char *argv[])
  73 {
  74   wint_t wc;
  75   struct multibyte mb;
  76   char buf[4];
  77
  78   /* configure should already have checked that the locale is supported.  */
  79   if (setlocale (LC_ALL, "") == NULL)
  80     return 1;
  81
  82   /* Test WEOF.  */
  83   wc = c32tolower (WEOF);
  84   ASSERT (wc == WEOF);
  85
  86   /* Test single-byte characters.
  87      POSIX specifies in
  88        <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html>
  89      that
  90        - in all locales, the uppercase characters include the A ... Z
  91          characters, and the corresponding characters a ... z (if not in a
  92          Turkish locale) are lowercase,
  93        - in the "POSIX" locale (which is usually the same as the "C" locale),
  94          the uppercase characters include only the ASCII A ... Z characters,
  95          and the corresponding characters a ... z are lowercase.
  96    */
  97 #if defined __NetBSD__
  98   /* towlower is broken in the zh_CN.GB18030 locale on NetBSD 9.0.
  99      See <https://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=57339>.  */
 100   if (!(argc > 1 && argv[1][0] == '4'))
 101 #endif
 102   {
 103     int c;
 104
 105     for (c = 0; c < 0x100; c++)
 106       switch (c)
 107         {
 108         case '\t': case '\v': case '\f':
 109         case ' ': case '!': case '"': case '#': case '%':
 110         case '&': case '\'': case '(': case ')': case '*':
 111         case '+': case ',': case '-': case '.': case '/':
 112         case '0': case '1': case '2': case '3': case '4':
 113         case '5': case '6': case '7': case '8': case '9':
 114         case ':': case ';': case '<': case '=': case '>':
 115         case '?':
 116         case 'A': case 'B': case 'C': case 'D': case 'E':
 117         case 'F': case 'G': case 'H': case 'I': case 'J':
 118         case 'K': case 'L': case 'M': case 'N': case 'O':
 119         case 'P': case 'Q': case 'R': case 'S': case 'T':
 120         case 'U': case 'V': case 'W': case 'X': case 'Y':
 121         case 'Z':
 122         case '[': case '\\': case ']': case '^': case '_':
 123         case 'a': case 'b': case 'c': case 'd': case 'e':
 124         case 'f': case 'g': case 'h': case 'i': case 'j':
 125         case 'k': case 'l': case 'm': case 'n': case 'o':
 126         case 'p': case 'q': case 'r': case 's': case 't':
 127         case 'u': case 'v': case 'w': case 'x': case 'y':
 128         case 'z': case '{': case '|': case '}': case '~':
 129           /* c is in the ISO C "basic character set".  */
 130           buf[0] = (unsigned char) c;
 131           mb = for_character (buf, 1);
 132           switch (c)
 133             {
 134             case 'A': case 'B': case 'C': case 'D': case 'E':
 135             case 'F': case 'G': case 'H': case 'I': case 'J':
 136             case 'K': case 'L': case 'M': case 'N': case 'O':
 137             case 'P': case 'Q': case 'R': case 'S': case 'T':
 138             case 'U': case 'V': case 'W': case 'X': case 'Y':
 139             case 'Z':
 140               ASSERT (mb.nbytes == 1);
 141               ASSERT ((unsigned char) mb.buf[0] == (unsigned char) c - 'A' + 'a');
 142               break;
 143             default:
 144               ASSERT (mb.nbytes == 1);
 145               ASSERT ((unsigned char) mb.buf[0] == c);
 146               break;
 147             }
 148           break;
 149         }
 150   }
 151
 152   if (argc > 1)
 153     switch (argv[1][0])
 154       {
 155       case '0':
 156         /* C locale; tested above.  */
 157         return test_exit_status;
 158
 159       case '1':
 160         /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
 161         {
 162           /* U+00B2 SUPERSCRIPT TWO */
 163           mb = for_character ("\262", 1);
 164           ASSERT (mb.nbytes == 1);
 165           ASSERT (memcmp (mb.buf, "\262", 1) == 0);
 166           /* U+00B5 MICRO SIGN */
 167           mb = for_character ("\265", 1);
 168           ASSERT (mb.nbytes == 1);
 169           ASSERT (memcmp (mb.buf, "\265", 1) == 0);
 170           /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
 171           mb = for_character ("\311", 1);
 172           ASSERT (mb.nbytes == 1);
 173           ASSERT (memcmp (mb.buf, "\351", 1) == 0);
 174           /* U+00DF LATIN SMALL LETTER SHARP S */
 175           mb = for_character ("\337", 1);
 176           ASSERT (mb.nbytes == 1);
 177           ASSERT (memcmp (mb.buf, "\337", 1) == 0);
 178           /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
 179           mb = for_character ("\351", 1);
 180           ASSERT (mb.nbytes == 1);
 181           ASSERT (memcmp (mb.buf, "\351", 1) == 0);
 182           /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
 183           mb = for_character ("\377", 1);
 184           ASSERT (mb.nbytes == 1);
 185           ASSERT (memcmp (mb.buf, "\377", 1) == 0);
 186         }
 187         return test_exit_status;
 188
 189       case '2':
 190         /* Locale encoding is EUC-JP.  */
 191         {
 192         #if !((defined __APPLE__ && defined __MACH__) || defined __DragonFly__)
 193           /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
 194           mb = for_character ("\217\252\261", 3);
 195           ASSERT (mb.nbytes == 3);
 196           ASSERT (memcmp (mb.buf, "\217\253\261", 3) == 0);
 197         #endif
 198           /* U+00DF LATIN SMALL LETTER SHARP S */
 199           mb = for_character ("\217\251\316", 3);
 200           ASSERT (mb.nbytes == 3);
 201           ASSERT (memcmp (mb.buf, "\217\251\316", 3) == 0);
 202           /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
 203           mb = for_character ("\217\253\261", 3);
 204           ASSERT (mb.nbytes == 3);
 205           ASSERT (memcmp (mb.buf, "\217\253\261", 3) == 0);
 206           /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
 207           mb = for_character ("\217\253\363", 3);
 208           ASSERT (mb.nbytes == 3);
 209           ASSERT (memcmp (mb.buf, "\217\253\363", 3) == 0);
 210         #if !((defined __APPLE__ && defined __MACH__) || defined __DragonFly__)
 211           /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
 212           mb = for_character ("\217\251\250", 3);
 213           ASSERT (mb.nbytes == 3);
 214           ASSERT (memcmp (mb.buf, "\217\251\310", 3) == 0);
 215         #endif
 216           /* U+0142 LATIN SMALL LETTER L WITH STROKE */
 217           mb = for_character ("\217\251\310", 3);
 218           ASSERT (mb.nbytes == 3);
 219           ASSERT (memcmp (mb.buf, "\217\251\310", 3) == 0);
 220         #if !defined __DragonFly__
 221           /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
 222           mb = for_character ("\247\273", 2);
 223           ASSERT (mb.nbytes == 2);
 224           ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
 225         #endif
 226           /* U+0449 CYRILLIC SMALL LETTER SHCHA */
 227           mb = for_character ("\247\353", 2);
 228           ASSERT (mb.nbytes == 2);
 229           ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
 230           /* U+3073 HIRAGANA LETTER BI */
 231           mb = for_character ("\244\323", 2);
 232           ASSERT (mb.nbytes == 2);
 233           ASSERT (memcmp (mb.buf, "\244\323", 2) == 0);
 234         #if !defined __DragonFly__
 235           /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
 236           mb = for_character ("\243\307", 2);
 237           ASSERT (mb.nbytes == 2);
 238           ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
 239         #endif
 240           /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
 241           mb = for_character ("\243\347", 2);
 242           ASSERT (mb.nbytes == 2);
 243           ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
 244         }
 245         return test_exit_status;
 246
 247       case '3':
 248         /* Locale encoding is UTF-8.  */
 249         {
 250           /* U+00B2 SUPERSCRIPT TWO */
 251           mb = for_character ("\302\262", 2);
 252           ASSERT (mb.nbytes == 2);
 253           ASSERT (memcmp (mb.buf, "\302\262", 2) == 0);
 254           /* U+00B5 MICRO SIGN */
 255           mb = for_character ("\302\265", 2);
 256           ASSERT (mb.nbytes == 2);
 257           ASSERT (memcmp (mb.buf, "\302\265", 2) == 0);
 258           /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
 259           mb = for_character ("\303\211", 2);
 260           ASSERT (mb.nbytes == 2);
 261           ASSERT (memcmp (mb.buf, "\303\251", 2) == 0);
 262           /* U+00DF LATIN SMALL LETTER SHARP S */
 263           mb = for_character ("\303\237", 2);
 264           ASSERT (mb.nbytes == 2);
 265           ASSERT (memcmp (mb.buf, "\303\237", 2) == 0);
 266           /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
 267           mb = for_character ("\303\251", 2);
 268           ASSERT (mb.nbytes == 2);
 269           ASSERT (memcmp (mb.buf, "\303\251", 2) == 0);
 270           /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
 271           mb = for_character ("\303\277", 2);
 272           ASSERT (mb.nbytes == 2);
 273           ASSERT (memcmp (mb.buf, "\303\277", 2) == 0);
 274           /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
 275           mb = for_character ("\305\201", 2);
 276           ASSERT (mb.nbytes == 2);
 277           ASSERT (memcmp (mb.buf, "\305\202", 2) == 0);
 278           /* U+0142 LATIN SMALL LETTER L WITH STROKE */
 279           mb = for_character ("\305\202", 2);
 280           ASSERT (mb.nbytes == 2);
 281           ASSERT (memcmp (mb.buf, "\305\202", 2) == 0);
 282           /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
 283           mb = for_character ("\320\251", 2);
 284           ASSERT (mb.nbytes == 2);
 285           ASSERT (memcmp (mb.buf, "\321\211", 2) == 0);
 286           /* U+0449 CYRILLIC SMALL LETTER SHCHA */
 287           mb = for_character ("\321\211", 2);
 288           ASSERT (mb.nbytes == 2);
 289           ASSERT (memcmp (mb.buf, "\321\211", 2) == 0);
 290           /* U+05D5 HEBREW LETTER VAV */
 291           mb = for_character ("\327\225", 2);
 292           ASSERT (mb.nbytes == 2);
 293           ASSERT (memcmp (mb.buf, "\327\225", 2) == 0);
 294           /* U+3073 HIRAGANA LETTER BI */
 295           mb = for_character ("\343\201\263", 3);
 296           ASSERT (mb.nbytes == 3);
 297           ASSERT (memcmp (mb.buf, "\343\201\263", 3) == 0);
 298           /* U+3162 HANGUL LETTER YI */
 299           mb = for_character ("\343\205\242", 3);
 300           ASSERT (mb.nbytes == 3);
 301           ASSERT (memcmp (mb.buf, "\343\205\242", 3) == 0);
 302           /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
 303           mb = for_character ("\357\274\247", 3);
 304           ASSERT (mb.nbytes == 3);
 305           ASSERT (memcmp (mb.buf, "\357\275\207", 3) == 0);
 306           /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
 307           mb = for_character ("\357\275\207", 3);
 308           ASSERT (mb.nbytes == 3);
 309           ASSERT (memcmp (mb.buf, "\357\275\207", 3) == 0);
 310           /* U+FFDB HALFWIDTH HANGUL LETTER YI */
 311           mb = for_character ("\357\277\233", 3);
 312           ASSERT (mb.nbytes == 3);
 313           ASSERT (memcmp (mb.buf, "\357\277\233", 3) == 0);
 314         #if !(defined __DragonFly__ || defined __sun)
 315           /* U+10419 DESERET CAPITAL LETTER EF */
 316           mb = for_character ("\360\220\220\231", 4);
 317           ASSERT (mb.nbytes == 4);
 318           ASSERT (memcmp (mb.buf, "\360\220\221\201", 4) == 0);
 319         #endif
 320           /* U+10441 DESERET SMALL LETTER EF */
 321           mb = for_character ("\360\220\221\201", 4);
 322           ASSERT (mb.nbytes == 4);
 323           ASSERT (memcmp (mb.buf, "\360\220\221\201", 4) == 0);
 324           /* U+E0041 TAG LATIN CAPITAL LETTER A */
 325           mb = for_character ("\363\240\201\201", 4);
 326           ASSERT (mb.nbytes == 4);
 327           ASSERT (memcmp (mb.buf, "\363\240\201\201", 4) == 0);
 328           /* U+E0061 TAG LATIN SMALL LETTER A */
 329           mb = for_character ("\363\240\201\241", 4);
 330           ASSERT (mb.nbytes == 4);
 331           ASSERT (memcmp (mb.buf, "\363\240\201\241", 4) == 0);
 332         }
 333         return test_exit_status;
 334
 335       case '4':
 336         /* Locale encoding is GB18030.  */
 337         #if (defined __GLIBC__ && __GLIBC__ == 2 && __GLIBC_MINOR__ >= 13 && __GLIBC_MINOR__ <= 15) || (GL_CHAR32_T_IS_UNICODE && (defined __FreeBSD__ || defined __NetBSD__ || defined __sun))
 338         if (test_exit_status != EXIT_SUCCESS)
 339           return test_exit_status;
 340         fputs ("Skipping test: The GB18030 converter in this system's iconv is broken.\n", stderr);
 341         return 77;
 342         #endif
 343         {
 344           /* U+00B2 SUPERSCRIPT TWO */
 345           mb = for_character ("\201\060\205\065", 4);
 346           ASSERT (mb.nbytes == 4);
 347           ASSERT (memcmp (mb.buf, "\201\060\205\065", 4) == 0);
 348           /* U+00B5 MICRO SIGN */
 349           mb = for_character ("\201\060\205\070", 4);
 350           ASSERT (mb.nbytes == 4);
 351           ASSERT (memcmp (mb.buf, "\201\060\205\070", 4) == 0);
 352         #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
 353           /* U+00C9 LATIN CAPITAL LETTER E WITH ACUTE */
 354           mb = for_character ("\201\060\207\067", 4);
 355           ASSERT (mb.nbytes == 2);
 356           ASSERT (memcmp (mb.buf, "\250\246", 2) == 0);
 357         #endif
 358           /* U+00DF LATIN SMALL LETTER SHARP S */
 359           mb = for_character ("\201\060\211\070", 4);
 360           ASSERT (mb.nbytes == 4);
 361           ASSERT (memcmp (mb.buf, "\201\060\211\070", 4) == 0);
 362           /* U+00E9 LATIN SMALL LETTER E WITH ACUTE */
 363           mb = for_character ("\250\246", 2);
 364           ASSERT (mb.nbytes == 2);
 365           ASSERT (memcmp (mb.buf, "\250\246", 2) == 0);
 366           /* U+00FF LATIN SMALL LETTER Y WITH DIAERESIS */
 367           mb = for_character ("\201\060\213\067", 4);
 368           ASSERT (mb.nbytes == 4);
 369           ASSERT (memcmp (mb.buf, "\201\060\213\067", 4) == 0);
 370         #if !(defined __FreeBSD__ || defined __DragonFly__ || defined __sun)
 371           /* U+0141 LATIN CAPITAL LETTER L WITH STROKE */
 372           mb = for_character ("\201\060\221\071", 4);
 373           ASSERT (mb.nbytes == 4);
 374           ASSERT (memcmp (mb.buf, "\201\060\222\060", 4) == 0);
 375         #endif
 376           /* U+0142 LATIN SMALL LETTER L WITH STROKE */
 377           mb = for_character ("\201\060\222\060", 4);
 378           ASSERT (mb.nbytes == 4);
 379           ASSERT (memcmp (mb.buf, "\201\060\222\060", 4) == 0);
 380         #if !(defined __FreeBSD__ || defined __DragonFly__)
 381           /* U+0429 CYRILLIC CAPITAL LETTER SHCHA */
 382           mb = for_character ("\247\273", 2);
 383           ASSERT (mb.nbytes == 2);
 384           ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
 385         #endif
 386           /* U+0449 CYRILLIC SMALL LETTER SHCHA */
 387           mb = for_character ("\247\353", 2);
 388           ASSERT (mb.nbytes == 2);
 389           ASSERT (memcmp (mb.buf, "\247\353", 2) == 0);
 390           /* U+05D5 HEBREW LETTER VAV */
 391           mb = for_character ("\201\060\371\067", 4);
 392           ASSERT (mb.nbytes == 4);
 393           ASSERT (memcmp (mb.buf, "\201\060\371\067", 4) == 0);
 394           /* U+3073 HIRAGANA LETTER BI */
 395           mb = for_character ("\244\323", 2);
 396           ASSERT (mb.nbytes == 2);
 397           ASSERT (memcmp (mb.buf, "\244\323", 2) == 0);
 398           /* U+3162 HANGUL LETTER YI */
 399           mb = for_character ("\201\071\256\062", 4);
 400           ASSERT (mb.nbytes == 4);
 401           ASSERT (memcmp (mb.buf, "\201\071\256\062", 4) == 0);
 402         #if !defined __DragonFly__
 403           /* U+FF27 FULLWIDTH LATIN CAPITAL LETTER G */
 404           mb = for_character ("\243\307", 2);
 405           ASSERT (mb.nbytes == 2);
 406           ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
 407         #endif
 408           /* U+FF47 FULLWIDTH LATIN SMALL LETTER G */
 409           mb = for_character ("\243\347", 2);
 410           ASSERT (mb.nbytes == 2);
 411           ASSERT (memcmp (mb.buf, "\243\347", 2) == 0);
 412           /* U+FFDB HALFWIDTH HANGUL LETTER YI */
 413           mb = for_character ("\204\061\241\071", 4);
 414           ASSERT (mb.nbytes == 4);
 415           ASSERT (memcmp (mb.buf, "\204\061\241\071", 4) == 0);
 416         #if !((defined __APPLE__ && defined __MACH__) || defined __FreeBSD__ || defined __DragonFly__ || defined __NetBSD__ || defined __sun)
 417           /* U+10419 DESERET CAPITAL LETTER EF */
 418           mb = for_character ("\220\060\351\071", 4);
 419           ASSERT (mb.nbytes == 4);
 420           ASSERT (memcmp (mb.buf, "\220\060\355\071", 4) == 0);
 421         #endif
 422           /* U+10441 DESERET SMALL LETTER EF */
 423           mb = for_character ("\220\060\355\071", 4);
 424           ASSERT (mb.nbytes == 4);
 425           ASSERT (memcmp (mb.buf, "\220\060\355\071", 4) == 0);
 426           /* U+E0041 TAG LATIN CAPITAL LETTER A */
 427           mb = for_character ("\323\066\234\063", 4);
 428           ASSERT (mb.nbytes == 4);
 429           ASSERT (memcmp (mb.buf, "\323\066\234\063", 4) == 0);
 430           /* U+E0061 TAG LATIN SMALL LETTER A */
 431           mb = for_character ("\323\066\237\065", 4);
 432           ASSERT (mb.nbytes == 4);
 433           ASSERT (memcmp (mb.buf, "\323\066\237\065", 4) == 0);
 434         }
 435         return test_exit_status;
 436
 437       }
 438
 439   return 1;
 440 }