gnu/dist/gettext/gettext-tools/lib/fnmatch_loop.c

   1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004
   2         Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  17
  18 /* Match STRING against the filename pattern PATTERN, returning zero if
  19    it matches, nonzero if not.  */
  20 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  21                 const CHAR *string_end, bool no_leading_period, int flags)
  22      internal_function;
  23 static const CHAR *END (const CHAR *patternp) internal_function;
  24
  25 static int
  26 internal_function
  27 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
  28      bool no_leading_period, int flags)
  29 {
  30   register const CHAR *p = pattern, *n = string;
  31   register UCHAR c;
  32 #ifdef _LIBC
  33 # if WIDE_CHAR_VERSION
  34   const char *collseq = (const char *)
  35     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  36 # else
  37   const UCHAR *collseq = (const UCHAR *)
  38     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  39 # endif
  40 #endif
  41
  42   while ((c = *p++) != L('\0'))
  43     {
  44       bool new_no_leading_period = false;
  45       c = FOLD (c);
  46
  47       switch (c)
  48         {
  49         case L('?'):
  50           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  51             {
  52               int res;
  53
  54               res = EXT (c, p, n, string_end, no_leading_period,
  55                          flags);
  56               if (res != -1)
  57                 return res;
  58             }
  59
  60           if (n == string_end)
  61             return FNM_NOMATCH;
  62           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  63             return FNM_NOMATCH;
  64           else if (*n == L('.') && no_leading_period)
  65             return FNM_NOMATCH;
  66           break;
  67
  68         case L('\\'):
  69           if (!(flags & FNM_NOESCAPE))
  70             {
  71               c = *p++;
  72               if (c == L('\0'))
  73                 /* Trailing \ loses.  */
  74                 return FNM_NOMATCH;
  75               c = FOLD (c);
  76             }
  77           if (n == string_end || FOLD ((UCHAR) *n) != c)
  78             return FNM_NOMATCH;
  79           break;
  80
  81         case L('*'):
  82           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  83             {
  84               int res;
  85
  86               res = EXT (c, p, n, string_end, no_leading_period,
  87                          flags);
  88               if (res != -1)
  89                 return res;
  90             }
  91
  92           if (n != string_end && *n == L('.') && no_leading_period)
  93             return FNM_NOMATCH;
  94
  95           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
  96             {
  97               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
  98                 {
  99                   const CHAR *endp = END (p);
 100                   if (endp != p)
 101                     {
 102                       /* This is a pattern.  Skip over it.  */
 103                       p = endp;
 104                       continue;
 105                     }
 106                 }
 107
 108               if (c == L('?'))
 109                 {
 110                   /* A ? needs to match one character.  */
 111                   if (n == string_end)
 112                     /* There isn't another character; no match.  */
 113                     return FNM_NOMATCH;
 114                   else if (*n == L('/')
 115                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 116                     /* A slash does not match a wildcard under
 117                        FNM_FILE_NAME.  */
 118                     return FNM_NOMATCH;
 119                   else
 120                     /* One character of the string is consumed in matching
 121                        this ? wildcard, so *??? won't match if there are
 122                        less than three characters.  */
 123                     ++n;
 124                 }
 125             }
 126
 127           if (c == L('\0'))
 128             /* The wildcard(s) is/are the last element of the pattern.
 129                If the name is a file name and contains another slash
 130                this means it cannot match, unless the FNM_LEADING_DIR
 131                flag is set.  */
 132             {
 133               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 134
 135               if (flags & FNM_FILE_NAME)
 136                 {
 137                   if (flags & FNM_LEADING_DIR)
 138                     result = 0;
 139                   else
 140                     {
 141                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 142                         result = 0;
 143                     }
 144                 }
 145
 146               return result;
 147             }
 148           else
 149             {
 150               const CHAR *endp;
 151
 152               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 153                              string_end - n);
 154               if (endp == NULL)
 155                 endp = string_end;
 156
 157               if (c == L('[')
 158                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 159                       && (c == L('@') || c == L('+') || c == L('!'))
 160                       && *p == L('(')))
 161                 {
 162                   int flags2 = ((flags & FNM_FILE_NAME)
 163                                 ? flags : (flags & ~FNM_PERIOD));
 164                   bool no_leading_period2 = no_leading_period;
 165
 166                   for (--p; n < endp; ++n, no_leading_period2 = false)
 167                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 168                         == 0)
 169                       return 0;
 170                 }
 171               else if (c == L('/') && (flags & FNM_FILE_NAME))
 172                 {
 173                   while (n < string_end && *n != L('/'))
 174                     ++n;
 175                   if (n < string_end && *n == L('/')
 176                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 177                           == 0))
 178                     return 0;
 179                 }
 180               else
 181                 {
 182                   int flags2 = ((flags & FNM_FILE_NAME)
 183                                 ? flags : (flags & ~FNM_PERIOD));
 184                   int no_leading_period2 = no_leading_period;
 185
 186                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 187                     c = *p;
 188                   c = FOLD (c);
 189                   for (--p; n < endp; ++n, no_leading_period2 = false)
 190                     if (FOLD ((UCHAR) *n) == c
 191                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 192                             == 0))
 193                       return 0;
 194                 }
 195             }
 196
 197           /* If we come here no match is possible with the wildcard.  */
 198           return FNM_NOMATCH;
 199
 200         case L('['):
 201           {
 202             /* Nonzero if the sense of the character class is inverted.  */
 203             register bool not;
 204             CHAR cold;
 205             UCHAR fn;
 206
 207             if (posixly_correct == 0)
 208               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 209
 210             if (n == string_end)
 211               return FNM_NOMATCH;
 212
 213             if (*n == L('.') && no_leading_period)
 214               return FNM_NOMATCH;
 215
 216             if (*n == L('/') && (flags & FNM_FILE_NAME))
 217               /* `/' cannot be matched.  */
 218               return FNM_NOMATCH;
 219
 220             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 221             if (not)
 222               ++p;
 223
 224             fn = FOLD ((UCHAR) *n);
 225
 226             c = *p++;
 227             for (;;)
 228               {
 229                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 230                   {
 231                     if (*p == L('\0'))
 232                       return FNM_NOMATCH;
 233                     c = FOLD ((UCHAR) *p);
 234                     ++p;
 235
 236                     if (c == fn)
 237                       goto matched;
 238                   }
 239                 else if (c == L('[') && *p == L(':'))
 240                   {
 241                     /* Leave room for the null.  */
 242                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 243                     size_t c1 = 0;
 244 #if defined _LIBC || WIDE_CHAR_SUPPORT
 245                     wctype_t wt;
 246 #endif
 247                     const CHAR *startp = p;
 248
 249                     for (;;)
 250                       {
 251                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 252                           /* The name is too long and therefore the pattern
 253                              is ill-formed.  */
 254                           return FNM_NOMATCH;
 255
 256                         c = *++p;
 257                         if (c == L(':') && p[1] == L(']'))
 258                           {
 259                             p += 2;
 260                             break;
 261                           }
 262                         if (c < L('a') || c >= L('z'))
 263                           {
 264                             /* This cannot possibly be a character class name.
 265                                Match it as a normal range.  */
 266                             p = startp;
 267                             c = L('[');
 268                             goto normal_bracket;
 269                           }
 270                         str[c1++] = c;
 271                       }
 272                     str[c1] = L('\0');
 273
 274 #if defined _LIBC || WIDE_CHAR_SUPPORT
 275                     wt = IS_CHAR_CLASS (str);
 276                     if (wt == 0)
 277                       /* Invalid character class name.  */
 278                       return FNM_NOMATCH;
 279
 280 # if defined _LIBC && ! WIDE_CHAR_VERSION
 281                     /* The following code is glibc specific but does
 282                        there a good job in speeding up the code since
 283                        we can avoid the btowc() call.  */
 284                     if (_ISCTYPE ((UCHAR) *n, wt))
 285                       goto matched;
 286 # else
 287                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 288                       goto matched;
 289 # endif
 290 #else
 291                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 292                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 293                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 294                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 295                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 296                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 297                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 298                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 299                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 300                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 301                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 302                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 303                       goto matched;
 304 #endif
 305                     c = *p++;
 306                   }
 307 #ifdef _LIBC
 308                 else if (c == L('[') && *p == L('='))
 309                   {
 310                     UCHAR str[1];
 311                     uint32_t nrules =
 312                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 313                     const CHAR *startp = p;
 314
 315                     c = *++p;
 316                     if (c == L('\0'))
 317                       {
 318                         p = startp;
 319                         c = L('[');
 320                         goto normal_bracket;
 321                       }
 322                     str[0] = c;
 323
 324                     c = *++p;
 325                     if (c != L('=') || p[1] != L(']'))
 326                       {
 327                         p = startp;
 328                         c = L('[');
 329                         goto normal_bracket;
 330                       }
 331                     p += 2;
 332
 333                     if (nrules == 0)
 334                       {
 335                         if ((UCHAR) *n == str[0])
 336                           goto matched;
 337                       }
 338                     else
 339                       {
 340                         const int32_t *table;
 341 # if WIDE_CHAR_VERSION
 342                         const int32_t *weights;
 343                         const int32_t *extra;
 344 # else
 345                         const unsigned char *weights;
 346                         const unsigned char *extra;
 347 # endif
 348                         const int32_t *indirect;
 349                         int32_t idx;
 350                         const UCHAR *cp = (const UCHAR *) str;
 351
 352                         /* This #include defines a local function!  */
 353 # if WIDE_CHAR_VERSION
 354 #  include <locale/weightwc.h>
 355 # else
 356 #  include <locale/weight.h>
 357 # endif
 358
 359 # if WIDE_CHAR_VERSION
 360                         table = (const int32_t *)
 361                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 362                         weights = (const int32_t *)
 363                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 364                         extra = (const int32_t *)
 365                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 366                         indirect = (const int32_t *)
 367                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 368 # else
 369                         table = (const int32_t *)
 370                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 371                         weights = (const unsigned char *)
 372                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 373                         extra = (const unsigned char *)
 374                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 375                         indirect = (const int32_t *)
 376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 377 # endif
 378
 379                         idx = findidx (&cp);
 380                         if (idx != 0)
 381                           {
 382                             /* We found a table entry.  Now see whether the
 383                                character we are currently at has the same
 384                                equivalance class value.  */
 385                             int len = weights[idx];
 386                             int32_t idx2;
 387                             const UCHAR *np = (const UCHAR *) n;
 388
 389                             idx2 = findidx (&np);
 390                             if (idx2 != 0 && len == weights[idx2])
 391                               {
 392                                 int cnt = 0;
 393
 394                                 while (cnt < len
 395                                        && (weights[idx + 1 + cnt]
 396                                            == weights[idx2 + 1 + cnt]))
 397                                   ++cnt;
 398
 399                                 if (cnt == len)
 400                                   goto matched;
 401                               }
 402                           }
 403                       }
 404
 405                     c = *p++;
 406                   }
 407 #endif
 408                 else if (c == L('\0'))
 409                   /* [ (unterminated) loses.  */
 410                   return FNM_NOMATCH;
 411                 else
 412                   {
 413                     bool is_range = false;
 414
 415 #ifdef _LIBC
 416                     bool is_seqval = false;
 417
 418                     if (c == L('[') && *p == L('.'))
 419                       {
 420                         uint32_t nrules =
 421                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 422                         const CHAR *startp = p;
 423                         size_t c1 = 0;
 424
 425                         while (1)
 426                           {
 427                             c = *++p;
 428                             if (c == L('.') && p[1] == L(']'))
 429                               {
 430                                 p += 2;
 431                                 break;
 432                               }
 433                             if (c == '\0')
 434                               return FNM_NOMATCH;
 435                             ++c1;
 436                           }
 437
 438                         /* We have to handling the symbols differently in
 439                            ranges since then the collation sequence is
 440                            important.  */
 441                         is_range = *p == L('-') && p[1] != L('\0');
 442
 443                         if (nrules == 0)
 444                           {
 445                             /* There are no names defined in the collation
 446                                data.  Therefore we only accept the trivial
 447                                names consisting of the character itself.  */
 448                             if (c1 != 1)
 449                               return FNM_NOMATCH;
 450
 451                             if (!is_range && *n == startp[1])
 452                               goto matched;
 453
 454                             cold = startp[1];
 455                             c = *p++;
 456                           }
 457                         else
 458                           {
 459                             int32_t table_size;
 460                             const int32_t *symb_table;
 461 # ifdef WIDE_CHAR_VERSION
 462                             char str[c1];
 463                             size_t strcnt;
 464 # else
 465 #  define str (startp + 1)
 466 # endif
 467                             const unsigned char *extra;
 468                             int32_t idx;
 469                             int32_t elem;
 470                             int32_t second;
 471                             int32_t hash;
 472
 473 # ifdef WIDE_CHAR_VERSION
 474                             /* We have to convert the name to a single-byte
 475                                string.  This is possible since the names
 476                                consist of ASCII characters and the internal
 477                                representation is UCS4.  */
 478                             for (strcnt = 0; strcnt < c1; ++strcnt)
 479                               str[strcnt] = startp[1 + strcnt];
 480 # endif
 481
 482                             table_size =
 483                               _NL_CURRENT_WORD (LC_COLLATE,
 484                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 485                             symb_table = (const int32_t *)
 486                               _NL_CURRENT (LC_COLLATE,
 487                                            _NL_COLLATE_SYMB_TABLEMB);
 488                             extra = (const unsigned char *)
 489                               _NL_CURRENT (LC_COLLATE,
 490                                            _NL_COLLATE_SYMB_EXTRAMB);
 491
 492                             /* Locate the character in the hashing table.  */
 493                             hash = elem_hash (str, c1);
 494
 495                             idx = 0;
 496                             elem = hash % table_size;
 497                             second = hash % (table_size - 2);
 498                             while (symb_table[2 * elem] != 0)
 499                               {
 500                                 /* First compare the hashing value.  */
 501                                 if (symb_table[2 * elem] == hash
 502                                     && c1 == extra[symb_table[2 * elem + 1]]
 503                                     && memcmp (str,
 504                                                &extra[symb_table[2 * elem + 1]
 505                                                      + 1], c1) == 0)
 506                                   {
 507                                     /* Yep, this is the entry.  */
 508                                     idx = symb_table[2 * elem + 1];
 509                                     idx += 1 + extra[idx];
 510                                     break;
 511                                   }
 512
 513                                 /* Next entry.  */
 514                                 elem += second;
 515                               }
 516
 517                             if (symb_table[2 * elem] != 0)
 518                               {
 519                                 /* Compare the byte sequence but only if
 520                                    this is not part of a range.  */
 521 # ifdef WIDE_CHAR_VERSION
 522                                 int32_t *wextra;
 523
 524                                 idx += 1 + extra[idx];
 525                                 /* Adjust for the alignment.  */
 526                                 idx = (idx + 3) & ~3;
 527
 528                                 wextra = (int32_t *) &extra[idx + 4];
 529 # endif
 530
 531                                 if (! is_range)
 532                                   {
 533 # ifdef WIDE_CHAR_VERSION
 534                                     for (c1 = 0;
 535                                          (int32_t) c1 < wextra[idx];
 536                                          ++c1)
 537                                       if (n[c1] != wextra[1 + c1])
 538                                         break;
 539
 540                                     if ((int32_t) c1 == wextra[idx])
 541                                       goto matched;
 542 # else
 543                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 544                                       if (n[c1] != extra[1 + c1])
 545                                         break;
 546
 547                                     if (c1 == extra[idx])
 548                                       goto matched;
 549 # endif
 550                                   }
 551
 552                                 /* Get the collation sequence value.  */
 553                                 is_seqval = true;
 554 # ifdef WIDE_CHAR_VERSION
 555                                 cold = wextra[1 + wextra[idx]];
 556 # else
 557                                 /* Adjust for the alignment.  */
 558                                 idx += 1 + extra[idx];
 559                                 idx = (idx + 3) & ~4;
 560                                 cold = *((int32_t *) &extra[idx]);
 561 # endif
 562
 563                                 c = *p++;
 564                               }
 565                             else if (c1 == 1)
 566                               {
 567                                 /* No valid character.  Match it as a
 568                                    single byte.  */
 569                                 if (!is_range && *n == str[0])
 570                                   goto matched;
 571
 572                                 cold = str[0];
 573                                 c = *p++;
 574                               }
 575                             else
 576                               return FNM_NOMATCH;
 577                           }
 578                       }
 579                     else
 580 # undef str
 581 #endif
 582                       {
 583                         c = FOLD (c);
 584                       normal_bracket:
 585
 586                         /* We have to handling the symbols differently in
 587                            ranges since then the collation sequence is
 588                            important.  */
 589                         is_range = (*p == L('-') && p[1] != L('\0')
 590                                     && p[1] != L(']'));
 591
 592                         if (!is_range && c == fn)
 593                           goto matched;
 594
 595                         cold = c;
 596                         c = *p++;
 597                       }
 598
 599                     if (c == L('-') && *p != L(']'))
 600                       {
 601 #if _LIBC
 602                         /* We have to find the collation sequence
 603                            value for C.  Collation sequence is nothing
 604                            we can regularly access.  The sequence
 605                            value is defined by the order in which the
 606                            definitions of the collation values for the
 607                            various characters appear in the source
 608                            file.  A strange concept, nowhere
 609                            documented.  */
 610                         uint32_t fcollseq;
 611                         uint32_t lcollseq;
 612                         UCHAR cend = *p++;
 613
 614 # ifdef WIDE_CHAR_VERSION
 615                         /* Search in the `names' array for the characters.  */
 616                         fcollseq = __collseq_table_lookup (collseq, fn);
 617                         if (fcollseq == ~((uint32_t) 0))
 618                           /* XXX We don't know anything about the character
 619                              we are supposed to match.  This means we are
 620                              failing.  */
 621                           goto range_not_matched;
 622
 623                         if (is_seqval)
 624                           lcollseq = cold;
 625                         else
 626                           lcollseq = __collseq_table_lookup (collseq, cold);
 627 # else
 628                         fcollseq = collseq[fn];
 629                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 630 # endif
 631
 632                         is_seqval = false;
 633                         if (cend == L('[') && *p == L('.'))
 634                           {
 635                             uint32_t nrules =
 636                               _NL_CURRENT_WORD (LC_COLLATE,
 637                                                 _NL_COLLATE_NRULES);
 638                             const CHAR *startp = p;
 639                             size_t c1 = 0;
 640
 641                             while (1)
 642                               {
 643                                 c = *++p;
 644                                 if (c == L('.') && p[1] == L(']'))
 645                                   {
 646                                     p += 2;
 647                                     break;
 648                                   }
 649                                 if (c == '\0')
 650                                   return FNM_NOMATCH;
 651                                 ++c1;
 652                               }
 653
 654                             if (nrules == 0)
 655                               {
 656                                 /* There are no names defined in the
 657                                    collation data.  Therefore we only
 658                                    accept the trivial names consisting
 659                                    of the character itself.  */
 660                                 if (c1 != 1)
 661                                   return FNM_NOMATCH;
 662
 663                                 cend = startp[1];
 664                               }
 665                             else
 666                               {
 667                                 int32_t table_size;
 668                                 const int32_t *symb_table;
 669 # ifdef WIDE_CHAR_VERSION
 670                                 char str[c1];
 671                                 size_t strcnt;
 672 # else
 673 #  define str (startp + 1)
 674 # endif
 675                                 const unsigned char *extra;
 676                                 int32_t idx;
 677                                 int32_t elem;
 678                                 int32_t second;
 679                                 int32_t hash;
 680
 681 # ifdef WIDE_CHAR_VERSION
 682                                 /* We have to convert the name to a single-byte
 683                                    string.  This is possible since the names
 684                                    consist of ASCII characters and the internal
 685                                    representation is UCS4.  */
 686                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 687                                   str[strcnt] = startp[1 + strcnt];
 688 # endif
 689
 690                                 table_size =
 691                                   _NL_CURRENT_WORD (LC_COLLATE,
 692                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 693                                 symb_table = (const int32_t *)
 694                                   _NL_CURRENT (LC_COLLATE,
 695                                                _NL_COLLATE_SYMB_TABLEMB);
 696                                 extra = (const unsigned char *)
 697                                   _NL_CURRENT (LC_COLLATE,
 698                                                _NL_COLLATE_SYMB_EXTRAMB);
 699
 700                                 /* Locate the character in the hashing
 701                                    table.  */
 702                                 hash = elem_hash (str, c1);
 703
 704                                 idx = 0;
 705                                 elem = hash % table_size;
 706                                 second = hash % (table_size - 2);
 707                                 while (symb_table[2 * elem] != 0)
 708                                   {
 709                                 /* First compare the hashing value.  */
 710                                     if (symb_table[2 * elem] == hash
 711                                         && (c1
 712                                             == extra[symb_table[2 * elem + 1]])
 713                                         && memcmp (str,
 714                                                    &extra[symb_table[2 * elem + 1]
 715                                                          + 1], c1) == 0)
 716                                       {
 717                                         /* Yep, this is the entry.  */
 718                                         idx = symb_table[2 * elem + 1];
 719                                         idx += 1 + extra[idx];
 720                                         break;
 721                                       }
 722
 723                                     /* Next entry.  */
 724                                     elem += second;
 725                                   }
 726
 727                                 if (symb_table[2 * elem] != 0)
 728                                   {
 729                                     /* Compare the byte sequence but only if
 730                                        this is not part of a range.  */
 731 # ifdef WIDE_CHAR_VERSION
 732                                     int32_t *wextra;
 733
 734                                     idx += 1 + extra[idx];
 735                                     /* Adjust for the alignment.  */
 736                                     idx = (idx + 3) & ~4;
 737
 738                                     wextra = (int32_t *) &extra[idx + 4];
 739 # endif
 740                                     /* Get the collation sequence value.  */
 741                                     is_seqval = true;
 742 # ifdef WIDE_CHAR_VERSION
 743                                     cend = wextra[1 + wextra[idx]];
 744 # else
 745                                     /* Adjust for the alignment.  */
 746                                     idx += 1 + extra[idx];
 747                                     idx = (idx + 3) & ~4;
 748                                     cend = *((int32_t *) &extra[idx]);
 749 # endif
 750                                   }
 751                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 752                                   {
 753                                     cend = str[0];
 754                                     c = *p++;
 755                                   }
 756                                 else
 757                                   return FNM_NOMATCH;
 758                               }
 759 # undef str
 760                           }
 761                         else
 762                           {
 763                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 764                               cend = *p++;
 765                             if (cend == L('\0'))
 766                               return FNM_NOMATCH;
 767                             cend = FOLD (cend);
 768                           }
 769
 770                         /* XXX It is not entirely clear to me how to handle
 771                            characters which are not mentioned in the
 772                            collation specification.  */
 773                         if (
 774 # ifdef WIDE_CHAR_VERSION
 775                             lcollseq == 0xffffffff ||
 776 # endif
 777                             lcollseq <= fcollseq)
 778                           {
 779                             /* We have to look at the upper bound.  */
 780                             uint32_t hcollseq;
 781
 782                             if (is_seqval)
 783                               hcollseq = cend;
 784                             else
 785                               {
 786 # ifdef WIDE_CHAR_VERSION
 787                                 hcollseq =
 788                                   __collseq_table_lookup (collseq, cend);
 789                                 if (hcollseq == ~((uint32_t) 0))
 790                                   {
 791                                     /* Hum, no information about the upper
 792                                        bound.  The matching succeeds if the
 793                                        lower bound is matched exactly.  */
 794                                     if (lcollseq != fcollseq)
 795                                       goto range_not_matched;
 796
 797                                     goto matched;
 798                                   }
 799 # else
 800                                 hcollseq = collseq[cend];
 801 # endif
 802                               }
 803
 804                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 805                               goto matched;
 806                           }
 807 # ifdef WIDE_CHAR_VERSION
 808                       range_not_matched:
 809 # endif
 810 #else
 811                         /* We use a boring value comparison of the character
 812                            values.  This is better than comparing using
 813                            `strcoll' since the latter would have surprising
 814                            and sometimes fatal consequences.  */
 815                         UCHAR cend = *p++;
 816
 817                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 818                           cend = *p++;
 819                         if (cend == L('\0'))
 820                           return FNM_NOMATCH;
 821
 822                         /* It is a range.  */
 823                         if (cold <= fn && fn <= cend)
 824                           goto matched;
 825 #endif
 826
 827                         c = *p++;
 828                       }
 829                   }
 830
 831                 if (c == L(']'))
 832                   break;
 833               }
 834
 835             if (!not)
 836               return FNM_NOMATCH;
 837             break;
 838
 839           matched:
 840             /* Skip the rest of the [...] that already matched.  */
 841             do
 842               {
 843               ignore_next:
 844                 c = *p++;
 845
 846                 if (c == L('\0'))
 847                   /* [... (unterminated) loses.  */
 848                   return FNM_NOMATCH;
 849
 850                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 851                   {
 852                     if (*p == L('\0'))
 853                       return FNM_NOMATCH;
 854                     /* XXX 1003.2d11 is unclear if this is right.  */
 855                     ++p;
 856                   }
 857                 else if (c == L('[') && *p == L(':'))
 858                   {
 859                     int c1 = 0;
 860                     const CHAR *startp = p;
 861
 862                     while (1)
 863                       {
 864                         c = *++p;
 865                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 866                           return FNM_NOMATCH;
 867
 868                         if (*p == L(':') && p[1] == L(']'))
 869                           break;
 870
 871                         if (c < L('a') || c >= L('z'))
 872                           {
 873                             p = startp;
 874                             goto ignore_next;
 875                           }
 876                       }
 877                     p += 2;
 878                     c = *p++;
 879                   }
 880                 else if (c == L('[') && *p == L('='))
 881                   {
 882                     c = *++p;
 883                     if (c == L('\0'))
 884                       return FNM_NOMATCH;
 885                     c = *++p;
 886                     if (c != L('=') || p[1] != L(']'))
 887                       return FNM_NOMATCH;
 888                     p += 2;
 889                     c = *p++;
 890                   }
 891                 else if (c == L('[') && *p == L('.'))
 892                   {
 893                     ++p;
 894                     while (1)
 895                       {
 896                         c = *++p;
 897                         if (c == '\0')
 898                           return FNM_NOMATCH;
 899
 900                         if (*p == L('.') && p[1] == L(']'))
 901                           break;
 902                       }
 903                     p += 2;
 904                     c = *p++;
 905                   }
 906               }
 907             while (c != L(']'));
 908             if (not)
 909               return FNM_NOMATCH;
 910           }
 911           break;
 912
 913         case L('+'):
 914         case L('@'):
 915         case L('!'):
 916           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 917             {
 918               int res;
 919
 920               res = EXT (c, p, n, string_end, no_leading_period, flags);
 921               if (res != -1)
 922                 return res;
 923             }
 924           goto normal_match;
 925
 926         case L('/'):
 927           if (NO_LEADING_PERIOD (flags))
 928             {
 929               if (n == string_end || c != (UCHAR) *n)
 930                 return FNM_NOMATCH;
 931
 932               new_no_leading_period = true;
 933               break;
 934             }
 935           /* FALLTHROUGH */
 936         default:
 937         normal_match:
 938           if (n == string_end || c != FOLD ((UCHAR) *n))
 939             return FNM_NOMATCH;
 940         }
 941
 942       no_leading_period = new_no_leading_period;
 943       ++n;
 944     }
 945
 946   if (n == string_end)
 947     return 0;
 948
 949   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
 950     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 951     return 0;
 952
 953   return FNM_NOMATCH;
 954 }
 955
 956
 957 static const CHAR *
 958 internal_function
 959 END (const CHAR *pattern)
 960 {
 961   const CHAR *p = pattern;
 962
 963   while (1)
 964     if (*++p == L('\0'))
 965       /* This is an invalid pattern.  */
 966       return pattern;
 967     else if (*p == L('['))
 968       {
 969         /* Handle brackets special.  */
 970         if (posixly_correct == 0)
 971           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 972
 973         /* Skip the not sign.  We have to recognize it because of a possibly
 974            following ']'.  */
 975         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
 976           ++p;
 977         /* A leading ']' is recognized as such.  */
 978         if (*p == L(']'))
 979           ++p;
 980         /* Skip over all characters of the list.  */
 981         while (*p != L(']'))
 982           if (*p++ == L('\0'))
 983             /* This is no valid pattern.  */
 984             return pattern;
 985       }
 986     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
 987               || *p == L('!')) && p[1] == L('('))
 988       p = END (p + 1);
 989     else if (*p == L(')'))
 990       break;
 991
 992   return p + 1;
 993 }
 994
 995
 996 static int
 997 internal_function
 998 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 999      bool no_leading_period, int flags)
1000 {
1001   const CHAR *startp;
1002   size_t level;
1003   struct patternlist
1004   {
1005     struct patternlist *next;
1006     CHAR str[1];
1007   } *list = NULL;
1008   struct patternlist **lastp = &list;
1009   size_t pattern_len = STRLEN (pattern);
1010   const CHAR *p;
1011   const CHAR *rs;
1012   enum { ALLOCA_LIMIT = 8000 };
1013
1014   /* Parse the pattern.  Store the individual parts in the list.  */
1015   level = 0;
1016   for (startp = p = pattern + 1; ; ++p)
1017     if (*p == L('\0'))
1018       /* This is an invalid pattern.  */
1019       return -1;
1020     else if (*p == L('['))
1021       {
1022         /* Handle brackets special.  */
1023         if (posixly_correct == 0)
1024           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1025
1026         /* Skip the not sign.  We have to recognize it because of a possibly
1027            following ']'.  */
1028         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1029           ++p;
1030         /* A leading ']' is recognized as such.  */
1031         if (*p == L(']'))
1032           ++p;
1033         /* Skip over all characters of the list.  */
1034         while (*p != L(']'))
1035           if (*p++ == L('\0'))
1036             /* This is no valid pattern.  */
1037             return -1;
1038       }
1039     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1040               || *p == L('!')) && p[1] == L('('))
1041       /* Remember the nesting level.  */
1042       ++level;
1043     else if (*p == L(')'))
1044       {
1045         if (level-- == 0)
1046           {
1047             /* This means we found the end of the pattern.  */
1048 #define NEW_PATTERN \
1049             struct patternlist *newp;                                         \
1050             size_t plen;                                                      \
1051             size_t plensize;                                                  \
1052             size_t newpsize;                                                  \
1053                                                                               \
1054             plen = (opt == L('?') || opt == L('@')                            \
1055                     ? pattern_len                                             \
1056                     : p - startp + 1);                                        \
1057             plensize = plen * sizeof (CHAR);                                  \
1058             newpsize = offsetof (struct patternlist, str) + plensize;         \
1059             if ((size_t) -1 / sizeof (CHAR) < plen                            \
1060                 || newpsize < offsetof (struct patternlist, str)              \
1061                 || ALLOCA_LIMIT <= newpsize)                                  \
1062               return -1;                                                      \
1063             newp = (struct patternlist *) alloca (newpsize);                  \
1064             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1065             newp->next = NULL;                                                \
1066             *lastp = newp;                                                    \
1067             lastp = &newp->next
1068             NEW_PATTERN;
1069             break;
1070           }
1071       }
1072     else if (*p == L('|'))
1073       {
1074         if (level == 0)
1075           {
1076             NEW_PATTERN;
1077             startp = p + 1;
1078           }
1079       }
1080   assert (list != NULL);
1081   assert (p[-1] == L(')'));
1082 #undef NEW_PATTERN
1083
1084   switch (opt)
1085     {
1086     case L('*'):
1087       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1088         return 0;
1089       /* FALLTHROUGH */
1090
1091     case L('+'):
1092       do
1093         {
1094           for (rs = string; rs <= string_end; ++rs)
1095             /* First match the prefix with the current pattern with the
1096                current pattern.  */
1097             if (FCT (list->str, string, rs, no_leading_period,
1098                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1099                 /* This was successful.  Now match the rest with the rest
1100                    of the pattern.  */
1101                 && (FCT (p, rs, string_end,
1102                          rs == string
1103                          ? no_leading_period
1104                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1105                          flags & FNM_FILE_NAME
1106                          ? flags : flags & ~FNM_PERIOD) == 0
1107                     /* This didn't work.  Try the whole pattern.  */
1108                     || (rs != string
1109                         && FCT (pattern - 1, rs, string_end,
1110                                 rs == string
1111                                 ? no_leading_period
1112                                 : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1113                                 flags & FNM_FILE_NAME
1114                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1115               /* It worked.  Signal success.  */
1116               return 0;
1117         }
1118       while ((list = list->next) != NULL);
1119
1120       /* None of the patterns lead to a match.  */
1121       return FNM_NOMATCH;
1122
1123     case L('?'):
1124       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1125         return 0;
1126       /* FALLTHROUGH */
1127
1128     case L('@'):
1129       do
1130         /* I cannot believe it but `strcat' is actually acceptable
1131            here.  Match the entire string with the prefix from the
1132            pattern list and the rest of the pattern following the
1133            pattern list.  */
1134         if (FCT (STRCAT (list->str, p), string, string_end,
1135                  no_leading_period,
1136                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1137           /* It worked.  Signal success.  */
1138           return 0;
1139       while ((list = list->next) != NULL);
1140
1141       /* None of the patterns lead to a match.  */
1142       return FNM_NOMATCH;
1143
1144     case L('!'):
1145       for (rs = string; rs <= string_end; ++rs)
1146         {
1147           struct patternlist *runp;
1148
1149           for (runp = list; runp != NULL; runp = runp->next)
1150             if (FCT (runp->str, string, rs,  no_leading_period,
1151                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1152               break;
1153
1154           /* If none of the patterns matched see whether the rest does.  */
1155           if (runp == NULL
1156               && (FCT (p, rs, string_end,
1157                        rs == string
1158                        ? no_leading_period
1159                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1160                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1161                   == 0))
1162             /* This is successful.  */
1163             return 0;
1164         }
1165
1166       /* None of the patterns together with the rest of the pattern
1167          lead to a match.  */
1168       return FNM_NOMATCH;
1169
1170     default:
1171       assert (! "Invalid extended matching operator");
1172       break;
1173     }
1174
1175   return -1;
1176 }
1177
1178
1179 #undef FOLD
1180 #undef CHAR
1181 #undef UCHAR
1182 #undef INT
1183 #undef FCT
1184 #undef EXT
1185 #undef END
1186 #undef MEMPCPY
1187 #undef MEMCHR
1188 #undef STRCOLL
1189 #undef STRLEN
1190 #undef STRCAT
1191 #undef L
1192 #undef BTOWC