posix/fnmatch_loop.c

   1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003
   2         Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, write to the Free
  17    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  18    02111-1307 USA.  */
  19
  20 /* Match STRING against the filename pattern PATTERN, returning zero if
  21    it matches, nonzero if not.  */
  22 static int FCT (const CHAR *pattern, const CHAR *string,
  23                 const CHAR *string_end, int no_leading_period, int flags)
  24      internal_function;
  25 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  26                 const CHAR *string_end, int no_leading_period, int flags)
  27      internal_function;
  28 static const CHAR *END (const CHAR *patternp) internal_function;
  29
  30 static int
  31 internal_function
  32 FCT (pattern, string, string_end, no_leading_period, flags)
  33      const CHAR *pattern;
  34      const CHAR *string;
  35      const CHAR *string_end;
  36      int no_leading_period;
  37      int flags;
  38 {
  39   register const CHAR *p = pattern, *n = string;
  40   register UCHAR c;
  41 #ifdef _LIBC
  42 # if WIDE_CHAR_VERSION
  43   const char *collseq = (const char *)
  44     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  45 # else
  46   const UCHAR *collseq = (const UCHAR *)
  47     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  48 # endif
  49 #endif
  50
  51   while ((c = *p++) != L('\0'))
  52     {
  53       int new_no_leading_period = 0;
  54       c = FOLD (c);
  55
  56       switch (c)
  57         {
  58         case L('?'):
  59           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  60             {
  61               int res;
  62
  63               res = EXT (c, p, n, string_end, no_leading_period,
  64                          flags);
  65               if (res != -1)
  66                 return res;
  67             }
  68
  69           if (n == string_end)
  70             return FNM_NOMATCH;
  71           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  72             return FNM_NOMATCH;
  73           else if (*n == L('.') && no_leading_period)
  74             return FNM_NOMATCH;
  75           break;
  76
  77         case L('\\'):
  78           if (!(flags & FNM_NOESCAPE))
  79             {
  80               c = *p++;
  81               if (c == L('\0'))
  82                 /* Trailing \ loses.  */
  83                 return FNM_NOMATCH;
  84               c = FOLD (c);
  85             }
  86           if (n == string_end || FOLD ((UCHAR) *n) != c)
  87             return FNM_NOMATCH;
  88           break;
  89
  90         case L('*'):
  91           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  92             {
  93               int res;
  94
  95               res = EXT (c, p, n, string_end, no_leading_period,
  96                          flags);
  97               if (res != -1)
  98                 return res;
  99             }
 100
 101           if (n != string_end && *n == L('.') && no_leading_period)
 102             return FNM_NOMATCH;
 103
 104           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 105             {
 106               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 107                 {
 108                   const CHAR *endp = END (p);
 109                   if (endp != p)
 110                     {
 111                       /* This is a pattern.  Skip over it.  */
 112                       p = endp;
 113                       continue;
 114                     }
 115                 }
 116
 117               if (c == L('?'))
 118                 {
 119                   /* A ? needs to match one character.  */
 120                   if (n == string_end)
 121                     /* There isn't another character; no match.  */
 122                     return FNM_NOMATCH;
 123                   else if (*n == L('/')
 124                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 125                     /* A slash does not match a wildcard under
 126                        FNM_FILE_NAME.  */
 127                     return FNM_NOMATCH;
 128                   else
 129                     /* One character of the string is consumed in matching
 130                        this ? wildcard, so *??? won't match if there are
 131                        less than three characters.  */
 132                     ++n;
 133                 }
 134             }
 135
 136           if (c == L('\0'))
 137             /* The wildcard(s) is/are the last element of the pattern.
 138                If the name is a file name and contains another slash
 139                this means it cannot match, unless the FNM_LEADING_DIR
 140                flag is set.  */
 141             {
 142               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 143
 144               if (flags & FNM_FILE_NAME)
 145                 {
 146                   if (flags & FNM_LEADING_DIR)
 147                     result = 0;
 148                   else
 149                     {
 150                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 151                         result = 0;
 152                     }
 153                 }
 154
 155               return result;
 156             }
 157           else
 158             {
 159               const CHAR *endp;
 160
 161               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 162                              string_end - n);
 163               if (endp == NULL)
 164                 endp = string_end;
 165
 166               if (c == L('[')
 167                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 168                       && (c == L('@') || c == L('+') || c == L('!'))
 169                       && *p == L('(')))
 170                 {
 171                   int flags2 = ((flags & FNM_FILE_NAME)
 172                                 ? flags : (flags & ~FNM_PERIOD));
 173                   int no_leading_period2 = no_leading_period;
 174
 175                   for (--p; n < endp; ++n, no_leading_period2 = 0)
 176                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 177                         == 0)
 178                       return 0;
 179                 }
 180               else if (c == L('/') && (flags & FNM_FILE_NAME))
 181                 {
 182                   while (n < string_end && *n != L('/'))
 183                     ++n;
 184                   if (n < string_end && *n == L('/')
 185                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 186                           == 0))
 187                     return 0;
 188                 }
 189               else
 190                 {
 191                   int flags2 = ((flags & FNM_FILE_NAME)
 192                                 ? flags : (flags & ~FNM_PERIOD));
 193                   int no_leading_period2 = no_leading_period;
 194
 195                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 196                     c = *p;
 197                   c = FOLD (c);
 198                   for (--p; n < endp; ++n, no_leading_period2 = 0)
 199                     if (FOLD ((UCHAR) *n) == c
 200                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 201                             == 0))
 202                       return 0;
 203                 }
 204             }
 205
 206           /* If we come here no match is possible with the wildcard.  */
 207           return FNM_NOMATCH;
 208
 209         case L('['):
 210           {
 211             /* Nonzero if the sense of the character class is inverted.  */
 212             register int not;
 213             CHAR cold;
 214             UCHAR fn;
 215
 216             if (posixly_correct == 0)
 217               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 218
 219             if (n == string_end)
 220               return FNM_NOMATCH;
 221
 222             if (*n == L('.') && no_leading_period)
 223               return FNM_NOMATCH;
 224
 225             if (*n == L('/') && (flags & FNM_FILE_NAME))
 226               /* `/' cannot be matched.  */
 227               return FNM_NOMATCH;
 228
 229             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 230             if (not)
 231               ++p;
 232
 233             fn = FOLD ((UCHAR) *n);
 234
 235             c = *p++;
 236             for (;;)
 237               {
 238                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 239                   {
 240                     if (*p == L('\0'))
 241                       return FNM_NOMATCH;
 242                     c = FOLD ((UCHAR) *p);
 243                     ++p;
 244
 245                     if (c == fn)
 246                       goto matched;
 247                   }
 248                 else if (c == L('[') && *p == L(':'))
 249                   {
 250                     /* Leave room for the null.  */
 251                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 252                     size_t c1 = 0;
 253 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 254                     wctype_t wt;
 255 #endif
 256                     const CHAR *startp = p;
 257
 258                     for (;;)
 259                       {
 260                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 261                           /* The name is too long and therefore the pattern
 262                              is ill-formed.  */
 263                           return FNM_NOMATCH;
 264
 265                         c = *++p;
 266                         if (c == L(':') && p[1] == L(']'))
 267                           {
 268                             p += 2;
 269                             break;
 270                           }
 271                         if (c < L('a') || c >= L('z'))
 272                           {
 273                             /* This cannot possibly be a character class name.
 274                                Match it as a normal range.  */
 275                             p = startp;
 276                             c = L('[');
 277                             goto normal_bracket;
 278                           }
 279                         str[c1++] = c;
 280                       }
 281                     str[c1] = L('\0');
 282
 283 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 284                     wt = IS_CHAR_CLASS (str);
 285                     if (wt == 0)
 286                       /* Invalid character class name.  */
 287                       return FNM_NOMATCH;
 288
 289 # if defined _LIBC && ! WIDE_CHAR_VERSION
 290                     /* The following code is glibc specific but does
 291                        there a good job in speeding up the code since
 292                        we can avoid the btowc() call.  */
 293                     if (_ISCTYPE ((UCHAR) *n, wt))
 294                       goto matched;
 295 # else
 296                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 297                       goto matched;
 298 # endif
 299 #else
 300                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 301                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 302                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 303                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 304                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 305                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 306                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 307                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 308                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 309                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 310                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 311                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 312                       goto matched;
 313 #endif
 314                     c = *p++;
 315                   }
 316 #ifdef _LIBC
 317                 else if (c == L('[') && *p == L('='))
 318                   {
 319                     UCHAR str[1];
 320                     uint32_t nrules =
 321                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 322                     const CHAR *startp = p;
 323
 324                     c = *++p;
 325                     if (c == L('\0'))
 326                       {
 327                         p = startp;
 328                         c = L('[');
 329                         goto normal_bracket;
 330                       }
 331                     str[0] = c;
 332
 333                     c = *++p;
 334                     if (c != L('=') || p[1] != L(']'))
 335                       {
 336                         p = startp;
 337                         c = L('[');
 338                         goto normal_bracket;
 339                       }
 340                     p += 2;
 341
 342                     if (nrules == 0)
 343                       {
 344                         if ((UCHAR) *n == str[0])
 345                           goto matched;
 346                       }
 347                     else
 348                       {
 349                         const int32_t *table;
 350 # if WIDE_CHAR_VERSION
 351                         const int32_t *weights;
 352                         const int32_t *extra;
 353 # else
 354                         const unsigned char *weights;
 355                         const unsigned char *extra;
 356 # endif
 357                         const int32_t *indirect;
 358                         int32_t idx;
 359                         const UCHAR *cp = (const UCHAR *) str;
 360
 361                         /* This #include defines a local function!  */
 362 # if WIDE_CHAR_VERSION
 363 #  include <locale/weightwc.h>
 364 # else
 365 #  include <locale/weight.h>
 366 # endif
 367
 368 # if WIDE_CHAR_VERSION
 369                         table = (const int32_t *)
 370                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 371                         weights = (const int32_t *)
 372                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 373                         extra = (const int32_t *)
 374                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 375                         indirect = (const int32_t *)
 376                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 377 # else
 378                         table = (const int32_t *)
 379                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 380                         weights = (const unsigned char *)
 381                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 382                         extra = (const unsigned char *)
 383                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 384                         indirect = (const int32_t *)
 385                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 386 # endif
 387
 388                         idx = findidx (&cp);
 389                         if (idx != 0)
 390                           {
 391                             /* We found a table entry.  Now see whether the
 392                                character we are currently at has the same
 393                                equivalance class value.  */
 394                             int len = weights[idx];
 395                             int32_t idx2;
 396                             const UCHAR *np = (const UCHAR *) n;
 397
 398                             idx2 = findidx (&np);
 399                             if (idx2 != 0 && len == weights[idx2])
 400                               {
 401                                 int cnt = 0;
 402
 403                                 while (cnt < len
 404                                        && (weights[idx + 1 + cnt]
 405                                            == weights[idx2 + 1 + cnt]))
 406                                   ++cnt;
 407
 408                                 if (cnt == len)
 409                                   goto matched;
 410                               }
 411                           }
 412                       }
 413
 414                     c = *p++;
 415                   }
 416 #endif
 417                 else if (c == L('\0'))
 418                   /* [ (unterminated) loses.  */
 419                   return FNM_NOMATCH;
 420                 else
 421                   {
 422                     int is_range = 0;
 423
 424 #ifdef _LIBC
 425                     int is_seqval = 0;
 426
 427                     if (c == L('[') && *p == L('.'))
 428                       {
 429                         uint32_t nrules =
 430                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 431                         const CHAR *startp = p;
 432                         size_t c1 = 0;
 433
 434                         while (1)
 435                           {
 436                             c = *++p;
 437                             if (c == L('.') && p[1] == L(']'))
 438                               {
 439                                 p += 2;
 440                                 break;
 441                               }
 442                             if (c == '\0')
 443                               return FNM_NOMATCH;
 444                             ++c1;
 445                           }
 446
 447                         /* We have to handling the symbols differently in
 448                            ranges since then the collation sequence is
 449                            important.  */
 450                         is_range = *p == L('-') && p[1] != L('\0');
 451
 452                         if (nrules == 0)
 453                           {
 454                             /* There are no names defined in the collation
 455                                data.  Therefore we only accept the trivial
 456                                names consisting of the character itself.  */
 457                             if (c1 != 1)
 458                               return FNM_NOMATCH;
 459
 460                             if (!is_range && *n == startp[1])
 461                               goto matched;
 462
 463                             cold = startp[1];
 464                             c = *p++;
 465                           }
 466                         else
 467                           {
 468                             int32_t table_size;
 469                             const int32_t *symb_table;
 470 # ifdef WIDE_CHAR_VERSION
 471                             char str[c1];
 472                             unsigned int strcnt;
 473 # else
 474 #  define str (startp + 1)
 475 # endif
 476                             const unsigned char *extra;
 477                             int32_t idx;
 478                             int32_t elem;
 479                             int32_t second;
 480                             int32_t hash;
 481
 482 # ifdef WIDE_CHAR_VERSION
 483                             /* We have to convert the name to a single-byte
 484                                string.  This is possible since the names
 485                                consist of ASCII characters and the internal
 486                                representation is UCS4.  */
 487                             for (strcnt = 0; strcnt < c1; ++strcnt)
 488                               str[strcnt] = startp[1 + strcnt];
 489 #endif
 490
 491                             table_size =
 492                               _NL_CURRENT_WORD (LC_COLLATE,
 493                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 494                             symb_table = (const int32_t *)
 495                               _NL_CURRENT (LC_COLLATE,
 496                                            _NL_COLLATE_SYMB_TABLEMB);
 497                             extra = (const unsigned char *)
 498                               _NL_CURRENT (LC_COLLATE,
 499                                            _NL_COLLATE_SYMB_EXTRAMB);
 500
 501                             /* Locate the character in the hashing table.  */
 502                             hash = elem_hash (str, c1);
 503
 504                             idx = 0;
 505                             elem = hash % table_size;
 506                             second = hash % (table_size - 2);
 507                             while (symb_table[2 * elem] != 0)
 508                               {
 509                                 /* First compare the hashing value.  */
 510                                 if (symb_table[2 * elem] == hash
 511                                     && c1 == extra[symb_table[2 * elem + 1]]
 512                                     && memcmp (str,
 513                                                &extra[symb_table[2 * elem + 1]
 514                                                      + 1], c1) == 0)
 515                                   {
 516                                     /* Yep, this is the entry.  */
 517                                     idx = symb_table[2 * elem + 1];
 518                                     idx += 1 + extra[idx];
 519                                     break;
 520                                   }
 521
 522                                 /* Next entry.  */
 523                                 elem += second;
 524                               }
 525
 526                             if (symb_table[2 * elem] != 0)
 527                               {
 528                                 /* Compare the byte sequence but only if
 529                                    this is not part of a range.  */
 530 # ifdef WIDE_CHAR_VERSION
 531                                 int32_t *wextra;
 532
 533                                 idx += 1 + extra[idx];
 534                                 /* Adjust for the alignment.  */
 535                                 idx = (idx + 3) & ~3;
 536
 537                                 wextra = (int32_t *) &extra[idx + 4];
 538 # endif
 539
 540                                 if (! is_range)
 541                                   {
 542 # ifdef WIDE_CHAR_VERSION
 543                                     for (c1 = 0;
 544                                          (int32_t) c1 < wextra[idx];
 545                                          ++c1)
 546                                       if (n[c1] != wextra[1 + c1])
 547                                         break;
 548
 549                                     if ((int32_t) c1 == wextra[idx])
 550                                       goto matched;
 551 # else
 552                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 553                                       if (n[c1] != extra[1 + c1])
 554                                         break;
 555
 556                                     if (c1 == extra[idx])
 557                                       goto matched;
 558 # endif
 559                                   }
 560
 561                                 /* Get the collation sequence value.  */
 562                                 is_seqval = 1;
 563 # ifdef WIDE_CHAR_VERSION
 564                                 cold = wextra[1 + wextra[idx]];
 565 # else
 566                                 /* Adjust for the alignment.  */
 567                                 idx += 1 + extra[idx];
 568                                 idx = (idx + 3) & ~4;
 569                                 cold = *((int32_t *) &extra[idx]);
 570 # endif
 571
 572                                 c = *p++;
 573                               }
 574                             else if (c1 == 1)
 575                               {
 576                                 /* No valid character.  Match it as a
 577                                    single byte.  */
 578                                 if (!is_range && *n == str[0])
 579                                   goto matched;
 580
 581                                 cold = str[0];
 582                                 c = *p++;
 583                               }
 584                             else
 585                               return FNM_NOMATCH;
 586                           }
 587                       }
 588                     else
 589 # undef str
 590 #endif
 591                       {
 592                         c = FOLD (c);
 593                       normal_bracket:
 594
 595                         /* We have to handling the symbols differently in
 596                            ranges since then the collation sequence is
 597                            important.  */
 598                         is_range = (*p == L('-') && p[1] != L('\0')
 599                                     && p[1] != L(']'));
 600
 601                         if (!is_range && c == fn)
 602                           goto matched;
 603
 604                         cold = c;
 605                         c = *p++;
 606                       }
 607
 608                     if (c == L('-') && *p != L(']'))
 609                       {
 610 #if _LIBC
 611                         /* We have to find the collation sequence
 612                            value for C.  Collation sequence is nothing
 613                            we can regularly access.  The sequence
 614                            value is defined by the order in which the
 615                            definitions of the collation values for the
 616                            various characters appear in the source
 617                            file.  A strange concept, nowhere
 618                            documented.  */
 619                         uint32_t fcollseq;
 620                         uint32_t lcollseq;
 621                         UCHAR cend = *p++;
 622
 623 # ifdef WIDE_CHAR_VERSION
 624                         /* Search in the `names' array for the characters.  */
 625                         fcollseq = __collseq_table_lookup (collseq, fn);
 626                         if (fcollseq == ~((uint32_t) 0))
 627                           /* XXX We don't know anything about the character
 628                              we are supposed to match.  This means we are
 629                              failing.  */
 630                           goto range_not_matched;
 631
 632                         if (is_seqval)
 633                           lcollseq = cold;
 634                         else
 635                           lcollseq = __collseq_table_lookup (collseq, cold);
 636 # else
 637                         fcollseq = collseq[fn];
 638                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 639 # endif
 640
 641                         is_seqval = 0;
 642                         if (cend == L('[') && *p == L('.'))
 643                           {
 644                             uint32_t nrules =
 645                               _NL_CURRENT_WORD (LC_COLLATE,
 646                                                 _NL_COLLATE_NRULES);
 647                             const CHAR *startp = p;
 648                             size_t c1 = 0;
 649
 650                             while (1)
 651                               {
 652                                 c = *++p;
 653                                 if (c == L('.') && p[1] == L(']'))
 654                                   {
 655                                     p += 2;
 656                                     break;
 657                                   }
 658                                 if (c == '\0')
 659                                   return FNM_NOMATCH;
 660                                 ++c1;
 661                               }
 662
 663                             if (nrules == 0)
 664                               {
 665                                 /* There are no names defined in the
 666                                    collation data.  Therefore we only
 667                                    accept the trivial names consisting
 668                                    of the character itself.  */
 669                                 if (c1 != 1)
 670                                   return FNM_NOMATCH;
 671
 672                                 cend = startp[1];
 673                               }
 674                             else
 675                               {
 676                                 int32_t table_size;
 677                                 const int32_t *symb_table;
 678 # ifdef WIDE_CHAR_VERSION
 679                                 char str[c1];
 680                                 unsigned int strcnt;
 681 # else
 682 #  define str (startp + 1)
 683 # endif
 684                                 const unsigned char *extra;
 685                                 int32_t idx;
 686                                 int32_t elem;
 687                                 int32_t second;
 688                                 int32_t hash;
 689
 690 # ifdef WIDE_CHAR_VERSION
 691                                 /* We have to convert the name to a single-byte
 692                                    string.  This is possible since the names
 693                                    consist of ASCII characters and the internal
 694                                    representation is UCS4.  */
 695                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 696                                   str[strcnt] = startp[1 + strcnt];
 697 # endif
 698
 699                                 table_size =
 700                                   _NL_CURRENT_WORD (LC_COLLATE,
 701                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 702                                 symb_table = (const int32_t *)
 703                                   _NL_CURRENT (LC_COLLATE,
 704                                                _NL_COLLATE_SYMB_TABLEMB);
 705                                 extra = (const unsigned char *)
 706                                   _NL_CURRENT (LC_COLLATE,
 707                                                _NL_COLLATE_SYMB_EXTRAMB);
 708
 709                                 /* Locate the character in the hashing
 710                                    table.  */
 711                                 hash = elem_hash (str, c1);
 712
 713                                 idx = 0;
 714                                 elem = hash % table_size;
 715                                 second = hash % (table_size - 2);
 716                                 while (symb_table[2 * elem] != 0)
 717                                   {
 718                                 /* First compare the hashing value.  */
 719                                     if (symb_table[2 * elem] == hash
 720                                         && (c1
 721                                             == extra[symb_table[2 * elem + 1]])
 722                                         && memcmp (str,
 723                                                    &extra[symb_table[2 * elem + 1]
 724                                                          + 1], c1) == 0)
 725                                       {
 726                                         /* Yep, this is the entry.  */
 727                                         idx = symb_table[2 * elem + 1];
 728                                         idx += 1 + extra[idx];
 729                                         break;
 730                                       }
 731
 732                                     /* Next entry.  */
 733                                     elem += second;
 734                                   }
 735
 736                                 if (symb_table[2 * elem] != 0)
 737                                   {
 738                                     /* Compare the byte sequence but only if
 739                                        this is not part of a range.  */
 740 # ifdef WIDE_CHAR_VERSION
 741                                     int32_t *wextra;
 742
 743                                     idx += 1 + extra[idx];
 744                                     /* Adjust for the alignment.  */
 745                                     idx = (idx + 3) & ~4;
 746
 747                                     wextra = (int32_t *) &extra[idx + 4];
 748 # endif
 749                                     /* Get the collation sequence value.  */
 750                                     is_seqval = 1;
 751 # ifdef WIDE_CHAR_VERSION
 752                                     cend = wextra[1 + wextra[idx]];
 753 # else
 754                                     /* Adjust for the alignment.  */
 755                                     idx += 1 + extra[idx];
 756                                     idx = (idx + 3) & ~4;
 757                                     cend = *((int32_t *) &extra[idx]);
 758 # endif
 759                                   }
 760                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 761                                   {
 762                                     cend = str[0];
 763                                     c = *p++;
 764                                   }
 765                                 else
 766                                   return FNM_NOMATCH;
 767                               }
 768 # undef str
 769                           }
 770                         else
 771                           {
 772                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 773                               cend = *p++;
 774                             if (cend == L('\0'))
 775                               return FNM_NOMATCH;
 776                             cend = FOLD (cend);
 777                           }
 778
 779                         /* XXX It is not entirely clear to me how to handle
 780                            characters which are not mentioned in the
 781                            collation specification.  */
 782                         if (
 783 # ifdef WIDE_CHAR_VERSION
 784                             lcollseq == 0xffffffff ||
 785 # endif
 786                             lcollseq <= fcollseq)
 787                           {
 788                             /* We have to look at the upper bound.  */
 789                             uint32_t hcollseq;
 790
 791                             if (is_seqval)
 792                               hcollseq = cend;
 793                             else
 794                               {
 795 # ifdef WIDE_CHAR_VERSION
 796                                 hcollseq =
 797                                   __collseq_table_lookup (collseq, cend);
 798                                 if (hcollseq == ~((uint32_t) 0))
 799                                   {
 800                                     /* Hum, no information about the upper
 801                                        bound.  The matching succeeds if the
 802                                        lower bound is matched exactly.  */
 803                                     if (lcollseq != fcollseq)
 804                                       goto range_not_matched;
 805
 806                                     goto matched;
 807                                   }
 808 # else
 809                                 hcollseq = collseq[cend];
 810 # endif
 811                               }
 812
 813                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 814                               goto matched;
 815                           }
 816 # ifdef WIDE_CHAR_VERSION
 817                       range_not_matched:
 818 # endif
 819 #else
 820                         /* We use a boring value comparison of the character
 821                            values.  This is better than comparing using
 822                            `strcoll' since the latter would have surprising
 823                            and sometimes fatal consequences.  */
 824                         UCHAR cend = *p++;
 825
 826                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 827                           cend = *p++;
 828                         if (cend == L('\0'))
 829                           return FNM_NOMATCH;
 830
 831                         /* It is a range.  */
 832                         if (cold <= fn && fn <= cend)
 833                           goto matched;
 834 #endif
 835
 836                         c = *p++;
 837                       }
 838                   }
 839
 840                 if (c == L(']'))
 841                   break;
 842               }
 843
 844             if (!not)
 845               return FNM_NOMATCH;
 846             break;
 847
 848           matched:
 849             /* Skip the rest of the [...] that already matched.  */
 850             do
 851               {
 852               ignore_next:
 853                 c = *p++;
 854
 855                 if (c == L('\0'))
 856                   /* [... (unterminated) loses.  */
 857                   return FNM_NOMATCH;
 858
 859                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 860                   {
 861                     if (*p == L('\0'))
 862                       return FNM_NOMATCH;
 863                     /* XXX 1003.2d11 is unclear if this is right.  */
 864                     ++p;
 865                   }
 866                 else if (c == L('[') && *p == L(':'))
 867                   {
 868                     int c1 = 0;
 869                     const CHAR *startp = p;
 870
 871                     while (1)
 872                       {
 873                         c = *++p;
 874                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 875                           return FNM_NOMATCH;
 876
 877                         if (*p == L(':') && p[1] == L(']'))
 878                           break;
 879
 880                         if (c < L('a') || c >= L('z'))
 881                           {
 882                             p = startp;
 883                             goto ignore_next;
 884                           }
 885                       }
 886                     p += 2;
 887                     c = *p++;
 888                   }
 889                 else if (c == L('[') && *p == L('='))
 890                   {
 891                     c = *++p;
 892                     if (c == L('\0'))
 893                       return FNM_NOMATCH;
 894                     c = *++p;
 895                     if (c != L('=') || p[1] != L(']'))
 896                       return FNM_NOMATCH;
 897                     p += 2;
 898                     c = *p++;
 899                   }
 900                 else if (c == L('[') && *p == L('.'))
 901                   {
 902                     ++p;
 903                     while (1)
 904                       {
 905                         c = *++p;
 906                         if (c == '\0')
 907                           return FNM_NOMATCH;
 908
 909                         if (*p == L('.') && p[1] == L(']'))
 910                           break;
 911                       }
 912                     p += 2;
 913                     c = *p++;
 914                   }
 915               }
 916             while (c != L(']'));
 917             if (not)
 918               return FNM_NOMATCH;
 919           }
 920           break;
 921
 922         case L('+'):
 923         case L('@'):
 924         case L('!'):
 925           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 926             {
 927               int res;
 928
 929               res = EXT (c, p, n, string_end, no_leading_period, flags);
 930               if (res != -1)
 931                 return res;
 932             }
 933           goto normal_match;
 934
 935         case L('/'):
 936           if (NO_LEADING_PERIOD (flags))
 937             {
 938               if (n == string_end || c != (UCHAR) *n)
 939                 return FNM_NOMATCH;
 940
 941               new_no_leading_period = 1;
 942               break;
 943             }
 944           /* FALLTHROUGH */
 945         default:
 946         normal_match:
 947           if (n == string_end || c != FOLD ((UCHAR) *n))
 948             return FNM_NOMATCH;
 949         }
 950
 951       no_leading_period = new_no_leading_period;
 952       ++n;
 953     }
 954
 955   if (n == string_end)
 956     return 0;
 957
 958   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
 959     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 960     return 0;
 961
 962   return FNM_NOMATCH;
 963 }
 964
 965
 966 static const CHAR *
 967 internal_function
 968 END (const CHAR *pattern)
 969 {
 970   const CHAR *p = pattern;
 971
 972   while (1)
 973     if (*++p == L('\0'))
 974       /* This is an invalid pattern.  */
 975       return pattern;
 976     else if (*p == L('['))
 977       {
 978         /* Handle brackets special.  */
 979         if (posixly_correct == 0)
 980           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 981
 982         /* Skip the not sign.  We have to recognize it because of a possibly
 983            following ']'.  */
 984         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
 985           ++p;
 986         /* A leading ']' is recognized as such.  */
 987         if (*p == L(']'))
 988           ++p;
 989         /* Skip over all characters of the list.  */
 990         while (*p != L(']'))
 991           if (*p++ == L('\0'))
 992             /* This is no valid pattern.  */
 993             return pattern;
 994       }
 995     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
 996               || *p == L('!')) && p[1] == L('('))
 997       p = END (p + 1);
 998     else if (*p == L(')'))
 999       break;
1000
1001   return p + 1;
1002 }
1003
1004
1005 static int
1006 internal_function
1007 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1008      int no_leading_period, int flags)
1009 {
1010   const CHAR *startp;
1011   int level;
1012   struct patternlist
1013   {
1014     struct patternlist *next;
1015     CHAR str[0];
1016   } *list = NULL;
1017   struct patternlist **lastp = &list;
1018   size_t pattern_len = STRLEN (pattern);
1019   const CHAR *p;
1020   const CHAR *rs;
1021
1022   /* Parse the pattern.  Store the individual parts in the list.  */
1023   level = 0;
1024   for (startp = p = pattern + 1; level >= 0; ++p)
1025     if (*p == L('\0'))
1026       /* This is an invalid pattern.  */
1027       return -1;
1028     else if (*p == L('['))
1029       {
1030         /* Handle brackets special.  */
1031         if (posixly_correct == 0)
1032           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1033
1034         /* Skip the not sign.  We have to recognize it because of a possibly
1035            following ']'.  */
1036         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1037           ++p;
1038         /* A leading ']' is recognized as such.  */
1039         if (*p == L(']'))
1040           ++p;
1041         /* Skip over all characters of the list.  */
1042         while (*p != L(']'))
1043           if (*p++ == L('\0'))
1044             /* This is no valid pattern.  */
1045             return -1;
1046       }
1047     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1048               || *p == L('!')) && p[1] == L('('))
1049       /* Remember the nesting level.  */
1050       ++level;
1051     else if (*p == L(')'))
1052       {
1053         if (level-- == 0)
1054           {
1055             /* This means we found the end of the pattern.  */
1056 #define NEW_PATTERN \
1057             struct patternlist *newp;                                         \
1058                                                                               \
1059             if (opt == L('?') || opt == L('@'))                               \
1060               newp = alloca (sizeof (struct patternlist)                      \
1061                              + (pattern_len * sizeof (CHAR)));                \
1062             else                                                              \
1063               newp = alloca (sizeof (struct patternlist)                      \
1064                              + ((p - startp + 1) * sizeof (CHAR)));           \
1065             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1066             newp->next = NULL;                                                \
1067             *lastp = newp;                                                    \
1068             lastp = &newp->next
1069             NEW_PATTERN;
1070           }
1071       }
1072     else if (*p == L('|'))
1073       {
1074         if (level == 0)
1075           {
1076             NEW_PATTERN;
1077             startp = p + 1;
1078           }
1079       }
1080   assert (list != NULL);
1081   assert (p[-1] == L(')'));
1082 #undef NEW_PATTERN
1083
1084   switch (opt)
1085     {
1086     case L('*'):
1087       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1088         return 0;
1089       /* FALLTHROUGH */
1090
1091     case L('+'):
1092       do
1093         {
1094           for (rs = string; rs <= string_end; ++rs)
1095             /* First match the prefix with the current pattern with the
1096                current pattern.  */
1097             if (FCT (list->str, string, rs, no_leading_period,
1098                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1099                 /* This was successful.  Now match the rest with the rest
1100                    of the pattern.  */
1101                 && (FCT (p, rs, string_end,
1102                          rs == string
1103                          ? no_leading_period
1104                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1105                          flags & FNM_FILE_NAME
1106                          ? flags : flags & ~FNM_PERIOD) == 0
1107                     /* This didn't work.  Try the whole pattern.  */
1108                     || (rs != string
1109                         && FCT (pattern - 1, rs, string_end,
1110                                 rs == string
1111                                 ? no_leading_period
1112                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1113                                    ? 1 : 0),
1114                                 flags & FNM_FILE_NAME
1115                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1116               /* It worked.  Signal success.  */
1117               return 0;
1118         }
1119       while ((list = list->next) != NULL);
1120
1121       /* None of the patterns lead to a match.  */
1122       return FNM_NOMATCH;
1123
1124     case L('?'):
1125       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1126         return 0;
1127       /* FALLTHROUGH */
1128
1129     case L('@'):
1130       do
1131         /* I cannot believe it but `strcat' is actually acceptable
1132            here.  Match the entire string with the prefix from the
1133            pattern list and the rest of the pattern following the
1134            pattern list.  */
1135         if (FCT (STRCAT (list->str, p), string, string_end,
1136                  no_leading_period,
1137                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1138           /* It worked.  Signal success.  */
1139           return 0;
1140       while ((list = list->next) != NULL);
1141
1142       /* None of the patterns lead to a match.  */
1143       return FNM_NOMATCH;
1144
1145     case L('!'):
1146       for (rs = string; rs <= string_end; ++rs)
1147         {
1148           struct patternlist *runp;
1149
1150           for (runp = list; runp != NULL; runp = runp->next)
1151             if (FCT (runp->str, string, rs,  no_leading_period,
1152                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1153               break;
1154
1155           /* If none of the patterns matched see whether the rest does.  */
1156           if (runp == NULL
1157               && (FCT (p, rs, string_end,
1158                        rs == string
1159                        ? no_leading_period
1160                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1161                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1162                   == 0))
1163             /* This is successful.  */
1164             return 0;
1165         }
1166
1167       /* None of the patterns together with the rest of the pattern
1168          lead to a match.  */
1169       return FNM_NOMATCH;
1170
1171     default:
1172       assert (! "Invalid extended matching operator");
1173       break;
1174     }
1175
1176   return -1;
1177 }
1178
1179
1180 #undef FOLD
1181 #undef CHAR
1182 #undef UCHAR
1183 #undef INT
1184 #undef FCT
1185 #undef EXT
1186 #undef END
1187 #undef MEMPCPY
1188 #undef MEMCHR
1189 #undef STRCOLL
1190 #undef STRLEN
1191 #undef STRCAT
1192 #undef L
1193 #undef BTOWC