lib/fnmatch_loop.c

   1 /* Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, 2000, 2001,
   2    2002 Free Software Foundation, Inc.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 2, or (at your option)
   7    any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program; if not, write to the Free Software Foundation,
  16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  17
  18 /* Match STRING against the filename pattern PATTERN, returning zero if
  19    it matches, nonzero if not.  */
  20 static int FCT (const CHAR *pattern, const CHAR *string,
  21                 const CHAR *string_end, int no_leading_period, int flags)
  22      internal_function;
  23 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  24                 const CHAR *string_end, int no_leading_period, int flags)
  25      internal_function;
  26 static const CHAR *END (const CHAR *patternp) internal_function;
  27
  28 static int
  29 internal_function
  30 FCT (pattern, string, string_end, no_leading_period, flags)
  31      const CHAR *pattern;
  32      const CHAR *string;
  33      const CHAR *string_end;
  34      int no_leading_period;
  35      int flags;
  36 {
  37   register const CHAR *p = pattern, *n = string;
  38   register UCHAR c;
  39 #ifdef _LIBC
  40 # if WIDE_CHAR_VERSION
  41   const char *collseq = (const char *)
  42     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  43 # else
  44   const UCHAR *collseq = (const UCHAR *)
  45     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  46 # endif
  47 #endif
  48
  49   while ((c = *p++) != L('\0'))
  50     {
  51       int new_no_leading_period = 0;
  52       c = FOLD (c);
  53
  54       switch (c)
  55         {
  56         case L('?'):
  57           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  58             {
  59               int res;
  60
  61               res = EXT (c, p, n, string_end, no_leading_period,
  62                          flags);
  63               if (res != -1)
  64                 return res;
  65             }
  66
  67           if (n == string_end)
  68             return FNM_NOMATCH;
  69           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  70             return FNM_NOMATCH;
  71           else if (*n == L('.') && no_leading_period)
  72             return FNM_NOMATCH;
  73           break;
  74
  75         case L('\\'):
  76           if (!(flags & FNM_NOESCAPE))
  77             {
  78               c = *p++;
  79               if (c == L('\0'))
  80                 /* Trailing \ loses.  */
  81                 return FNM_NOMATCH;
  82               c = FOLD (c);
  83             }
  84           if (n == string_end || FOLD ((UCHAR) *n) != c)
  85             return FNM_NOMATCH;
  86           break;
  87
  88         case L('*'):
  89           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  90             {
  91               int res;
  92
  93               res = EXT (c, p, n, string_end, no_leading_period,
  94                          flags);
  95               if (res != -1)
  96                 return res;
  97             }
  98
  99           if (n != string_end && *n == L('.') && no_leading_period)
 100             return FNM_NOMATCH;
 101
 102           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 103             {
 104               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 105                 {
 106                   const CHAR *endp = END (p);
 107                   if (endp != p)
 108                     {
 109                       /* This is a pattern.  Skip over it.  */
 110                       p = endp;
 111                       continue;
 112                     }
 113                 }
 114
 115               if (c == L('?'))
 116                 {
 117                   /* A ? needs to match one character.  */
 118                   if (n == string_end)
 119                     /* There isn't another character; no match.  */
 120                     return FNM_NOMATCH;
 121                   else if (*n == L('/')
 122                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 123                     /* A slash does not match a wildcard under
 124                        FNM_FILE_NAME.  */
 125                     return FNM_NOMATCH;
 126                   else
 127                     /* One character of the string is consumed in matching
 128                        this ? wildcard, so *??? won't match if there are
 129                        less than three characters.  */
 130                     ++n;
 131                 }
 132             }
 133
 134           if (c == L('\0'))
 135             /* The wildcard(s) is/are the last element of the pattern.
 136                If the name is a file name and contains another slash
 137                this means it cannot match, unless the FNM_LEADING_DIR
 138                flag is set.  */
 139             {
 140               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 141
 142               if (flags & FNM_FILE_NAME)
 143                 {
 144                   if (flags & FNM_LEADING_DIR)
 145                     result = 0;
 146                   else
 147                     {
 148                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 149                         result = 0;
 150                     }
 151                 }
 152
 153               return result;
 154             }
 155           else
 156             {
 157               const CHAR *endp;
 158
 159               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 160                              string_end - n);
 161               if (endp == NULL)
 162                 endp = string_end;
 163
 164               if (c == L('[')
 165                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 166                       && (c == L('@') || c == L('+') || c == L('!'))
 167                       && *p == L('(')))
 168                 {
 169                   int flags2 = ((flags & FNM_FILE_NAME)
 170                                 ? flags : (flags & ~FNM_PERIOD));
 171                   int no_leading_period2 = no_leading_period;
 172
 173                   for (--p; n < endp; ++n, no_leading_period2 = 0)
 174                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 175                         == 0)
 176                       return 0;
 177                 }
 178               else if (c == L('/') && (flags & FNM_FILE_NAME))
 179                 {
 180                   while (n < string_end && *n != L('/'))
 181                     ++n;
 182                   if (n < string_end && *n == L('/')
 183                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 184                           == 0))
 185                     return 0;
 186                 }
 187               else
 188                 {
 189                   int flags2 = ((flags & FNM_FILE_NAME)
 190                                 ? flags : (flags & ~FNM_PERIOD));
 191                   int no_leading_period2 = no_leading_period;
 192
 193                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 194                     c = *p;
 195                   c = FOLD (c);
 196                   for (--p; n < endp; ++n, no_leading_period2 = 0)
 197                     if (FOLD ((UCHAR) *n) == c
 198                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 199                             == 0))
 200                       return 0;
 201                 }
 202             }
 203
 204           /* If we come here no match is possible with the wildcard.  */
 205           return FNM_NOMATCH;
 206
 207         case L('['):
 208           {
 209             /* Nonzero if the sense of the character class is inverted.  */
 210             register int not;
 211             CHAR cold;
 212             UCHAR fn;
 213
 214             if (posixly_correct == 0)
 215               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 216
 217             if (n == string_end)
 218               return FNM_NOMATCH;
 219
 220             if (*n == L('.') && no_leading_period)
 221               return FNM_NOMATCH;
 222
 223             if (*n == L('/') && (flags & FNM_FILE_NAME))
 224               /* `/' cannot be matched.  */
 225               return FNM_NOMATCH;
 226
 227             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 228             if (not)
 229               ++p;
 230
 231             fn = FOLD ((UCHAR) *n);
 232
 233             c = *p++;
 234             for (;;)
 235               {
 236                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 237                   {
 238                     if (*p == L('\0'))
 239                       return FNM_NOMATCH;
 240                     c = FOLD ((UCHAR) *p);
 241                     ++p;
 242
 243                     if (c == fn)
 244                       goto matched;
 245                   }
 246                 else if (c == L('[') && *p == L(':'))
 247                   {
 248                     /* Leave room for the null.  */
 249                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 250                     size_t c1 = 0;
 251 #if defined _LIBC || WIDE_CHAR_SUPPORT
 252                     wctype_t wt;
 253 #endif
 254                     const CHAR *startp = p;
 255
 256                     for (;;)
 257                       {
 258                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 259                           /* The name is too long and therefore the pattern
 260                              is ill-formed.  */
 261                           return FNM_NOMATCH;
 262
 263                         c = *++p;
 264                         if (c == L(':') && p[1] == L(']'))
 265                           {
 266                             p += 2;
 267                             break;
 268                           }
 269                         if (c < L('a') || c >= L('z'))
 270                           {
 271                             /* This cannot possibly be a character class name.
 272                                Match it as a normal range.  */
 273                             p = startp;
 274                             c = L('[');
 275                             goto normal_bracket;
 276                           }
 277                         str[c1++] = c;
 278                       }
 279                     str[c1] = L('\0');
 280
 281 #if defined _LIBC || WIDE_CHAR_SUPPORT
 282                     wt = IS_CHAR_CLASS (str);
 283                     if (wt == 0)
 284                       /* Invalid character class name.  */
 285                       return FNM_NOMATCH;
 286
 287 # if defined _LIBC && ! WIDE_CHAR_VERSION
 288                     /* The following code is glibc specific but does
 289                        there a good job in speeding up the code since
 290                        we can avoid the btowc() call.  */
 291                     if (_ISCTYPE ((UCHAR) *n, wt))
 292                       goto matched;
 293 # else
 294                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 295                       goto matched;
 296 # endif
 297 #else
 298                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 299                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 300                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 301                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 302                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 303                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 304                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 305                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 306                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 307                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 308                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 309                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 310                       goto matched;
 311 #endif
 312                     c = *p++;
 313                   }
 314 #ifdef _LIBC
 315                 else if (c == L('[') && *p == L('='))
 316                   {
 317                     UCHAR str[1];
 318                     uint32_t nrules =
 319                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 320                     const CHAR *startp = p;
 321
 322                     c = *++p;
 323                     if (c == L('\0'))
 324                       {
 325                         p = startp;
 326                         c = L('[');
 327                         goto normal_bracket;
 328                       }
 329                     str[0] = c;
 330
 331                     c = *++p;
 332                     if (c != L('=') || p[1] != L(']'))
 333                       {
 334                         p = startp;
 335                         c = L('[');
 336                         goto normal_bracket;
 337                       }
 338                     p += 2;
 339
 340                     if (nrules == 0)
 341                       {
 342                         if ((UCHAR) *n == str[0])
 343                           goto matched;
 344                       }
 345                     else
 346                       {
 347                         const int32_t *table;
 348 # if WIDE_CHAR_VERSION
 349                         const int32_t *weights;
 350                         const int32_t *extra;
 351 # else
 352                         const unsigned char *weights;
 353                         const unsigned char *extra;
 354 # endif
 355                         const int32_t *indirect;
 356                         int32_t idx;
 357                         const UCHAR *cp = (const UCHAR *) str;
 358
 359                         /* This #include defines a local function!  */
 360 # if WIDE_CHAR_VERSION
 361 #  include <locale/weightwc.h>
 362 # else
 363 #  include <locale/weight.h>
 364 # endif
 365
 366 # if WIDE_CHAR_VERSION
 367                         table = (const int32_t *)
 368                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 369                         weights = (const int32_t *)
 370                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 371                         extra = (const int32_t *)
 372                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 373                         indirect = (const int32_t *)
 374                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 375 # else
 376                         table = (const int32_t *)
 377                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 378                         weights = (const unsigned char *)
 379                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 380                         extra = (const unsigned char *)
 381                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 382                         indirect = (const int32_t *)
 383                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 384 # endif
 385
 386                         idx = findidx (&cp);
 387                         if (idx != 0)
 388                           {
 389                             /* We found a table entry.  Now see whether the
 390                                character we are currently at has the same
 391                                equivalance class value.  */
 392                             int len = weights[idx];
 393                             int32_t idx2;
 394                             const UCHAR *np = (const UCHAR *) n;
 395
 396                             idx2 = findidx (&np);
 397                             if (idx2 != 0 && len == weights[idx2])
 398                               {
 399                                 int cnt = 0;
 400
 401                                 while (cnt < len
 402                                        && (weights[idx + 1 + cnt]
 403                                            == weights[idx2 + 1 + cnt]))
 404                                   ++cnt;
 405
 406                                 if (cnt == len)
 407                                   goto matched;
 408                               }
 409                           }
 410                       }
 411
 412                     c = *p++;
 413                   }
 414 #endif
 415                 else if (c == L('\0'))
 416                   /* [ (unterminated) loses.  */
 417                   return FNM_NOMATCH;
 418                 else
 419                   {
 420                     int is_range = 0;
 421
 422 #ifdef _LIBC
 423                     int is_seqval = 0;
 424
 425                     if (c == L('[') && *p == L('.'))
 426                       {
 427                         uint32_t nrules =
 428                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 429                         const CHAR *startp = p;
 430                         size_t c1 = 0;
 431
 432                         while (1)
 433                           {
 434                             c = *++p;
 435                             if (c == L('.') && p[1] == L(']'))
 436                               {
 437                                 p += 2;
 438                                 break;
 439                               }
 440                             if (c == '\0')
 441                               return FNM_NOMATCH;
 442                             ++c1;
 443                           }
 444
 445                         /* We have to handling the symbols differently in
 446                            ranges since then the collation sequence is
 447                            important.  */
 448                         is_range = *p == L('-') && p[1] != L('\0');
 449
 450                         if (nrules == 0)
 451                           {
 452                             /* There are no names defined in the collation
 453                                data.  Therefore we only accept the trivial
 454                                names consisting of the character itself.  */
 455                             if (c1 != 1)
 456                               return FNM_NOMATCH;
 457
 458                             if (!is_range && *n == startp[1])
 459                               goto matched;
 460
 461                             cold = startp[1];
 462                             c = *p++;
 463                           }
 464                         else
 465                           {
 466                             int32_t table_size;
 467                             const int32_t *symb_table;
 468 # ifdef WIDE_CHAR_VERSION
 469                             char str[c1];
 470                             unsigned int strcnt;
 471 # else
 472 #  define str (startp + 1)
 473 # endif
 474                             const unsigned char *extra;
 475                             int32_t idx;
 476                             int32_t elem;
 477                             int32_t second;
 478                             int32_t hash;
 479
 480 # ifdef WIDE_CHAR_VERSION
 481                             /* We have to convert the name to a single-byte
 482                                string.  This is possible since the names
 483                                consist of ASCII characters and the internal
 484                                representation is UCS4.  */
 485                             for (strcnt = 0; strcnt < c1; ++strcnt)
 486                               str[strcnt] = startp[1 + strcnt];
 487 # endif
 488
 489                             table_size =
 490                               _NL_CURRENT_WORD (LC_COLLATE,
 491                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 492                             symb_table = (const int32_t *)
 493                               _NL_CURRENT (LC_COLLATE,
 494                                            _NL_COLLATE_SYMB_TABLEMB);
 495                             extra = (const unsigned char *)
 496                               _NL_CURRENT (LC_COLLATE,
 497                                            _NL_COLLATE_SYMB_EXTRAMB);
 498
 499                             /* Locate the character in the hashing table.  */
 500                             hash = elem_hash (str, c1);
 501
 502                             idx = 0;
 503                             elem = hash % table_size;
 504                             second = hash % (table_size - 2);
 505                             while (symb_table[2 * elem] != 0)
 506                               {
 507                                 /* First compare the hashing value.  */
 508                                 if (symb_table[2 * elem] == hash
 509                                     && c1 == extra[symb_table[2 * elem + 1]]
 510                                     && memcmp (str,
 511                                                &extra[symb_table[2 * elem + 1]
 512                                                      + 1], c1) == 0)
 513                                   {
 514                                     /* Yep, this is the entry.  */
 515                                     idx = symb_table[2 * elem + 1];
 516                                     idx += 1 + extra[idx];
 517                                     break;
 518                                   }
 519
 520                                 /* Next entry.  */
 521                                 elem += second;
 522                               }
 523
 524                             if (symb_table[2 * elem] != 0)
 525                               {
 526                                 /* Compare the byte sequence but only if
 527                                    this is not part of a range.  */
 528 # ifdef WIDE_CHAR_VERSION
 529                                 int32_t *wextra;
 530
 531                                 idx += 1 + extra[idx];
 532                                 /* Adjust for the alignment.  */
 533                                 idx = (idx + 3) & ~3;
 534
 535                                 wextra = (int32_t *) &extra[idx + 4];
 536 # endif
 537
 538                                 if (! is_range)
 539                                   {
 540 # ifdef WIDE_CHAR_VERSION
 541                                     for (c1 = 0; c1 < wextra[idx]; ++c1)
 542                                       if (n[c1] != wextra[1 + c1])
 543                                         break;
 544
 545                                     if (c1 == wextra[idx])
 546                                       goto matched;
 547 # else
 548                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 549                                       if (n[c1] != extra[1 + c1])
 550                                         break;
 551
 552                                     if (c1 == extra[idx])
 553                                       goto matched;
 554 # endif
 555                                   }
 556
 557                                 /* Get the collation sequence value.  */
 558                                 is_seqval = 1;
 559 # ifdef WIDE_CHAR_VERSION
 560                                 cold = wextra[1 + wextra[idx]];
 561 # else
 562                                 /* Adjust for the alignment.  */
 563                                 idx += 1 + extra[idx];
 564                                 idx = (idx + 3) & ~4;
 565                                 cold = *((int32_t *) &extra[idx]);
 566 # endif
 567
 568                                 c = *p++;
 569                               }
 570                             else if (c1 == 1)
 571                               {
 572                                 /* No valid character.  Match it as a
 573                                    single byte.  */
 574                                 if (!is_range && *n == str[0])
 575                                   goto matched;
 576
 577                                 cold = str[0];
 578                                 c = *p++;
 579                               }
 580                             else
 581                               return FNM_NOMATCH;
 582                           }
 583                       }
 584                     else
 585 # undef str
 586 #endif
 587                       {
 588                         c = FOLD (c);
 589                       normal_bracket:
 590
 591                         /* We have to handling the symbols differently in
 592                            ranges since then the collation sequence is
 593                            important.  */
 594                         is_range = (*p == L('-') && p[1] != L('\0')
 595                                     && p[1] != L(']'));
 596
 597                         if (!is_range && c == fn)
 598                           goto matched;
 599
 600                         cold = c;
 601                         c = *p++;
 602                       }
 603
 604                     if (c == L('-') && *p != L(']'))
 605                       {
 606 #if _LIBC
 607                         /* We have to find the collation sequence
 608                            value for C.  Collation sequence is nothing
 609                            we can regularly access.  The sequence
 610                            value is defined by the order in which the
 611                            definitions of the collation values for the
 612                            various characters appear in the source
 613                            file.  A strange concept, nowhere
 614                            documented.  */
 615                         uint32_t fcollseq;
 616                         uint32_t lcollseq;
 617                         UCHAR cend = *p++;
 618
 619 # ifdef WIDE_CHAR_VERSION
 620                         /* Search in the `names' array for the characters.  */
 621                         fcollseq = collseq_table_lookup (collseq, fn);
 622                         if (fcollseq == ~((uint32_t) 0))
 623                           /* XXX We don't know anything about the character
 624                              we are supposed to match.  This means we are
 625                              failing.  */
 626                           goto range_not_matched;
 627
 628                         if (is_seqval)
 629                           lcollseq = cold;
 630                         else
 631                           lcollseq = collseq_table_lookup (collseq, cold);
 632 # else
 633                         fcollseq = collseq[fn];
 634                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 635 # endif
 636
 637                         is_seqval = 0;
 638                         if (cend == L('[') && *p == L('.'))
 639                           {
 640                             uint32_t nrules =
 641                               _NL_CURRENT_WORD (LC_COLLATE,
 642                                                 _NL_COLLATE_NRULES);
 643                             const CHAR *startp = p;
 644                             size_t c1 = 0;
 645
 646                             while (1)
 647                               {
 648                                 c = *++p;
 649                                 if (c == L('.') && p[1] == L(']'))
 650                                   {
 651                                     p += 2;
 652                                     break;
 653                                   }
 654                                 if (c == '\0')
 655                                   return FNM_NOMATCH;
 656                                 ++c1;
 657                               }
 658
 659                             if (nrules == 0)
 660                               {
 661                                 /* There are no names defined in the
 662                                    collation data.  Therefore we only
 663                                    accept the trivial names consisting
 664                                    of the character itself.  */
 665                                 if (c1 != 1)
 666                                   return FNM_NOMATCH;
 667
 668                                 cend = startp[1];
 669                               }
 670                             else
 671                               {
 672                                 int32_t table_size;
 673                                 const int32_t *symb_table;
 674 # ifdef WIDE_CHAR_VERSION
 675                                 char str[c1];
 676                                 unsigned int strcnt;
 677 # else
 678 #  define str (startp + 1)
 679 # endif
 680                                 const unsigned char *extra;
 681                                 int32_t idx;
 682                                 int32_t elem;
 683                                 int32_t second;
 684                                 int32_t hash;
 685
 686 # ifdef WIDE_CHAR_VERSION
 687                                 /* We have to convert the name to a single-byte
 688                                    string.  This is possible since the names
 689                                    consist of ASCII characters and the internal
 690                                    representation is UCS4.  */
 691                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 692                                   str[strcnt] = startp[1 + strcnt];
 693 # endif
 694
 695                                 table_size =
 696                                   _NL_CURRENT_WORD (LC_COLLATE,
 697                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 698                                 symb_table = (const int32_t *)
 699                                   _NL_CURRENT (LC_COLLATE,
 700                                                _NL_COLLATE_SYMB_TABLEMB);
 701                                 extra = (const unsigned char *)
 702                                   _NL_CURRENT (LC_COLLATE,
 703                                                _NL_COLLATE_SYMB_EXTRAMB);
 704
 705                                 /* Locate the character in the hashing
 706                                    table.  */
 707                                 hash = elem_hash (str, c1);
 708
 709                                 idx = 0;
 710                                 elem = hash % table_size;
 711                                 second = hash % (table_size - 2);
 712                                 while (symb_table[2 * elem] != 0)
 713                                   {
 714                                 /* First compare the hashing value.  */
 715                                     if (symb_table[2 * elem] == hash
 716                                         && (c1
 717                                             == extra[symb_table[2 * elem + 1]])
 718                                         && memcmp (str,
 719                                                    &extra[symb_table[2 * elem + 1]
 720                                                          + 1], c1) == 0)
 721                                       {
 722                                         /* Yep, this is the entry.  */
 723                                         idx = symb_table[2 * elem + 1];
 724                                         idx += 1 + extra[idx];
 725                                         break;
 726                                       }
 727
 728                                     /* Next entry.  */
 729                                     elem += second;
 730                                   }
 731
 732                                 if (symb_table[2 * elem] != 0)
 733                                   {
 734                                     /* Compare the byte sequence but only if
 735                                        this is not part of a range.  */
 736 # ifdef WIDE_CHAR_VERSION
 737                                     int32_t *wextra;
 738
 739                                     idx += 1 + extra[idx];
 740                                     /* Adjust for the alignment.  */
 741                                     idx = (idx + 3) & ~4;
 742
 743                                     wextra = (int32_t *) &extra[idx + 4];
 744 # endif
 745                                     /* Get the collation sequence value.  */
 746                                     is_seqval = 1;
 747 # ifdef WIDE_CHAR_VERSION
 748                                     cend = wextra[1 + wextra[idx]];
 749 # else
 750                                     /* Adjust for the alignment.  */
 751                                     idx += 1 + extra[idx];
 752                                     idx = (idx + 3) & ~4;
 753                                     cend = *((int32_t *) &extra[idx]);
 754 # endif
 755                                   }
 756                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 757                                   {
 758                                     cend = str[0];
 759                                     c = *p++;
 760                                   }
 761                                 else
 762                                   return FNM_NOMATCH;
 763                               }
 764 # undef str
 765                           }
 766                         else
 767                           {
 768                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 769                               cend = *p++;
 770                             if (cend == L('\0'))
 771                               return FNM_NOMATCH;
 772                             cend = FOLD (cend);
 773                           }
 774
 775                         /* XXX It is not entirely clear to me how to handle
 776                            characters which are not mentioned in the
 777                            collation specification.  */
 778                         if (
 779 # ifdef WIDE_CHAR_VERSION
 780                             lcollseq == 0xffffffff ||
 781 # endif
 782                             lcollseq <= fcollseq)
 783                           {
 784                             /* We have to look at the upper bound.  */
 785                             uint32_t hcollseq;
 786
 787                             if (is_seqval)
 788                               hcollseq = cend;
 789                             else
 790                               {
 791 # ifdef WIDE_CHAR_VERSION
 792                                 hcollseq =
 793                                   collseq_table_lookup (collseq, cend);
 794                                 if (hcollseq == ~((uint32_t) 0))
 795                                   {
 796                                     /* Hum, no information about the upper
 797                                        bound.  The matching succeeds if the
 798                                        lower bound is matched exactly.  */
 799                                     if (lcollseq != fcollseq)
 800                                       goto range_not_matched;
 801
 802                                     goto matched;
 803                                   }
 804 # else
 805                                 hcollseq = collseq[cend];
 806 # endif
 807                               }
 808
 809                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 810                               goto matched;
 811                           }
 812 # ifdef WIDE_CHAR_VERSION
 813                       range_not_matched:
 814 # endif
 815 #else
 816                         /* We use a boring value comparison of the character
 817                            values.  This is better than comparing using
 818                            `strcoll' since the latter would have surprising
 819                            and sometimes fatal consequences.  */
 820                         UCHAR cend = *p++;
 821
 822                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 823                           cend = *p++;
 824                         if (cend == L('\0'))
 825                           return FNM_NOMATCH;
 826
 827                         /* It is a range.  */
 828                         if (cold <= fn && fn <= cend)
 829                           goto matched;
 830 #endif
 831
 832                         c = *p++;
 833                       }
 834                   }
 835
 836                 if (c == L(']'))
 837                   break;
 838               }
 839
 840             if (!not)
 841               return FNM_NOMATCH;
 842             break;
 843
 844           matched:
 845             /* Skip the rest of the [...] that already matched.  */
 846             do
 847               {
 848               ignore_next:
 849                 c = *p++;
 850
 851                 if (c == L('\0'))
 852                   /* [... (unterminated) loses.  */
 853                   return FNM_NOMATCH;
 854
 855                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 856                   {
 857                     if (*p == L('\0'))
 858                       return FNM_NOMATCH;
 859                     /* XXX 1003.2d11 is unclear if this is right.  */
 860                     ++p;
 861                   }
 862                 else if (c == L('[') && *p == L(':'))
 863                   {
 864                     int c1 = 0;
 865                     const CHAR *startp = p;
 866
 867                     while (1)
 868                       {
 869                         c = *++p;
 870                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 871                           return FNM_NOMATCH;
 872
 873                         if (*p == L(':') && p[1] == L(']'))
 874                           break;
 875
 876                         if (c < L('a') || c >= L('z'))
 877                           {
 878                             p = startp;
 879                             goto ignore_next;
 880                           }
 881                       }
 882                     p += 2;
 883                     c = *p++;
 884                   }
 885                 else if (c == L('[') && *p == L('='))
 886                   {
 887                     c = *++p;
 888                     if (c == L('\0'))
 889                       return FNM_NOMATCH;
 890                     c = *++p;
 891                     if (c != L('=') || p[1] != L(']'))
 892                       return FNM_NOMATCH;
 893                     p += 2;
 894                     c = *p++;
 895                   }
 896                 else if (c == L('[') && *p == L('.'))
 897                   {
 898                     ++p;
 899                     while (1)
 900                       {
 901                         c = *++p;
 902                         if (c == '\0')
 903                           return FNM_NOMATCH;
 904
 905                         if (*p == L('.') && p[1] == L(']'))
 906                           break;
 907                       }
 908                     p += 2;
 909                     c = *p++;
 910                   }
 911               }
 912             while (c != L(']'));
 913             if (not)
 914               return FNM_NOMATCH;
 915           }
 916           break;
 917
 918         case L('+'):
 919         case L('@'):
 920         case L('!'):
 921           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 922             {
 923               int res;
 924
 925               res = EXT (c, p, n, string_end, no_leading_period, flags);
 926               if (res != -1)
 927                 return res;
 928             }
 929           goto normal_match;
 930
 931         case L('/'):
 932           if (NO_LEADING_PERIOD (flags))
 933             {
 934               if (n == string_end || c != *n)
 935                 return FNM_NOMATCH;
 936
 937               new_no_leading_period = 1;
 938               break;
 939             }
 940           /* FALLTHROUGH */
 941         default:
 942         normal_match:
 943           if (n == string_end || c != FOLD ((UCHAR) *n))
 944             return FNM_NOMATCH;
 945         }
 946
 947       no_leading_period = new_no_leading_period;
 948       ++n;
 949     }
 950
 951   if (n == string_end)
 952     return 0;
 953
 954   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
 955     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 956     return 0;
 957
 958   return FNM_NOMATCH;
 959 }
 960
 961
 962 static const CHAR *
 963 internal_function
 964 END (const CHAR *pattern)
 965 {
 966   const CHAR *p = pattern;
 967
 968   while (1)
 969     if (*++p == L('\0'))
 970       /* This is an invalid pattern.  */
 971       return pattern;
 972     else if (*p == L('['))
 973       {
 974         /* Handle brackets special.  */
 975         if (posixly_correct == 0)
 976           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 977
 978         /* Skip the not sign.  We have to recognize it because of a possibly
 979            following ']'.  */
 980         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
 981           ++p;
 982         /* A leading ']' is recognized as such.  */
 983         if (*p == L(']'))
 984           ++p;
 985         /* Skip over all characters of the list.  */
 986         while (*p != L(']'))
 987           if (*p++ == L('\0'))
 988             /* This is no valid pattern.  */
 989             return pattern;
 990       }
 991     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
 992               || *p == L('!')) && p[1] == L('('))
 993       p = END (p + 1);
 994     else if (*p == L(')'))
 995       break;
 996
 997   return p + 1;
 998 }
 999
1000
1001 static int
1002 internal_function
1003 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1004      int no_leading_period, int flags)
1005 {
1006   const CHAR *startp;
1007   int level;
1008   struct patternlist
1009   {
1010     struct patternlist *next;
1011     CHAR str[1];
1012   } *list = NULL;
1013   struct patternlist **lastp = &list;
1014   size_t pattern_len = STRLEN (pattern);
1015   const CHAR *p;
1016   const CHAR *rs;
1017
1018   /* Parse the pattern.  Store the individual parts in the list.  */
1019   level = 0;
1020   for (startp = p = pattern + 1; level >= 0; ++p)
1021     if (*p == L('\0'))
1022       /* This is an invalid pattern.  */
1023       return -1;
1024     else if (*p == L('['))
1025       {
1026         /* Handle brackets special.  */
1027         if (posixly_correct == 0)
1028           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1029
1030         /* Skip the not sign.  We have to recognize it because of a possibly
1031            following ']'.  */
1032         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1033           ++p;
1034         /* A leading ']' is recognized as such.  */
1035         if (*p == L(']'))
1036           ++p;
1037         /* Skip over all characters of the list.  */
1038         while (*p != L(']'))
1039           if (*p++ == L('\0'))
1040             /* This is no valid pattern.  */
1041             return -1;
1042       }
1043     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1044               || *p == L('!')) && p[1] == L('('))
1045       /* Remember the nesting level.  */
1046       ++level;
1047     else if (*p == L(')'))
1048       {
1049         if (level-- == 0)
1050           {
1051             /* This means we found the end of the pattern.  */
1052 #define NEW_PATTERN \
1053             struct patternlist *newp;                                         \
1054                                                                               \
1055             if (opt == L('?') || opt == L('@'))                               \
1056               newp = alloca (offsetof (struct patternlist, str)               \
1057                              + (pattern_len * sizeof (CHAR)));                \
1058             else                                                              \
1059               newp = alloca (offsetof (struct patternlist, str)               \
1060                              + ((p - startp + 1) * sizeof (CHAR)));           \
1061             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1062             newp->next = NULL;                                                \
1063             *lastp = newp;                                                    \
1064             lastp = &newp->next
1065             NEW_PATTERN;
1066           }
1067       }
1068     else if (*p == L('|'))
1069       {
1070         if (level == 0)
1071           {
1072             NEW_PATTERN;
1073             startp = p + 1;
1074           }
1075       }
1076   assert (list != NULL);
1077   assert (p[-1] == L(')'));
1078 #undef NEW_PATTERN
1079
1080   switch (opt)
1081     {
1082     case L('*'):
1083       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1084         return 0;
1085       /* FALLTHROUGH */
1086
1087     case L('+'):
1088       do
1089         {
1090           for (rs = string; rs <= string_end; ++rs)
1091             /* First match the prefix with the current pattern with the
1092                current pattern.  */
1093             if (FCT (list->str, string, rs, no_leading_period,
1094                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1095                 /* This was successful.  Now match the rest with the rest
1096                    of the pattern.  */
1097                 && (FCT (p, rs, string_end,
1098                          rs == string
1099                          ? no_leading_period
1100                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1101                          flags & FNM_FILE_NAME
1102                          ? flags : flags & ~FNM_PERIOD) == 0
1103                     /* This didn't work.  Try the whole pattern.  */
1104                     || (rs != string
1105                         && FCT (pattern - 1, rs, string_end,
1106                                 rs == string
1107                                 ? no_leading_period
1108                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1109                                    ? 1 : 0),
1110                                 flags & FNM_FILE_NAME
1111                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1112               /* It worked.  Signal success.  */
1113               return 0;
1114         }
1115       while ((list = list->next) != NULL);
1116
1117       /* None of the patterns lead to a match.  */
1118       return FNM_NOMATCH;
1119
1120     case L('?'):
1121       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1122         return 0;
1123       /* FALLTHROUGH */
1124
1125     case L('@'):
1126       do
1127         /* I cannot believe it but `strcat' is actually acceptable
1128            here.  Match the entire string with the prefix from the
1129            pattern list and the rest of the pattern following the
1130            pattern list.  */
1131         if (FCT (STRCAT (list->str, p), string, string_end,
1132                  no_leading_period,
1133                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1134           /* It worked.  Signal success.  */
1135           return 0;
1136       while ((list = list->next) != NULL);
1137
1138       /* None of the patterns lead to a match.  */
1139       return FNM_NOMATCH;
1140
1141     case L('!'):
1142       for (rs = string; rs <= string_end; ++rs)
1143         {
1144           struct patternlist *runp;
1145
1146           for (runp = list; runp != NULL; runp = runp->next)
1147             if (FCT (runp->str, string, rs,  no_leading_period,
1148                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1149               break;
1150
1151           /* If none of the patterns matched see whether the rest does.  */
1152           if (runp == NULL
1153               && (FCT (p, rs, string_end,
1154                        rs == string
1155                        ? no_leading_period
1156                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1157                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1158                   == 0))
1159             /* This is successful.  */
1160             return 0;
1161         }
1162
1163       /* None of the patterns together with the rest of the pattern
1164          lead to a match.  */
1165       return FNM_NOMATCH;
1166
1167     default:
1168       assert (! "Invalid extended matching operator");
1169       break;
1170     }
1171
1172   return -1;
1173 }
1174
1175
1176 #undef FOLD
1177 #undef CHAR
1178 #undef UCHAR
1179 #undef INT
1180 #undef FCT
1181 #undef EXT
1182 #undef END
1183 #undef MEMPCPY
1184 #undef MEMCHR
1185 #undef STRCOLL
1186 #undef STRLEN
1187 #undef STRCAT
1188 #undef L
1189 #undef BTOWC