2004-07-12 Paul Eggert <eggert@cs.ucla.edu>
[glibc/history.git] / posix / fnmatch_loop.c
blob0c14d45db747f32492adb83b5df6092729e68f89
1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003
2 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
20 /* Match STRING against the filename pattern PATTERN, returning zero if
21 it matches, nonzero if not. */
22 static int FCT (const CHAR *pattern, const CHAR *string,
23 const CHAR *string_end, int no_leading_period, int flags)
24 internal_function;
25 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
26 const CHAR *string_end, int no_leading_period, int flags)
27 internal_function;
28 static const CHAR *END (const CHAR *patternp) internal_function;
30 static int
31 internal_function
32 FCT (pattern, string, string_end, no_leading_period, flags)
33 const CHAR *pattern;
34 const CHAR *string;
35 const CHAR *string_end;
36 int no_leading_period;
37 int flags;
39 register const CHAR *p = pattern, *n = string;
40 register UCHAR c;
41 #ifdef _LIBC
42 # if WIDE_CHAR_VERSION
43 const char *collseq = (const char *)
44 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
45 # else
46 const UCHAR *collseq = (const UCHAR *)
47 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
48 # endif
49 #endif
51 while ((c = *p++) != L('\0'))
53 int new_no_leading_period = 0;
54 c = FOLD (c);
56 switch (c)
58 case L('?'):
59 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
61 int res;
63 res = EXT (c, p, n, string_end, no_leading_period,
64 flags);
65 if (res != -1)
66 return res;
69 if (n == string_end)
70 return FNM_NOMATCH;
71 else if (*n == L('/') && (flags & FNM_FILE_NAME))
72 return FNM_NOMATCH;
73 else if (*n == L('.') && no_leading_period)
74 return FNM_NOMATCH;
75 break;
77 case L('\\'):
78 if (!(flags & FNM_NOESCAPE))
80 c = *p++;
81 if (c == L('\0'))
82 /* Trailing \ loses. */
83 return FNM_NOMATCH;
84 c = FOLD (c);
86 if (n == string_end || FOLD ((UCHAR) *n) != c)
87 return FNM_NOMATCH;
88 break;
90 case L('*'):
91 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
93 int res;
95 res = EXT (c, p, n, string_end, no_leading_period,
96 flags);
97 if (res != -1)
98 return res;
101 if (n != string_end && *n == L('.') && no_leading_period)
102 return FNM_NOMATCH;
104 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
106 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
108 const CHAR *endp = END (p);
109 if (endp != p)
111 /* This is a pattern. Skip over it. */
112 p = endp;
113 continue;
117 if (c == L('?'))
119 /* A ? needs to match one character. */
120 if (n == string_end)
121 /* There isn't another character; no match. */
122 return FNM_NOMATCH;
123 else if (*n == L('/')
124 && __builtin_expect (flags & FNM_FILE_NAME, 0))
125 /* A slash does not match a wildcard under
126 FNM_FILE_NAME. */
127 return FNM_NOMATCH;
128 else
129 /* One character of the string is consumed in matching
130 this ? wildcard, so *??? won't match if there are
131 less than three characters. */
132 ++n;
136 if (c == L('\0'))
137 /* The wildcard(s) is/are the last element of the pattern.
138 If the name is a file name and contains another slash
139 this means it cannot match, unless the FNM_LEADING_DIR
140 flag is set. */
142 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
144 if (flags & FNM_FILE_NAME)
146 if (flags & FNM_LEADING_DIR)
147 result = 0;
148 else
150 if (MEMCHR (n, L('/'), string_end - n) == NULL)
151 result = 0;
155 return result;
157 else
159 const CHAR *endp;
161 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
162 string_end - n);
163 if (endp == NULL)
164 endp = string_end;
166 if (c == L('[')
167 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
168 && (c == L('@') || c == L('+') || c == L('!'))
169 && *p == L('(')))
171 int flags2 = ((flags & FNM_FILE_NAME)
172 ? flags : (flags & ~FNM_PERIOD));
173 int no_leading_period2 = no_leading_period;
175 for (--p; n < endp; ++n, no_leading_period2 = 0)
176 if (FCT (p, n, string_end, no_leading_period2, flags2)
177 == 0)
178 return 0;
180 else if (c == L('/') && (flags & FNM_FILE_NAME))
182 while (n < string_end && *n != L('/'))
183 ++n;
184 if (n < string_end && *n == L('/')
185 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
186 == 0))
187 return 0;
189 else
191 int flags2 = ((flags & FNM_FILE_NAME)
192 ? flags : (flags & ~FNM_PERIOD));
193 int no_leading_period2 = no_leading_period;
195 if (c == L('\\') && !(flags & FNM_NOESCAPE))
196 c = *p;
197 c = FOLD (c);
198 for (--p; n < endp; ++n, no_leading_period2 = 0)
199 if (FOLD ((UCHAR) *n) == c
200 && (FCT (p, n, string_end, no_leading_period2, flags2)
201 == 0))
202 return 0;
206 /* If we come here no match is possible with the wildcard. */
207 return FNM_NOMATCH;
209 case L('['):
211 /* Nonzero if the sense of the character class is inverted. */
212 register int not;
213 CHAR cold;
214 UCHAR fn;
216 if (posixly_correct == 0)
217 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
219 if (n == string_end)
220 return FNM_NOMATCH;
222 if (*n == L('.') && no_leading_period)
223 return FNM_NOMATCH;
225 if (*n == L('/') && (flags & FNM_FILE_NAME))
226 /* `/' cannot be matched. */
227 return FNM_NOMATCH;
229 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
230 if (not)
231 ++p;
233 fn = FOLD ((UCHAR) *n);
235 c = *p++;
236 for (;;)
238 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
240 if (*p == L('\0'))
241 return FNM_NOMATCH;
242 c = FOLD ((UCHAR) *p);
243 ++p;
245 if (c == fn)
246 goto matched;
248 else if (c == L('[') && *p == L(':'))
250 /* Leave room for the null. */
251 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
252 size_t c1 = 0;
253 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
254 wctype_t wt;
255 #endif
256 const CHAR *startp = p;
258 for (;;)
260 if (c1 == CHAR_CLASS_MAX_LENGTH)
261 /* The name is too long and therefore the pattern
262 is ill-formed. */
263 return FNM_NOMATCH;
265 c = *++p;
266 if (c == L(':') && p[1] == L(']'))
268 p += 2;
269 break;
271 if (c < L('a') || c >= L('z'))
273 /* This cannot possibly be a character class name.
274 Match it as a normal range. */
275 p = startp;
276 c = L('[');
277 goto normal_bracket;
279 str[c1++] = c;
281 str[c1] = L('\0');
283 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
284 wt = IS_CHAR_CLASS (str);
285 if (wt == 0)
286 /* Invalid character class name. */
287 return FNM_NOMATCH;
289 # if defined _LIBC && ! WIDE_CHAR_VERSION
290 /* The following code is glibc specific but does
291 there a good job in speeding up the code since
292 we can avoid the btowc() call. */
293 if (_ISCTYPE ((UCHAR) *n, wt))
294 goto matched;
295 # else
296 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
297 goto matched;
298 # endif
299 #else
300 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
301 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
302 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
303 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
304 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
305 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
306 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
307 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
308 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
309 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
310 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
311 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
312 goto matched;
313 #endif
314 c = *p++;
316 #ifdef _LIBC
317 else if (c == L('[') && *p == L('='))
319 UCHAR str[1];
320 uint32_t nrules =
321 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
322 const CHAR *startp = p;
324 c = *++p;
325 if (c == L('\0'))
327 p = startp;
328 c = L('[');
329 goto normal_bracket;
331 str[0] = c;
333 c = *++p;
334 if (c != L('=') || p[1] != L(']'))
336 p = startp;
337 c = L('[');
338 goto normal_bracket;
340 p += 2;
342 if (nrules == 0)
344 if ((UCHAR) *n == str[0])
345 goto matched;
347 else
349 const int32_t *table;
350 # if WIDE_CHAR_VERSION
351 const int32_t *weights;
352 const int32_t *extra;
353 # else
354 const unsigned char *weights;
355 const unsigned char *extra;
356 # endif
357 const int32_t *indirect;
358 int32_t idx;
359 const UCHAR *cp = (const UCHAR *) str;
361 /* This #include defines a local function! */
362 # if WIDE_CHAR_VERSION
363 # include <locale/weightwc.h>
364 # else
365 # include <locale/weight.h>
366 # endif
368 # if WIDE_CHAR_VERSION
369 table = (const int32_t *)
370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
371 weights = (const int32_t *)
372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
373 extra = (const int32_t *)
374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
375 indirect = (const int32_t *)
376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
377 # else
378 table = (const int32_t *)
379 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
380 weights = (const unsigned char *)
381 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
382 extra = (const unsigned char *)
383 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
384 indirect = (const int32_t *)
385 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
386 # endif
388 idx = findidx (&cp);
389 if (idx != 0)
391 /* We found a table entry. Now see whether the
392 character we are currently at has the same
393 equivalance class value. */
394 int len = weights[idx];
395 int32_t idx2;
396 const UCHAR *np = (const UCHAR *) n;
398 idx2 = findidx (&np);
399 if (idx2 != 0 && len == weights[idx2])
401 int cnt = 0;
403 while (cnt < len
404 && (weights[idx + 1 + cnt]
405 == weights[idx2 + 1 + cnt]))
406 ++cnt;
408 if (cnt == len)
409 goto matched;
414 c = *p++;
416 #endif
417 else if (c == L('\0'))
418 /* [ (unterminated) loses. */
419 return FNM_NOMATCH;
420 else
422 int is_range = 0;
424 #ifdef _LIBC
425 int is_seqval = 0;
427 if (c == L('[') && *p == L('.'))
429 uint32_t nrules =
430 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
431 const CHAR *startp = p;
432 size_t c1 = 0;
434 while (1)
436 c = *++p;
437 if (c == L('.') && p[1] == L(']'))
439 p += 2;
440 break;
442 if (c == '\0')
443 return FNM_NOMATCH;
444 ++c1;
447 /* We have to handling the symbols differently in
448 ranges since then the collation sequence is
449 important. */
450 is_range = *p == L('-') && p[1] != L('\0');
452 if (nrules == 0)
454 /* There are no names defined in the collation
455 data. Therefore we only accept the trivial
456 names consisting of the character itself. */
457 if (c1 != 1)
458 return FNM_NOMATCH;
460 if (!is_range && *n == startp[1])
461 goto matched;
463 cold = startp[1];
464 c = *p++;
466 else
468 int32_t table_size;
469 const int32_t *symb_table;
470 # ifdef WIDE_CHAR_VERSION
471 char str[c1];
472 unsigned int strcnt;
473 # else
474 # define str (startp + 1)
475 # endif
476 const unsigned char *extra;
477 int32_t idx;
478 int32_t elem;
479 int32_t second;
480 int32_t hash;
482 # ifdef WIDE_CHAR_VERSION
483 /* We have to convert the name to a single-byte
484 string. This is possible since the names
485 consist of ASCII characters and the internal
486 representation is UCS4. */
487 for (strcnt = 0; strcnt < c1; ++strcnt)
488 str[strcnt] = startp[1 + strcnt];
489 #endif
491 table_size =
492 _NL_CURRENT_WORD (LC_COLLATE,
493 _NL_COLLATE_SYMB_HASH_SIZEMB);
494 symb_table = (const int32_t *)
495 _NL_CURRENT (LC_COLLATE,
496 _NL_COLLATE_SYMB_TABLEMB);
497 extra = (const unsigned char *)
498 _NL_CURRENT (LC_COLLATE,
499 _NL_COLLATE_SYMB_EXTRAMB);
501 /* Locate the character in the hashing table. */
502 hash = elem_hash (str, c1);
504 idx = 0;
505 elem = hash % table_size;
506 second = hash % (table_size - 2);
507 while (symb_table[2 * elem] != 0)
509 /* First compare the hashing value. */
510 if (symb_table[2 * elem] == hash
511 && c1 == extra[symb_table[2 * elem + 1]]
512 && memcmp (str,
513 &extra[symb_table[2 * elem + 1]
514 + 1], c1) == 0)
516 /* Yep, this is the entry. */
517 idx = symb_table[2 * elem + 1];
518 idx += 1 + extra[idx];
519 break;
522 /* Next entry. */
523 elem += second;
526 if (symb_table[2 * elem] != 0)
528 /* Compare the byte sequence but only if
529 this is not part of a range. */
530 # ifdef WIDE_CHAR_VERSION
531 int32_t *wextra;
533 idx += 1 + extra[idx];
534 /* Adjust for the alignment. */
535 idx = (idx + 3) & ~3;
537 wextra = (int32_t *) &extra[idx + 4];
538 # endif
540 if (! is_range)
542 # ifdef WIDE_CHAR_VERSION
543 for (c1 = 0;
544 (int32_t) c1 < wextra[idx];
545 ++c1)
546 if (n[c1] != wextra[1 + c1])
547 break;
549 if ((int32_t) c1 == wextra[idx])
550 goto matched;
551 # else
552 for (c1 = 0; c1 < extra[idx]; ++c1)
553 if (n[c1] != extra[1 + c1])
554 break;
556 if (c1 == extra[idx])
557 goto matched;
558 # endif
561 /* Get the collation sequence value. */
562 is_seqval = 1;
563 # ifdef WIDE_CHAR_VERSION
564 cold = wextra[1 + wextra[idx]];
565 # else
566 /* Adjust for the alignment. */
567 idx += 1 + extra[idx];
568 idx = (idx + 3) & ~4;
569 cold = *((int32_t *) &extra[idx]);
570 # endif
572 c = *p++;
574 else if (c1 == 1)
576 /* No valid character. Match it as a
577 single byte. */
578 if (!is_range && *n == str[0])
579 goto matched;
581 cold = str[0];
582 c = *p++;
584 else
585 return FNM_NOMATCH;
588 else
589 # undef str
590 #endif
592 c = FOLD (c);
593 normal_bracket:
595 /* We have to handling the symbols differently in
596 ranges since then the collation sequence is
597 important. */
598 is_range = (*p == L('-') && p[1] != L('\0')
599 && p[1] != L(']'));
601 if (!is_range && c == fn)
602 goto matched;
604 cold = c;
605 c = *p++;
608 if (c == L('-') && *p != L(']'))
610 #if _LIBC
611 /* We have to find the collation sequence
612 value for C. Collation sequence is nothing
613 we can regularly access. The sequence
614 value is defined by the order in which the
615 definitions of the collation values for the
616 various characters appear in the source
617 file. A strange concept, nowhere
618 documented. */
619 uint32_t fcollseq;
620 uint32_t lcollseq;
621 UCHAR cend = *p++;
623 # ifdef WIDE_CHAR_VERSION
624 /* Search in the `names' array for the characters. */
625 fcollseq = __collseq_table_lookup (collseq, fn);
626 if (fcollseq == ~((uint32_t) 0))
627 /* XXX We don't know anything about the character
628 we are supposed to match. This means we are
629 failing. */
630 goto range_not_matched;
632 if (is_seqval)
633 lcollseq = cold;
634 else
635 lcollseq = __collseq_table_lookup (collseq, cold);
636 # else
637 fcollseq = collseq[fn];
638 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
639 # endif
641 is_seqval = 0;
642 if (cend == L('[') && *p == L('.'))
644 uint32_t nrules =
645 _NL_CURRENT_WORD (LC_COLLATE,
646 _NL_COLLATE_NRULES);
647 const CHAR *startp = p;
648 size_t c1 = 0;
650 while (1)
652 c = *++p;
653 if (c == L('.') && p[1] == L(']'))
655 p += 2;
656 break;
658 if (c == '\0')
659 return FNM_NOMATCH;
660 ++c1;
663 if (nrules == 0)
665 /* There are no names defined in the
666 collation data. Therefore we only
667 accept the trivial names consisting
668 of the character itself. */
669 if (c1 != 1)
670 return FNM_NOMATCH;
672 cend = startp[1];
674 else
676 int32_t table_size;
677 const int32_t *symb_table;
678 # ifdef WIDE_CHAR_VERSION
679 char str[c1];
680 unsigned int strcnt;
681 # else
682 # define str (startp + 1)
683 # endif
684 const unsigned char *extra;
685 int32_t idx;
686 int32_t elem;
687 int32_t second;
688 int32_t hash;
690 # ifdef WIDE_CHAR_VERSION
691 /* We have to convert the name to a single-byte
692 string. This is possible since the names
693 consist of ASCII characters and the internal
694 representation is UCS4. */
695 for (strcnt = 0; strcnt < c1; ++strcnt)
696 str[strcnt] = startp[1 + strcnt];
697 # endif
699 table_size =
700 _NL_CURRENT_WORD (LC_COLLATE,
701 _NL_COLLATE_SYMB_HASH_SIZEMB);
702 symb_table = (const int32_t *)
703 _NL_CURRENT (LC_COLLATE,
704 _NL_COLLATE_SYMB_TABLEMB);
705 extra = (const unsigned char *)
706 _NL_CURRENT (LC_COLLATE,
707 _NL_COLLATE_SYMB_EXTRAMB);
709 /* Locate the character in the hashing
710 table. */
711 hash = elem_hash (str, c1);
713 idx = 0;
714 elem = hash % table_size;
715 second = hash % (table_size - 2);
716 while (symb_table[2 * elem] != 0)
718 /* First compare the hashing value. */
719 if (symb_table[2 * elem] == hash
720 && (c1
721 == extra[symb_table[2 * elem + 1]])
722 && memcmp (str,
723 &extra[symb_table[2 * elem + 1]
724 + 1], c1) == 0)
726 /* Yep, this is the entry. */
727 idx = symb_table[2 * elem + 1];
728 idx += 1 + extra[idx];
729 break;
732 /* Next entry. */
733 elem += second;
736 if (symb_table[2 * elem] != 0)
738 /* Compare the byte sequence but only if
739 this is not part of a range. */
740 # ifdef WIDE_CHAR_VERSION
741 int32_t *wextra;
743 idx += 1 + extra[idx];
744 /* Adjust for the alignment. */
745 idx = (idx + 3) & ~4;
747 wextra = (int32_t *) &extra[idx + 4];
748 # endif
749 /* Get the collation sequence value. */
750 is_seqval = 1;
751 # ifdef WIDE_CHAR_VERSION
752 cend = wextra[1 + wextra[idx]];
753 # else
754 /* Adjust for the alignment. */
755 idx += 1 + extra[idx];
756 idx = (idx + 3) & ~4;
757 cend = *((int32_t *) &extra[idx]);
758 # endif
760 else if (symb_table[2 * elem] != 0 && c1 == 1)
762 cend = str[0];
763 c = *p++;
765 else
766 return FNM_NOMATCH;
768 # undef str
770 else
772 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
773 cend = *p++;
774 if (cend == L('\0'))
775 return FNM_NOMATCH;
776 cend = FOLD (cend);
779 /* XXX It is not entirely clear to me how to handle
780 characters which are not mentioned in the
781 collation specification. */
782 if (
783 # ifdef WIDE_CHAR_VERSION
784 lcollseq == 0xffffffff ||
785 # endif
786 lcollseq <= fcollseq)
788 /* We have to look at the upper bound. */
789 uint32_t hcollseq;
791 if (is_seqval)
792 hcollseq = cend;
793 else
795 # ifdef WIDE_CHAR_VERSION
796 hcollseq =
797 __collseq_table_lookup (collseq, cend);
798 if (hcollseq == ~((uint32_t) 0))
800 /* Hum, no information about the upper
801 bound. The matching succeeds if the
802 lower bound is matched exactly. */
803 if (lcollseq != fcollseq)
804 goto range_not_matched;
806 goto matched;
808 # else
809 hcollseq = collseq[cend];
810 # endif
813 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
814 goto matched;
816 # ifdef WIDE_CHAR_VERSION
817 range_not_matched:
818 # endif
819 #else
820 /* We use a boring value comparison of the character
821 values. This is better than comparing using
822 `strcoll' since the latter would have surprising
823 and sometimes fatal consequences. */
824 UCHAR cend = *p++;
826 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
827 cend = *p++;
828 if (cend == L('\0'))
829 return FNM_NOMATCH;
831 /* It is a range. */
832 if (cold <= fn && fn <= cend)
833 goto matched;
834 #endif
836 c = *p++;
840 if (c == L(']'))
841 break;
844 if (!not)
845 return FNM_NOMATCH;
846 break;
848 matched:
849 /* Skip the rest of the [...] that already matched. */
852 ignore_next:
853 c = *p++;
855 if (c == L('\0'))
856 /* [... (unterminated) loses. */
857 return FNM_NOMATCH;
859 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
861 if (*p == L('\0'))
862 return FNM_NOMATCH;
863 /* XXX 1003.2d11 is unclear if this is right. */
864 ++p;
866 else if (c == L('[') && *p == L(':'))
868 int c1 = 0;
869 const CHAR *startp = p;
871 while (1)
873 c = *++p;
874 if (++c1 == CHAR_CLASS_MAX_LENGTH)
875 return FNM_NOMATCH;
877 if (*p == L(':') && p[1] == L(']'))
878 break;
880 if (c < L('a') || c >= L('z'))
882 p = startp;
883 goto ignore_next;
886 p += 2;
887 c = *p++;
889 else if (c == L('[') && *p == L('='))
891 c = *++p;
892 if (c == L('\0'))
893 return FNM_NOMATCH;
894 c = *++p;
895 if (c != L('=') || p[1] != L(']'))
896 return FNM_NOMATCH;
897 p += 2;
898 c = *p++;
900 else if (c == L('[') && *p == L('.'))
902 ++p;
903 while (1)
905 c = *++p;
906 if (c == '\0')
907 return FNM_NOMATCH;
909 if (*p == L('.') && p[1] == L(']'))
910 break;
912 p += 2;
913 c = *p++;
916 while (c != L(']'));
917 if (not)
918 return FNM_NOMATCH;
920 break;
922 case L('+'):
923 case L('@'):
924 case L('!'):
925 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
927 int res;
929 res = EXT (c, p, n, string_end, no_leading_period, flags);
930 if (res != -1)
931 return res;
933 goto normal_match;
935 case L('/'):
936 if (NO_LEADING_PERIOD (flags))
938 if (n == string_end || c != (UCHAR) *n)
939 return FNM_NOMATCH;
941 new_no_leading_period = 1;
942 break;
944 /* FALLTHROUGH */
945 default:
946 normal_match:
947 if (n == string_end || c != FOLD ((UCHAR) *n))
948 return FNM_NOMATCH;
951 no_leading_period = new_no_leading_period;
952 ++n;
955 if (n == string_end)
956 return 0;
958 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
959 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
960 return 0;
962 return FNM_NOMATCH;
966 static const CHAR *
967 internal_function
968 END (const CHAR *pattern)
970 const CHAR *p = pattern;
972 while (1)
973 if (*++p == L('\0'))
974 /* This is an invalid pattern. */
975 return pattern;
976 else if (*p == L('['))
978 /* Handle brackets special. */
979 if (posixly_correct == 0)
980 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
982 /* Skip the not sign. We have to recognize it because of a possibly
983 following ']'. */
984 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
985 ++p;
986 /* A leading ']' is recognized as such. */
987 if (*p == L(']'))
988 ++p;
989 /* Skip over all characters of the list. */
990 while (*p != L(']'))
991 if (*p++ == L('\0'))
992 /* This is no valid pattern. */
993 return pattern;
995 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
996 || *p == L('!')) && p[1] == L('('))
997 p = END (p + 1);
998 else if (*p == L(')'))
999 break;
1001 return p + 1;
1005 static int
1006 internal_function
1007 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1008 int no_leading_period, int flags)
1010 const CHAR *startp;
1011 int level;
1012 struct patternlist
1014 struct patternlist *next;
1015 CHAR str[0];
1016 } *list = NULL;
1017 struct patternlist **lastp = &list;
1018 size_t pattern_len = STRLEN (pattern);
1019 const CHAR *p;
1020 const CHAR *rs;
1022 /* Parse the pattern. Store the individual parts in the list. */
1023 level = 0;
1024 for (startp = p = pattern + 1; level >= 0; ++p)
1025 if (*p == L('\0'))
1026 /* This is an invalid pattern. */
1027 return -1;
1028 else if (*p == L('['))
1030 /* Handle brackets special. */
1031 if (posixly_correct == 0)
1032 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1034 /* Skip the not sign. We have to recognize it because of a possibly
1035 following ']'. */
1036 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1037 ++p;
1038 /* A leading ']' is recognized as such. */
1039 if (*p == L(']'))
1040 ++p;
1041 /* Skip over all characters of the list. */
1042 while (*p != L(']'))
1043 if (*p++ == L('\0'))
1044 /* This is no valid pattern. */
1045 return -1;
1047 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1048 || *p == L('!')) && p[1] == L('('))
1049 /* Remember the nesting level. */
1050 ++level;
1051 else if (*p == L(')'))
1053 if (level-- == 0)
1055 /* This means we found the end of the pattern. */
1056 #define NEW_PATTERN \
1057 struct patternlist *newp; \
1059 if (opt == L('?') || opt == L('@')) \
1060 newp = alloca (sizeof (struct patternlist) \
1061 + (pattern_len * sizeof (CHAR))); \
1062 else \
1063 newp = alloca (sizeof (struct patternlist) \
1064 + ((p - startp + 1) * sizeof (CHAR))); \
1065 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1066 newp->next = NULL; \
1067 *lastp = newp; \
1068 lastp = &newp->next
1069 NEW_PATTERN;
1072 else if (*p == L('|'))
1074 if (level == 0)
1076 NEW_PATTERN;
1077 startp = p + 1;
1080 assert (list != NULL);
1081 assert (p[-1] == L(')'));
1082 #undef NEW_PATTERN
1084 switch (opt)
1086 case L('*'):
1087 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1088 return 0;
1089 /* FALLTHROUGH */
1091 case L('+'):
1094 for (rs = string; rs <= string_end; ++rs)
1095 /* First match the prefix with the current pattern with the
1096 current pattern. */
1097 if (FCT (list->str, string, rs, no_leading_period,
1098 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1099 /* This was successful. Now match the rest with the rest
1100 of the pattern. */
1101 && (FCT (p, rs, string_end,
1102 rs == string
1103 ? no_leading_period
1104 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1105 flags & FNM_FILE_NAME
1106 ? flags : flags & ~FNM_PERIOD) == 0
1107 /* This didn't work. Try the whole pattern. */
1108 || (rs != string
1109 && FCT (pattern - 1, rs, string_end,
1110 rs == string
1111 ? no_leading_period
1112 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1113 ? 1 : 0),
1114 flags & FNM_FILE_NAME
1115 ? flags : flags & ~FNM_PERIOD) == 0)))
1116 /* It worked. Signal success. */
1117 return 0;
1119 while ((list = list->next) != NULL);
1121 /* None of the patterns lead to a match. */
1122 return FNM_NOMATCH;
1124 case L('?'):
1125 if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1126 return 0;
1127 /* FALLTHROUGH */
1129 case L('@'):
1131 /* I cannot believe it but `strcat' is actually acceptable
1132 here. Match the entire string with the prefix from the
1133 pattern list and the rest of the pattern following the
1134 pattern list. */
1135 if (FCT (STRCAT (list->str, p), string, string_end,
1136 no_leading_period,
1137 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1138 /* It worked. Signal success. */
1139 return 0;
1140 while ((list = list->next) != NULL);
1142 /* None of the patterns lead to a match. */
1143 return FNM_NOMATCH;
1145 case L('!'):
1146 for (rs = string; rs <= string_end; ++rs)
1148 struct patternlist *runp;
1150 for (runp = list; runp != NULL; runp = runp->next)
1151 if (FCT (runp->str, string, rs, no_leading_period,
1152 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1153 break;
1155 /* If none of the patterns matched see whether the rest does. */
1156 if (runp == NULL
1157 && (FCT (p, rs, string_end,
1158 rs == string
1159 ? no_leading_period
1160 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1161 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1162 == 0))
1163 /* This is successful. */
1164 return 0;
1167 /* None of the patterns together with the rest of the pattern
1168 lead to a match. */
1169 return FNM_NOMATCH;
1171 default:
1172 assert (! "Invalid extended matching operator");
1173 break;
1176 return -1;
1180 #undef FOLD
1181 #undef CHAR
1182 #undef UCHAR
1183 #undef INT
1184 #undef FCT
1185 #undef EXT
1186 #undef END
1187 #undef MEMPCPY
1188 #undef MEMCHR
1189 #undef STRCOLL
1190 #undef STRLEN
1191 #undef STRCAT
1192 #undef L
1193 #undef BTOWC