Patch-ID: bash40-030
[bash.git] / lib / glob / sm_loop.c
blobdfff06cf5c84472aa9076d3cb20c7898020eab64
1 /* Copyright (C) 1991-2006 Free Software Foundation, Inc.
3 This file is part of GNU Bash, the Bourne Again SHell.
5 Bash is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 Bash is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 int FCT __P((CHAR *, CHAR *, int));
21 static int GMATCH __P((CHAR *, CHAR *, CHAR *, CHAR *, int));
22 static CHAR *PARSE_COLLSYM __P((CHAR *, INT *));
23 static CHAR *BRACKMATCH __P((CHAR *, U_CHAR, int));
24 static int EXTMATCH __P((INT, CHAR *, CHAR *, CHAR *, CHAR *, int));
25 static CHAR *PATSCAN __P((CHAR *, CHAR *, INT));
27 int
28 FCT (pattern, string, flags)
29 CHAR *pattern;
30 CHAR *string;
31 int flags;
33 CHAR *se, *pe;
35 if (string == 0 || pattern == 0)
36 return FNM_NOMATCH;
38 se = string + STRLEN ((XCHAR *)string);
39 pe = pattern + STRLEN ((XCHAR *)pattern);
41 return (GMATCH (string, se, pattern, pe, flags));
44 /* Match STRING against the filename pattern PATTERN, returning zero if
45 it matches, FNM_NOMATCH if not. */
46 static int
47 GMATCH (string, se, pattern, pe, flags)
48 CHAR *string, *se;
49 CHAR *pattern, *pe;
50 int flags;
52 CHAR *p, *n; /* pattern, string */
53 INT c; /* current pattern character - XXX U_CHAR? */
54 INT sc; /* current string character - XXX U_CHAR? */
56 p = pattern;
57 n = string;
59 if (string == 0 || pattern == 0)
60 return FNM_NOMATCH;
62 #if DEBUG_MATCHING
63 fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se);
64 fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe);
65 #endif
67 while (p < pe)
69 c = *p++;
70 c = FOLD (c);
72 sc = n < se ? *n : '\0';
74 #ifdef EXTENDED_GLOB
75 /* EXTMATCH () will handle recursively calling GMATCH, so we can
76 just return what EXTMATCH() returns. */
77 if ((flags & FNM_EXTMATCH) && *p == L('(') &&
78 (c == L('+') || c == L('*') || c == L('?') || c == L('@') || c == L('!'))) /* ) */
80 int lflags;
81 /* If we're not matching the start of the string, we're not
82 concerned about the special cases for matching `.' */
83 lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
84 return (EXTMATCH (c, n, se, p, pe, lflags));
86 #endif /* EXTENDED_GLOB */
88 switch (c)
90 case L('?'): /* Match single character */
91 if (sc == '\0')
92 return FNM_NOMATCH;
93 else if ((flags & FNM_PATHNAME) && sc == L('/'))
94 /* If we are matching a pathname, `?' can never match a `/'. */
95 return FNM_NOMATCH;
96 else if ((flags & FNM_PERIOD) && sc == L('.') &&
97 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
98 /* `?' cannot match a `.' if it is the first character of the
99 string or if it is the first character following a slash and
100 we are matching a pathname. */
101 return FNM_NOMATCH;
102 break;
104 case L('\\'): /* backslash escape removes special meaning */
105 if (p == pe)
106 return FNM_NOMATCH;
108 if ((flags & FNM_NOESCAPE) == 0)
110 c = *p++;
111 /* A trailing `\' cannot match. */
112 if (p > pe)
113 return FNM_NOMATCH;
114 c = FOLD (c);
116 if (FOLD (sc) != (U_CHAR)c)
117 return FNM_NOMATCH;
118 break;
120 case '*': /* Match zero or more characters */
121 if (p == pe)
122 return 0;
124 if ((flags & FNM_PERIOD) && sc == L('.') &&
125 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
126 /* `*' cannot match a `.' if it is the first character of the
127 string or if it is the first character following a slash and
128 we are matching a pathname. */
129 return FNM_NOMATCH;
131 /* Collapse multiple consecutive `*' and `?', but make sure that
132 one character of the string is consumed for each `?'. */
133 for (c = *p++; (c == L('?') || c == L('*')); c = *p++)
135 if ((flags & FNM_PATHNAME) && sc == L('/'))
136 /* A slash does not match a wildcard under FNM_PATHNAME. */
137 return FNM_NOMATCH;
138 #ifdef EXTENDED_GLOB
139 else if ((flags & FNM_EXTMATCH) && c == L('?') && *p == L('(')) /* ) */
141 CHAR *newn;
142 for (newn = n; newn < se; ++newn)
144 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
145 return (0);
147 /* We didn't match. If we have a `?(...)', that's failure. */
148 return FNM_NOMATCH;
150 #endif
151 else if (c == L('?'))
153 if (sc == L('\0'))
154 return FNM_NOMATCH;
155 /* One character of the string is consumed in matching
156 this ? wildcard, so *??? won't match if there are
157 fewer than three characters. */
158 n++;
159 sc = n < se ? *n : '\0';
162 #ifdef EXTENDED_GLOB
163 /* Handle ******(patlist) */
164 if ((flags & FNM_EXTMATCH) && c == L('*') && *p == L('(')) /*)*/
166 CHAR *newn;
167 /* We need to check whether or not the extended glob
168 pattern matches the remainder of the string.
169 If it does, we match the entire pattern. */
170 for (newn = n; newn < se; ++newn)
172 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
173 return (0);
175 /* We didn't match the extended glob pattern, but
176 that's OK, since we can match 0 or more occurrences.
177 We need to skip the glob pattern and see if we
178 match the rest of the string. */
179 newn = PATSCAN (p + 1, pe, 0);
180 /* If NEWN is 0, we have an ill-formed pattern. */
181 p = newn ? newn : pe;
183 #endif
184 if (p == pe)
185 break;
188 /* If we've hit the end of the pattern and the last character of
189 the pattern was handled by the loop above, we've succeeded.
190 Otherwise, we need to match that last character. */
191 if (p == pe && (c == L('?') || c == L('*')))
192 return (0);
194 /* General case, use recursion. */
196 U_CHAR c1;
198 c1 = ((flags & FNM_NOESCAPE) == 0 && c == L('\\')) ? *p : c;
199 c1 = FOLD (c1);
200 for (--p; n < se; ++n)
202 /* Only call strmatch if the first character indicates a
203 possible match. We can check the first character if
204 we're not doing an extended glob match. */
205 if ((flags & FNM_EXTMATCH) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
206 continue;
208 /* If we're doing an extended glob match and the pattern is not
209 one of the extended glob patterns, we can check the first
210 character. */
211 if ((flags & FNM_EXTMATCH) && p[1] != L('(') && /*)*/
212 STRCHR (L("?*+@!"), *p) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
213 continue;
215 /* Otherwise, we just recurse. */
216 if (GMATCH (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
217 return (0);
219 return FNM_NOMATCH;
222 case L('['):
224 if (sc == L('\0') || n == se)
225 return FNM_NOMATCH;
227 /* A character class cannot match a `.' if it is the first
228 character of the string or if it is the first character
229 following a slash and we are matching a pathname. */
230 if ((flags & FNM_PERIOD) && sc == L('.') &&
231 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
232 return (FNM_NOMATCH);
234 p = BRACKMATCH (p, sc, flags);
235 if (p == 0)
236 return FNM_NOMATCH;
238 break;
240 default:
241 if ((U_CHAR)c != FOLD (sc))
242 return (FNM_NOMATCH);
245 ++n;
248 if (n == se)
249 return (0);
251 if ((flags & FNM_LEADING_DIR) && *n == L('/'))
252 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
253 return 0;
255 return (FNM_NOMATCH);
258 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
259 the value of the symbol, and move P past the collating symbol expression.
260 The value is returned in *VP, if VP is not null. */
261 static CHAR *
262 PARSE_COLLSYM (p, vp)
263 CHAR *p;
264 INT *vp;
266 register int pc;
267 INT val;
269 p++; /* move past the `.' */
271 for (pc = 0; p[pc]; pc++)
272 if (p[pc] == L('.') && p[pc+1] == L(']'))
273 break;
274 val = COLLSYM (p, pc);
275 if (vp)
276 *vp = val;
277 return (p + pc + 2);
280 /* Use prototype definition here because of type promotion. */
281 static CHAR *
282 #if defined (PROTOTYPES)
283 BRACKMATCH (CHAR *p, U_CHAR test, int flags)
284 #else
285 BRACKMATCH (p, test, flags)
286 CHAR *p;
287 U_CHAR test;
288 int flags;
289 #endif
291 register CHAR cstart, cend, c;
292 register int not; /* Nonzero if the sense of the character class is inverted. */
293 int brcnt;
294 INT pc;
295 CHAR *savep;
297 test = FOLD (test);
299 savep = p;
301 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
302 circumflex (`^') in its role in a `nonmatching list'. A bracket
303 expression starting with an unquoted circumflex character produces
304 unspecified results. This implementation treats the two identically. */
305 if (not = (*p == L('!') || *p == L('^')))
306 ++p;
308 c = *p++;
309 for (;;)
311 /* Initialize cstart and cend in case `-' is the last
312 character of the pattern. */
313 cstart = cend = c;
315 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
316 the end of the equivalence class, move the pattern pointer past
317 it, and check for equivalence. XXX - this handles only
318 single-character equivalence classes, which is wrong, or at
319 least incomplete. */
320 if (c == L('[') && *p == L('=') && p[2] == L('=') && p[3] == L(']'))
322 pc = FOLD (p[1]);
323 p += 4;
324 if (COLLEQUIV (test, pc))
326 /*[*/ /* Move past the closing `]', since the first thing we do at
327 the `matched:' label is back p up one. */
328 p++;
329 goto matched;
331 else
333 c = *p++;
334 if (c == L('\0'))
335 return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
336 c = FOLD (c);
337 continue;
341 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
342 if (c == L('[') && *p == L(':'))
344 CHAR *close, *ccname;
346 pc = 0; /* make sure invalid char classes don't match. */
347 /* Find end of character class name */
348 for (close = p + 1; *close != '\0'; close++)
349 if (*close == L(':') && *(close+1) == L(']'))
350 break;
352 if (*close != L('\0'))
354 ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
355 if (ccname == 0)
356 pc = 0;
357 else
359 bcopy (p + 1, ccname, (close - p - 1) * sizeof (CHAR));
360 *(ccname + (close - p - 1)) = L('\0');
361 pc = IS_CCLASS (test, (XCHAR *)ccname);
363 if (pc == -1)
364 pc = 0;
365 else
366 p = close + 2;
368 free (ccname);
371 if (pc)
373 /*[*/ /* Move past the closing `]', since the first thing we do at
374 the `matched:' label is back p up one. */
375 p++;
376 goto matched;
378 else
380 /* continue the loop here, since this expression can't be
381 the first part of a range expression. */
382 c = *p++;
383 if (c == L('\0'))
384 return ((test == L('[')) ? savep : (CHAR *)0);
385 else if (c == L(']'))
386 break;
387 c = FOLD (c);
388 continue;
392 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
393 the symbol name, make sure it is terminated by `.]', translate
394 the name to a character using the external table, and do the
395 comparison. */
396 if (c == L('[') && *p == L('.'))
398 p = PARSE_COLLSYM (p, &pc);
399 /* An invalid collating symbol cannot be the first point of a
400 range. If it is, we set cstart to one greater than `test',
401 so any comparisons later will fail. */
402 cstart = (pc == INVALID) ? test + 1 : pc;
405 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
407 if (*p == '\0')
408 return (CHAR *)0;
409 cstart = cend = *p++;
412 cstart = cend = FOLD (cstart);
414 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
415 is not preceded by a backslash and is not part of a bracket
416 expression produces undefined results.' This implementation
417 treats the `[' as just a character to be matched if there is
418 not a closing `]'. */
419 if (c == L('\0'))
420 return ((test == L('[')) ? savep : (CHAR *)0);
422 c = *p++;
423 c = FOLD (c);
425 if ((flags & FNM_PATHNAME) && c == L('/'))
426 /* [/] can never match when matching a pathname. */
427 return (CHAR *)0;
429 /* This introduces a range, unless the `-' is the last
430 character of the class. Find the end of the range
431 and move past it. */
432 if (c == L('-') && *p != L(']'))
434 cend = *p++;
435 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
436 cend = *p++;
437 if (cend == L('\0'))
438 return (CHAR *)0;
439 if (cend == L('[') && *p == L('.'))
441 p = PARSE_COLLSYM (p, &pc);
442 /* An invalid collating symbol cannot be the second part of a
443 range expression. If we get one, we set cend to one fewer
444 than the test character to make sure the range test fails. */
445 cend = (pc == INVALID) ? test - 1 : pc;
447 cend = FOLD (cend);
449 c = *p++;
451 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
452 equal to or higher than the starting range point; otherwise
453 the expression shall be treated as invalid.'' Note that this
454 applies to only the range expression; the rest of the bracket
455 expression is still checked for matches. */
456 if (RANGECMP (cstart, cend) > 0)
458 if (c == L(']'))
459 break;
460 c = FOLD (c);
461 continue;
465 if (RANGECMP (test, cstart) >= 0 && RANGECMP (test, cend) <= 0)
466 goto matched;
468 if (c == L(']'))
469 break;
471 /* No match. */
472 return (!not ? (CHAR *)0 : p);
474 matched:
475 /* Skip the rest of the [...] that already matched. */
476 c = *--p;
477 brcnt = 1;
478 while (brcnt > 0)
480 /* A `[' without a matching `]' is just another character to match. */
481 if (c == L('\0'))
482 return ((test == L('[')) ? savep : (CHAR *)0);
484 c = *p++;
485 if (c == L('[') && (*p == L('=') || *p == L(':') || *p == L('.')))
486 brcnt++;
487 else if (c == L(']'))
488 brcnt--;
489 else if (!(flags & FNM_NOESCAPE) && c == L('\\'))
491 if (*p == '\0')
492 return (CHAR *)0;
493 /* XXX 1003.2d11 is unclear if this is right. */
494 ++p;
497 return (not ? (CHAR *)0 : p);
500 #if defined (EXTENDED_GLOB)
501 /* ksh-like extended pattern matching:
503 [?*+@!](pat-list)
505 where pat-list is a list of one or patterns separated by `|'. Operation
506 is as follows:
508 ?(patlist) match zero or one of the given patterns
509 *(patlist) match zero or more of the given patterns
510 +(patlist) match one or more of the given patterns
511 @(patlist) match exactly one of the given patterns
512 !(patlist) match anything except one of the given patterns
515 /* Scan a pattern starting at STRING and ending at END, keeping track of
516 embedded () and []. If DELIM is 0, we scan until a matching `)'
517 because we're scanning a `patlist'. Otherwise, we scan until we see
518 DELIM. In all cases, we never scan past END. The return value is the
519 first character after the matching DELIM. */
520 static CHAR *
521 PATSCAN (string, end, delim)
522 CHAR *string, *end;
523 INT delim;
525 int pnest, bnest, skip;
526 INT cchar;
527 CHAR *s, c, *bfirst;
529 pnest = bnest = skip = 0;
530 cchar = 0;
531 bfirst = NULL;
533 for (s = string; c = *s; s++)
535 if (s >= end)
536 return (s);
537 if (skip)
539 skip = 0;
540 continue;
542 switch (c)
544 case L('\\'):
545 skip = 1;
546 break;
548 case L('\0'):
549 return ((CHAR *)NULL);
551 /* `[' is not special inside a bracket expression, but it may
552 introduce one of the special POSIX bracket expressions
553 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
554 case L('['):
555 if (bnest == 0)
557 bfirst = s + 1;
558 if (*bfirst == L('!') || *bfirst == L('^'))
559 bfirst++;
560 bnest++;
562 else if (s[1] == L(':') || s[1] == L('.') || s[1] == L('='))
563 cchar = s[1];
564 break;
566 /* `]' is not special if it's the first char (after a leading `!'
567 or `^') in a bracket expression or if it's part of one of the
568 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
569 case L(']'):
570 if (bnest)
572 if (cchar && s[-1] == cchar)
573 cchar = 0;
574 else if (s != bfirst)
576 bnest--;
577 bfirst = 0;
580 break;
582 case L('('):
583 if (bnest == 0)
584 pnest++;
585 break;
587 case L(')'):
588 if (bnest == 0 && pnest-- <= 0)
589 return ++s;
590 break;
592 case L('|'):
593 if (bnest == 0 && pnest == 0 && delim == L('|'))
594 return ++s;
595 break;
599 return (NULL);
602 /* Return 0 if dequoted pattern matches S in the current locale. */
603 static int
604 STRCOMPARE (p, pe, s, se)
605 CHAR *p, *pe, *s, *se;
607 int ret;
608 CHAR c1, c2;
610 c1 = *pe;
611 c2 = *se;
613 *pe = *se = '\0';
614 #if HAVE_MULTIBYTE || defined (HAVE_STRCOLL)
615 ret = STRCOLL ((XCHAR *)p, (XCHAR *)s);
616 #else
617 ret = STRCMP ((XCHAR *)p, (XCHAR *)s);
618 #endif
620 *pe = c1;
621 *se = c2;
623 return (ret == 0 ? ret : FNM_NOMATCH);
626 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
627 0 on success. This is handed the entire rest of the pattern and string
628 the first time an extended pattern specifier is encountered, so it calls
629 gmatch recursively. */
630 static int
631 EXTMATCH (xc, s, se, p, pe, flags)
632 INT xc; /* select which operation */
633 CHAR *s, *se;
634 CHAR *p, *pe;
635 int flags;
637 CHAR *prest; /* pointer to rest of pattern */
638 CHAR *psub; /* pointer to sub-pattern */
639 CHAR *pnext; /* pointer to next sub-pattern */
640 CHAR *srest; /* pointer to rest of string */
641 int m1, m2, xflags; /* xflags = flags passed to recursive matches */
643 #if DEBUG_MATCHING
644 fprintf(stderr, "extmatch: xc = %c\n", xc);
645 fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
646 fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
647 fprintf(stderr, "extmatch: flags = %d\n", flags);
648 #endif
650 prest = PATSCAN (p + (*p == L('(')), pe, 0); /* ) */
651 if (prest == 0)
652 /* If PREST is 0, we failed to scan a valid pattern. In this
653 case, we just want to compare the two as strings. */
654 return (STRCOMPARE (p - 1, pe, s, se));
656 switch (xc)
658 case L('+'): /* match one or more occurrences */
659 case L('*'): /* match zero or more occurrences */
660 /* If we can get away with no matches, don't even bother. Just
661 call GMATCH on the rest of the pattern and return success if
662 it succeeds. */
663 if (xc == L('*') && (GMATCH (s, se, prest, pe, flags) == 0))
664 return 0;
666 /* OK, we have to do this the hard way. First, we make sure one of
667 the subpatterns matches, then we try to match the rest of the
668 string. */
669 for (psub = p + 1; ; psub = pnext)
671 pnext = PATSCAN (psub, pe, L('|'));
672 for (srest = s; srest <= se; srest++)
674 /* Match this substring (S -> SREST) against this
675 subpattern (psub -> pnext - 1) */
676 m1 = GMATCH (s, srest, psub, pnext - 1, flags) == 0;
677 /* OK, we matched a subpattern, so make sure the rest of the
678 string matches the rest of the pattern. Also handle
679 multiple matches of the pattern. */
680 if (m1)
682 /* if srest > s, we are not at start of string */
683 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
684 m2 = (GMATCH (srest, se, prest, pe, xflags) == 0) ||
685 (s != srest && GMATCH (srest, se, p - 1, pe, xflags) == 0);
687 if (m1 && m2)
688 return (0);
690 if (pnext == prest)
691 break;
693 return (FNM_NOMATCH);
695 case L('?'): /* match zero or one of the patterns */
696 case L('@'): /* match one (or more) of the patterns */
697 /* If we can get away with no matches, don't even bother. Just
698 call gmatch on the rest of the pattern and return success if
699 it succeeds. */
700 if (xc == L('?') && (GMATCH (s, se, prest, pe, flags) == 0))
701 return 0;
703 /* OK, we have to do this the hard way. First, we see if one of
704 the subpatterns matches, then, if it does, we try to match the
705 rest of the string. */
706 for (psub = p + 1; ; psub = pnext)
708 pnext = PATSCAN (psub, pe, L('|'));
709 srest = (prest == pe) ? se : s;
710 for ( ; srest <= se; srest++)
712 /* if srest > s, we are not at start of string */
713 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
714 if (GMATCH (s, srest, psub, pnext - 1, flags) == 0 &&
715 GMATCH (srest, se, prest, pe, xflags) == 0)
716 return (0);
718 if (pnext == prest)
719 break;
721 return (FNM_NOMATCH);
723 case '!': /* match anything *except* one of the patterns */
724 for (srest = s; srest <= se; srest++)
726 m1 = 0;
727 for (psub = p + 1; ; psub = pnext)
729 pnext = PATSCAN (psub, pe, L('|'));
730 /* If one of the patterns matches, just bail immediately. */
731 if (m1 = (GMATCH (s, srest, psub, pnext - 1, flags) == 0))
732 break;
733 if (pnext == prest)
734 break;
736 /* if srest > s, we are not at start of string */
737 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
738 if (m1 == 0 && GMATCH (srest, se, prest, pe, xflags) == 0)
739 return (0);
741 return (FNM_NOMATCH);
744 return (FNM_NOMATCH);
746 #endif /* EXTENDED_GLOB */
748 #undef IS_CCLASS
749 #undef FOLD
750 #undef CHAR
751 #undef U_CHAR
752 #undef XCHAR
753 #undef INT
754 #undef INVALID
755 #undef FCT
756 #undef GMATCH
757 #undef COLLSYM
758 #undef PARSE_COLLSYM
759 #undef PATSCAN
760 #undef STRCOMPARE
761 #undef EXTMATCH
762 #undef BRACKMATCH
763 #undef STRCHR
764 #undef STRCOLL
765 #undef STRLEN
766 #undef STRCMP
767 #undef COLLEQUIV
768 #undef RANGECMP
769 #undef L