1 /* Copyright (C) 1991-2006 Free Software Foundation, Inc.
3 This file is part of GNU Bash, the Bourne Again SHell.
5 Bash is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 Bash is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with Bash. If not, see <http://www.gnu.org/licenses/>.
19 int FCT
__P((CHAR
*, CHAR
*, int));
21 static int GMATCH
__P((CHAR
*, CHAR
*, CHAR
*, CHAR
*, int));
22 static CHAR
*PARSE_COLLSYM
__P((CHAR
*, INT
*));
23 static CHAR
*BRACKMATCH
__P((CHAR
*, U_CHAR
, int));
24 static int EXTMATCH
__P((INT
, CHAR
*, CHAR
*, CHAR
*, CHAR
*, int));
25 static CHAR
*PATSCAN
__P((CHAR
*, CHAR
*, INT
));
28 FCT (pattern
, string
, flags
)
35 if (string
== 0 || pattern
== 0)
38 se
= string
+ STRLEN ((XCHAR
*)string
);
39 pe
= pattern
+ STRLEN ((XCHAR
*)pattern
);
41 return (GMATCH (string
, se
, pattern
, pe
, flags
));
44 /* Match STRING against the filename pattern PATTERN, returning zero if
45 it matches, FNM_NOMATCH if not. */
47 GMATCH (string
, se
, pattern
, pe
, flags
)
52 CHAR
*p
, *n
; /* pattern, string */
53 INT c
; /* current pattern character - XXX U_CHAR? */
54 INT sc
; /* current string character - XXX U_CHAR? */
59 if (string
== 0 || pattern
== 0)
63 fprintf(stderr
, "gmatch: string = %s; se = %s\n", string
, se
);
64 fprintf(stderr
, "gmatch: pattern = %s; pe = %s\n", pattern
, pe
);
72 sc
= n
< se
? *n
: '\0';
75 /* EXTMATCH () will handle recursively calling GMATCH, so we can
76 just return what EXTMATCH() returns. */
77 if ((flags
& FNM_EXTMATCH
) && *p
== L('(') &&
78 (c
== L('+') || c
== L('*') || c
== L('?') || c
== L('@') || c
== L('!'))) /* ) */
81 /* If we're not matching the start of the string, we're not
82 concerned about the special cases for matching `.' */
83 lflags
= (n
== string
) ? flags
: (flags
& ~FNM_PERIOD
);
84 return (EXTMATCH (c
, n
, se
, p
, pe
, lflags
));
86 #endif /* EXTENDED_GLOB */
90 case L('?'): /* Match single character */
93 else if ((flags
& FNM_PATHNAME
) && sc
== L('/'))
94 /* If we are matching a pathname, `?' can never match a `/'. */
96 else if ((flags
& FNM_PERIOD
) && sc
== L('.') &&
97 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == L('/'))))
98 /* `?' cannot match a `.' if it is the first character of the
99 string or if it is the first character following a slash and
100 we are matching a pathname. */
104 case L('\\'): /* backslash escape removes special meaning */
108 if ((flags
& FNM_NOESCAPE
) == 0)
111 /* A trailing `\' cannot match. */
116 if (FOLD (sc
) != (U_CHAR
)c
)
120 case '*': /* Match zero or more characters */
124 if ((flags
& FNM_PERIOD
) && sc
== L('.') &&
125 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == L('/'))))
126 /* `*' cannot match a `.' if it is the first character of the
127 string or if it is the first character following a slash and
128 we are matching a pathname. */
131 /* Collapse multiple consecutive `*' and `?', but make sure that
132 one character of the string is consumed for each `?'. */
133 for (c
= *p
++; (c
== L('?') || c
== L('*')); c
= *p
++)
135 if ((flags
& FNM_PATHNAME
) && sc
== L('/'))
136 /* A slash does not match a wildcard under FNM_PATHNAME. */
139 else if ((flags
& FNM_EXTMATCH
) && c
== L('?') && *p
== L('(')) /* ) */
142 for (newn
= n
; newn
< se
; ++newn
)
144 if (EXTMATCH (c
, newn
, se
, p
, pe
, flags
) == 0)
147 /* We didn't match. If we have a `?(...)', that's failure. */
151 else if (c
== L('?'))
155 /* One character of the string is consumed in matching
156 this ? wildcard, so *??? won't match if there are
157 fewer than three characters. */
159 sc
= n
< se
? *n
: '\0';
163 /* Handle ******(patlist) */
164 if ((flags
& FNM_EXTMATCH
) && c
== L('*') && *p
== L('(')) /*)*/
167 /* We need to check whether or not the extended glob
168 pattern matches the remainder of the string.
169 If it does, we match the entire pattern. */
170 for (newn
= n
; newn
< se
; ++newn
)
172 if (EXTMATCH (c
, newn
, se
, p
, pe
, flags
) == 0)
175 /* We didn't match the extended glob pattern, but
176 that's OK, since we can match 0 or more occurrences.
177 We need to skip the glob pattern and see if we
178 match the rest of the string. */
179 newn
= PATSCAN (p
+ 1, pe
, 0);
180 /* If NEWN is 0, we have an ill-formed pattern. */
181 p
= newn
? newn
: pe
;
188 /* If we've hit the end of the pattern and the last character of
189 the pattern was handled by the loop above, we've succeeded.
190 Otherwise, we need to match that last character. */
191 if (p
== pe
&& (c
== L('?') || c
== L('*')))
194 /* General case, use recursion. */
198 c1
= ((flags
& FNM_NOESCAPE
) == 0 && c
== L('\\')) ? *p
: c
;
200 for (--p
; n
< se
; ++n
)
202 /* Only call strmatch if the first character indicates a
203 possible match. We can check the first character if
204 we're not doing an extended glob match. */
205 if ((flags
& FNM_EXTMATCH
) == 0 && c
!= L('[') && FOLD (*n
) != c1
) /*]*/
208 /* If we're doing an extended glob match and the pattern is not
209 one of the extended glob patterns, we can check the first
211 if ((flags
& FNM_EXTMATCH
) && p
[1] != L('(') && /*)*/
212 STRCHR (L("?*+@!"), *p
) == 0 && c
!= L('[') && FOLD (*n
) != c1
) /*]*/
215 /* Otherwise, we just recurse. */
216 if (GMATCH (n
, se
, p
, pe
, flags
& ~FNM_PERIOD
) == 0)
224 if (sc
== L('\0') || n
== se
)
227 /* A character class cannot match a `.' if it is the first
228 character of the string or if it is the first character
229 following a slash and we are matching a pathname. */
230 if ((flags
& FNM_PERIOD
) && sc
== L('.') &&
231 (n
== string
|| ((flags
& FNM_PATHNAME
) && n
[-1] == L('/'))))
232 return (FNM_NOMATCH
);
234 p
= BRACKMATCH (p
, sc
, flags
);
241 if ((U_CHAR
)c
!= FOLD (sc
))
242 return (FNM_NOMATCH
);
251 if ((flags
& FNM_LEADING_DIR
) && *n
== L('/'))
252 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
255 return (FNM_NOMATCH
);
258 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
259 the value of the symbol, and move P past the collating symbol expression.
260 The value is returned in *VP, if VP is not null. */
262 PARSE_COLLSYM (p
, vp
)
269 p
++; /* move past the `.' */
271 for (pc
= 0; p
[pc
]; pc
++)
272 if (p
[pc
] == L('.') && p
[pc
+1] == L(']'))
274 val
= COLLSYM (p
, pc
);
280 /* Use prototype definition here because of type promotion. */
282 #if defined (PROTOTYPES)
283 BRACKMATCH (CHAR
*p
, U_CHAR test
, int flags
)
285 BRACKMATCH (p
, test
, flags
)
291 register CHAR cstart
, cend
, c
;
292 register int not; /* Nonzero if the sense of the character class is inverted. */
301 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
302 circumflex (`^') in its role in a `nonmatching list'. A bracket
303 expression starting with an unquoted circumflex character produces
304 unspecified results. This implementation treats the two identically. */
305 if (not = (*p
== L('!') || *p
== L('^')))
311 /* Initialize cstart and cend in case `-' is the last
312 character of the pattern. */
315 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
316 the end of the equivalence class, move the pattern pointer past
317 it, and check for equivalence. XXX - this handles only
318 single-character equivalence classes, which is wrong, or at
320 if (c
== L('[') && *p
== L('=') && p
[2] == L('=') && p
[3] == L(']'))
324 if (COLLEQUIV (test
, pc
))
326 /*[*/ /* Move past the closing `]', since the first thing we do at
327 the `matched:' label is back p up one. */
335 return ((test
== L('[')) ? savep
: (CHAR
*)0); /*]*/
341 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
342 if (c
== L('[') && *p
== L(':'))
344 CHAR
*close
, *ccname
;
346 pc
= 0; /* make sure invalid char classes don't match. */
347 /* Find end of character class name */
348 for (close
= p
+ 1; *close
!= '\0'; close
++)
349 if (*close
== L(':') && *(close
+1) == L(']'))
352 if (*close
!= L('\0'))
354 ccname
= (CHAR
*)malloc ((close
- p
) * sizeof (CHAR
));
359 bcopy (p
+ 1, ccname
, (close
- p
- 1) * sizeof (CHAR
));
360 *(ccname
+ (close
- p
- 1)) = L('\0');
361 pc
= IS_CCLASS (test
, (XCHAR
*)ccname
);
373 /*[*/ /* Move past the closing `]', since the first thing we do at
374 the `matched:' label is back p up one. */
380 /* continue the loop here, since this expression can't be
381 the first part of a range expression. */
384 return ((test
== L('[')) ? savep
: (CHAR
*)0);
385 else if (c
== L(']'))
392 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
393 the symbol name, make sure it is terminated by `.]', translate
394 the name to a character using the external table, and do the
396 if (c
== L('[') && *p
== L('.'))
398 p
= PARSE_COLLSYM (p
, &pc
);
399 /* An invalid collating symbol cannot be the first point of a
400 range. If it is, we set cstart to one greater than `test',
401 so any comparisons later will fail. */
402 cstart
= (pc
== INVALID
) ? test
+ 1 : pc
;
405 if (!(flags
& FNM_NOESCAPE
) && c
== L('\\'))
409 cstart
= cend
= *p
++;
412 cstart
= cend
= FOLD (cstart
);
414 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
415 is not preceded by a backslash and is not part of a bracket
416 expression produces undefined results.' This implementation
417 treats the `[' as just a character to be matched if there is
418 not a closing `]'. */
420 return ((test
== L('[')) ? savep
: (CHAR
*)0);
425 if ((flags
& FNM_PATHNAME
) && c
== L('/'))
426 /* [/] can never match when matching a pathname. */
429 /* This introduces a range, unless the `-' is the last
430 character of the class. Find the end of the range
432 if (c
== L('-') && *p
!= L(']'))
435 if (!(flags
& FNM_NOESCAPE
) && cend
== L('\\'))
439 if (cend
== L('[') && *p
== L('.'))
441 p
= PARSE_COLLSYM (p
, &pc
);
442 /* An invalid collating symbol cannot be the second part of a
443 range expression. If we get one, we set cend to one fewer
444 than the test character to make sure the range test fails. */
445 cend
= (pc
== INVALID
) ? test
- 1 : pc
;
451 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
452 equal to or higher than the starting range point; otherwise
453 the expression shall be treated as invalid.'' Note that this
454 applies to only the range expression; the rest of the bracket
455 expression is still checked for matches. */
456 if (RANGECMP (cstart
, cend
) > 0)
465 if (RANGECMP (test
, cstart
) >= 0 && RANGECMP (test
, cend
) <= 0)
472 return (!not ? (CHAR
*)0 : p
);
475 /* Skip the rest of the [...] that already matched. */
480 /* A `[' without a matching `]' is just another character to match. */
482 return ((test
== L('[')) ? savep
: (CHAR
*)0);
485 if (c
== L('[') && (*p
== L('=') || *p
== L(':') || *p
== L('.')))
487 else if (c
== L(']'))
489 else if (!(flags
& FNM_NOESCAPE
) && c
== L('\\'))
493 /* XXX 1003.2d11 is unclear if this is right. */
497 return (not ? (CHAR
*)0 : p
);
500 #if defined (EXTENDED_GLOB)
501 /* ksh-like extended pattern matching:
505 where pat-list is a list of one or patterns separated by `|'. Operation
508 ?(patlist) match zero or one of the given patterns
509 *(patlist) match zero or more of the given patterns
510 +(patlist) match one or more of the given patterns
511 @(patlist) match exactly one of the given patterns
512 !(patlist) match anything except one of the given patterns
515 /* Scan a pattern starting at STRING and ending at END, keeping track of
516 embedded () and []. If DELIM is 0, we scan until a matching `)'
517 because we're scanning a `patlist'. Otherwise, we scan until we see
518 DELIM. In all cases, we never scan past END. The return value is the
519 first character after the matching DELIM. */
521 PATSCAN (string
, end
, delim
)
525 int pnest
, bnest
, skip
;
529 pnest
= bnest
= skip
= 0;
533 for (s
= string
; c
= *s
; s
++)
549 return ((CHAR
*)NULL
);
551 /* `[' is not special inside a bracket expression, but it may
552 introduce one of the special POSIX bracket expressions
553 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
558 if (*bfirst
== L('!') || *bfirst
== L('^'))
562 else if (s
[1] == L(':') || s
[1] == L('.') || s
[1] == L('='))
566 /* `]' is not special if it's the first char (after a leading `!'
567 or `^') in a bracket expression or if it's part of one of the
568 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
572 if (cchar
&& s
[-1] == cchar
)
574 else if (s
!= bfirst
)
588 if (bnest
== 0 && pnest
-- <= 0)
593 if (bnest
== 0 && pnest
== 0 && delim
== L('|'))
602 /* Return 0 if dequoted pattern matches S in the current locale. */
604 STRCOMPARE (p
, pe
, s
, se
)
605 CHAR
*p
, *pe
, *s
, *se
;
614 #if HAVE_MULTIBYTE || defined (HAVE_STRCOLL)
615 ret
= STRCOLL ((XCHAR
*)p
, (XCHAR
*)s
);
617 ret
= STRCMP ((XCHAR
*)p
, (XCHAR
*)s
);
623 return (ret
== 0 ? ret
: FNM_NOMATCH
);
626 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
627 0 on success. This is handed the entire rest of the pattern and string
628 the first time an extended pattern specifier is encountered, so it calls
629 gmatch recursively. */
631 EXTMATCH (xc
, s
, se
, p
, pe
, flags
)
632 INT xc
; /* select which operation */
637 CHAR
*prest
; /* pointer to rest of pattern */
638 CHAR
*psub
; /* pointer to sub-pattern */
639 CHAR
*pnext
; /* pointer to next sub-pattern */
640 CHAR
*srest
; /* pointer to rest of string */
641 int m1
, m2
, xflags
; /* xflags = flags passed to recursive matches */
644 fprintf(stderr
, "extmatch: xc = %c\n", xc
);
645 fprintf(stderr
, "extmatch: s = %s; se = %s\n", s
, se
);
646 fprintf(stderr
, "extmatch: p = %s; pe = %s\n", p
, pe
);
647 fprintf(stderr
, "extmatch: flags = %d\n", flags
);
650 prest
= PATSCAN (p
+ (*p
== L('(')), pe
, 0); /* ) */
652 /* If PREST is 0, we failed to scan a valid pattern. In this
653 case, we just want to compare the two as strings. */
654 return (STRCOMPARE (p
- 1, pe
, s
, se
));
658 case L('+'): /* match one or more occurrences */
659 case L('*'): /* match zero or more occurrences */
660 /* If we can get away with no matches, don't even bother. Just
661 call GMATCH on the rest of the pattern and return success if
663 if (xc
== L('*') && (GMATCH (s
, se
, prest
, pe
, flags
) == 0))
666 /* OK, we have to do this the hard way. First, we make sure one of
667 the subpatterns matches, then we try to match the rest of the
669 for (psub
= p
+ 1; ; psub
= pnext
)
671 pnext
= PATSCAN (psub
, pe
, L('|'));
672 for (srest
= s
; srest
<= se
; srest
++)
674 /* Match this substring (S -> SREST) against this
675 subpattern (psub -> pnext - 1) */
676 m1
= GMATCH (s
, srest
, psub
, pnext
- 1, flags
) == 0;
677 /* OK, we matched a subpattern, so make sure the rest of the
678 string matches the rest of the pattern. Also handle
679 multiple matches of the pattern. */
682 /* if srest > s, we are not at start of string */
683 xflags
= (srest
> s
) ? (flags
& ~FNM_PERIOD
) : flags
;
684 m2
= (GMATCH (srest
, se
, prest
, pe
, xflags
) == 0) ||
685 (s
!= srest
&& GMATCH (srest
, se
, p
- 1, pe
, xflags
) == 0);
693 return (FNM_NOMATCH
);
695 case L('?'): /* match zero or one of the patterns */
696 case L('@'): /* match one (or more) of the patterns */
697 /* If we can get away with no matches, don't even bother. Just
698 call gmatch on the rest of the pattern and return success if
700 if (xc
== L('?') && (GMATCH (s
, se
, prest
, pe
, flags
) == 0))
703 /* OK, we have to do this the hard way. First, we see if one of
704 the subpatterns matches, then, if it does, we try to match the
705 rest of the string. */
706 for (psub
= p
+ 1; ; psub
= pnext
)
708 pnext
= PATSCAN (psub
, pe
, L('|'));
709 srest
= (prest
== pe
) ? se
: s
;
710 for ( ; srest
<= se
; srest
++)
712 /* if srest > s, we are not at start of string */
713 xflags
= (srest
> s
) ? (flags
& ~FNM_PERIOD
) : flags
;
714 if (GMATCH (s
, srest
, psub
, pnext
- 1, flags
) == 0 &&
715 GMATCH (srest
, se
, prest
, pe
, xflags
) == 0)
721 return (FNM_NOMATCH
);
723 case '!': /* match anything *except* one of the patterns */
724 for (srest
= s
; srest
<= se
; srest
++)
727 for (psub
= p
+ 1; ; psub
= pnext
)
729 pnext
= PATSCAN (psub
, pe
, L('|'));
730 /* If one of the patterns matches, just bail immediately. */
731 if (m1
= (GMATCH (s
, srest
, psub
, pnext
- 1, flags
) == 0))
736 /* if srest > s, we are not at start of string */
737 xflags
= (srest
> s
) ? (flags
& ~FNM_PERIOD
) : flags
;
738 if (m1
== 0 && GMATCH (srest
, se
, prest
, pe
, xflags
) == 0)
741 return (FNM_NOMATCH
);
744 return (FNM_NOMATCH
);
746 #endif /* EXTENDED_GLOB */