init version.
[bush.git] / lib / glob / sm_loop.c
blob25e13ef2a6ad93b4d065a67c9074a56fb3520d4e
1 /* Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 This file is part of GNU Bush, the Bourne Again SHell.
5 Bush is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 Bush is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with Bush. If not, see <http://www.gnu.org/licenses/>.
19 struct STRUCT
21 CHAR *pattern;
22 CHAR *string;
25 int FCT PARAMS((CHAR *, CHAR *, int));
27 static int GMATCH PARAMS((CHAR *, CHAR *, CHAR *, CHAR *, struct STRUCT *, int));
28 static CHAR *PARSE_COLLSYM PARAMS((CHAR *, INT *));
29 static CHAR *BRACKMATCH PARAMS((CHAR *, U_CHAR, int));
30 static int EXTMATCH PARAMS((INT, CHAR *, CHAR *, CHAR *, CHAR *, int));
32 extern void DEQUOTE_PATHNAME PARAMS((CHAR *));
34 /*static*/ CHAR *PATSCAN PARAMS((CHAR *, CHAR *, INT));
36 int
37 FCT (pattern, string, flags)
38 CHAR *pattern;
39 CHAR *string;
40 int flags;
42 CHAR *se, *pe;
44 if (string == 0 || pattern == 0)
45 return FNM_NOMATCH;
47 se = string + STRLEN ((XCHAR *)string);
48 pe = pattern + STRLEN ((XCHAR *)pattern);
50 return (GMATCH (string, se, pattern, pe, (struct STRUCT *)NULL, flags));
53 /* Match STRING against the filename pattern PATTERN, returning zero if
54 it matches, FNM_NOMATCH if not. */
55 static int
56 GMATCH (string, se, pattern, pe, ends, flags)
57 CHAR *string, *se;
58 CHAR *pattern, *pe;
59 struct STRUCT *ends;
60 int flags;
62 CHAR *p, *n; /* pattern, string */
63 INT c; /* current pattern character - XXX U_CHAR? */
64 INT sc; /* current string character - XXX U_CHAR? */
66 p = pattern;
67 n = string;
69 if (string == 0 || pattern == 0)
70 return FNM_NOMATCH;
72 #if DEBUG_MATCHING
73 fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se);
74 fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe);
75 #endif
77 while (p < pe)
79 c = *p++;
80 c = FOLD (c);
82 sc = n < se ? *n : '\0';
84 #ifdef EXTENDED_GLOB
85 /* EXTMATCH () will handle recursively calling GMATCH, so we can
86 just return what EXTMATCH() returns. */
87 if ((flags & FNM_EXTMATCH) && *p == L('(') &&
88 (c == L('+') || c == L('*') || c == L('?') || c == L('@') || c == L('!'))) /* ) */
90 int lflags;
91 /* If we're not matching the start of the string, we're not
92 concerned about the special cases for matching `.' */
93 lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
94 return (EXTMATCH (c, n, se, p, pe, lflags));
96 #endif /* EXTENDED_GLOB */
98 switch (c)
100 case L('?'): /* Match single character */
101 if (sc == '\0')
102 return FNM_NOMATCH;
103 else if ((flags & FNM_PATHNAME) && sc == L('/'))
104 /* If we are matching a pathname, `?' can never match a `/'. */
105 return FNM_NOMATCH;
106 else if ((flags & FNM_PERIOD) && sc == L('.') &&
107 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
108 /* `?' cannot match a `.' if it is the first character of the
109 string or if it is the first character following a slash and
110 we are matching a pathname. */
111 return FNM_NOMATCH;
112 break;
114 case L('\\'): /* backslash escape removes special meaning */
115 if (p == pe && sc == '\\' && (n+1 == se))
116 break;
118 if (p == pe)
119 return FNM_NOMATCH;
121 if ((flags & FNM_NOESCAPE) == 0)
123 c = *p++;
124 /* A trailing `\' cannot match. */
125 if (p > pe)
126 return FNM_NOMATCH;
127 c = FOLD (c);
129 if (FOLD (sc) != (U_CHAR)c)
130 return FNM_NOMATCH;
131 break;
133 case L('*'): /* Match zero or more characters */
134 /* See below for the reason for using this. It avoids backtracking
135 back to a previous `*'. Picked up from glibc. */
136 if (ends != NULL)
138 ends->pattern = p - 1;
139 ends->string = n;
140 return (0);
143 if ((flags & FNM_PERIOD) && sc == L('.') &&
144 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
145 /* `*' cannot match a `.' if it is the first character of the
146 string or if it is the first character following a slash and
147 we are matching a pathname. */
148 return FNM_NOMATCH;
150 if (p == pe)
151 return 0;
153 /* Collapse multiple consecutive `*' and `?', but make sure that
154 one character of the string is consumed for each `?'. */
155 for (c = *p++; (c == L('?') || c == L('*')); c = *p++)
157 if ((flags & FNM_PATHNAME) && sc == L('/'))
158 /* A slash does not match a wildcard under FNM_PATHNAME. */
159 return FNM_NOMATCH;
160 #ifdef EXTENDED_GLOB
161 else if ((flags & FNM_EXTMATCH) && c == L('?') && *p == L('(')) /* ) */
163 CHAR *newn;
165 /* We can match 0 or 1 times. If we match, return success */
166 if (EXTMATCH (c, n, se, p, pe, flags) == 0)
167 return (0);
169 /* We didn't match the extended glob pattern, but
170 that's OK, since we can match 0 or 1 occurrences.
171 We need to skip the glob pattern and see if we
172 match the rest of the string. */
173 newn = PATSCAN (p + 1, pe, 0);
174 /* If NEWN is 0, we have an ill-formed pattern. */
175 p = newn ? newn : pe;
177 #endif
178 else if (c == L('?'))
180 if (sc == L('\0'))
181 return FNM_NOMATCH;
182 /* One character of the string is consumed in matching
183 this ? wildcard, so *??? won't match if there are
184 fewer than three characters. */
185 n++;
186 sc = n < se ? *n : '\0';
189 #ifdef EXTENDED_GLOB
190 /* Handle ******(patlist) */
191 if ((flags & FNM_EXTMATCH) && c == L('*') && *p == L('(')) /*)*/
193 CHAR *newn;
194 /* We need to check whether or not the extended glob
195 pattern matches the remainder of the string.
196 If it does, we match the entire pattern. */
197 for (newn = n; newn < se; ++newn)
199 if (EXTMATCH (c, newn, se, p, pe, flags) == 0)
200 return (0);
202 /* We didn't match the extended glob pattern, but
203 that's OK, since we can match 0 or more occurrences.
204 We need to skip the glob pattern and see if we
205 match the rest of the string. */
206 newn = PATSCAN (p + 1, pe, 0);
207 /* If NEWN is 0, we have an ill-formed pattern. */
208 p = newn ? newn : pe;
210 #endif
211 if (p == pe)
212 break;
215 /* The wildcards are the last element of the pattern. The name
216 cannot match completely if we are looking for a pathname and
217 it contains another slash, unless FNM_LEADING_DIR is set. */
218 if (c == L('\0'))
220 int r = (flags & FNM_PATHNAME) == 0 ? 0 : FNM_NOMATCH;
221 if (flags & FNM_PATHNAME)
223 if (flags & FNM_LEADING_DIR)
224 r = 0;
225 else if (MEMCHR (n, L('/'), se - n) == NULL)
226 r = 0;
228 return r;
231 /* If we've hit the end of the pattern and the last character of
232 the pattern was handled by the loop above, we've succeeded.
233 Otherwise, we need to match that last character. */
234 if (p == pe && (c == L('?') || c == L('*')))
235 return (0);
237 /* If we've hit the end of the string and the rest of the pattern
238 is something that matches the empty string, we can succeed. */
239 #if defined (EXTENDED_GLOB)
240 if (n == se && ((flags & FNM_EXTMATCH) && (c == L('!') || c == L('?')) && *p == L('(')))
242 --p;
243 if (EXTMATCH (c, n, se, p, pe, flags) == 0)
244 return (c == L('!') ? FNM_NOMATCH : 0);
245 return (c == L('!') ? 0 : FNM_NOMATCH);
247 #endif
249 /* If we stop at a slash in the pattern and we are looking for a
250 pathname ([star]/foo), then consume enough of the string to stop
251 at any slash and then try to match the rest of the pattern. If
252 the string doesn't contain a slash, fail */
253 if (c == L('/') && (flags & FNM_PATHNAME))
255 while (n < se && *n != L('/'))
256 ++n;
257 if (n < se && *n == L('/') && (GMATCH (n+1, se, p, pe, NULL, flags) == 0))
258 return 0;
259 return FNM_NOMATCH; /* XXX */
262 /* General case, use recursion. */
264 U_CHAR c1;
265 const CHAR *endp;
266 struct STRUCT end;
268 end.pattern = NULL;
269 endp = MEMCHR (n, (flags & FNM_PATHNAME) ? L('/') : L('\0'), se - n);
270 if (endp == 0)
271 endp = se;
273 c1 = ((flags & FNM_NOESCAPE) == 0 && c == L('\\')) ? *p : c;
274 c1 = FOLD (c1);
275 for (--p; n < endp; ++n)
277 /* Only call strmatch if the first character indicates a
278 possible match. We can check the first character if
279 we're not doing an extended glob match. */
280 if ((flags & FNM_EXTMATCH) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
281 continue;
283 /* If we're doing an extended glob match and the pattern is not
284 one of the extended glob patterns, we can check the first
285 character. */
286 if ((flags & FNM_EXTMATCH) && p[1] != L('(') && /*)*/
287 STRCHR (L("?*+@!"), *p) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/
288 continue;
290 /* Otherwise, we just recurse. */
291 if (GMATCH (n, se, p, pe, &end, flags & ~FNM_PERIOD) == 0)
293 if (end.pattern == NULL)
294 return (0);
295 break;
298 /* This is a clever idea from glibc, used to avoid backtracking
299 to a `*' that appears earlier in the pattern. We get away
300 without saving se and pe because they are always the same,
301 even in the recursive calls to gmatch */
302 if (end.pattern != NULL)
304 p = end.pattern;
305 n = end.string;
306 continue;
309 return FNM_NOMATCH;
312 case L('['):
314 if (sc == L('\0') || n == se)
315 return FNM_NOMATCH;
317 /* A character class cannot match a `.' if it is the first
318 character of the string or if it is the first character
319 following a slash and we are matching a pathname. */
320 if ((flags & FNM_PERIOD) && sc == L('.') &&
321 (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/'))))
322 return (FNM_NOMATCH);
324 p = BRACKMATCH (p, sc, flags);
325 if (p == 0)
326 return FNM_NOMATCH;
328 break;
330 default:
331 if ((U_CHAR)c != FOLD (sc))
332 return (FNM_NOMATCH);
335 ++n;
338 if (n == se)
339 return (0);
341 if ((flags & FNM_LEADING_DIR) && *n == L('/'))
342 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
343 return 0;
345 return (FNM_NOMATCH);
348 /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
349 the value of the symbol, and move P past the collating symbol expression.
350 The value is returned in *VP, if VP is not null. */
351 static CHAR *
352 PARSE_COLLSYM (p, vp)
353 CHAR *p;
354 INT *vp;
356 register int pc;
357 INT val;
359 p++; /* move past the `.' */
361 for (pc = 0; p[pc]; pc++)
362 if (p[pc] == L('.') && p[pc+1] == L(']'))
363 break;
364 if (p[pc] == 0)
366 if (vp)
367 *vp = INVALID;
368 return (p + pc);
370 val = COLLSYM (p, pc);
371 if (vp)
372 *vp = val;
373 return (p + pc + 2);
376 /* Use prototype definition here because of type promotion. */
377 static CHAR *
378 #if defined (PROTOTYPES)
379 BRACKMATCH (CHAR *p, U_CHAR test, int flags)
380 #else
381 BRACKMATCH (p, test, flags)
382 CHAR *p;
383 U_CHAR test;
384 int flags;
385 #endif
387 register CHAR cstart, cend, c;
388 register int not; /* Nonzero if the sense of the character class is inverted. */
389 int brcnt, forcecoll, isrange;
390 INT pc;
391 CHAR *savep;
392 CHAR *brchrp;
393 U_CHAR orig_test;
395 orig_test = test;
396 test = FOLD (orig_test);
398 savep = p;
400 /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
401 circumflex (`^') in its role in a `nonmatching list'. A bracket
402 expression starting with an unquoted circumflex character produces
403 unspecified results. This implementation treats the two identically. */
404 if (not = (*p == L('!') || *p == L('^')))
405 ++p;
407 c = *p++;
408 for (;;)
410 /* Initialize cstart and cend in case `-' is the last
411 character of the pattern. */
412 cstart = cend = c;
413 forcecoll = 0;
415 /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
416 the end of the equivalence class, move the pattern pointer past
417 it, and check for equivalence. XXX - this handles only
418 single-character equivalence classes, which is wrong, or at
419 least incomplete. */
420 if (c == L('[') && *p == L('=') && p[2] == L('=') && p[3] == L(']'))
422 pc = FOLD (p[1]);
423 p += 4;
424 if (COLLEQUIV (test, pc))
426 /*[*/ /* Move past the closing `]', since the first thing we do at
427 the `matched:' label is back p up one. */
428 p++;
429 goto matched;
431 else
433 c = *p++;
434 if (c == L('\0'))
435 return ((test == L('[')) ? savep : (CHAR *)0); /*]*/
436 c = FOLD (c);
437 continue;
441 /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
442 if (c == L('[') && *p == L(':'))
444 CHAR *close, *ccname;
446 pc = 0; /* make sure invalid char classes don't match. */
447 /* Find end of character class name */
448 for (close = p + 1; *close != '\0'; close++)
449 if (*close == L(':') && *(close+1) == L(']'))
450 break;
452 if (*close != L('\0'))
454 ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR));
455 if (ccname == 0)
456 pc = 0;
457 else
459 bcopy (p + 1, ccname, (close - p - 1) * sizeof (CHAR));
460 *(ccname + (close - p - 1)) = L('\0');
461 /* As a result of a POSIX discussion, char class names are
462 allowed to be quoted (?) */
463 DEQUOTE_PATHNAME (ccname);
464 pc = IS_CCLASS (orig_test, (XCHAR *)ccname);
466 if (pc == -1)
468 /* CCNAME is not a valid character class in the current
469 locale. In addition to noting no match (pc = 0), we have
470 a choice about what to do with the invalid charclass.
471 Posix leaves the behavior unspecified, but we're going
472 to skip over the charclass and keep going instead of
473 testing ORIG_TEST against each character in the class
474 string. If we don't want to do that, take out the update
475 of P. */
476 pc = 0;
477 p = close + 2;
479 else
480 p = close + 2; /* move past the closing `]' */
482 free (ccname);
485 if (pc)
487 /*[*/ /* Move past the closing `]', since the first thing we do at
488 the `matched:' label is back p up one. */
489 p++;
490 goto matched;
492 else
494 /* continue the loop here, since this expression can't be
495 the first part of a range expression. */
496 c = *p++;
497 if (c == L('\0'))
498 return ((test == L('[')) ? savep : (CHAR *)0);
499 else if (c == L(']'))
500 break;
501 c = FOLD (c);
502 continue;
506 /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
507 the symbol name, make sure it is terminated by `.]', translate
508 the name to a character using the external table, and do the
509 comparison. */
510 if (c == L('[') && *p == L('.'))
512 p = PARSE_COLLSYM (p, &pc);
513 /* An invalid collating symbol cannot be the first point of a
514 range. If it is, we set cstart to one greater than `test',
515 so any comparisons later will fail. */
516 cstart = (pc == INVALID) ? test + 1 : pc;
517 forcecoll = 1;
520 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
522 if (*p == '\0')
523 return (CHAR *)0;
524 cstart = cend = *p++;
527 cstart = cend = FOLD (cstart);
528 isrange = 0;
530 /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
531 is not preceded by a backslash and is not part of a bracket
532 expression produces undefined results.' This implementation
533 treats the `[' as just a character to be matched if there is
534 not a closing `]'. */
535 if (c == L('\0'))
536 return ((test == L('[')) ? savep : (CHAR *)0);
538 c = *p++;
539 c = FOLD (c);
541 if (c == L('\0'))
542 return ((test == L('[')) ? savep : (CHAR *)0);
544 if ((flags & FNM_PATHNAME) && c == L('/'))
545 /* [/] can never match when matching a pathname. */
546 return (CHAR *)0;
548 /* This introduces a range, unless the `-' is the last
549 character of the class. Find the end of the range
550 and move past it. */
551 if (c == L('-') && *p != L(']'))
553 cend = *p++;
554 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
555 cend = *p++;
556 if (cend == L('\0'))
557 return (CHAR *)0;
558 if (cend == L('[') && *p == L('.'))
560 p = PARSE_COLLSYM (p, &pc);
561 /* An invalid collating symbol cannot be the second part of a
562 range expression. If we get one, we set cend to one fewer
563 than the test character to make sure the range test fails. */
564 cend = (pc == INVALID) ? test - 1 : pc;
565 forcecoll = 1;
567 cend = FOLD (cend);
569 c = *p++;
571 /* POSIX.2 2.8.3.2: ``The ending range point shall collate
572 equal to or higher than the starting range point; otherwise
573 the expression shall be treated as invalid.'' Note that this
574 applies to only the range expression; the rest of the bracket
575 expression is still checked for matches. */
576 if (RANGECMP (cstart, cend, forcecoll) > 0)
578 if (c == L(']'))
579 break;
580 c = FOLD (c);
581 continue;
583 isrange = 1;
586 if (isrange == 0 && test == cstart)
587 goto matched;
588 if (isrange && RANGECMP (test, cstart, forcecoll) >= 0 && RANGECMP (test, cend, forcecoll) <= 0)
589 goto matched;
591 if (c == L(']'))
592 break;
594 /* No match. */
595 return (!not ? (CHAR *)0 : p);
597 matched:
598 /* Skip the rest of the [...] that already matched. */
599 c = *--p;
600 brcnt = 1;
601 brchrp = 0;
602 while (brcnt > 0)
604 int oc;
606 /* A `[' without a matching `]' is just another character to match. */
607 if (c == L('\0'))
608 return ((test == L('[')) ? savep : (CHAR *)0);
610 oc = c;
611 c = *p++;
612 if (c == L('[') && (*p == L('=') || *p == L(':') || *p == L('.')))
614 brcnt++;
615 brchrp = p++; /* skip over the char after the left bracket */
616 if ((c = *p) == L('\0'))
617 return ((test == L('[')) ? savep : (CHAR *)0);
618 /* If *brchrp == ':' we should check that the rest of the characters
619 form a valid character class name. We don't do that yet, but we
620 keep BRCHRP in case we want to. */
622 /* We only want to check brchrp if we set it above. */
623 else if (c == L(']') && brcnt > 1 && brchrp != 0 && oc == *brchrp)
625 brcnt--;
626 brchrp = 0; /* just in case */
628 /* Left bracket loses its special meaning inside a bracket expression.
629 It is only valid when followed by a `.', `=', or `:', which we check
630 for above. Technically the right bracket can appear in a collating
631 symbol, so we check for that here. Otherwise, it terminates the
632 bracket expression. */
633 else if (c == L(']') && (brchrp == 0 || *brchrp != L('.')) && brcnt >= 1)
634 brcnt = 0;
635 else if (!(flags & FNM_NOESCAPE) && c == L('\\'))
637 if (*p == '\0')
638 return (CHAR *)0;
639 /* XXX 1003.2d11 is unclear if this is right. */
640 ++p;
643 return (not ? (CHAR *)0 : p);
646 #if defined (EXTENDED_GLOB)
647 /* ksh-like extended pattern matching:
649 [?*+@!](pat-list)
651 where pat-list is a list of one or patterns separated by `|'. Operation
652 is as follows:
654 ?(patlist) match zero or one of the given patterns
655 *(patlist) match zero or more of the given patterns
656 +(patlist) match one or more of the given patterns
657 @(patlist) match exactly one of the given patterns
658 !(patlist) match anything except one of the given patterns
661 /* Scan a pattern starting at STRING and ending at END, keeping track of
662 embedded () and []. If DELIM is 0, we scan until a matching `)'
663 because we're scanning a `patlist'. Otherwise, we scan until we see
664 DELIM. In all cases, we never scan past END. The return value is the
665 first character after the matching DELIM or NULL if the pattern is
666 empty or invalid. */
667 /*static*/ CHAR *
668 PATSCAN (string, end, delim)
669 CHAR *string, *end;
670 INT delim;
672 int pnest, bnest, skip;
673 INT cchar;
674 CHAR *s, c, *bfirst;
676 pnest = bnest = skip = 0;
677 cchar = 0;
678 bfirst = NULL;
680 if (string == end)
681 return (NULL);
683 for (s = string; c = *s; s++)
685 if (s >= end)
686 return (s);
687 if (skip)
689 skip = 0;
690 continue;
692 switch (c)
694 case L('\\'):
695 skip = 1;
696 break;
698 case L('\0'):
699 return ((CHAR *)NULL);
701 /* `[' is not special inside a bracket expression, but it may
702 introduce one of the special POSIX bracket expressions
703 ([.SYM.], [=c=], [: ... :]) that needs special handling. */
704 case L('['):
705 if (bnest == 0)
707 bfirst = s + 1;
708 if (*bfirst == L('!') || *bfirst == L('^'))
709 bfirst++;
710 bnest++;
712 else if (s[1] == L(':') || s[1] == L('.') || s[1] == L('='))
713 cchar = s[1];
714 break;
716 /* `]' is not special if it's the first char (after a leading `!'
717 or `^') in a bracket expression or if it's part of one of the
718 special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
719 case L(']'):
720 if (bnest)
722 if (cchar && s[-1] == cchar)
723 cchar = 0;
724 else if (s != bfirst)
726 bnest--;
727 bfirst = 0;
730 break;
732 case L('('):
733 if (bnest == 0)
734 pnest++;
735 break;
737 case L(')'):
738 if (bnest == 0 && pnest-- <= 0)
739 return ++s;
740 break;
742 case L('|'):
743 if (bnest == 0 && pnest == 0 && delim == L('|'))
744 return ++s;
745 break;
749 return (NULL);
752 /* Return 0 if dequoted pattern matches S in the current locale. */
753 static int
754 STRCOMPARE (p, pe, s, se)
755 CHAR *p, *pe, *s, *se;
757 int ret;
758 CHAR c1, c2;
759 int l1, l2;
761 l1 = pe - p;
762 l2 = se - s;
764 if (l1 != l2)
765 return (FNM_NOMATCH); /* unequal lengths, can't be identical */
767 c1 = *pe;
768 c2 = *se;
770 if (c1 != 0)
771 *pe = '\0';
772 if (c2 != 0)
773 *se = '\0';
775 #if HAVE_MULTIBYTE || defined (HAVE_STRCOLL)
776 ret = STRCOLL ((XCHAR *)p, (XCHAR *)s);
777 #else
778 ret = STRCMP ((XCHAR *)p, (XCHAR *)s);
779 #endif
781 if (c1 != 0)
782 *pe = c1;
783 if (c2 != 0)
784 *se = c2;
786 return (ret == 0 ? ret : FNM_NOMATCH);
789 /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
790 0 on success. This is handed the entire rest of the pattern and string
791 the first time an extended pattern specifier is encountered, so it calls
792 gmatch recursively. */
793 static int
794 EXTMATCH (xc, s, se, p, pe, flags)
795 INT xc; /* select which operation */
796 CHAR *s, *se;
797 CHAR *p, *pe;
798 int flags;
800 CHAR *prest; /* pointer to rest of pattern */
801 CHAR *psub; /* pointer to sub-pattern */
802 CHAR *pnext; /* pointer to next sub-pattern */
803 CHAR *srest; /* pointer to rest of string */
804 int m1, m2, xflags; /* xflags = flags passed to recursive matches */
806 #if DEBUG_MATCHING
807 fprintf(stderr, "extmatch: xc = %c\n", xc);
808 fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
809 fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
810 fprintf(stderr, "extmatch: flags = %d\n", flags);
811 #endif
813 prest = PATSCAN (p + (*p == L('(')), pe, 0); /* ) */
814 if (prest == 0)
815 /* If PREST is 0, we failed to scan a valid pattern. In this
816 case, we just want to compare the two as strings. */
817 return (STRCOMPARE (p - 1, pe, s, se));
819 switch (xc)
821 case L('+'): /* match one or more occurrences */
822 case L('*'): /* match zero or more occurrences */
823 /* If we can get away with no matches, don't even bother. Just
824 call GMATCH on the rest of the pattern and return success if
825 it succeeds. */
826 if (xc == L('*') && (GMATCH (s, se, prest, pe, NULL, flags) == 0))
827 return 0;
829 /* OK, we have to do this the hard way. First, we make sure one of
830 the subpatterns matches, then we try to match the rest of the
831 string. */
832 for (psub = p + 1; ; psub = pnext)
834 pnext = PATSCAN (psub, pe, L('|'));
835 for (srest = s; srest <= se; srest++)
837 /* Match this substring (S -> SREST) against this
838 subpattern (psub -> pnext - 1) */
839 m1 = GMATCH (s, srest, psub, pnext - 1, NULL, flags) == 0;
840 /* OK, we matched a subpattern, so make sure the rest of the
841 string matches the rest of the pattern. Also handle
842 multiple matches of the pattern. */
843 if (m1)
845 /* if srest > s, we are not at start of string */
846 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
847 m2 = (GMATCH (srest, se, prest, pe, NULL, xflags) == 0) ||
848 (s != srest && GMATCH (srest, se, p - 1, pe, NULL, xflags) == 0);
850 if (m1 && m2)
851 return (0);
853 if (pnext == prest)
854 break;
856 return (FNM_NOMATCH);
858 case L('?'): /* match zero or one of the patterns */
859 case L('@'): /* match one (or more) of the patterns */
860 /* If we can get away with no matches, don't even bother. Just
861 call gmatch on the rest of the pattern and return success if
862 it succeeds. */
863 if (xc == L('?') && (GMATCH (s, se, prest, pe, NULL, flags) == 0))
864 return 0;
866 /* OK, we have to do this the hard way. First, we see if one of
867 the subpatterns matches, then, if it does, we try to match the
868 rest of the string. */
869 for (psub = p + 1; ; psub = pnext)
871 pnext = PATSCAN (psub, pe, L('|'));
872 srest = (prest == pe) ? se : s;
873 for ( ; srest <= se; srest++)
875 /* if srest > s, we are not at start of string */
876 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
877 if (GMATCH (s, srest, psub, pnext - 1, NULL, flags) == 0 &&
878 GMATCH (srest, se, prest, pe, NULL, xflags) == 0)
879 return (0);
881 if (pnext == prest)
882 break;
884 return (FNM_NOMATCH);
886 case '!': /* match anything *except* one of the patterns */
887 for (srest = s; srest <= se; srest++)
889 m1 = 0;
890 for (psub = p + 1; ; psub = pnext)
892 pnext = PATSCAN (psub, pe, L('|'));
893 /* If one of the patterns matches, just bail immediately. */
894 if (m1 = (GMATCH (s, srest, psub, pnext - 1, NULL, flags) == 0))
895 break;
896 if (pnext == prest)
897 break;
900 /* If nothing matched, but the string starts with a period and we
901 need to match periods explicitly, don't return this as a match,
902 even for negation. Might need to do this only if srest == s. */
903 if (m1 == 0 && *s == '.' && (flags & FNM_PERIOD))
904 return (FNM_NOMATCH);
906 /* if srest > s, we are not at start of string */
907 xflags = (srest > s) ? (flags & ~FNM_PERIOD) : flags;
908 if (m1 == 0 && GMATCH (srest, se, prest, pe, NULL, xflags) == 0)
909 return (0);
911 return (FNM_NOMATCH);
914 return (FNM_NOMATCH);
916 #endif /* EXTENDED_GLOB */
918 #undef IS_CCLASS
919 #undef FOLD
920 #undef CHAR
921 #undef U_CHAR
922 #undef XCHAR
923 #undef INT
924 #undef INVALID
925 #undef FCT
926 #undef GMATCH
927 #undef COLLSYM
928 #undef PARSE_COLLSYM
929 #undef PATSCAN
930 #undef STRCOMPARE
931 #undef EXTMATCH
932 #undef DEQUOTE_PATHNAME
933 #undef STRUCT
934 #undef BRACKMATCH
935 #undef STRCHR
936 #undef STRCOLL
937 #undef STRLEN
938 #undef STRCMP
939 #undef MEMCHR
940 #undef COLLEQUIV
941 #undef RANGECMP
942 #undef L