import less(1)
[unleashed/tickless.git] / usr / src / lib / libc / port / regex / glob.c
blobc5b82015b560ddac3edcbcd4d94806261b115236
1 /*
2 * Copyright (c) 2013 Gary Mills
3 */
4 /* $OpenBSD: glob.c,v 1.39 2012/01/20 07:09:42 tedu Exp $ */
5 /*
6 * Copyright (c) 1989, 1993
7 * The Regents of the University of California. All rights reserved.
9 * This code is derived from software contributed to Berkeley by
10 * Guido van Rossum.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
38 * glob(3) -- a superset of the one defined in POSIX 1003.2.
40 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
42 * Optional extra services, controlled by flags not defined by POSIX:
44 * GLOB_QUOTE:
45 * Escaping convention: \ inhibits any special meaning the following
46 * character might have (except \ at end of string is retained).
47 * GLOB_MAGCHAR:
48 * Set in gl_flags if pattern contained a globbing character.
49 * GLOB_NOMAGIC:
50 * Same as GLOB_NOCHECK, but it will only append pattern if it did
51 * not contain any magic characters. [Used in csh style globbing]
52 * GLOB_ALTDIRFUNC:
53 * Use alternately specified directory access functions.
54 * GLOB_TILDE:
55 * expand ~user/foo to the /home/dir/of/user/foo
56 * GLOB_BRACE:
57 * expand {1,2}{a,b} to 1a 1b 2a 2b
58 * gl_matchc:
59 * Number of matches in the current invocation of glob.
62 #include "lint.h"
64 #include <sys/param.h>
65 #include <sys/stat.h>
67 #include <ctype.h>
68 #include <dirent.h>
69 #include <errno.h>
70 #include <glob.h>
71 #include <limits.h>
72 #include <pwd.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <unistd.h>
77 #include <wchar.h>
78 #include <wctype.h>
81 * This is the legacy glob_t prior to illumos enhancement 1097,
82 * used when old programs call the old libc glob functions.
83 * (New programs call the _glob_ext, _globfree_ext functions.)
84 * This struct should be considered "carved in stone".
86 typedef struct old_glob {
87 size_t gl_pathc; /* Count of paths matched by pattern */
88 char **gl_pathv; /* List of matched pathnames */
89 size_t gl_offs; /* # of slots reserved in gl_pathv */
90 /* following are internal to the implementation */
91 char **gl_pathp; /* gl_pathv + gl_offs */
92 int gl_pathn; /* # of elements allocated */
93 } old_glob_t;
96 * For old programs, the external names need to be the old names:
97 * glob() and globfree() . We've redefined those already to
98 * _glob_ext() and _globfree_ext() . Now redefine old_glob()
99 * and old_globfree() to glob() and globfree() .
101 #ifdef __PRAGMA_REDEFINE_EXTNAME
102 #pragma redefine_extname old_glob glob
103 #pragma redefine_extname old_globfree globfree
104 #endif /* __PRAGMA_REDEFINE_EXTNAME */
105 extern int old_glob(const char *, int, int (*)(const char *, int),
106 old_glob_t *);
107 extern void old_globfree(old_glob_t *);
110 * The various extensions to glob(3C) allow for stat and dirent structures to
111 * show up whose size may change in a largefile environment. If libc defines
112 * _FILE_OFFSET_BITS to be 64 that is the key to indicate that we're building
113 * the LFS version of this file. As such, we rename the public functions here,
114 * _glob_ext() and _globfree_ext() to have a 64 suffix. When building the LFS
115 * version, we do not include the old versions.
117 #if !defined(_LP64) && _FILE_OFFSET_BITS == 64
118 #define _glob_ext _glob_ext64
119 #define _globfree_ext _globfree_ext64
120 #endif /* !_LP64 && _FILE_OFFSET_BITS == 64 */
122 #define DOLLAR '$'
123 #define DOT '.'
124 #define EOS '\0'
125 #define LBRACKET '['
126 #define NOT '!'
127 #define QUESTION '?'
128 #define QUOTE '\\'
129 #define RANGE '-'
130 #define RBRACKET ']'
131 #define SEP '/'
132 #define STAR '*'
133 #define TILDE '~'
134 #define UNDERSCORE '_'
135 #define LBRACE '{'
136 #define RBRACE '}'
137 #define SLASH '/'
138 #define COMMA ','
139 #define COLON ':'
141 #define M_QUOTE 0x800000
142 #define M_PROTECT 0x400000
144 typedef struct wcat {
145 wchar_t w_wc;
146 uint_t w_at;
147 } wcat_t;
149 #define M_ALL '*' /* Plus M_QUOTE */
150 #define M_END ']' /* Plus M_QUOTE */
151 #define M_NOT '!' /* Plus M_QUOTE */
152 #define M_ONE '?' /* Plus M_QUOTE */
153 #define M_RNG '-' /* Plus M_QUOTE */
154 #define M_SET '[' /* Plus M_QUOTE */
155 #define M_CLASS ':' /* Plus M_QUOTE */
156 #define ismeta(c) (((c).w_at&M_QUOTE) != 0)
158 #define INITIAL 8 /* initial pathv allocation */
160 #define GLOB_LIMIT_MALLOC 65536
161 #define GLOB_LIMIT_STAT 2048
162 #define GLOB_LIMIT_READDIR 16384
164 /* Limit of recursion during matching attempts. */
165 #define GLOB_LIMIT_RECUR 64
167 struct glob_lim {
168 size_t glim_malloc;
169 size_t glim_stat;
170 size_t glim_readdir;
173 struct glob_path_stat {
174 char *gps_path;
175 struct stat *gps_stat;
178 static int compare(const void *, const void *);
179 static int compare_gps(const void *, const void *);
180 static int g_Ctoc(const wcat_t *, char *, uint_t);
181 static int g_lstat(wcat_t *, struct stat *, glob_t *);
182 static DIR *g_opendir(wcat_t *, glob_t *);
183 static wcat_t *g_strchr(const wcat_t *, wchar_t);
184 static int g_stat(wcat_t *, struct stat *, glob_t *);
185 static int glob0(const wcat_t *, glob_t *, struct glob_lim *,
186 int (*)(const char *, int));
187 static int glob1(wcat_t *, wcat_t *, glob_t *, struct glob_lim *,
188 int (*)(const char *, int));
189 static int glob2(wcat_t *, wcat_t *, wcat_t *, wcat_t *, wcat_t *,
190 wcat_t *, glob_t *, struct glob_lim *,
191 int (*)(const char *, int));
192 static int glob3(wcat_t *, wcat_t *, wcat_t *, wcat_t *, wcat_t *,
193 wcat_t *, wcat_t *, glob_t *, struct glob_lim *,
194 int (*)(const char *, int));
195 static int globextend(const wcat_t *, glob_t *, struct glob_lim *,
196 struct stat *);
197 static
198 const wcat_t *globtilde(const wcat_t *, wcat_t *, size_t, glob_t *);
199 static int globexp1(const wcat_t *, glob_t *, struct glob_lim *,
200 int (*)(const char *, int));
201 static int globexp2(const wcat_t *, const wcat_t *, glob_t *,
202 struct glob_lim *, int (*)(const char *, int));
203 static int match(wcat_t *, wcat_t *, wcat_t *, int);
206 * Extended glob() function, selected by #pragma redefine_extname
207 * in glob.h with the external name _glob_ext() .
210 _glob_ext(const char *pattern, int flags, int (*errfunc)(const char *, int),
211 glob_t *pglob)
213 const char *patnext;
214 int n;
215 size_t patlen;
216 wchar_t c;
217 wcat_t *bufnext, *bufend, patbuf[MAXPATHLEN];
218 struct glob_lim limit = { 0, 0, 0 };
220 if ((patlen = strnlen(pattern, PATH_MAX)) == PATH_MAX)
221 return (GLOB_NOMATCH);
223 patnext = pattern;
224 if (!(flags & GLOB_APPEND)) {
225 pglob->gl_pathc = 0;
226 pglob->gl_pathn = 0;
227 pglob->gl_pathv = NULL;
228 if ((flags & GLOB_KEEPSTAT) != 0)
229 pglob->gl_statv = NULL;
230 if (!(flags & GLOB_DOOFFS))
231 pglob->gl_offs = 0;
233 pglob->gl_flags = flags & ~GLOB_MAGCHAR;
234 pglob->gl_matchc = 0;
236 if (pglob->gl_offs >= INT_MAX || pglob->gl_pathc >= INT_MAX ||
237 pglob->gl_pathc >= INT_MAX - pglob->gl_offs - 1)
238 return (GLOB_NOSPACE);
240 bufnext = patbuf;
241 bufend = bufnext + MAXPATHLEN - 1;
242 patlen += 1;
243 if (flags & GLOB_NOESCAPE) {
244 while (bufnext < bufend) {
245 if ((n = mbtowc(&c, patnext, patlen)) > 0) {
246 patnext += n;
247 patlen -= n;
248 bufnext->w_at = 0;
249 (bufnext++)->w_wc = c;
250 } else if (n == 0) {
251 break;
252 } else {
253 return (GLOB_NOMATCH);
256 } else {
257 /* Protect the quoted characters. */
258 while (bufnext < bufend) {
259 if ((n = mbtowc(&c, patnext, patlen)) > 0) {
260 patnext += n;
261 patlen -= n;
262 if (c == QUOTE) {
263 n = mbtowc(&c, patnext, patlen);
264 if (n < 0)
265 return (GLOB_NOMATCH);
266 if (n > 0) {
267 patnext += n;
268 patlen -= n;
270 if (n == 0)
271 c = QUOTE;
272 bufnext->w_at = M_PROTECT;
273 (bufnext++)->w_wc = c;
274 } else {
275 bufnext->w_at = 0;
276 (bufnext++)->w_wc = c;
278 } else if (n == 0) {
279 break;
280 } else {
281 return (GLOB_NOMATCH);
285 bufnext->w_at = 0;
286 bufnext->w_wc = EOS;
288 if (flags & GLOB_BRACE)
289 return (globexp1(patbuf, pglob, &limit, errfunc));
290 else
291 return (glob0(patbuf, pglob, &limit, errfunc));
295 * Expand recursively a glob {} pattern. When there is no more expansion
296 * invoke the standard globbing routine to glob the rest of the magic
297 * characters
299 static int
300 globexp1(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp,
301 int (*errfunc)(const char *, int))
303 const wcat_t *ptr = pattern;
305 /* Protect a single {}, for find(1), like csh */
306 if (pattern[0].w_wc == LBRACE && pattern[1].w_wc == RBRACE &&
307 pattern[2].w_wc == EOS)
308 return (glob0(pattern, pglob, limitp, errfunc));
310 if ((ptr = (const wcat_t *) g_strchr(ptr, LBRACE)) != NULL)
311 return (globexp2(ptr, pattern, pglob, limitp, errfunc));
313 return (glob0(pattern, pglob, limitp, errfunc));
318 * Recursive brace globbing helper. Tries to expand a single brace.
319 * If it succeeds then it invokes globexp1 with the new pattern.
320 * If it fails then it tries to glob the rest of the pattern and returns.
322 static int
323 globexp2(const wcat_t *ptr, const wcat_t *pattern, glob_t *pglob,
324 struct glob_lim *limitp, int (*errfunc)(const char *, int))
326 int i, rv;
327 wcat_t *lm, *ls;
328 const wcat_t *pe, *pm, *pl;
329 wcat_t patbuf[MAXPATHLEN];
331 /* copy part up to the brace */
332 for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
334 lm->w_at = 0;
335 lm->w_wc = EOS;
336 ls = lm;
338 /* Find the balanced brace */
339 for (i = 0, pe = ++ptr; pe->w_wc != EOS; pe++)
340 if (pe->w_wc == LBRACKET) {
341 /* Ignore everything between [] */
342 for (pm = pe++; pe->w_wc != RBRACKET &&
343 pe->w_wc != EOS; pe++)
345 if (pe->w_wc == EOS) {
347 * We could not find a matching RBRACKET.
348 * Ignore and just look for RBRACE
350 pe = pm;
352 } else if (pe->w_wc == LBRACE) {
353 i++;
354 } else if (pe->w_wc == RBRACE) {
355 if (i == 0)
356 break;
357 i--;
360 /* Non matching braces; just glob the pattern */
361 if (i != 0 || pe->w_wc == EOS)
362 return (glob0(patbuf, pglob, limitp, errfunc));
364 for (i = 0, pl = pm = ptr; pm <= pe; pm++) {
365 switch (pm->w_wc) {
366 case LBRACKET:
367 /* Ignore everything between [] */
368 for (pl = pm++; pm->w_wc != RBRACKET && pm->w_wc != EOS;
369 pm++)
371 if (pm->w_wc == EOS) {
373 * We could not find a matching RBRACKET.
374 * Ignore and just look for RBRACE
376 pm = pl;
378 break;
380 case LBRACE:
381 i++;
382 break;
384 case RBRACE:
385 if (i) {
386 i--;
387 break;
389 /* FALLTHROUGH */
390 case COMMA:
391 if (i && pm->w_wc == COMMA)
392 break;
393 else {
394 /* Append the current string */
395 for (lm = ls; (pl < pm); *lm++ = *pl++)
399 * Append the rest of the pattern after the
400 * closing brace
402 for (pl = pe + 1;
403 (*lm++ = *pl++).w_wc != EOS; /* */)
406 /* Expand the current pattern */
407 rv = globexp1(patbuf, pglob, limitp, errfunc);
408 if (rv && rv != GLOB_NOMATCH)
409 return (rv);
411 /* move after the comma, to the next string */
412 pl = pm + 1;
414 break;
416 default:
417 break;
420 return (0);
426 * expand tilde from the passwd file.
428 static const wcat_t *
429 globtilde(const wcat_t *pattern, wcat_t *patbuf, size_t patbuf_len,
430 glob_t *pglob)
432 struct passwd *pwd;
433 char *h;
434 const wcat_t *p;
435 wcat_t *b, *eb, *q;
436 int n;
437 size_t lenh;
438 wchar_t c;
440 if (pattern->w_wc != TILDE || !(pglob->gl_flags & GLOB_TILDE))
441 return (pattern);
443 /* Copy up to the end of the string or / */
444 eb = &patbuf[patbuf_len - 1];
445 for (p = pattern + 1, q = patbuf;
446 q < eb && p->w_wc != EOS && p->w_wc != SLASH; *q++ = *p++)
449 q->w_at = 0;
450 q->w_wc = EOS;
452 /* What to do if patbuf is full? */
454 if (patbuf[0].w_wc == EOS) {
456 * handle a plain ~ or ~/ by expanding $HOME
457 * first and then trying the password file
459 if (issetugid() != 0)
460 return (pattern);
461 if ((h = getenv("HOME")) == NULL) {
462 if ((pwd = getpwuid(getuid())) == NULL)
463 return (pattern);
464 else
465 h = pwd->pw_dir;
467 } else {
469 * Expand a ~user
471 if ((pwd = getpwnam((char *)patbuf)) == NULL)
472 return (pattern);
473 else
474 h = pwd->pw_dir;
477 /* Copy the home directory */
478 lenh = strlen(h) + 1;
479 for (b = patbuf; b < eb && *h != EOS; b++) {
480 if ((n = mbtowc(&c, h, lenh)) > 0) {
481 h += n;
482 lenh -= n;
483 b->w_at = 0;
484 b->w_wc = c;
485 } else if (n < 0) {
486 return (pattern);
487 } else {
488 break;
492 /* Append the rest of the pattern */
493 while (b < eb && (*b++ = *p++).w_wc != EOS)
495 b->w_at = 0;
496 b->w_wc = EOS;
498 return (patbuf);
501 static int
502 g_charclass(const wcat_t **patternp, wcat_t **bufnextp)
504 const wcat_t *pattern = *patternp + 1;
505 wcat_t *bufnext = *bufnextp;
506 const wcat_t *colon;
507 char cbuf[MB_LEN_MAX + 32];
508 wctype_t cc;
509 size_t len;
511 if ((colon = g_strchr(pattern, COLON)) == NULL ||
512 colon[1].w_wc != RBRACKET)
513 return (1); /* not a character class */
515 len = (size_t)(colon - pattern);
516 if (len + MB_LEN_MAX + 1 > sizeof (cbuf))
517 return (-1); /* invalid character class */
519 wchar_t w;
520 const wcat_t *s1 = pattern;
521 char *s2 = cbuf;
522 size_t n = len;
524 /* Copy the string. */
525 while (n > 0) {
526 w = (s1++)->w_wc;
527 /* Character class names must be ASCII. */
528 if (iswascii(w)) {
529 n--;
530 *s2++ = w;
531 } else {
532 return (-1); /* invalid character class */
535 *s2 = EOS;
537 if ((cc = wctype(cbuf)) == 0)
538 return (-1); /* invalid character class */
539 bufnext->w_at = M_QUOTE;
540 (bufnext++)->w_wc = M_CLASS;
541 bufnext->w_at = 0;
542 (bufnext++)->w_wc = cc;
543 *bufnextp = bufnext;
544 *patternp += len + 3;
546 return (0);
550 * The main glob() routine: compiles the pattern (optionally processing
551 * quotes), calls glob1() to do the real pattern matching, and finally
552 * sorts the list (unless unsorted operation is requested). Returns 0
553 * if things went well, nonzero if errors occurred. It is not an error
554 * to find no matches.
556 static int
557 glob0(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp,
558 int (*errfunc)(const char *, int))
560 const wcat_t *qpatnext;
561 int err, oldpathc;
562 wchar_t c;
563 int a;
564 wcat_t *bufnext, patbuf[MAXPATHLEN];
566 qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
567 oldpathc = pglob->gl_pathc;
568 bufnext = patbuf;
571 * We don't need to check for buffer overflow any more.
572 * The pattern has already been copied to an internal buffer.
574 while ((a = qpatnext->w_at), (c = (qpatnext++)->w_wc) != EOS) {
575 switch (c) {
576 case LBRACKET:
577 if (a != 0) {
578 bufnext->w_at = a;
579 (bufnext++)->w_wc = c;
580 break;
582 a = qpatnext->w_at;
583 c = qpatnext->w_wc;
584 if (a == 0 && c == NOT)
585 ++qpatnext;
586 if (qpatnext->w_wc == EOS ||
587 g_strchr(qpatnext+1, RBRACKET) == NULL) {
588 bufnext->w_at = 0;
589 (bufnext++)->w_wc = LBRACKET;
590 if (a == 0 && c == NOT)
591 --qpatnext;
592 break;
594 bufnext->w_at = M_QUOTE;
595 (bufnext++)->w_wc = M_SET;
596 if (a == 0 && c == NOT) {
597 bufnext->w_at = M_QUOTE;
598 (bufnext++)->w_wc = M_NOT;
600 a = qpatnext->w_at;
601 c = (qpatnext++)->w_wc;
602 do {
603 if (a == 0 && c == LBRACKET &&
604 qpatnext->w_wc == COLON) {
605 do {
606 err = g_charclass(&qpatnext,
607 &bufnext);
608 if (err)
609 break;
610 a = qpatnext->w_at;
611 c = (qpatnext++)->w_wc;
612 } while (a == 0 && c == LBRACKET &&
613 qpatnext->w_wc == COLON);
614 if (err == -1 &&
615 !(pglob->gl_flags & GLOB_NOCHECK))
616 return (GLOB_NOMATCH);
617 if (a == 0 && c == RBRACKET)
618 break;
620 bufnext->w_at = a;
621 (bufnext++)->w_wc = c;
622 if (qpatnext->w_at == 0 &&
623 qpatnext->w_wc == RANGE) {
624 a = qpatnext[1].w_at;
625 c = qpatnext[1].w_wc;
626 if (qpatnext[1].w_at != 0 ||
627 qpatnext[1].w_wc != RBRACKET) {
628 bufnext->w_at = M_QUOTE;
629 (bufnext++)->w_wc = M_RNG;
630 bufnext->w_at = a;
631 (bufnext++)->w_wc = c;
632 qpatnext += 2;
635 a = qpatnext->w_at;
636 c = (qpatnext++)->w_wc;
637 } while (a != 0 || c != RBRACKET);
638 pglob->gl_flags |= GLOB_MAGCHAR;
639 bufnext->w_at = M_QUOTE;
640 (bufnext++)->w_wc = M_END;
641 break;
642 case QUESTION:
643 if (a != 0) {
644 bufnext->w_at = a;
645 (bufnext++)->w_wc = c;
646 break;
648 pglob->gl_flags |= GLOB_MAGCHAR;
649 bufnext->w_at = M_QUOTE;
650 (bufnext++)->w_wc = M_ONE;
651 break;
652 case STAR:
653 if (a != 0) {
654 bufnext->w_at = a;
655 (bufnext++)->w_wc = c;
656 break;
658 pglob->gl_flags |= GLOB_MAGCHAR;
660 * collapse adjacent stars to one,
661 * to avoid exponential behavior
663 if (bufnext == patbuf ||
664 bufnext[-1].w_at != M_QUOTE ||
665 bufnext[-1].w_wc != M_ALL) {
666 bufnext->w_at = M_QUOTE;
667 (bufnext++)->w_wc = M_ALL;
669 break;
670 default:
671 bufnext->w_at = a;
672 (bufnext++)->w_wc = c;
673 break;
676 bufnext->w_at = 0;
677 bufnext->w_wc = EOS;
679 if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, limitp, errfunc))
680 != 0)
681 return (err);
684 * If there was no match we are going to append the pattern
685 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
686 * and the pattern did not contain any magic characters
687 * GLOB_NOMAGIC is there just for compatibility with csh.
689 if (pglob->gl_pathc == oldpathc) {
690 if ((pglob->gl_flags & GLOB_NOCHECK) ||
691 ((pglob->gl_flags & GLOB_NOMAGIC) &&
692 !(pglob->gl_flags & GLOB_MAGCHAR)))
693 return (globextend(pattern, pglob, limitp, NULL));
694 else
695 return (GLOB_NOMATCH);
697 if (!(pglob->gl_flags & GLOB_NOSORT)) {
698 if ((pglob->gl_flags & GLOB_KEEPSTAT)) {
699 /* Keep the paths and stat info synced during sort */
700 struct glob_path_stat *path_stat;
701 int i;
702 int n = pglob->gl_pathc - oldpathc;
703 int o = pglob->gl_offs + oldpathc;
705 if ((path_stat = calloc(n, sizeof (*path_stat))) ==
706 NULL)
707 return (GLOB_NOSPACE);
708 for (i = 0; i < n; i++) {
709 path_stat[i].gps_path = pglob->gl_pathv[o + i];
710 path_stat[i].gps_stat = pglob->gl_statv[o + i];
712 qsort(path_stat, n, sizeof (*path_stat), compare_gps);
713 for (i = 0; i < n; i++) {
714 pglob->gl_pathv[o + i] = path_stat[i].gps_path;
715 pglob->gl_statv[o + i] = path_stat[i].gps_stat;
717 free(path_stat);
718 } else {
719 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
720 pglob->gl_pathc - oldpathc, sizeof (char *),
721 compare);
724 return (0);
727 static int
728 compare(const void *p, const void *q)
730 return (strcmp(*(char **)p, *(char **)q));
733 static int
734 compare_gps(const void *_p, const void *_q)
736 const struct glob_path_stat *p = (const struct glob_path_stat *)_p;
737 const struct glob_path_stat *q = (const struct glob_path_stat *)_q;
739 return (strcmp(p->gps_path, q->gps_path));
742 static int
743 glob1(wcat_t *pattern, wcat_t *pattern_last, glob_t *pglob,
744 struct glob_lim *limitp, int (*errfunc)(const char *, int))
746 wcat_t pathbuf[MAXPATHLEN];
748 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
749 if (pattern->w_wc == EOS)
750 return (0);
751 return (glob2(pathbuf, pathbuf+MAXPATHLEN-1,
752 pathbuf, pathbuf+MAXPATHLEN-1,
753 pattern, pattern_last, pglob, limitp, errfunc));
757 * The functions glob2 and glob3 are mutually recursive; there is one level
758 * of recursion for each segment in the pattern that contains one or more
759 * meta characters.
761 static int
762 glob2(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend,
763 wcat_t *pathend_last, wcat_t *pattern, wcat_t *pattern_last,
764 glob_t *pglob, struct glob_lim *limitp, int (*errfunc)(const char *, int))
766 struct stat sb;
767 wcat_t *p, *q;
768 int anymeta;
771 * Loop over pattern segments until end of pattern or until
772 * segment with meta character found.
774 for (anymeta = 0; ; ) {
775 if (pattern->w_wc == EOS) { /* End of pattern? */
776 pathend->w_at = 0;
777 pathend->w_wc = EOS;
779 if ((pglob->gl_flags & GLOB_LIMIT) &&
780 limitp->glim_stat++ >= GLOB_LIMIT_STAT) {
781 errno = 0;
782 pathend->w_at = 0;
783 (pathend++)->w_wc = SEP;
784 pathend->w_at = 0;
785 pathend->w_wc = EOS;
786 return (GLOB_NOSPACE);
788 if (g_lstat(pathbuf, &sb, pglob))
789 return (0);
791 if (((pglob->gl_flags & GLOB_MARK) &&
792 (pathend[-1].w_at != 0 ||
793 pathend[-1].w_wc != SEP)) &&
794 (S_ISDIR(sb.st_mode) ||
795 (S_ISLNK(sb.st_mode) &&
796 (g_stat(pathbuf, &sb, pglob) == 0) &&
797 S_ISDIR(sb.st_mode)))) {
798 if (pathend+1 > pathend_last)
799 return (GLOB_NOSPACE);
800 pathend->w_at = 0;
801 (pathend++)->w_wc = SEP;
802 pathend->w_at = 0;
803 pathend->w_wc = EOS;
805 ++pglob->gl_matchc;
806 return (globextend(pathbuf, pglob, limitp, &sb));
809 /* Find end of next segment, copy tentatively to pathend. */
810 q = pathend;
811 p = pattern;
812 while (p->w_wc != EOS && p->w_wc != SEP) {
813 if (ismeta(*p))
814 anymeta = 1;
815 if (q+1 > pathend_last)
816 return (GLOB_NOSPACE);
817 *q++ = *p++;
820 if (!anymeta) { /* No expansion, do next segment. */
821 pathend = q;
822 pattern = p;
823 while (pattern->w_wc == SEP) {
824 if (pathend+1 > pathend_last)
825 return (GLOB_NOSPACE);
826 *pathend++ = *pattern++;
828 } else {
829 /* Need expansion, recurse. */
830 return (glob3(pathbuf, pathbuf_last, pathend,
831 pathend_last, pattern, p, pattern_last,
832 pglob, limitp, errfunc));
835 /* NOTREACHED */
838 static int
839 glob3(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend,
840 wcat_t *pathend_last, wcat_t *pattern, wcat_t *restpattern,
841 wcat_t *restpattern_last, glob_t *pglob, struct glob_lim *limitp,
842 int (*errfunc)(const char *, int))
844 struct dirent *dp;
845 DIR *dirp;
846 int err;
847 char buf[MAXPATHLEN];
850 * The readdirfunc declaration can't be prototyped, because it is
851 * assigned, below, to two functions which are prototyped in glob.h
852 * and dirent.h as taking pointers to differently typed opaque
853 * structures.
855 struct dirent *(*readdirfunc)(void *);
857 if (pathend > pathend_last)
858 return (GLOB_NOSPACE);
859 pathend->w_at = 0;
860 pathend->w_wc = EOS;
861 errno = 0;
863 if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
864 /* TODO: don't call for ENOENT or ENOTDIR? */
865 if (errfunc) {
866 if (g_Ctoc(pathbuf, buf, sizeof (buf)))
867 return (GLOB_ABORTED);
868 if (errfunc(buf, errno) ||
869 pglob->gl_flags & GLOB_ERR)
870 return (GLOB_ABORTED);
872 return (0);
875 err = 0;
877 /* Search directory for matching names. */
878 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
879 readdirfunc = pglob->gl_readdir;
880 else
881 readdirfunc = (struct dirent *(*)(void *))readdir;
882 while ((dp = (*readdirfunc)(dirp))) {
883 char *sc;
884 wcat_t *dc;
885 int n;
886 int lensc;
887 wchar_t w;
889 if ((pglob->gl_flags & GLOB_LIMIT) &&
890 limitp->glim_readdir++ >= GLOB_LIMIT_READDIR) {
891 errno = 0;
892 pathend->w_at = 0;
893 (pathend++)->w_wc = SEP;
894 pathend->w_at = 0;
895 pathend->w_wc = EOS;
896 err = GLOB_NOSPACE;
897 break;
900 /* Initial DOT must be matched literally. */
901 if (dp->d_name[0] == DOT && pattern->w_wc != DOT)
902 continue;
903 dc = pathend;
904 sc = dp->d_name;
905 lensc = strlen(sc) + 1;
906 while (dc < pathend_last) {
907 if ((n = mbtowc(&w, sc, lensc)) <= 0) {
908 sc += 1;
909 lensc -= 1;
910 dc->w_at = 0;
911 dc->w_wc = EOS;
912 } else {
913 sc += n;
914 lensc -= n;
915 dc->w_at = 0;
916 dc->w_wc = w;
918 dc++;
919 if (n <= 0)
920 break;
922 if (dc >= pathend_last) {
923 dc->w_at = 0;
924 dc->w_wc = EOS;
925 err = GLOB_NOSPACE;
926 break;
928 if (n < 0) {
929 err = GLOB_NOMATCH;
930 break;
933 if (!match(pathend, pattern, restpattern, GLOB_LIMIT_RECUR)) {
934 pathend->w_at = 0;
935 pathend->w_wc = EOS;
936 continue;
938 err = glob2(pathbuf, pathbuf_last, --dc, pathend_last,
939 restpattern, restpattern_last, pglob, limitp,
940 errfunc);
941 if (err)
942 break;
945 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
946 (*pglob->gl_closedir)(dirp);
947 else
948 (void) closedir(dirp);
949 return (err);
954 * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
955 * add the new item, and update gl_pathc. Avoids excessive reallocation
956 * by doubling the number of elements each time. Uses gl_pathn to contain
957 * the number.
959 * Return 0 if new item added, error code if memory couldn't be allocated.
961 * Invariant of the glob_t structure:
962 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
963 * gl_pathv points to (gl_offs + gl_pathc + 1) items.
965 static int
966 globextend(const wcat_t *path, glob_t *pglob, struct glob_lim *limitp,
967 struct stat *sb)
969 char **pathv;
970 ssize_t i;
971 size_t allocn, newn, len;
972 char *copy = NULL;
973 const wcat_t *p;
974 struct stat **statv;
975 char junk[MB_LEN_MAX];
976 int n;
978 allocn = pglob->gl_pathn;
979 newn = 2 + pglob->gl_pathc + pglob->gl_offs;
981 if (newn <= allocn) {
982 pathv = pglob->gl_pathv;
983 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0)
984 statv = pglob->gl_statv;
985 } else {
986 if (allocn == 0)
987 allocn = pglob->gl_offs + INITIAL;
988 allocn *= 2;
989 if (pglob->gl_offs >= INT_MAX ||
990 pglob->gl_pathc >= INT_MAX ||
991 allocn >= INT_MAX ||
992 SIZE_MAX / sizeof (*pathv) <= allocn ||
993 SIZE_MAX / sizeof (*statv) <= allocn) {
994 nospace:
995 for (i = pglob->gl_offs; i < (ssize_t)(newn - 2);
996 i++) {
997 if (pglob->gl_pathv)
998 free(pglob->gl_pathv[i]);
999 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 && pglob->gl_statv)
1000 free(pglob->gl_statv[i]);
1002 if (pglob->gl_pathv) {
1003 free(pglob->gl_pathv);
1004 pglob->gl_pathv = NULL;
1006 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
1007 pglob->gl_statv) {
1008 free(pglob->gl_statv);
1009 pglob->gl_statv = NULL;
1011 return (GLOB_NOSPACE);
1013 limitp->glim_malloc += allocn * sizeof (*pathv);
1014 pathv = reallocarray(pglob->gl_pathv, allocn, sizeof (*pathv));
1015 if (pathv == NULL)
1016 goto nospace;
1017 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) {
1018 limitp->glim_malloc += allocn * sizeof (*statv);
1019 statv = reallocarray(pglob->gl_statv, allocn,
1020 sizeof (*statv));
1021 if (statv == NULL)
1022 goto nospace;
1025 pglob->gl_pathn = allocn;
1027 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
1028 /* first time around -- clear initial gl_offs items */
1029 pathv += pglob->gl_offs;
1030 for (i = pglob->gl_offs; --i >= 0; )
1031 *--pathv = NULL;
1033 pglob->gl_pathv = pathv;
1035 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) {
1036 if (pglob->gl_statv == NULL && pglob->gl_offs > 0) {
1037 /* first time around -- clear initial gl_offs items */
1038 statv += pglob->gl_offs;
1039 for (i = pglob->gl_offs; --i >= 0; )
1040 *--statv = NULL;
1042 pglob->gl_statv = statv;
1043 if (sb == NULL)
1044 statv[pglob->gl_offs + pglob->gl_pathc] = NULL;
1045 else {
1046 limitp->glim_malloc += sizeof (**statv);
1047 if ((statv[pglob->gl_offs + pglob->gl_pathc] =
1048 malloc(sizeof (**statv))) == NULL)
1049 goto copy_error;
1050 (void) memcpy(statv[pglob->gl_offs + pglob->gl_pathc],
1051 sb, sizeof (*sb));
1053 statv[pglob->gl_offs + pglob->gl_pathc + 1] = NULL;
1056 len = MB_LEN_MAX;
1057 p = path;
1058 while ((n = wctomb(junk, p->w_wc)) > 0) {
1059 len += n;
1060 if ((p++)->w_wc == EOS)
1061 break;
1063 if (n < 0)
1064 return (GLOB_NOMATCH);
1066 limitp->glim_malloc += len;
1067 if ((copy = malloc(len)) != NULL) {
1068 if (g_Ctoc(path, copy, len)) {
1069 free(copy);
1070 return (GLOB_NOSPACE);
1072 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
1074 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
1076 if ((pglob->gl_flags & GLOB_LIMIT) &&
1077 limitp->glim_malloc >= GLOB_LIMIT_MALLOC) {
1078 errno = 0;
1079 return (GLOB_NOSPACE);
1081 copy_error:
1082 return (copy == NULL ? GLOB_NOSPACE : 0);
1087 * pattern matching function for filenames. Each occurrence of the *
1088 * pattern causes a recursion level.
1090 static int
1091 match(wcat_t *name, wcat_t *pat, wcat_t *patend, int recur)
1093 int ok, negate_range;
1094 wcat_t c, k;
1096 if (recur-- == 0)
1097 return (1);
1099 while (pat < patend) {
1100 c = *pat++;
1101 switch (c.w_wc) {
1102 case M_ALL:
1103 if (c.w_at != M_QUOTE) {
1104 k = *name++;
1105 if (k.w_at != c.w_at || k.w_wc != c.w_wc)
1106 return (0);
1107 break;
1109 while (pat < patend && pat->w_at == M_QUOTE &&
1110 pat->w_wc == M_ALL)
1111 pat++; /* eat consecutive '*' */
1112 if (pat == patend)
1113 return (1);
1114 do {
1115 if (match(name, pat, patend, recur))
1116 return (1);
1117 } while ((name++)->w_wc != EOS);
1118 return (0);
1119 case M_ONE:
1120 if (c.w_at != M_QUOTE) {
1121 k = *name++;
1122 if (k.w_at != c.w_at || k.w_wc != c.w_wc)
1123 return (0);
1124 break;
1126 if ((name++)->w_wc == EOS)
1127 return (0);
1128 break;
1129 case M_SET:
1130 if (c.w_at != M_QUOTE) {
1131 k = *name++;
1132 if (k.w_at != c.w_at || k.w_wc != c.w_wc)
1133 return (0);
1134 break;
1136 ok = 0;
1137 if ((k = *name++).w_wc == EOS)
1138 return (0);
1139 if ((negate_range = (pat->w_at == M_QUOTE &&
1140 pat->w_wc == M_NOT)) != 0)
1141 ++pat;
1142 while (((c = *pat++).w_at != M_QUOTE) ||
1143 c.w_wc != M_END) {
1144 if (c.w_at == M_QUOTE && c.w_wc == M_CLASS) {
1145 wcat_t cc;
1147 cc.w_at = pat->w_at;
1148 cc.w_wc = pat->w_wc;
1149 if (iswctype(k.w_wc, cc.w_wc))
1150 ok = 1;
1151 ++pat;
1153 if (pat->w_at == M_QUOTE &&
1154 pat->w_wc == M_RNG) {
1155 if (c.w_wc <= k.w_wc &&
1156 k.w_wc <= pat[1].w_wc)
1157 ok = 1;
1158 pat += 2;
1159 } else if (c.w_wc == k.w_wc)
1160 ok = 1;
1162 if (ok == negate_range)
1163 return (0);
1164 break;
1165 default:
1166 k = *name++;
1167 if (k.w_at != c.w_at || k.w_wc != c.w_wc)
1168 return (0);
1169 break;
1172 return (name->w_wc == EOS);
1176 * Extended globfree() function, selected by #pragma redefine_extname
1177 * in glob.h with the external name _globfree_ext() .
1179 void
1180 _globfree_ext(glob_t *pglob)
1182 int i;
1183 char **pp;
1185 if (pglob->gl_pathv != NULL) {
1186 pp = pglob->gl_pathv + pglob->gl_offs;
1187 for (i = pglob->gl_pathc; i--; ++pp)
1188 free(*pp);
1189 free(pglob->gl_pathv);
1190 pglob->gl_pathv = NULL;
1192 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
1193 pglob->gl_statv != NULL) {
1194 for (i = 0; i < pglob->gl_pathc; i++) {
1195 free(pglob->gl_statv[i]);
1197 free(pglob->gl_statv);
1198 pglob->gl_statv = NULL;
1202 static DIR *
1203 g_opendir(wcat_t *str, glob_t *pglob)
1205 char buf[MAXPATHLEN];
1207 if (str->w_wc == EOS)
1208 (void) strlcpy(buf, ".", sizeof (buf));
1209 else {
1210 if (g_Ctoc(str, buf, sizeof (buf)))
1211 return (NULL);
1214 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1215 return ((*pglob->gl_opendir)(buf));
1217 return (opendir(buf));
1220 static int
1221 g_lstat(wcat_t *fn, struct stat *sb, glob_t *pglob)
1223 char buf[MAXPATHLEN];
1225 if (g_Ctoc(fn, buf, sizeof (buf)))
1226 return (-1);
1227 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1228 return ((*pglob->gl_lstat)(buf, sb));
1229 return (lstat(buf, sb));
1232 static int
1233 g_stat(wcat_t *fn, struct stat *sb, glob_t *pglob)
1235 char buf[MAXPATHLEN];
1237 if (g_Ctoc(fn, buf, sizeof (buf)))
1238 return (-1);
1239 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1240 return ((*pglob->gl_stat)(buf, sb));
1241 return (stat(buf, sb));
1244 static wcat_t *
1245 g_strchr(const wcat_t *str, wchar_t ch)
1247 do {
1248 if (str->w_at == 0 && str->w_wc == ch)
1249 return ((wcat_t *)str);
1250 } while ((str++)->w_wc != EOS);
1251 return (NULL);
1254 static int
1255 g_Ctoc(const wcat_t *str, char *buf, uint_t len)
1257 int n;
1258 wchar_t w;
1260 while (len >= MB_LEN_MAX) {
1261 w = (str++)->w_wc;
1262 if ((n = wctomb(buf, w)) > 0) {
1263 len -= n;
1264 buf += n;
1266 if (n < 0)
1267 break;
1268 if (w == EOS)
1269 return (0);
1271 return (1);
1274 #if defined(_LP64) || _FILE_OFFSET_BITS != 64
1276 /* glob() function with legacy glob structure */
1278 old_glob(const char *pattern, int flags, int (*errfunc)(const char *, int),
1279 old_glob_t *pglob)
1282 glob_t gl;
1283 int rv;
1285 flags &= GLOB_POSIX;
1287 (void) memset(&gl, 0, sizeof (gl));
1290 * Copy all the members, old to new. There's
1291 * really no point in micro-optimizing the copying.
1292 * Other members are set to zero.
1294 gl.gl_pathc = pglob->gl_pathc;
1295 gl.gl_pathv = pglob->gl_pathv;
1296 gl.gl_offs = pglob->gl_offs;
1297 gl.gl_pathp = pglob->gl_pathp;
1298 gl.gl_pathn = pglob->gl_pathn;
1300 rv = _glob_ext(pattern, flags, errfunc, &gl);
1303 * Copy all the members, new to old. There's
1304 * really no point in micro-optimizing the copying.
1306 pglob->gl_pathc = gl.gl_pathc;
1307 pglob->gl_pathv = gl.gl_pathv;
1308 pglob->gl_offs = gl.gl_offs;
1309 pglob->gl_pathp = gl.gl_pathp;
1310 pglob->gl_pathn = gl.gl_pathn;
1312 return (rv);
1315 /* globfree() function with legacy glob structure */
1316 void
1317 old_globfree(old_glob_t *pglob)
1319 glob_t gl;
1321 (void) memset(&gl, 0, sizeof (gl));
1324 * Copy all the members, old to new. There's
1325 * really no point in micro-optimizing the copying.
1326 * Other members are set to zero.
1328 gl.gl_pathc = pglob->gl_pathc;
1329 gl.gl_pathv = pglob->gl_pathv;
1330 gl.gl_offs = pglob->gl_offs;
1331 gl.gl_pathp = pglob->gl_pathp;
1332 gl.gl_pathn = pglob->gl_pathn;
1334 _globfree_ext(&gl);
1337 * Copy all the members, new to old. There's
1338 * really no point in micro-optimizing the copying.
1340 pglob->gl_pathc = gl.gl_pathc;
1341 pglob->gl_pathv = gl.gl_pathv;
1342 pglob->gl_offs = gl.gl_offs;
1343 pglob->gl_pathp = gl.gl_pathp;
1344 pglob->gl_pathn = gl.gl_pathn;
1348 #endif /* _LP64 || _FILE_OFFSET_BITS != 64 */