26763: fix problem on failed cd -s to relative path
[zsh.git] / Src / glob.c
blob5000ff4574ce0a360f49a22fc3328381bc08996b
1 /*
2 * glob.c - filename generation
4 * This file is part of zsh, the Z shell.
6 * Copyright (c) 1992-1997 Paul Falstad
7 * All rights reserved.
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
15 * In no event shall Paul Falstad or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Paul Falstad and the Zsh Development Group have been advised of
19 * the possibility of such damage.
21 * Paul Falstad and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose. The software
24 * provided hereunder is on an "as is" basis, and Paul Falstad and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
30 #include "zsh.mdh"
31 #include "glob.pro"
33 #if defined(OFF_T_IS_64_BIT) && defined(__GNUC__)
34 # define ALIGN64 __attribute__((aligned(8)))
35 #else
36 # define ALIGN64
37 #endif
39 /* flag for CSHNULLGLOB */
41 typedef struct gmatch *Gmatch;
43 struct gmatch {
44 char *name;
46 * Array of sort strings: one for each GS_EXEC sort type in
47 * the glob qualifiers.
49 char **sortstrs;
50 off_t size ALIGN64;
51 long atime;
52 long mtime;
53 long ctime;
54 long links;
55 off_t _size ALIGN64;
56 long _atime;
57 long _mtime;
58 long _ctime;
59 long _links;
60 #ifdef GET_ST_ATIME_NSEC
61 long ansec;
62 long _ansec;
63 #endif
64 #ifdef GET_ST_MTIME_NSEC
65 long mnsec;
66 long _mnsec;
67 #endif
68 #ifdef GET_ST_CTIME_NSEC
69 long cnsec;
70 long _cnsec;
71 #endif
74 #define GS_NAME 1
75 #define GS_DEPTH 2
76 #define GS_EXEC 4
78 #define GS_SHIFT_BASE 8
80 #define GS_SIZE (GS_SHIFT_BASE)
81 #define GS_ATIME (GS_SHIFT_BASE << 1)
82 #define GS_MTIME (GS_SHIFT_BASE << 2)
83 #define GS_CTIME (GS_SHIFT_BASE << 3)
84 #define GS_LINKS (GS_SHIFT_BASE << 4)
86 #define GS_SHIFT 5
87 #define GS__SIZE (GS_SIZE << GS_SHIFT)
88 #define GS__ATIME (GS_ATIME << GS_SHIFT)
89 #define GS__MTIME (GS_MTIME << GS_SHIFT)
90 #define GS__CTIME (GS_CTIME << GS_SHIFT)
91 #define GS__LINKS (GS_LINKS << GS_SHIFT)
93 #define GS_DESC (GS_SHIFT_BASE << (2*GS_SHIFT))
94 #define GS_NONE (GS_SHIFT_BASE << (2*GS_SHIFT+1))
96 #define GS_NORMAL (GS_SIZE | GS_ATIME | GS_MTIME | GS_CTIME | GS_LINKS)
97 #define GS_LINKED (GS_NORMAL << GS_SHIFT)
99 /**/
100 int badcshglob;
102 /**/
103 int pathpos; /* position in pathbuf (needed by pattern code) */
105 /**/
106 char *pathbuf; /* pathname buffer (needed by pattern code) */
108 typedef struct stat *Statptr; /* This makes the Ultrix compiler happy. Go figure. */
110 /* modifier for unit conversions */
112 #define TT_DAYS 0
113 #define TT_HOURS 1
114 #define TT_MINS 2
115 #define TT_WEEKS 3
116 #define TT_MONTHS 4
117 #define TT_SECONDS 5
119 #define TT_BYTES 0
120 #define TT_POSIX_BLOCKS 1
121 #define TT_KILOBYTES 2
122 #define TT_MEGABYTES 3
125 typedef int (*TestMatchFunc) _((char *, struct stat *, off_t, char *));
127 struct qual {
128 struct qual *next; /* Next qualifier, must match */
129 struct qual *or; /* Alternative set of qualifiers to match */
130 TestMatchFunc func; /* Function to call to test match */
131 off_t data ALIGN64; /* Argument passed to function */
132 int sense; /* Whether asserting or negating */
133 int amc; /* Flag for which time to test (a, m, c) */
134 int range; /* Whether to test <, > or = (as per signum) */
135 int units; /* Multiplier for time or size, respectively */
136 char *sdata; /* currently only: expression to eval */
139 /* Prefix, suffix for doing zle trickery */
141 /**/
142 mod_export char *glob_pre, *glob_suf;
144 /* Element of a glob sort */
145 struct globsort {
146 /* Sort type */
147 int tp;
148 /* Sort code to eval, if type is GS_EXEC */
149 char *exec;
152 /* Maximum entries in sort array */
153 #define MAX_SORTS (12)
155 /* struct to easily save/restore current state */
157 struct globdata {
158 int gd_pathpos;
159 char *gd_pathbuf;
161 int gd_matchsz; /* size of matchbuf */
162 int gd_matchct; /* number of matches found */
163 int gd_pathbufsz; /* size of pathbuf */
164 int gd_pathbufcwd; /* where did we chdir()'ed */
165 Gmatch gd_matchbuf; /* array of matches */
166 Gmatch gd_matchptr; /* &matchbuf[matchct] */
167 char *gd_colonmod; /* colon modifiers in qualifier list */
169 /* Qualifiers pertaining to current pattern */
170 struct qual *gd_quals;
172 /* Other state values for current pattern */
173 int gd_qualct, gd_qualorct;
174 int gd_range, gd_amc, gd_units;
175 int gd_gf_nullglob, gd_gf_markdirs, gd_gf_noglobdots, gd_gf_listtypes;
176 int gd_gf_numsort;
177 int gd_gf_follow, gd_gf_sorts, gd_gf_nsorts;
178 struct globsort gd_gf_sortlist[MAX_SORTS];
180 char *gd_glob_pre, *gd_glob_suf;
183 /* The variable with the current globbing state and convenience macros */
185 static struct globdata curglobdata;
187 #define matchsz (curglobdata.gd_matchsz)
188 #define matchct (curglobdata.gd_matchct)
189 #define pathbufsz (curglobdata.gd_pathbufsz)
190 #define pathbufcwd (curglobdata.gd_pathbufcwd)
191 #define matchbuf (curglobdata.gd_matchbuf)
192 #define matchptr (curglobdata.gd_matchptr)
193 #define colonmod (curglobdata.gd_colonmod)
194 #define quals (curglobdata.gd_quals)
195 #define qualct (curglobdata.gd_qualct)
196 #define qualorct (curglobdata.gd_qualorct)
197 #define g_range (curglobdata.gd_range)
198 #define g_amc (curglobdata.gd_amc)
199 #define g_units (curglobdata.gd_units)
200 #define gf_nullglob (curglobdata.gd_gf_nullglob)
201 #define gf_markdirs (curglobdata.gd_gf_markdirs)
202 #define gf_noglobdots (curglobdata.gd_gf_noglobdots)
203 #define gf_listtypes (curglobdata.gd_gf_listtypes)
204 #define gf_numsort (curglobdata.gd_gf_numsort)
205 #define gf_follow (curglobdata.gd_gf_follow)
206 #define gf_sorts (curglobdata.gd_gf_sorts)
207 #define gf_nsorts (curglobdata.gd_gf_nsorts)
208 #define gf_sortlist (curglobdata.gd_gf_sortlist)
210 /* and macros for save/restore */
212 #define save_globstate(N) \
213 do { \
214 memcpy(&(N), &curglobdata, sizeof(struct globdata)); \
215 (N).gd_pathpos = pathpos; \
216 (N).gd_pathbuf = pathbuf; \
217 (N).gd_glob_pre = glob_pre; \
218 (N).gd_glob_suf = glob_suf; \
219 pathbuf = NULL; \
220 } while (0)
222 #define restore_globstate(N) \
223 do { \
224 zfree(pathbuf, pathbufsz); \
225 memcpy(&curglobdata, &(N), sizeof(struct globdata)); \
226 pathpos = (N).gd_pathpos; \
227 pathbuf = (N).gd_pathbuf; \
228 glob_pre = (N).gd_glob_pre; \
229 glob_suf = (N).gd_glob_suf; \
230 } while (0)
232 /* pathname component in filename patterns */
234 struct complist {
235 Complist next;
236 Patprog pat;
237 int closure; /* 1 if this is a (foo/)# */
238 int follow; /* 1 to go thru symlinks */
241 /* Add a component to pathbuf: This keeps track of how *
242 * far we are into a file name, since each path component *
243 * must be matched separately. */
245 /**/
246 static void
247 addpath(char *s, int l)
249 DPUTS(!pathbuf, "BUG: pathbuf not initialised");
250 while (pathpos + l + 1 >= pathbufsz)
251 pathbuf = realloc(pathbuf, pathbufsz *= 2);
252 while (l--)
253 pathbuf[pathpos++] = *s++;
254 pathbuf[pathpos++] = '/';
255 pathbuf[pathpos] = '\0';
258 /* stat the filename s appended to pathbuf. l should be true for lstat, *
259 * false for stat. If st is NULL, the file is only checked for existance. *
260 * s == "" is treated as s == ".". This is necessary since on most systems *
261 * foo/ can be used to reference a non-directory foo. Returns nonzero if *
262 * the file does not exists. */
264 /**/
265 static int
266 statfullpath(const char *s, struct stat *st, int l)
268 char buf[PATH_MAX];
270 DPUTS(strlen(s) + !*s + pathpos - pathbufcwd >= PATH_MAX,
271 "BUG: statfullpath(): pathname too long");
272 strcpy(buf, pathbuf + pathbufcwd);
273 strcpy(buf + pathpos - pathbufcwd, s);
274 if (!*s && *buf) {
276 * Don't add the '.' if the path so far is empty, since
277 * then we get bogus empty strings inserted as files.
279 buf[pathpos - pathbufcwd] = '.';
280 buf[pathpos - pathbufcwd + 1] = '\0';
281 l = 0;
283 unmetafy(buf, NULL);
284 if (!st) {
285 char lbuf[1];
286 return access(buf, F_OK) && (!l || readlink(buf, lbuf, 1) < 0);
288 return l ? lstat(buf, st) : stat(buf, st);
291 /* This may be set by qualifier functions to an array of strings to insert
292 * into the list instead of the original string. */
294 char **inserts;
296 /* add a match to the list */
298 /**/
299 static void
300 insert(char *s, int checked)
302 struct stat buf, buf2, *bp;
303 char *news = s;
304 int statted = 0;
306 queue_signals();
307 inserts = NULL;
309 if (gf_listtypes || gf_markdirs) {
310 /* Add the type marker to the end of the filename */
311 mode_t mode;
312 checked = statted = 1;
313 if (statfullpath(s, &buf, 1)) {
314 unqueue_signals();
315 return;
317 mode = buf.st_mode;
318 if (gf_follow) {
319 if (!S_ISLNK(mode) || statfullpath(s, &buf2, 0))
320 memcpy(&buf2, &buf, sizeof(buf));
321 statted |= 2;
322 mode = buf2.st_mode;
324 if (gf_listtypes || S_ISDIR(mode)) {
325 int ll = strlen(s);
327 news = (char *) hcalloc(ll + 2);
328 strcpy(news, s);
329 news[ll] = file_type(mode);
330 news[ll + 1] = '\0';
333 if (qualct || qualorct) {
334 /* Go through the qualifiers, rejecting the file if appropriate */
335 struct qual *qo, *qn;
337 if (!statted && statfullpath(s, &buf, 1)) {
338 unqueue_signals();
339 return;
341 news = dyncat(pathbuf, news);
343 statted = 1;
344 qo = quals;
345 for (qn = qo; qn && qn->func;) {
346 g_range = qn->range;
347 g_amc = qn->amc;
348 g_units = qn->units;
349 if ((qn->sense & 2) && !(statted & 2)) {
350 /* If (sense & 2), we're following links */
351 if (!S_ISLNK(buf.st_mode) || statfullpath(s, &buf2, 0))
352 memcpy(&buf2, &buf, sizeof(buf));
353 statted |= 2;
355 bp = (qn->sense & 2) ? &buf2 : &buf;
356 /* Reject the file if the function returned zero *
357 * and the sense was positive (sense&1 == 0), or *
358 * vice versa. */
359 if ((!((qn->func) (news, bp, qn->data, qn->sdata))
360 ^ qn->sense) & 1) {
361 /* Try next alternative, or return if there are no more */
362 if (!(qo = qo->or)) {
363 unqueue_signals();
364 return;
366 qn = qo;
367 continue;
369 qn = qn->next;
371 } else if (!checked) {
372 if (statfullpath(s, NULL, 1)) {
373 unqueue_signals();
374 return;
376 statted = 1;
377 news = dyncat(pathbuf, news);
378 } else
379 news = dyncat(pathbuf, news);
381 while (!inserts || (news = dupstring(*inserts++))) {
382 if (colonmod) {
383 /* Handle the remainder of the qualifier: e.g. (:r:s/foo/bar/). */
384 s = colonmod;
385 modify(&news, &s);
387 if (!statted && (gf_sorts & GS_NORMAL)) {
388 statfullpath(s, &buf, 1);
389 statted = 1;
391 if (!(statted & 2) && (gf_sorts & GS_LINKED)) {
392 if (statted) {
393 if (!S_ISLNK(buf.st_mode) || statfullpath(s, &buf2, 0))
394 memcpy(&buf2, &buf, sizeof(buf));
395 } else if (statfullpath(s, &buf2, 0))
396 statfullpath(s, &buf2, 1);
397 statted |= 2;
399 matchptr->name = news;
400 if (statted & 1) {
401 matchptr->size = buf.st_size;
402 matchptr->atime = buf.st_atime;
403 matchptr->mtime = buf.st_mtime;
404 matchptr->ctime = buf.st_ctime;
405 matchptr->links = buf.st_nlink;
406 #ifdef GET_ST_ATIME_NSEC
407 matchptr->ansec = GET_ST_ATIME_NSEC(buf);
408 #endif
409 #ifdef GET_ST_MTIME_NSEC
410 matchptr->mnsec = GET_ST_MTIME_NSEC(buf);
411 #endif
412 #ifdef GET_ST_CTIME_NSEC
413 matchptr->cnsec = GET_ST_CTIME_NSEC(buf);
414 #endif
416 if (statted & 2) {
417 matchptr->_size = buf2.st_size;
418 matchptr->_atime = buf2.st_atime;
419 matchptr->_mtime = buf2.st_mtime;
420 matchptr->_ctime = buf2.st_ctime;
421 matchptr->_links = buf2.st_nlink;
422 #ifdef GET_ST_ATIME_NSEC
423 matchptr->_ansec = GET_ST_ATIME_NSEC(buf);
424 #endif
425 #ifdef GET_ST_MTIME_NSEC
426 matchptr->_mnsec = GET_ST_MTIME_NSEC(buf);
427 #endif
428 #ifdef GET_ST_CTIME_NSEC
429 matchptr->_cnsec = GET_ST_CTIME_NSEC(buf);
430 #endif
432 matchptr++;
434 if (++matchct == matchsz) {
435 matchbuf = (Gmatch )realloc((char *)matchbuf,
436 sizeof(struct gmatch) * (matchsz *= 2));
438 matchptr = matchbuf + matchct;
440 if (!inserts)
441 break;
443 unqueue_signals();
446 /* Check to see if str is eligible for filename generation. */
448 /**/
449 mod_export int
450 haswilds(char *str)
452 /* `[' and `]' are legal even if bad patterns are usually not. */
453 if ((*str == Inbrack || *str == Outbrack) && !str[1])
454 return 0;
456 /* If % is immediately followed by ?, then that ? is *
457 * not treated as a wildcard. This is so you don't have *
458 * to escape job references such as %?foo. */
459 if (str[0] == '%' && str[1] == Quest)
460 str[1] = '?';
462 for (; *str; str++) {
463 switch (*str) {
464 case Inpar:
465 case Bar:
466 case Star:
467 case Inbrack:
468 case Inang:
469 case Quest:
470 return 1;
471 case Pound:
472 case Hat:
473 if (isset(EXTENDEDGLOB))
474 return 1;
475 break;
478 return 0;
481 /* Do the globbing: scanner is called recursively *
482 * with successive bits of the path until we've *
483 * tried all of it. */
485 /**/
486 static void
487 scanner(Complist q)
489 Patprog p;
490 int closure;
491 int pbcwdsav = pathbufcwd;
492 int errssofar = errsfound;
493 struct dirsav ds;
495 ds.ino = ds.dev = 0;
496 ds.dirname = NULL;
497 ds.dirfd = ds.level = -1;
498 if (!q)
499 return;
501 if ((closure = q->closure)) {
502 /* (foo/)# - match zero or more dirs */
503 if (q->closure == 2) /* (foo/)## - match one or more dirs */
504 q->closure = 1;
505 else
506 scanner(q->next);
508 p = q->pat;
509 /* Now the actual matching for the current path section. */
510 if (p->flags & PAT_PURES) {
512 * It's a straight string to the end of the path section.
514 char *str = (char *)p + p->startoff;
515 int l = p->patmlen;
517 if (l + !l + pathpos - pathbufcwd >= PATH_MAX) {
518 int err;
520 if (l >= PATH_MAX)
521 return;
522 err = lchdir(pathbuf + pathbufcwd, &ds, 0);
523 if (err == -1)
524 return;
525 if (err) {
526 zerr("current directory lost during glob");
527 return;
529 pathbufcwd = pathpos;
531 if (q->next) {
532 /* Not the last path section. Just add it to the path. */
533 int oppos = pathpos;
535 if (!errflag) {
536 int add = 1;
538 if (q->closure && *pathbuf) {
539 if (!strcmp(str, "."))
540 add = 0;
541 else if (!strcmp(str, "..")) {
542 struct stat sc, sr;
544 add = (stat("/", &sr) || stat(pathbuf, &sc) ||
545 sr.st_ino != sc.st_ino ||
546 sr.st_dev != sc.st_dev);
549 if (add) {
550 addpath(str, l);
551 if (!closure || !statfullpath("", NULL, 1))
552 scanner((q->closure) ? q : q->next);
553 pathbuf[pathpos = oppos] = '\0';
556 } else {
557 if (str[l])
558 str = dupstrpfx(str, l);
559 insert(str, 0);
561 } else {
562 /* Do pattern matching on current path section. */
563 char *fn = pathbuf[pathbufcwd] ? unmeta(pathbuf + pathbufcwd) : ".";
564 int dirs = !!q->next;
565 DIR *lock = opendir(fn);
566 char *subdirs = NULL;
567 int subdirlen = 0;
569 if (lock == NULL)
570 return;
571 while ((fn = zreaddir(lock, 1)) && !errflag) {
572 /* prefix and suffix are zle trickery */
573 if (!dirs && !colonmod &&
574 ((glob_pre && !strpfx(glob_pre, fn))
575 || (glob_suf && !strsfx(glob_suf, fn))))
576 continue;
577 errsfound = errssofar;
578 if (pattry(p, fn)) {
579 /* if this name matchs the pattern... */
580 if (pbcwdsav == pathbufcwd &&
581 strlen(fn) + pathpos - pathbufcwd >= PATH_MAX) {
582 int err;
584 DPUTS(pathpos == pathbufcwd,
585 "BUG: filename longer than PATH_MAX");
586 err = lchdir(pathbuf + pathbufcwd, &ds, 0);
587 if (err == -1)
588 break;
589 if (err) {
590 zerr("current directory lost during glob");
591 break;
593 pathbufcwd = pathpos;
595 if (dirs) {
596 int l;
599 * If not the last component in the path:
601 * If we made an approximation in the new path segment,
602 * then it is possible we made too many errors. For
603 * example, (ab)#(cb)# will match the directory abcb
604 * with one error if allowed to, even though it can
605 * match with none. This will stop later parts of the
606 * path matching, so we need to check by reducing the
607 * maximum number of errors and seeing if the directory
608 * still matches. Luckily, this is not a terribly
609 * common case, since complex patterns typically occur
610 * in the last part of the path which is not affected
611 * by this problem.
613 if (errsfound > errssofar) {
614 forceerrs = errsfound - 1;
615 while (forceerrs >= errssofar) {
616 errsfound = errssofar;
617 if (!pattry(p, fn))
618 break;
619 forceerrs = errsfound - 1;
621 errsfound = forceerrs + 1;
622 forceerrs = -1;
624 if (closure) {
625 /* if matching multiple directories */
626 struct stat buf;
628 if (statfullpath(fn, &buf, !q->follow)) {
629 if (errno != ENOENT && errno != EINTR &&
630 errno != ENOTDIR && !errflag) {
631 zwarn("%e: %s", errno, fn);
633 continue;
635 if (!S_ISDIR(buf.st_mode))
636 continue;
638 l = strlen(fn) + 1;
639 subdirs = hrealloc(subdirs, subdirlen, subdirlen + l
640 + sizeof(int));
641 strcpy(subdirs + subdirlen, fn);
642 subdirlen += l;
643 /* store the count of errors made so far, too */
644 memcpy(subdirs + subdirlen, (char *)&errsfound,
645 sizeof(int));
646 subdirlen += sizeof(int);
647 } else
648 /* if the last filename component, just add it */
649 insert(fn, 1);
652 closedir(lock);
653 if (subdirs) {
654 int oppos = pathpos;
656 for (fn = subdirs; fn < subdirs+subdirlen; ) {
657 int l = strlen(fn);
658 addpath(fn, l);
659 fn += l + 1;
660 memcpy((char *)&errsfound, fn, sizeof(int));
661 fn += sizeof(int);
662 scanner((q->closure) ? q : q->next); /* scan next level */
663 pathbuf[pathpos = oppos] = '\0';
665 hrealloc(subdirs, subdirlen, 0);
668 if (pbcwdsav < pathbufcwd) {
669 if (restoredir(&ds))
670 zerr("current directory lost during glob");
671 zsfree(ds.dirname);
672 if (ds.dirfd >= 0)
673 close(ds.dirfd);
674 pathbufcwd = pbcwdsav;
678 /* This function tokenizes a zsh glob pattern */
680 /**/
681 static Complist
682 parsecomplist(char *instr)
684 Patprog p1;
685 Complist l1;
686 char *str;
687 int compflags = gf_noglobdots ? (PAT_FILE|PAT_NOGLD) : PAT_FILE;
689 if (instr[0] == Star && instr[1] == Star &&
690 (instr[2] == '/' || (instr[2] == Star && instr[3] == '/'))) {
691 /* Match any number of directories. */
692 int follow;
694 /* with three stars, follow symbolic links */
695 follow = (instr[2] == Star);
696 instr += (3 + follow);
698 /* Now get the next path component if there is one. */
699 l1 = (Complist) zhalloc(sizeof *l1);
700 if ((l1->next = parsecomplist(instr)) == NULL) {
701 errflag = 1;
702 return NULL;
704 l1->pat = patcompile(NULL, compflags | PAT_ANY, NULL);
705 l1->closure = 1; /* ...zero or more times. */
706 l1->follow = follow;
707 return l1;
710 /* Parse repeated directories such as (dir/)# and (dir/)## */
711 if (*(str = instr) == Inpar && !skipparens(Inpar, Outpar, (char **)&str) &&
712 *str == Pound && isset(EXTENDEDGLOB) && str[-2] == '/') {
713 instr++;
714 if (!(p1 = patcompile(instr, compflags, &instr)))
715 return NULL;
716 if (instr[0] == '/' && instr[1] == Outpar && instr[2] == Pound) {
717 int pdflag = 0;
719 instr += 3;
720 if (*instr == Pound) {
721 pdflag = 1;
722 instr++;
724 l1 = (Complist) zhalloc(sizeof *l1);
725 l1->pat = p1;
726 l1->closure = 1 + pdflag;
727 l1->follow = 0;
728 l1->next = parsecomplist(instr);
729 return (l1->pat) ? l1 : NULL;
731 } else {
732 /* parse single path component */
733 if (!(p1 = patcompile(instr, compflags|PAT_FILET, &instr)))
734 return NULL;
735 /* then do the remaining path components */
736 if (*instr == '/' || !*instr) {
737 int ef = *instr == '/';
739 l1 = (Complist) zhalloc(sizeof *l1);
740 l1->pat = p1;
741 l1->closure = 0;
742 l1->next = ef ? parsecomplist(instr+1) : NULL;
743 return (ef && !l1->next) ? NULL : l1;
746 errflag = 1;
747 return NULL;
750 /* turn a string into a Complist struct: this has path components */
752 /**/
753 static Complist
754 parsepat(char *str)
756 long assert;
757 int ignore;
759 patcompstart();
761 * Check for initial globbing flags, so that they don't form
762 * a bogus path component.
764 if ((*str == Inpar && str[1] == Pound && isset(EXTENDEDGLOB)) ||
765 (isset(KSHGLOB) && *str == '@' && str[1] == Inpar &&
766 str[2] == Pound)) {
767 str += (*str == Inpar) ? 2 : 3;
768 if (!patgetglobflags(&str, &assert, &ignore))
769 return NULL;
772 /* Now there is no (#X) in front, we can check the path. */
773 if (!pathbuf)
774 pathbuf = zalloc(pathbufsz = PATH_MAX);
775 DPUTS(pathbufcwd, "BUG: glob changed directory");
776 if (*str == '/') { /* pattern has absolute path */
777 str++;
778 pathbuf[0] = '/';
779 pathbuf[pathpos = 1] = '\0';
780 } else /* pattern is relative to pwd */
781 pathbuf[pathpos = 0] = '\0';
783 return parsecomplist(str);
786 /* get number after qualifier */
788 /**/
789 static off_t
790 qgetnum(char **s)
792 off_t v = 0;
794 if (!idigit(**s)) {
795 zerr("number expected");
796 return 0;
798 while (idigit(**s))
799 v = v * 10 + *(*s)++ - '0';
800 return v;
803 /* get mode spec after qualifier */
805 /**/
806 static zlong
807 qgetmodespec(char **s)
809 zlong yes = 0, no = 0, val, mask, t;
810 char *p = *s, c, how, end;
812 if ((c = *p) == '=' || c == Equals || c == '+' || c == '-' ||
813 c == '?' || c == Quest || (c >= '0' && c <= '7')) {
814 end = 0;
815 c = 0;
816 } else {
817 end = (c == '<' ? '>' :
818 (c == '[' ? ']' :
819 (c == '{' ? '}' :
820 (c == Inang ? Outang :
821 (c == Inbrack ? Outbrack :
822 (c == Inbrace ? Outbrace : c))))));
823 p++;
825 do {
826 mask = 0;
827 while (((c = *p) == 'u' || c == 'g' || c == 'o' || c == 'a') && end) {
828 switch (c) {
829 case 'o': mask |= 01007; break;
830 case 'g': mask |= 02070; break;
831 case 'u': mask |= 04700; break;
832 case 'a': mask |= 07777; break;
834 p++;
836 how = ((c == '+' || c == '-') ? c : '=');
837 if (c == '+' || c == '-' || c == '=' || c == Equals)
838 p++;
839 val = 0;
840 if (mask) {
841 while ((c = *p++) != ',' && c != end) {
842 switch (c) {
843 case 'x': val |= 00111; break;
844 case 'w': val |= 00222; break;
845 case 'r': val |= 00444; break;
846 case 's': val |= 06000; break;
847 case 't': val |= 01000; break;
848 case '0': case '1': case '2': case '3':
849 case '4': case '5': case '6': case '7':
850 t = ((zlong) c - '0');
851 val |= t | (t << 3) | (t << 6);
852 break;
853 default:
854 zerr("invalid mode specification");
855 return 0;
858 if (how == '=' || how == '+') {
859 yes |= val & mask;
860 val = ~val;
862 if (how == '=' || how == '-')
863 no |= val & mask;
864 } else if (!(end && c == end) && c != ',' && c) {
865 t = 07777;
866 while ((c = *p) == '?' || c == Quest ||
867 (c >= '0' && c <= '7')) {
868 if (c == '?' || c == Quest) {
869 t = (t << 3) | 7;
870 val <<= 3;
871 } else {
872 t <<= 3;
873 val = (val << 3) | ((zlong) c - '0');
875 p++;
877 if (end && c != end && c != ',') {
878 zerr("invalid mode specification");
879 return 0;
881 if (how == '=') {
882 yes = (yes & ~t) | val;
883 no = (no & ~t) | (~val & ~t);
884 } else if (how == '+')
885 yes |= val;
886 else
887 no |= val;
888 } else {
889 zerr("invalid mode specification");
890 return 0;
892 } while (end && c != end);
894 *s = p;
895 return ((yes & 07777) | ((no & 07777) << 12));
898 static int
899 gmatchcmp(Gmatch a, Gmatch b)
901 int i;
902 off_t r = 0L;
903 struct globsort *s;
904 char **asortstrp = NULL, **bsortstrp = NULL;
906 for (i = gf_nsorts, s = gf_sortlist; i; i--, s++) {
907 switch (s->tp & ~GS_DESC) {
908 case GS_NAME:
909 r = zstrcmp(b->name, a->name, gf_numsort ? SORTIT_NUMERICALLY : 0);
910 break;
911 case GS_DEPTH:
913 char *aptr = a->name, *bptr = b->name;
914 int slasha = 0, slashb = 0;
915 /* Count slashes. Trailing slashes don't count. */
916 while (*aptr && *aptr == *bptr)
917 aptr++, bptr++;
918 if (*aptr)
919 for (; aptr[1]; aptr++)
920 if (*aptr == '/') {
921 slasha = 1;
922 break;
924 if (*bptr)
925 for (; bptr[1]; bptr++)
926 if (*bptr == '/') {
927 slashb = 1;
928 break;
930 r = slasha - slashb;
932 break;
933 case GS_EXEC:
934 if (!asortstrp) {
935 asortstrp = a->sortstrs;
936 bsortstrp = b->sortstrs;
937 } else {
938 asortstrp++;
939 bsortstrp++;
941 r = zstrcmp(*bsortstrp, *asortstrp,
942 gf_numsort ? SORTIT_NUMERICALLY : 0);
943 break;
944 case GS_SIZE:
945 r = b->size - a->size;
946 break;
947 case GS_ATIME:
948 r = a->atime - b->atime;
949 #ifdef GET_ST_ATIME_NSEC
950 if (!r)
951 r = a->ansec - b->ansec;
952 #endif
953 break;
954 case GS_MTIME:
955 r = a->mtime - b->mtime;
956 #ifdef GET_ST_MTIME_NSEC
957 if (!r)
958 r = a->mnsec - b->mnsec;
959 #endif
960 break;
961 case GS_CTIME:
962 r = a->ctime - b->ctime;
963 #ifdef GET_ST_CTIME_NSEC
964 if (!r)
965 r = a->cnsec - b->cnsec;
966 #endif
967 break;
968 case GS_LINKS:
969 r = b->links - a->links;
970 break;
971 case GS__SIZE:
972 r = b->_size - a->_size;
973 break;
974 case GS__ATIME:
975 r = a->_atime - b->_atime;
976 #ifdef GET_ST_ATIME_NSEC
977 if (!r)
978 r = a->_ansec - b->_ansec;
979 #endif
980 break;
981 case GS__MTIME:
982 r = a->_mtime - b->_mtime;
983 #ifdef GET_ST_MTIME_NSEC
984 if (!r)
985 r = a->_mnsec - b->_mnsec;
986 #endif
987 break;
988 case GS__CTIME:
989 r = a->_ctime - b->_ctime;
990 #ifdef GET_ST_CTIME_NSEC
991 if (!r)
992 r = a->_cnsec - b->_cnsec;
993 #endif
994 break;
995 case GS__LINKS:
996 r = b->_links - a->_links;
997 break;
999 if (r)
1000 return (int) ((s->tp & GS_DESC) ? -r : r);
1002 return 0;
1006 * Duplicate a list of qualifiers using the `next' linkage (not the
1007 * `or' linkage). Return the head element and set *last (if last non-NULL)
1008 * to point to the last element of the new list. All allocation is on the
1009 * heap (or off the heap?)
1011 static struct qual *dup_qual_list(struct qual *orig, struct qual **lastp)
1013 struct qual *qfirst = NULL, *qlast = NULL;
1015 while (orig) {
1016 struct qual *qnew = (struct qual *)zhalloc(sizeof(struct qual));
1017 *qnew = *orig;
1018 qnew->next = qnew->or = NULL;
1020 if (!qfirst)
1021 qfirst = qnew;
1022 if (qlast)
1023 qlast->next = qnew;
1024 qlast = qnew;
1026 orig = orig->next;
1029 if (lastp)
1030 *lastp = qlast;
1031 return qfirst;
1036 * Get a glob string for execution, following e or + qualifiers.
1037 * Pointer is character after the e or +.
1040 /**/
1041 static char *
1042 glob_exec_string(char **sp)
1044 char sav, *tt, *sdata, *s = *sp;
1045 int plus;
1047 if (s[-1] == '+') {
1048 plus = 0;
1049 tt = itype_end(s, IIDENT, 0);
1050 if (tt == s)
1052 zerr("missing identifier after `+'");
1053 return NULL;
1055 } else {
1056 tt = get_strarg(s, &plus);
1057 if (!*tt)
1059 zerr("missing end of string");
1060 return NULL;
1064 sav = *tt;
1065 *tt = '\0';
1066 sdata = dupstring(s + plus);
1067 untokenize(sdata);
1068 *tt = sav;
1069 if (sav)
1070 *sp = tt + plus;
1071 else
1072 *sp = tt;
1074 return sdata;
1077 /* Main entry point to the globbing code for filename globbing. *
1078 * np points to a node in the list list which will be expanded *
1079 * into a series of nodes. */
1081 /**/
1082 void
1083 zglob(LinkList list, LinkNode np, int nountok)
1085 struct qual *qo, *qn, *ql;
1086 LinkNode node = prevnode(np);
1087 char *str; /* the pattern */
1088 int sl; /* length of the pattern */
1089 Complist q; /* pattern after parsing */
1090 char *ostr = (char *)getdata(np); /* the pattern before the parser */
1091 /* chops it up */
1092 int first = 0, end = -1; /* index of first match to return */
1093 /* and index+1 of the last match */
1094 struct globdata saved; /* saved glob state */
1095 int nobareglob = !isset(BAREGLOBQUAL);
1097 if (unset(GLOBOPT) || !haswilds(ostr)) {
1098 if (!nountok)
1099 untokenize(ostr);
1100 return;
1102 save_globstate(saved);
1104 str = dupstring(ostr);
1105 uremnode(list, np);
1107 /* quals will hold the complete list of qualifiers (file static). */
1108 quals = NULL;
1110 * qualct and qualorct indicate we have qualifiers in the last
1111 * alternative, or a set of alternatives, respectively. They
1112 * are not necessarily an accurate count, however.
1114 qualct = qualorct = 0;
1116 * colonmod is a concatenated list of all colon modifiers found in
1117 * all sets of qualifiers.
1119 colonmod = NULL;
1120 /* The gf_* flags are qualifiers which are applied globally. */
1121 gf_nullglob = isset(NULLGLOB);
1122 gf_markdirs = isset(MARKDIRS);
1123 gf_listtypes = gf_follow = 0;
1124 gf_noglobdots = unset(GLOBDOTS);
1125 gf_numsort = isset(NUMERICGLOBSORT);
1126 gf_sorts = gf_nsorts = 0;
1128 /* Check for qualifiers */
1129 while (!nobareglob || isset(EXTENDEDGLOB)) {
1130 struct qual *newquals;
1131 char *s;
1132 int sense, paren;
1133 off_t data;
1134 char *sdata, *newcolonmod;
1135 int (*func) _((char *, Statptr, off_t, char *));
1138 * Initialise state variables for current file pattern.
1139 * newquals is the root for the linked list of all qualifiers.
1140 * qo is the root of the current list of alternatives.
1141 * ql is the end of the current alternative where the `next' will go.
1142 * qn is the current qualifier node to be added.
1144 * Here is an attempt at a diagram. An `or' is added horizontally
1145 * to the top line, a `next' at the bottom of the right hand line.
1146 * `qn' is usually NULL unless a new `or' has just been added.
1148 * quals -> x -> x -> qo
1149 * | | |
1150 * x x x
1151 * | |
1152 * x ql
1154 * In fact, after each loop the complete set is in the file static
1155 * `quals'. Then, if we have a second set of qualifiers, we merge
1156 * the lists together. This is only tricky if one or both have an
1157 * `or' in them; then we need to distribute over all alternatives.
1159 newquals = qo = qn = ql = NULL;
1161 sl = strlen(str);
1162 if (str[sl - 1] != Outpar)
1163 break;
1165 /* Check these are really qualifiers, not a set of *
1166 * alternatives or exclusions. We can be more *
1167 * lenient with an explicit (#q) than with a bare *
1168 * set of qualifiers. */
1169 paren = 0;
1170 for (s = str + sl - 2; *s && (*s != Inpar || paren); s--) {
1171 switch (*s) {
1172 case Outpar:
1173 paren++; /*FALLTHROUGH*/
1174 case Bar:
1175 nobareglob = 1;
1176 break;
1177 case Tilde:
1178 if (isset(EXTENDEDGLOB))
1179 nobareglob = 1;
1180 break;
1181 case Inpar:
1182 paren--;
1183 break;
1186 if (*s != Inpar)
1187 break;
1188 if (isset(EXTENDEDGLOB) && s[1] == Pound) {
1189 if (s[2] == 'q') {
1190 *s = 0;
1191 s += 2;
1192 } else
1193 break;
1194 } else if (nobareglob)
1195 break;
1197 /* Real qualifiers found. */
1198 nobareglob = 1;
1199 sense = 0; /* bit 0 for match (0)/don't match (1) */
1200 /* bit 1 for follow links (2), don't (0) */
1201 data = 0; /* Any numerical argument required */
1202 sdata = NULL; /* Any list argument required */
1203 newcolonmod = NULL; /* Contains trailing colon modifiers */
1205 str[sl-1] = 0;
1206 *s++ = 0;
1207 while (*s && !newcolonmod) {
1208 func = (int (*) _((char *, Statptr, off_t, char *)))0;
1209 if (idigit(*s)) {
1210 /* Store numeric argument for qualifier */
1211 func = qualflags;
1212 data = 0;
1213 sdata = NULL;
1214 while (idigit(*s))
1215 data = data * 010 + (*s++ - '0');
1216 } else if (*s == ',') {
1217 /* A comma separates alternative sets of qualifiers */
1218 s++;
1219 sense = 0;
1220 if (qualct) {
1221 qn = (struct qual *)hcalloc(sizeof *qn);
1222 qo->or = qn;
1223 qo = qn;
1224 qualorct++;
1225 qualct = 0;
1226 ql = NULL;
1228 } else {
1229 switch (*s++) {
1230 case ':':
1231 /* Remaining arguments are history-type *
1232 * colon substitutions, handled separately. */
1233 newcolonmod = s - 1;
1234 untokenize(newcolonmod);
1235 if (colonmod) {
1236 /* remember we're searching backwards */
1237 colonmod = dyncat(newcolonmod, colonmod);
1238 } else
1239 colonmod = newcolonmod;
1240 break;
1241 case Hat:
1242 case '^':
1243 /* Toggle sense: go from positive to *
1244 * negative match and vice versa. */
1245 sense ^= 1;
1246 break;
1247 case '-':
1248 /* Toggle matching of symbolic links */
1249 sense ^= 2;
1250 break;
1251 case '@':
1252 /* Match symbolic links */
1253 func = qualislnk;
1254 break;
1255 case Equals:
1256 case '=':
1257 /* Match sockets */
1258 func = qualissock;
1259 break;
1260 case 'p':
1261 /* Match named pipes */
1262 func = qualisfifo;
1263 break;
1264 case '/':
1265 /* Match directories */
1266 func = qualisdir;
1267 break;
1268 case '.':
1269 /* Match regular files */
1270 func = qualisreg;
1271 break;
1272 case '%':
1273 /* Match special files: block, *
1274 * character or any device */
1275 if (*s == 'b')
1276 s++, func = qualisblk;
1277 else if (*s == 'c')
1278 s++, func = qualischr;
1279 else
1280 func = qualisdev;
1281 break;
1282 case Star:
1283 /* Match executable plain files */
1284 func = qualiscom;
1285 break;
1286 case 'R':
1287 /* Match world-readable files */
1288 func = qualflags;
1289 data = 0004;
1290 break;
1291 case 'W':
1292 /* Match world-writeable files */
1293 func = qualflags;
1294 data = 0002;
1295 break;
1296 case 'X':
1297 /* Match world-executable files */
1298 func = qualflags;
1299 data = 0001;
1300 break;
1301 case 'A':
1302 func = qualflags;
1303 data = 0040;
1304 break;
1305 case 'I':
1306 func = qualflags;
1307 data = 0020;
1308 break;
1309 case 'E':
1310 func = qualflags;
1311 data = 0010;
1312 break;
1313 case 'r':
1314 /* Match files readable by current process */
1315 func = qualflags;
1316 data = 0400;
1317 break;
1318 case 'w':
1319 /* Match files writeable by current process */
1320 func = qualflags;
1321 data = 0200;
1322 break;
1323 case 'x':
1324 /* Match files executable by current process */
1325 func = qualflags;
1326 data = 0100;
1327 break;
1328 case 's':
1329 /* Match setuid files */
1330 func = qualflags;
1331 data = 04000;
1332 break;
1333 case 'S':
1334 /* Match setgid files */
1335 func = qualflags;
1336 data = 02000;
1337 break;
1338 case 't':
1339 func = qualflags;
1340 data = 01000;
1341 break;
1342 case 'd':
1343 /* Match device files by device number *
1344 * (as given by stat's st_dev element). */
1345 func = qualdev;
1346 data = qgetnum(&s);
1347 break;
1348 case 'l':
1349 /* Match files with the given no. of hard links */
1350 func = qualnlink;
1351 g_amc = -1;
1352 goto getrange;
1353 case 'U':
1354 /* Match files owned by effective user ID */
1355 func = qualuid;
1356 data = geteuid();
1357 break;
1358 case 'G':
1359 /* Match files owned by effective group ID */
1360 func = qualgid;
1361 data = getegid();
1362 break;
1363 case 'u':
1364 /* Match files owned by given user id */
1365 func = qualuid;
1366 /* either the actual uid... */
1367 if (idigit(*s))
1368 data = qgetnum(&s);
1369 else {
1370 /* ... or a user name */
1371 char sav, *tt;
1372 int arglen;
1374 /* Find matching delimiters */
1375 tt = get_strarg(s, &arglen);
1376 if (!*tt) {
1377 zerr("missing end of name");
1378 data = 0;
1379 } else {
1380 #ifdef USE_GETPWNAM
1381 struct passwd *pw;
1382 sav = *tt;
1383 *tt = '\0';
1385 if ((pw = getpwnam(s + arglen)))
1386 data = pw->pw_uid;
1387 else {
1388 zerr("unknown user");
1389 data = 0;
1391 *tt = sav;
1392 #else /* !USE_GETPWNAM */
1393 sav = *tt;
1394 zerr("unknown user");
1395 data = 0;
1396 #endif /* !USE_GETPWNAM */
1397 if (sav)
1398 s = tt + arglen;
1399 else
1400 s = tt;
1403 break;
1404 case 'g':
1405 /* Given gid or group id... works like `u' */
1406 func = qualgid;
1407 /* either the actual gid... */
1408 if (idigit(*s))
1409 data = qgetnum(&s);
1410 else {
1411 /* ...or a delimited group name. */
1412 char sav, *tt;
1413 int arglen;
1415 tt = get_strarg(s, &arglen);
1416 if (!*tt) {
1417 zerr("missing end of name");
1418 data = 0;
1419 } else {
1420 #ifdef USE_GETGRNAM
1421 struct group *gr;
1422 sav = *tt;
1423 *tt = '\0';
1425 if ((gr = getgrnam(s + arglen)))
1426 data = gr->gr_gid;
1427 else {
1428 zerr("unknown group");
1429 data = 0;
1431 *tt = sav;
1432 #else /* !USE_GETGRNAM */
1433 sav = *tt;
1434 zerr("unknown group");
1435 data = 0;
1436 #endif /* !USE_GETGRNAM */
1437 if (sav)
1438 s = tt + arglen;
1439 else
1440 s = tt;
1443 break;
1444 case 'f':
1445 /* Match modes with chmod-spec. */
1446 func = qualmodeflags;
1447 data = qgetmodespec(&s);
1448 break;
1449 case 'F':
1450 func = qualnonemptydir;
1451 break;
1452 case 'M':
1453 /* Mark directories with a / */
1454 if ((gf_markdirs = !(sense & 1)))
1455 gf_follow = sense & 2;
1456 break;
1457 case 'T':
1458 /* Mark types in a `ls -F' type fashion */
1459 if ((gf_listtypes = !(sense & 1)))
1460 gf_follow = sense & 2;
1461 break;
1462 case 'N':
1463 /* Nullglob: remove unmatched patterns. */
1464 gf_nullglob = !(sense & 1);
1465 break;
1466 case 'D':
1467 /* Glob dots: match leading dots implicitly */
1468 gf_noglobdots = sense & 1;
1469 break;
1470 case 'n':
1471 /* Numeric glob sort */
1472 gf_numsort = !(sense & 1);
1473 break;
1474 case 'a':
1475 /* Access time in given range */
1476 g_amc = 0;
1477 func = qualtime;
1478 goto getrange;
1479 case 'm':
1480 /* Modification time in given range */
1481 g_amc = 1;
1482 func = qualtime;
1483 goto getrange;
1484 case 'c':
1485 /* Inode creation time in given range */
1486 g_amc = 2;
1487 func = qualtime;
1488 goto getrange;
1489 case 'L':
1490 /* File size (Length) in given range */
1491 func = qualsize;
1492 g_amc = -1;
1493 /* Get size multiplier */
1494 g_units = TT_BYTES;
1495 if (*s == 'p' || *s == 'P')
1496 g_units = TT_POSIX_BLOCKS, ++s;
1497 else if (*s == 'k' || *s == 'K')
1498 g_units = TT_KILOBYTES, ++s;
1499 else if (*s == 'm' || *s == 'M')
1500 g_units = TT_MEGABYTES, ++s;
1501 getrange:
1502 /* Get time multiplier */
1503 if (g_amc >= 0) {
1504 g_units = TT_DAYS;
1505 if (*s == 'h')
1506 g_units = TT_HOURS, ++s;
1507 else if (*s == 'm')
1508 g_units = TT_MINS, ++s;
1509 else if (*s == 'w')
1510 g_units = TT_WEEKS, ++s;
1511 else if (*s == 'M')
1512 g_units = TT_MONTHS, ++s;
1513 else if (*s == 's')
1514 g_units = TT_SECONDS, ++s;
1516 /* See if it's greater than, equal to, or less than */
1517 if ((g_range = *s == '+' ? 1 : *s == '-' ? -1 : 0))
1518 ++s;
1519 data = qgetnum(&s);
1520 break;
1522 case 'o':
1523 case 'O':
1525 int t;
1526 char *send;
1528 if (gf_nsorts == MAX_SORTS) {
1529 zerr("too many glob sort specifiers");
1530 restore_globstate(saved);
1531 return;
1534 /* usually just one character */
1535 send = s+1;
1536 switch (*s) {
1537 case 'n': t = GS_NAME; break;
1538 case 'L': t = GS_SIZE; break;
1539 case 'l': t = GS_LINKS; break;
1540 case 'a': t = GS_ATIME; break;
1541 case 'm': t = GS_MTIME; break;
1542 case 'c': t = GS_CTIME; break;
1543 case 'd': t = GS_DEPTH; break;
1544 case 'N': t = GS_NONE; break;
1545 case 'e':
1546 case '+':
1548 t = GS_EXEC;
1549 if ((gf_sortlist[gf_nsorts].exec =
1550 glob_exec_string(&send)) == NULL)
1552 restore_globstate(saved);
1553 return;
1555 break;
1557 default:
1558 zerr("unknown sort specifier");
1559 restore_globstate(saved);
1560 return;
1562 if (t != GS_EXEC) {
1563 if ((sense & 2) && !(t & (GS_NAME|GS_DEPTH)))
1564 t <<= GS_SHIFT; /* HERE: GS_EXEC? */
1565 if (gf_sorts & t) {
1566 zerr("doubled sort specifier");
1567 restore_globstate(saved);
1568 return;
1571 gf_sorts |= t;
1572 gf_sortlist[gf_nsorts++].tp = t |
1573 (((sense & 1) ^ (s[-1] == 'O')) ? GS_DESC : 0);
1574 s = send;
1575 break;
1577 case '+':
1578 case 'e':
1580 char *tt;
1582 tt = glob_exec_string(&s);
1584 if (tt == NULL) {
1585 data = 0;
1586 } else {
1587 func = qualsheval;
1588 sdata = tt;
1590 break;
1592 case '[':
1593 case Inbrack:
1595 char *os = --s;
1596 struct value v;
1598 v.isarr = SCANPM_WANTVALS;
1599 v.pm = NULL;
1600 v.end = -1;
1601 v.flags = 0;
1602 if (getindex(&s, &v, 0) || s == os) {
1603 zerr("invalid subscript");
1604 restore_globstate(saved);
1605 return;
1607 first = v.start;
1608 end = v.end;
1609 break;
1611 default:
1612 zerr("unknown file attribute");
1613 restore_globstate(saved);
1614 return;
1617 if (func) {
1618 /* Requested test is performed by function func */
1619 if (!qn)
1620 qn = (struct qual *)hcalloc(sizeof *qn);
1621 if (ql)
1622 ql->next = qn;
1623 ql = qn;
1624 if (!newquals)
1625 newquals = qo = qn;
1626 qn->func = func;
1627 qn->sense = sense;
1628 qn->data = data;
1629 qn->sdata = sdata;
1630 qn->range = g_range;
1631 qn->units = g_units;
1632 qn->amc = g_amc;
1634 qn = NULL;
1635 qualct++;
1637 if (errflag) {
1638 restore_globstate(saved);
1639 return;
1643 if (quals && newquals) {
1644 /* Merge previous group of qualifiers with new set. */
1645 if (quals->or || newquals->or) {
1646 /* The hard case. */
1647 struct qual *qorhead = NULL, *qortail = NULL;
1649 * Distribute in the most trivial way, by creating
1650 * all possible combinations of the two sets and chaining
1651 * these into one long set of alternatives given
1652 * by qorhead and qortail.
1654 for (qn = newquals; qn; qn = qn->or) {
1655 for (qo = quals; qo; qo = qo->or) {
1656 struct qual *qfirst, *qlast;
1657 int islast = !qn->or && !qo->or;
1658 /* Generate first set of qualifiers... */
1659 if (islast) {
1660 /* Last time round: don't bother copying. */
1661 qfirst = qn;
1662 for (qlast = qfirst; qlast->next;
1663 qlast = qlast->next)
1665 } else
1666 qfirst = dup_qual_list(qn, &qlast);
1667 /* ... link into new `or' chain ... */
1668 if (!qorhead)
1669 qorhead = qfirst;
1670 if (qortail)
1671 qortail->or = qfirst;
1672 qortail = qfirst;
1673 /* ... and concatenate second set. */
1674 qlast->next = islast ? qo : dup_qual_list(qo, NULL);
1677 quals = qorhead;
1678 } else {
1680 * Easy: we can just chain the qualifiers together.
1681 * This is an optimisation; the code above will work, too.
1682 * We retain the original left to right ordering --- remember
1683 * we are searching for sets of qualifiers from the right.
1685 qn = newquals;
1686 for ( ; newquals->next; newquals = newquals->next)
1688 newquals->next = quals;
1689 quals = qn;
1691 } else if (newquals)
1692 quals = newquals;
1694 q = parsepat(str);
1695 if (!q || errflag) { /* if parsing failed */
1696 restore_globstate(saved);
1697 if (unset(BADPATTERN)) {
1698 if (!nountok)
1699 untokenize(ostr);
1700 insertlinknode(list, node, ostr);
1701 return;
1703 errflag = 0;
1704 zerr("bad pattern: %s", ostr);
1705 return;
1707 if (!gf_nsorts) {
1708 gf_sortlist[0].tp = gf_sorts = GS_NAME;
1709 gf_nsorts = 1;
1711 /* Initialise receptacle for matched files, *
1712 * expanded by insert() where necessary. */
1713 matchptr = matchbuf = (Gmatch)zalloc((matchsz = 16) *
1714 sizeof(struct gmatch));
1715 matchct = 0;
1716 pattrystart();
1718 /* The actual processing takes place here: matches go into *
1719 * matchbuf. This is the only top-level call to scanner(). */
1720 scanner(q);
1722 /* Deal with failures to match depending on options */
1723 if (matchct)
1724 badcshglob |= 2; /* at least one cmd. line expansion O.K. */
1725 else if (!gf_nullglob) {
1726 if (isset(CSHNULLGLOB)) {
1727 badcshglob |= 1; /* at least one cmd. line expansion failed */
1728 } else if (isset(NOMATCH)) {
1729 zerr("no matches found: %s", ostr);
1730 free(matchbuf);
1731 restore_globstate(saved);
1732 return;
1733 } else {
1734 /* treat as an ordinary string */
1735 untokenize(matchptr->name = dupstring(ostr));
1736 matchptr++;
1737 matchct = 1;
1741 if (!(gf_sortlist[0].tp & GS_NONE)) {
1743 * Get the strings to use for sorting by executing
1744 * the code chunk. We allow more than one of these.
1746 int nexecs = 0;
1747 struct globsort *sortp;
1748 struct globsort *lastsortp = gf_sortlist + gf_nsorts;
1750 /* First find out if there are any GS_EXECs, counting them. */
1751 for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
1753 if (sortp->tp & GS_EXEC)
1754 nexecs++;
1757 if (nexecs) {
1758 Gmatch tmpptr;
1759 int iexec = 0;
1761 /* Yes; allocate enough space for strings for each */
1762 for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
1763 tmpptr->sortstrs = (char **)zhalloc(nexecs*sizeof(char*));
1765 /* Loop over each one, incrementing iexec */
1766 for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
1768 /* Ignore unless this is a GS_EXEC */
1769 if (sortp->tp & GS_EXEC) {
1770 Eprog prog;
1772 if ((prog = parse_string(sortp->exec, 0))) {
1773 int ef = errflag, lv = lastval, ret;
1775 /* Parsed OK, execute for each name */
1776 for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++) {
1777 setsparam("REPLY", ztrdup(tmpptr->name));
1778 execode(prog, 1, 0);
1779 if (!errflag)
1780 tmpptr->sortstrs[iexec] =
1781 dupstring(getsparam("REPLY"));
1782 else
1783 tmpptr->sortstrs[iexec] = tmpptr->name;
1786 ret = lastval;
1787 errflag = ef;
1788 lastval = lv;
1789 } else {
1790 /* Failed, let's be safe */
1791 for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
1792 tmpptr->sortstrs[iexec] = tmpptr->name;
1795 iexec++;
1800 /* Sort arguments in to lexical (and possibly numeric) order. *
1801 * This is reversed to facilitate insertion into the list. */
1802 qsort((void *) & matchbuf[0], matchct, sizeof(struct gmatch),
1803 (int (*) _((const void *, const void *)))gmatchcmp);
1806 if (first < 0) {
1807 first += matchct;
1808 if (first < 0)
1809 first = 0;
1811 if (end < 0)
1812 end += matchct + 1;
1813 else if (end > matchct)
1814 end = matchct;
1815 if ((end -= first) > 0) {
1816 if (gf_sortlist[0].tp & GS_NONE) {
1817 /* Match list was never reversed, so insert back to front. */
1818 matchptr = matchbuf + matchct - first - 1;
1819 while (end-- > 0) {
1820 /* insert matches in the arg list */
1821 insertlinknode(list, node, matchptr->name);
1822 matchptr--;
1824 } else {
1825 matchptr = matchbuf + matchct - first - end;
1826 while (end-- > 0) {
1827 /* insert matches in the arg list */
1828 insertlinknode(list, node, matchptr->name);
1829 matchptr++;
1833 free(matchbuf);
1835 restore_globstate(saved);
1838 /* Return the trailing character for marking file types */
1840 /**/
1841 mod_export char
1842 file_type(mode_t filemode)
1844 if(S_ISBLK(filemode))
1845 return '#';
1846 else if(S_ISCHR(filemode))
1847 return '%';
1848 else if(S_ISDIR(filemode))
1849 return '/';
1850 else if(S_ISFIFO(filemode))
1851 return '|';
1852 else if(S_ISLNK(filemode))
1853 return '@';
1854 else if(S_ISREG(filemode))
1855 return (filemode & S_IXUGO) ? '*' : ' ';
1856 else if(S_ISSOCK(filemode))
1857 return '=';
1858 else
1859 return '?';
1862 /* check to see if str is eligible for brace expansion */
1864 /**/
1865 mod_export int
1866 hasbraces(char *str)
1868 char *lbr, *mbr, *comma;
1870 if (isset(BRACECCL)) {
1871 /* In this case, any properly formed brace expression *
1872 * will match and expand to the characters in between. */
1873 int bc, c;
1875 for (bc = 0; (c = *str); ++str)
1876 if (c == Inbrace) {
1877 if (!bc && str[1] == Outbrace)
1878 *str++ = '{', *str = '}';
1879 else
1880 bc++;
1881 } else if (c == Outbrace) {
1882 if (!bc)
1883 *str = '}';
1884 else if (!--bc)
1885 return 1;
1887 return 0;
1889 /* Otherwise we need to look for... */
1890 lbr = mbr = comma = NULL;
1891 for (;;) {
1892 switch (*str++) {
1893 case Inbrace:
1894 if (!lbr) {
1895 lbr = str - 1;
1896 while (idigit(*str))
1897 str++;
1898 if (*str == '.' && str[1] == '.') {
1899 str++;
1900 while (idigit(*++str));
1901 if (*str == Outbrace &&
1902 (idigit(lbr[1]) || idigit(str[-1])))
1903 return 1;
1905 } else {
1906 char *s = --str;
1908 if (skipparens(Inbrace, Outbrace, &str)) {
1909 *lbr = *s = '{';
1910 if (comma)
1911 str = comma;
1912 if (mbr && mbr < str)
1913 str = mbr;
1914 lbr = mbr = comma = NULL;
1915 } else if (!mbr)
1916 mbr = s;
1918 break;
1919 case Outbrace:
1920 if (!lbr)
1921 str[-1] = '}';
1922 else if (comma)
1923 return 1;
1924 else {
1925 *lbr = '{';
1926 str[-1] = '}';
1927 if (mbr)
1928 str = mbr;
1929 mbr = lbr = NULL;
1931 break;
1932 case Comma:
1933 if (!lbr)
1934 str[-1] = ',';
1935 else if (!comma)
1936 comma = str - 1;
1937 break;
1938 case '\0':
1939 if (lbr)
1940 *lbr = '{';
1941 if (!mbr && !comma)
1942 return 0;
1943 if (comma)
1944 str = comma;
1945 if (mbr && mbr < str)
1946 str = mbr;
1947 lbr = mbr = comma = NULL;
1948 break;
1953 /* expand stuff like >>*.c */
1955 /**/
1957 xpandredir(struct redir *fn, LinkList tab)
1959 char *nam;
1960 struct redir *ff;
1961 int ret = 0;
1962 local_list1(fake);
1964 /* Stick the name in a list... */
1965 init_list1(fake, fn->name);
1966 /* ...which undergoes all the usual shell expansions */
1967 prefork(&fake, isset(MULTIOS) ? 0 : PF_SINGLE);
1968 /* Globbing is only done for multios. */
1969 if (!errflag && isset(MULTIOS))
1970 globlist(&fake, 0);
1971 if (errflag)
1972 return 0;
1973 if (nonempty(&fake) && !nextnode(firstnode(&fake))) {
1974 /* Just one match, the usual case. */
1975 char *s = peekfirst(&fake);
1976 fn->name = s;
1977 untokenize(s);
1978 if (fn->type == REDIR_MERGEIN || fn->type == REDIR_MERGEOUT) {
1979 if (s[0] == '-' && !s[1])
1980 fn->type = REDIR_CLOSE;
1981 else if (s[0] == 'p' && !s[1])
1982 fn->fd2 = -2;
1983 else {
1984 while (idigit(*s))
1985 s++;
1986 if (!*s && s > fn->name)
1987 fn->fd2 = zstrtol(fn->name, NULL, 10);
1988 else if (fn->type == REDIR_MERGEIN)
1989 zerr("file number expected");
1990 else
1991 fn->type = REDIR_ERRWRITE;
1994 } else if (fn->type == REDIR_MERGEIN)
1995 zerr("file number expected");
1996 else {
1997 if (fn->type == REDIR_MERGEOUT)
1998 fn->type = REDIR_ERRWRITE;
1999 while ((nam = (char *)ugetnode(&fake))) {
2000 /* Loop over matches, duplicating the *
2001 * redirection for each file found. */
2002 ff = (struct redir *) zhalloc(sizeof *ff);
2003 *ff = *fn;
2004 ff->name = nam;
2005 addlinknode(tab, ff);
2006 ret = 1;
2009 return ret;
2012 /* brace expansion */
2014 /**/
2015 mod_export void
2016 xpandbraces(LinkList list, LinkNode *np)
2018 LinkNode node = (*np), last = prevnode(node);
2019 char *str = (char *)getdata(node), *str3 = str, *str2;
2020 int prev, bc, comma, dotdot;
2022 for (; *str != Inbrace; str++);
2023 /* First, match up braces and see what we have. */
2024 for (str2 = str, bc = comma = dotdot = 0; *str2; ++str2)
2025 if (*str2 == Inbrace)
2026 ++bc;
2027 else if (*str2 == Outbrace) {
2028 if (--bc == 0)
2029 break;
2030 } else if (bc == 1) {
2031 if (*str2 == Comma)
2032 ++comma; /* we have {foo,bar} */
2033 else if (*str2 == '.' && str2[1] == '.')
2034 dotdot++; /* we have {num1..num2} */
2036 DPUTS(bc, "BUG: unmatched brace in xpandbraces()");
2037 if (!comma && dotdot) {
2038 /* Expand range like 0..10 numerically: comma or recursive
2039 brace expansion take precedence. */
2040 char *dots, *p;
2041 LinkNode olast = last;
2042 /* Get the first number of the range */
2043 int rstart = zstrtol(str+1,&dots,10), rend = 0, err = 0, rev = 0;
2044 int wid1 = (dots - str) - 1, wid2 = (str2 - dots) - 2;
2045 int strp = str - str3;
2047 if (dots == str + 1 || *dots != '.' || dots[1] != '.')
2048 err++;
2049 else {
2050 /* Get the last number of the range */
2051 rend = zstrtol(dots+2,&p,10);
2052 if (p == dots+2 || p != str2)
2053 err++;
2055 if (!err) {
2056 /* If either no. begins with a zero, pad the output with *
2057 * zeroes. Otherwise, choose a min width to suppress them. */
2058 int minw = (str[1] == '0') ? wid1 : (dots[2] == '0' ) ? wid2 :
2059 (wid2 > wid1) ? wid1 : wid2;
2060 if (rstart > rend) {
2061 /* Handle decreasing ranges correctly. */
2062 int rt = rend;
2063 rend = rstart;
2064 rstart = rt;
2065 rev = 1;
2067 uremnode(list, node);
2068 for (; rend >= rstart; rend--) {
2069 /* Node added in at end, so do highest first */
2070 p = dupstring(str3);
2071 sprintf(p + strp, "%0*d", minw, rend);
2072 strcat(p + strp, str2 + 1);
2073 insertlinknode(list, last, p);
2074 if (rev) /* decreasing: add in reverse order. */
2075 last = nextnode(last);
2077 *np = nextnode(olast);
2078 return;
2081 if (!comma && isset(BRACECCL)) { /* {a-mnop} */
2082 /* Here we expand each character to a separate node, *
2083 * but also ranges of characters like a-m. ccl is a *
2084 * set of flags saying whether each character is present; *
2085 * the final list is in lexical order. */
2086 char ccl[256], *p;
2087 unsigned char c1, c2;
2088 unsigned int len, pl;
2089 int lastch = -1;
2091 uremnode(list, node);
2092 memset(ccl, 0, sizeof(ccl) / sizeof(ccl[0]));
2093 for (p = str + 1; p < str2;) {
2094 if (itok(c1 = *p++))
2095 c1 = ztokens[c1 - STOUC(Pound)];
2096 if ((char) c1 == Meta)
2097 c1 = 32 ^ *p++;
2098 if (itok(c2 = *p))
2099 c2 = ztokens[c2 - STOUC(Pound)];
2100 if ((char) c2 == Meta)
2101 c2 = 32 ^ p[1];
2102 if (c1 == '-' && lastch >= 0 && p < str2 && lastch <= (int)c2) {
2103 while (lastch < (int)c2)
2104 ccl[lastch++] = 1;
2105 lastch = -1;
2106 } else
2107 ccl[lastch = c1] = 1;
2109 pl = str - str3;
2110 len = pl + strlen(++str2) + 2;
2111 for (p = ccl + 256; p-- > ccl;)
2112 if (*p) {
2113 c1 = p - ccl;
2114 if (imeta(c1)) {
2115 str = hcalloc(len + 1);
2116 str[pl] = Meta;
2117 str[pl+1] = c1 ^ 32;
2118 strcpy(str + pl + 2, str2);
2119 } else {
2120 str = hcalloc(len);
2121 str[pl] = c1;
2122 strcpy(str + pl + 1, str2);
2124 memcpy(str, str3, pl);
2125 insertlinknode(list, last, str);
2127 *np = nextnode(last);
2128 return;
2130 prev = str++ - str3;
2131 str2++;
2132 uremnode(list, node);
2133 node = last;
2134 /* Finally, normal comma expansion *
2135 * str1{foo,bar}str2 -> str1foostr2 str1barstr2. *
2136 * Any number of intervening commas is allowed. */
2137 for (;;) {
2138 char *zz, *str4;
2139 int cnt;
2141 for (str4 = str, cnt = 0; cnt || (*str != Comma && *str !=
2142 Outbrace); str++) {
2143 if (*str == Inbrace)
2144 cnt++;
2145 else if (*str == Outbrace)
2146 cnt--;
2147 DPUTS(!*str, "BUG: illegal brace expansion");
2149 /* Concatenate the string before the braces (str3), the section *
2150 * just found (str4) and the text after the braces (str2) */
2151 zz = (char *) hcalloc(prev + (str - str4) + strlen(str2) + 1);
2152 ztrncpy(zz, str3, prev);
2153 strncat(zz, str4, str - str4);
2154 strcat(zz, str2);
2155 /* and add this text to the argument list. */
2156 insertlinknode(list, node, zz);
2157 incnode(node);
2158 if (*str != Outbrace)
2159 str++;
2160 else
2161 break;
2163 *np = nextnode(last);
2166 /* check to see if a matches b (b is not a filename pattern) */
2168 /**/
2170 matchpat(char *a, char *b)
2172 Patprog p = patcompile(b, PAT_STATIC, NULL);
2174 if (!p) {
2175 zerr("bad pattern: %s", b);
2176 return 0;
2178 return pattry(p, a);
2181 /* do the ${foo%%bar}, ${foo#bar} stuff */
2182 /* please do not laugh at this code. */
2184 /* Having found a match in getmatch, decide what part of string
2185 * to return. The matched part starts b characters into string s
2186 * and finishes e characters in: 0 <= b <= e <= strlen(s)
2187 * (yes, empty matches should work).
2188 * fl is a set of the SUB_* matches defined in zsh.h from SUB_MATCH onwards;
2189 * the lower parts are ignored.
2190 * replstr is the replacement string for a substitution
2193 /**/
2194 static char *
2195 get_match_ret(char *s, int b, int e, int fl, char *replstr,
2196 LinkList repllist)
2198 char buf[80], *r, *p, *rr;
2199 int ll = 0, l = strlen(s), bl = 0, t = 0, i;
2201 if (replstr || (fl & SUB_LIST)) {
2202 if (fl & SUB_DOSUBST) {
2203 replstr = dupstring(replstr);
2204 singsub(&replstr);
2205 untokenize(replstr);
2207 if ((fl & (SUB_GLOBAL|SUB_LIST)) && repllist) {
2208 /* We are replacing the chunk, just add this to the list */
2209 Repldata rd = (Repldata)
2210 ((fl & SUB_LIST) ? zalloc(sizeof(*rd)) : zhalloc(sizeof(*rd)));
2211 rd->b = b;
2212 rd->e = e;
2213 rd->replstr = replstr;
2214 if (fl & SUB_LIST)
2215 zaddlinknode(repllist, rd);
2216 else
2217 addlinknode(repllist, rd);
2218 return s;
2220 ll += strlen(replstr);
2222 if (fl & SUB_MATCH) /* matched portion */
2223 ll += 1 + (e - b);
2224 if (fl & SUB_REST) /* unmatched portion */
2225 ll += 1 + (l - (e - b));
2226 if (fl & SUB_BIND) {
2227 /* position of start of matched portion */
2228 sprintf(buf, "%d ", b + 1);
2229 ll += (bl = strlen(buf));
2231 if (fl & SUB_EIND) {
2232 /* position of end of matched portion */
2233 sprintf(buf + bl, "%d ", e + 1);
2234 ll += (bl = strlen(buf));
2236 if (fl & SUB_LEN) {
2237 /* length of matched portion */
2238 sprintf(buf + bl, "%d ", e - b);
2239 ll += (bl = strlen(buf));
2241 if (bl)
2242 buf[bl - 1] = '\0';
2244 rr = r = (char *) hcalloc(ll);
2246 if (fl & SUB_MATCH) {
2247 /* copy matched portion to new buffer */
2248 for (i = b, p = s + b; i < e; i++)
2249 *rr++ = *p++;
2250 t = 1;
2252 if (fl & SUB_REST) {
2253 /* Copy unmatched portion to buffer. If both portions *
2254 * requested, put a space in between (why?) */
2255 if (t)
2256 *rr++ = ' ';
2257 /* there may be unmatched bits at both beginning and end of string */
2258 for (i = 0, p = s; i < b; i++)
2259 *rr++ = *p++;
2260 if (replstr)
2261 for (p = replstr; *p; )
2262 *rr++ = *p++;
2263 for (i = e, p = s + e; i < l; i++)
2264 *rr++ = *p++;
2265 t = 1;
2267 *rr = '\0';
2268 if (bl) {
2269 /* if there was a buffer (with a numeric result), add it; *
2270 * if there was other stuff too, stick in a space first. */
2271 if (t)
2272 *rr++ = ' ';
2273 strcpy(rr, buf);
2275 return r;
2278 static Patprog
2279 compgetmatch(char *pat, int *flp, char **replstrp)
2281 Patprog p;
2283 * Flags to pattern compiler: use static buffer since we only
2284 * have one pattern at a time; we will try the must-match test ourselves,
2285 * so tell the pattern compiler we are scanning.
2288 /* int patflags = PAT_STATIC|PAT_SCAN|PAT_NOANCH;*/
2290 /* Unfortunately, PAT_STATIC doesn't work if we have a replstr with
2291 * something like ${x#...} in it which will be singsub()ed below because
2292 * that would overwrite the pattern buffer. */
2294 int patflags = PAT_SCAN|PAT_NOANCH | (*replstrp ? 0 : PAT_STATIC);
2297 * Search is anchored to the end of the string if we want to match
2298 * it all, or if we are matching at the end of the string and not
2299 * using substrings.
2301 if ((*flp & SUB_ALL) || ((*flp & SUB_END) && !(*flp & SUB_SUBSTR)))
2302 patflags &= ~PAT_NOANCH;
2303 p = patcompile(pat, patflags, NULL);
2304 if (!p) {
2305 zerr("bad pattern: %s", pat);
2306 return NULL;
2308 if (*replstrp) {
2309 if (p->patnpar || (p->globend & GF_MATCHREF)) {
2311 * Either backreferences or match references, so we
2312 * need to re-substitute replstr each time round.
2314 *flp |= SUB_DOSUBST;
2315 } else {
2316 singsub(replstrp);
2317 untokenize(*replstrp);
2321 return p;
2325 * This is called from paramsubst to get the match for ${foo#bar} etc.
2326 * fl is a set of the SUB_* flags defined in zsh.h
2327 * *sp points to the string we have to modify. The n'th match will be
2328 * returned in *sp. The heap is used to get memory for the result string.
2329 * replstr is the replacement string from a ${.../orig/repl}, in
2330 * which case pat is the original.
2332 * n is now ignored unless we are looking for a substring, in
2333 * which case the n'th match from the start is counted such that
2334 * there is no more than one match from each position.
2337 /**/
2339 getmatch(char **sp, char *pat, int fl, int n, char *replstr)
2341 Patprog p;
2343 if (!(p = compgetmatch(pat, &fl, &replstr)))
2344 return 1;
2346 return igetmatch(sp, p, fl, n, replstr, NULL);
2350 * This is the corresponding function for array variables.
2351 * Matching is done with the same pattern on each element.
2354 /**/
2355 void
2356 getmatcharr(char ***ap, char *pat, int fl, int n, char *replstr)
2358 char **arr = *ap, **pp;
2359 Patprog p;
2361 if (!(p = compgetmatch(pat, &fl, &replstr)))
2362 return;
2364 *ap = pp = hcalloc(sizeof(char *) * (arrlen(arr) + 1));
2365 while ((*pp = *arr++))
2366 if (igetmatch(pp, p, fl, n, replstr, NULL))
2367 pp++;
2371 * Match against str using pattern pp; return a list of
2372 * Repldata matches in the linked list *repllistp; this is
2373 * in permanent storage and to be freed by freematchlist()
2376 /**/
2377 mod_export int
2378 getmatchlist(char *str, Patprog p, LinkList *repllistp)
2380 char **sp = &str;
2383 * We don't care if we have longest or shortest match, but SUB_LONG
2384 * is cheaper since the pattern code does that by default.
2385 * We need SUB_GLOBAL to get all matches.
2386 * We need SUB_SUBSTR to scan through for substrings.
2387 * We need SUB_LIST to activate the special handling of the list
2388 * passed in.
2390 return igetmatch(sp, p, SUB_LONG|SUB_GLOBAL|SUB_SUBSTR|SUB_LIST,
2391 0, NULL, repllistp);
2394 static void
2395 freerepldata(void *ptr)
2397 zfree(ptr, sizeof(struct repldata));
2400 /**/
2401 mod_export void
2402 freematchlist(LinkList repllist)
2404 freelinklist(repllist, freerepldata);
2407 /**/
2408 static void
2409 set_pat_start(Patprog p, int offs)
2412 * If we are messing around with the test string by advancing up
2413 * it from the start, we need to tell the pattern matcher that
2414 * a start-of-string assertion, i.e. (#s), should fail. Hence
2415 * we test whether the offset of the real start of string from
2416 * the actual start, passed as offs, is zero.
2418 if (offs)
2419 p->flags |= PAT_NOTSTART;
2420 else
2421 p->flags &= ~PAT_NOTSTART;
2424 /**/
2425 static void
2426 set_pat_end(Patprog p, char null_me)
2429 * If we are messing around with the string by shortening it at the
2430 * tail, we need to tell the pattern matcher that an end-of-string
2431 * assertion, i.e. (#e), should fail. Hence we test whether
2432 * the character null_me about to be zapped is or is not already a null.
2434 if (null_me)
2435 p->flags |= PAT_NOTEND;
2436 else
2437 p->flags &= ~PAT_NOTEND;
2440 /**/
2441 #ifdef MULTIBYTE_SUPPORT
2444 * Increment *tp over character which may be multibyte.
2445 * Return number of bytes that remain in the character after unmetafication.
2448 /**/
2449 static int iincchar(char **tp)
2451 char *t = *tp;
2452 int mbclen = mb_metacharlenconv(t, NULL);
2453 int umlen = 0;
2455 while (mbclen--) {
2456 umlen++;
2457 if (*t++ == Meta) {
2458 t++;
2459 mbclen--;
2462 *tp = t;
2464 return umlen;
2467 /**/
2468 static int
2469 igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
2470 LinkList *repllistp)
2472 char *s = *sp, *t, *tmatch;
2474 * Note that ioff counts (possibly multibyte) characters in the
2475 * character set (Meta's are not included), while l counts characters in
2476 * the metafied string.
2478 * umlen is a counter for (unmetafied) byte lengths---neither characters
2479 * nor raw byte indices; this is simply an optimisation for allocation.
2480 * umltot is the full length of the string in this scheme.
2482 * l is the raw string length, used together with any pointers into
2483 * the string (typically t).
2485 int ioff, l = strlen(*sp), matched = 1, umltot = ztrlen(*sp);
2486 int umlen, nmatches;
2488 * List of bits of matches to concatenate with replacement string.
2489 * The data is a struct repldata. It is not used in cases like
2490 * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
2491 * is anchored. It goes on the heap.
2493 LinkList repllist = NULL;
2495 /* perform must-match test for complex closures */
2496 if (p->mustoff)
2499 * Yuk. Probably we should rewrite this whole function to
2500 * use an unmetafied test string.
2502 * Use META_HEAPDUP because we need a terminating NULL.
2504 char *muststr = metafy((char *)p + p->mustoff,
2505 p->patmlen, META_HEAPDUP);
2507 if (!strstr(s, muststr))
2508 matched = 0;
2511 /* in case we used the prog before... */
2512 p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
2514 if (fl & SUB_ALL) {
2515 int i = matched && pattry(p, s);
2516 *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
2517 if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
2518 return 0;
2519 return 1;
2521 if (matched) {
2523 * The default behaviour is to match at the start; this
2524 * is modified by SUB_END and SUB_SUBSTR. SUB_END matches
2525 * at the end of the string instead of the start. SUB_SUBSTR
2526 * without SUB_END matches substrings searching from the start;
2527 * with SUB_END it matches substrings searching from the end.
2529 * The possibilities are further modified by whether we want the
2530 * longest (SUB_LONG) or shortest possible match.
2532 * SUB_START is only used in the case where we are also
2533 * forcing a match at the end (SUB_END with no SUB_SUBSTR,
2534 * with or without SUB_LONG), to indicate we should match
2535 * the entire string.
2537 switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) {
2538 case 0:
2539 case SUB_LONG:
2541 * Largest/smallest possible match at head of string.
2542 * First get the longest match...
2544 if (pattry(p, s)) {
2545 /* patmatchlen returns metafied length, as we need */
2546 int mlen = patmatchlen();
2547 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2549 * ... now we know whether it's worth looking for the
2550 * shortest, which we do by brute force.
2552 mb_metacharinit();
2553 for (t = s, umlen = 0; t < s + mlen; ) {
2554 set_pat_end(p, *t);
2555 if (pattrylen(p, s, t - s, umlen, 0)) {
2556 mlen = patmatchlen();
2557 break;
2559 umlen += iincchar(&t);
2562 *sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL);
2563 return 1;
2565 break;
2567 case SUB_END:
2569 * Smallest possible match at tail of string.
2570 * As we can only be sure we've got wide characters right
2571 * when going forwards, we need to match at every point
2572 * until we fail and record the last successful match.
2574 * It's important that we return the last successful match
2575 * so that match, mbegin, mend and MATCH, MBEGIN, MEND are
2576 * correct.
2578 mb_metacharinit();
2579 tmatch = NULL;
2580 for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
2581 set_pat_start(p, t-s);
2582 if (pattrylen(p, t, s + l - t, umlen, ioff))
2583 tmatch = t;
2584 if (fl & SUB_START)
2585 break;
2586 umlen -= iincchar(&t);
2588 if (tmatch) {
2589 *sp = get_match_ret(*sp, tmatch - s, l, fl, replstr, NULL);
2590 return 1;
2592 if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) {
2593 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
2594 return 1;
2596 break;
2598 case (SUB_END|SUB_LONG):
2599 /* Largest possible match at tail of string: *
2600 * move forward along string until we get a match. *
2601 * Again there's no optimisation. */
2602 mb_metacharinit();
2603 for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
2604 set_pat_start(p, t-s);
2605 if (pattrylen(p, t, s + l - t, umlen, ioff)) {
2606 *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
2607 return 1;
2609 if (fl & SUB_START)
2610 break;
2611 umlen -= iincchar(&t);
2613 if (!(fl & SUB_START) && pattrylen(p, s + l, 0, 0, ioff)) {
2614 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
2615 return 1;
2617 break;
2619 case SUB_SUBSTR:
2620 /* Smallest at start, but matching substrings. */
2621 set_pat_start(p, l);
2622 if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) {
2623 *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
2624 return 1;
2625 } /* fall through */
2626 case (SUB_SUBSTR|SUB_LONG):
2627 /* longest or smallest at start with substrings */
2628 t = s;
2629 if (fl & SUB_GLOBAL) {
2630 repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist();
2631 if (repllistp)
2632 *repllistp = repllist;
2634 ioff = 0; /* offset into string */
2635 umlen = umltot;
2636 mb_metacharinit();
2637 do {
2638 /* loop over all matches for global substitution */
2639 matched = 0;
2640 for (; t < s + l; ioff++) {
2641 /* Find the longest match from this position. */
2642 set_pat_start(p, t-s);
2643 if (pattrylen(p, t, s + l - t, umlen, ioff)) {
2644 char *mpos = t + patmatchlen();
2645 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2646 char *ptr;
2647 int umlen2;
2649 * If searching for the shortest match,
2650 * start with a zero length and increase
2651 * it until we reach the longest possible
2652 * match, accepting the first successful
2653 * match.
2655 for (ptr = t, umlen2 = 0; ptr < mpos;) {
2656 set_pat_end(p, *ptr);
2657 if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
2658 mpos = t + patmatchlen();
2659 break;
2661 umlen2 += iincchar(&ptr);
2664 if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
2665 *sp = get_match_ret(*sp, t-s, mpos-s, fl,
2666 replstr, repllist);
2667 if (mpos == t)
2668 mpos += mb_metacharlenconv(mpos, NULL);
2670 if (!(fl & SUB_GLOBAL)) {
2671 if (n) {
2673 * Looking for a later match: in this case,
2674 * we can continue looking for matches from
2675 * the next character, even if it overlaps
2676 * with what we just found.
2678 umlen -= iincchar(&t);
2679 continue;
2680 } else {
2681 return 1;
2685 * For a global match, we need to skip the stuff
2686 * which is already marked for replacement.
2688 matched = 1;
2689 while (t < mpos) {
2690 ioff++;
2691 umlen -= iincchar(&t);
2693 break;
2695 umlen -= iincchar(&t);
2697 } while (matched);
2699 * check if we can match a blank string, if so do it
2700 * at the start. Goodness knows if this is a good idea
2701 * with global substitution, so it doesn't happen.
2703 set_pat_start(p, l);
2704 if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
2705 pattry(p, s + l) && !--n) {
2706 *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
2707 return 1;
2709 break;
2711 case (SUB_END|SUB_SUBSTR):
2712 case (SUB_END|SUB_LONG|SUB_SUBSTR):
2713 /* Longest/shortest at end, matching substrings. */
2714 if (!(fl & SUB_LONG)) {
2715 set_pat_start(p, l);
2716 if (pattrylen(p, s + l, 0, 0, umltot) && !--n) {
2717 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
2718 return 1;
2722 * If multibyte characters are present we need to start from the
2723 * beginning. This is a bit unpleasant because we can't tell in
2724 * advance how many times it will match and from where, so if n is
2725 * greater then 1 we will need to count the number of times it
2726 * matched and then go through again until we reach the right
2727 * point. (Either that or record every single match in a list,
2728 * which isn't stupid; it involves more memory management at this
2729 * level but less use of the pattern matcher.)
2731 nmatches = 0;
2732 tmatch = NULL;
2733 mb_metacharinit();
2734 for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
2735 set_pat_start(p, t-s);
2736 if (pattrylen(p, t, s + l - t, umlen, ioff)) {
2737 nmatches++;
2738 tmatch = t;
2740 umlen -= iincchar(&t);
2742 if (nmatches) {
2743 char *mpos;
2744 if (n > 1) {
2746 * We need to find the n'th last match.
2748 n = nmatches - n;
2749 mb_metacharinit();
2750 for (ioff = 0, t = s, umlen = umltot; t < s + l; ioff++) {
2751 set_pat_start(p, t-s);
2752 if (pattrylen(p, t, s + l - t, umlen, ioff) &&
2753 !n--) {
2754 tmatch = t;
2755 break;
2757 umlen -= iincchar(&t);
2760 mpos = tmatch + patmatchlen();
2761 /* Look for the shortest match if necessary */
2762 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2763 for (t = tmatch, umlen = 0; t < mpos; ) {
2764 set_pat_end(p, *t);
2765 if (pattrylen(p, tmatch, t - tmatch, umlen, ioff)) {
2766 mpos = tmatch + patmatchlen();
2767 break;
2769 umlen += iincchar(&t);
2772 *sp = get_match_ret(*sp, tmatch-s, mpos-s, fl,
2773 replstr, NULL);
2774 return 1;
2776 set_pat_start(p, l);
2777 if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, umltot) && !--n) {
2778 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
2779 return 1;
2781 break;
2785 if (repllist && nonempty(repllist)) {
2786 /* Put all the bits of a global search and replace together. */
2787 LinkNode nd;
2788 Repldata rd;
2789 int lleft;
2790 char *ptr, *start;
2791 int i;
2793 if (!(fl & SUB_LIST)) {
2794 lleft = 0; /* size of returned string */
2795 i = 0; /* start of last chunk we got from *sp */
2796 for (nd = firstnode(repllist); nd; incnode(nd)) {
2797 rd = (Repldata) getdata(nd);
2798 lleft += rd->b - i; /* previous chunk of *sp */
2799 lleft += strlen(rd->replstr); /* the replaced bit */
2800 i = rd->e; /* start of next chunk of *sp */
2802 lleft += l - i; /* final chunk from *sp */
2803 start = t = zhalloc(lleft+1);
2804 i = 0;
2805 for (nd = firstnode(repllist); nd; incnode(nd)) {
2806 rd = (Repldata) getdata(nd);
2807 memcpy(t, s + i, rd->b - i);
2808 t += rd->b - i;
2809 ptr = rd->replstr;
2810 while (*ptr)
2811 *t++ = *ptr++;
2812 i = rd->e;
2814 memcpy(t, s + i, l - i);
2815 start[lleft] = '\0';
2816 *sp = (char *)start;
2818 return 1;
2820 if (fl & SUB_LIST) /* safety: don't think this can happen */
2821 return 0;
2823 /* munge the whole string: no match, so no replstr */
2824 *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
2825 return (fl & SUB_RETFAIL) ? 0 : 1;
2828 /**/
2829 #else
2832 * Increment pointer which may be on a Meta (x is a pointer variable),
2833 * returning the incremented value (i.e. like pre-increment).
2835 #define METAINC(x) ((x) += (*(x) == Meta) ? 2 : 1)
2837 /**/
2838 static int
2839 igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
2840 LinkList *repllistp)
2842 char *s = *sp, *t;
2844 * Note that ioff and uml count characters in the character
2845 * set (Meta's are not included), while l counts characters in the
2846 * metafied string. umlen is a counter for (unmetafied) character
2847 * lengths.
2849 int ioff, l = strlen(*sp), uml = ztrlen(*sp), matched = 1, umlen;
2851 * List of bits of matches to concatenate with replacement string.
2852 * The data is a struct repldata. It is not used in cases like
2853 * ${...//#foo/bar} even though SUB_GLOBAL is set, since the match
2854 * is anchored. It goes on the heap.
2856 LinkList repllist = NULL;
2858 /* perform must-match test for complex closures */
2859 if (p->mustoff)
2862 * Yuk. Probably we should rewrite this whole function to
2863 * use an unmetafied test string.
2865 * Use META_HEAPDUP because we need a terminating NULL.
2867 char *muststr = metafy((char *)p + p->mustoff,
2868 p->patmlen, META_HEAPDUP);
2870 if (!strstr(s, muststr))
2871 matched = 0;
2874 /* in case we used the prog before... */
2875 p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
2877 if (fl & SUB_ALL) {
2878 int i = matched && pattry(p, s);
2879 *sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0, NULL);
2880 if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
2881 return 0;
2882 return 1;
2884 if (matched) {
2885 switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) {
2886 case 0:
2887 case SUB_LONG:
2889 * Largest/smallest possible match at head of string.
2890 * First get the longest match...
2892 if (pattry(p, s)) {
2893 /* patmatchlen returns metafied length, as we need */
2894 int mlen = patmatchlen();
2895 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2897 * ... now we know whether it's worth looking for the
2898 * shortest, which we do by brute force.
2900 for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) {
2901 set_pat_end(p, *t);
2902 if (pattrylen(p, s, t - s, umlen, 0)) {
2903 mlen = patmatchlen();
2904 break;
2908 *sp = get_match_ret(*sp, 0, mlen, fl, replstr, NULL);
2909 return 1;
2911 break;
2913 case SUB_END:
2914 /* Smallest possible match at tail of string: *
2915 * move back down string until we get a match. *
2916 * There's no optimization here. */
2917 for (ioff = uml, t = s + l, umlen = 0; t >= s;
2918 t--, ioff--, umlen++) {
2919 if (t > s && t[-1] == Meta)
2920 t--;
2921 set_pat_start(p, t-s);
2922 if (pattrylen(p, t, s + l - t, umlen, ioff)) {
2923 *sp = get_match_ret(*sp, t - s, l, fl, replstr, NULL);
2924 return 1;
2926 if (t > s+1 && t[-2] == Meta)
2927 t--;
2929 break;
2931 case (SUB_END|SUB_LONG):
2932 /* Largest possible match at tail of string: *
2933 * move forward along string until we get a match. *
2934 * Again there's no optimisation. */
2935 for (ioff = 0, t = s, umlen = uml; t < s + l;
2936 ioff++, METAINC(t), umlen--) {
2937 set_pat_start(p, t-s);
2938 if (pattrylen(p, t, s + l - t, umlen, ioff)) {
2939 *sp = get_match_ret(*sp, t-s, l, fl, replstr, NULL);
2940 return 1;
2942 if (*t == Meta)
2943 t++;
2945 break;
2947 case SUB_SUBSTR:
2948 /* Smallest at start, but matching substrings. */
2949 set_pat_start(p, l);
2950 if (!(fl & SUB_GLOBAL) && pattry(p, s + l) && !--n) {
2951 *sp = get_match_ret(*sp, 0, 0, fl, replstr, NULL);
2952 return 1;
2953 } /* fall through */
2954 case (SUB_SUBSTR|SUB_LONG):
2955 /* longest or smallest at start with substrings */
2956 t = s;
2957 if (fl & SUB_GLOBAL) {
2958 repllist = newlinklist();
2959 if (repllistp)
2960 *repllistp = repllist;
2962 ioff = 0; /* offset into string */
2963 umlen = uml;
2964 do {
2965 /* loop over all matches for global substitution */
2966 matched = 0;
2967 for (; t < s + l; METAINC(t), ioff++, umlen--) {
2968 /* Find the longest match from this position. */
2969 set_pat_start(p, t-s);
2970 if (pattrylen(p, t, s + l - t, umlen, ioff)) {
2971 char *mpos = t + patmatchlen();
2972 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2973 char *ptr;
2974 int umlen2;
2975 for (ptr = t, umlen2 = 0; ptr < mpos;
2976 METAINC(ptr), umlen2++) {
2977 set_pat_end(p, *ptr);
2978 if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
2979 mpos = t + patmatchlen();
2980 break;
2984 if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
2985 *sp = get_match_ret(*sp, t-s, mpos-s, fl,
2986 replstr, repllist);
2987 if (mpos == t)
2988 METAINC(mpos);
2990 if (!(fl & SUB_GLOBAL)) {
2991 if (n) {
2993 * Looking for a later match: in this case,
2994 * we can continue looking for matches from
2995 * the next character, even if it overlaps
2996 * with what we just found.
2998 continue;
2999 } else {
3000 return 1;
3004 * For a global match, we need to skip the stuff
3005 * which is already marked for replacement.
3007 matched = 1;
3008 for ( ; t < mpos; t++, ioff++, umlen--)
3009 if (*t == Meta)
3010 t++;
3011 break;
3013 if (*t == Meta)
3014 t++;
3016 } while (matched);
3018 * check if we can match a blank string, if so do it
3019 * at the start. Goodness knows if this is a good idea
3020 * with global substitution, so it doesn't happen.
3022 set_pat_start(p, l);
3023 if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
3024 pattry(p, s + l) && !--n) {
3025 *sp = get_match_ret(*sp, 0, 0, fl, replstr, repllist);
3026 return 1;
3028 break;
3030 case (SUB_END|SUB_SUBSTR):
3031 case (SUB_END|SUB_LONG|SUB_SUBSTR):
3032 /* Longest/shortest at end, matching substrings. */
3033 if (!(fl & SUB_LONG)) {
3034 set_pat_start(p, l);
3035 if (pattrylen(p, s + l, 0, 0, uml) && !--n) {
3036 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
3037 return 1;
3040 for (ioff = uml - 1, t = s + l - 1, umlen = 1; t >= s;
3041 t--, ioff--, umlen++) {
3042 if (t > s && t[-1] == Meta)
3043 t--;
3044 set_pat_start(p, t-s);
3045 if (pattrylen(p, t, s + l - t, umlen, ioff) && !--n) {
3046 /* Found the longest match */
3047 char *mpos = t + patmatchlen();
3048 if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
3049 char *ptr;
3050 int umlen2;
3051 for (ptr = t, umlen2 = 0; ptr < mpos;
3052 METAINC(ptr), umlen2++) {
3053 set_pat_end(p, *ptr);
3054 if (pattrylen(p, t, ptr - t, umlen2, ioff)) {
3055 mpos = t + patmatchlen();
3056 break;
3060 *sp = get_match_ret(*sp, t-s, mpos-s, fl,
3061 replstr, NULL);
3062 return 1;
3065 set_pat_start(p, l);
3066 if ((fl & SUB_LONG) && pattrylen(p, s + l, 0, 0, uml) && !--n) {
3067 *sp = get_match_ret(*sp, l, l, fl, replstr, NULL);
3068 return 1;
3070 break;
3074 if (repllist && nonempty(repllist)) {
3075 /* Put all the bits of a global search and replace together. */
3076 LinkNode nd;
3077 Repldata rd;
3078 int lleft = 0; /* size of returned string */
3079 char *ptr, *start;
3080 int i;
3082 i = 0; /* start of last chunk we got from *sp */
3083 for (nd = firstnode(repllist); nd; incnode(nd)) {
3084 rd = (Repldata) getdata(nd);
3085 lleft += rd->b - i; /* previous chunk of *sp */
3086 lleft += strlen(rd->replstr); /* the replaced bit */
3087 i = rd->e; /* start of next chunk of *sp */
3089 lleft += l - i; /* final chunk from *sp */
3090 start = t = zhalloc(lleft+1);
3091 i = 0;
3092 for (nd = firstnode(repllist); nd; incnode(nd)) {
3093 rd = (Repldata) getdata(nd);
3094 memcpy(t, s + i, rd->b - i);
3095 t += rd->b - i;
3096 ptr = rd->replstr;
3097 while (*ptr)
3098 *t++ = *ptr++;
3099 i = rd->e;
3101 memcpy(t, s + i, l - i);
3102 start[lleft] = '\0';
3103 *sp = (char *)start;
3104 return 1;
3107 /* munge the whole string: no match, so no replstr */
3108 *sp = get_match_ret(*sp, 0, 0, fl, 0, 0);
3109 return 1;
3112 /**/
3113 #endif /* MULTIBYTE_SUPPORT */
3115 /* blindly turn a string into a tokenised expression without lexing */
3117 /**/
3118 mod_export void
3119 tokenize(char *s)
3121 zshtokenize(s, 0);
3125 * shtokenize is used when we tokenize a string with GLOB_SUBST set.
3126 * In that case we need to retain backslashes when we turn the
3127 * pattern back into a string, so that the string is not
3128 * modified if it failed to match a pattern.
3130 * It may be modified by the effect of SH_GLOB which turns off
3131 * various zsh-specific options.
3134 /**/
3135 mod_export void
3136 shtokenize(char *s)
3138 int flags = ZSHTOK_SUBST;
3139 if (isset(SHGLOB))
3140 flags |= ZSHTOK_SHGLOB;
3141 zshtokenize(s, flags);
3144 /**/
3145 static void
3146 zshtokenize(char *s, int flags)
3148 char *t;
3149 int bslash = 0;
3151 for (; *s; s++) {
3152 cont:
3153 switch (*s) {
3154 case Bnull:
3155 case Bnullkeep:
3156 case '\\':
3157 if (bslash) {
3158 s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
3159 break;
3161 bslash = 1;
3162 continue;
3163 case '<':
3164 if (flags & ZSHTOK_SHGLOB)
3165 break;
3166 if (bslash) {
3167 s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
3168 break;
3170 t = s;
3171 while (idigit(*++s));
3172 if (*s != '-')
3173 goto cont;
3174 while (idigit(*++s));
3175 if (*s != '>')
3176 goto cont;
3177 *t = Inang;
3178 *s = Outang;
3179 break;
3180 case '(':
3181 case '|':
3182 case ')':
3183 if (flags & ZSHTOK_SHGLOB)
3184 break;
3185 case '>':
3186 case '^':
3187 case '#':
3188 case '~':
3189 case '[':
3190 case ']':
3191 case '*':
3192 case '?':
3193 case '=':
3194 for (t = ztokens; *t; t++)
3195 if (*t == *s) {
3196 if (bslash)
3197 s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
3198 else
3199 *s = (t - ztokens) + Pound;
3200 break;
3203 bslash = 0;
3207 /* remove unnecessary Nulargs */
3209 /**/
3210 mod_export void
3211 remnulargs(char *s)
3213 if (*s) {
3214 char *o = s, c;
3216 while ((c = *s++))
3217 if (c == Bnullkeep) {
3219 * An active backslash that needs to be turned back into
3220 * a real backslash for output. However, we don't
3221 * do that yet since we need to ignore it during
3222 * pattern matching.
3224 continue;
3225 } else if (inull(c)) {
3226 char *t = s - 1;
3228 while ((c = *s++)) {
3229 if (c == Bnullkeep)
3230 *t++ = '\\';
3231 else if (!inull(c))
3232 *t++ = c;
3234 *t = '\0';
3235 if (!*o) {
3236 o[0] = Nularg;
3237 o[1] = '\0';
3239 break;
3244 /* qualifier functions: mostly self-explanatory, see glob(). */
3246 /* device number */
3248 /**/
3249 static int
3250 qualdev(UNUSED(char *name), struct stat *buf, off_t dv, UNUSED(char *dummy))
3252 return (off_t)buf->st_dev == dv;
3255 /* number of hard links to file */
3257 /**/
3258 static int
3259 qualnlink(UNUSED(char *name), struct stat *buf, off_t ct, UNUSED(char *dummy))
3261 return (g_range < 0 ? buf->st_nlink < ct :
3262 g_range > 0 ? buf->st_nlink > ct :
3263 buf->st_nlink == ct);
3266 /* user ID */
3268 /**/
3269 static int
3270 qualuid(UNUSED(char *name), struct stat *buf, off_t uid, UNUSED(char *dummy))
3272 return buf->st_uid == uid;
3275 /* group ID */
3277 /**/
3278 static int
3279 qualgid(UNUSED(char *name), struct stat *buf, off_t gid, UNUSED(char *dummy))
3281 return buf->st_gid == gid;
3284 /* device special file? */
3286 /**/
3287 static int
3288 qualisdev(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3290 return S_ISBLK(buf->st_mode) || S_ISCHR(buf->st_mode);
3293 /* block special file? */
3295 /**/
3296 static int
3297 qualisblk(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3299 return S_ISBLK(buf->st_mode);
3302 /* character special file? */
3304 /**/
3305 static int
3306 qualischr(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3308 return S_ISCHR(buf->st_mode);
3311 /* directory? */
3313 /**/
3314 static int
3315 qualisdir(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3317 return S_ISDIR(buf->st_mode);
3320 /* FIFO? */
3322 /**/
3323 static int
3324 qualisfifo(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3326 return S_ISFIFO(buf->st_mode);
3329 /* symbolic link? */
3331 /**/
3332 static int
3333 qualislnk(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3335 return S_ISLNK(buf->st_mode);
3338 /* regular file? */
3340 /**/
3341 static int
3342 qualisreg(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3344 return S_ISREG(buf->st_mode);
3347 /* socket? */
3349 /**/
3350 static int
3351 qualissock(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3353 return S_ISSOCK(buf->st_mode);
3356 /* given flag is set in mode */
3358 /**/
3359 static int
3360 qualflags(UNUSED(char *name), struct stat *buf, off_t mod, UNUSED(char *dummy))
3362 return mode_to_octal(buf->st_mode) & mod;
3365 /* mode matches specification */
3367 /**/
3368 static int
3369 qualmodeflags(UNUSED(char *name), struct stat *buf, off_t mod, UNUSED(char *dummy))
3371 long v = mode_to_octal(buf->st_mode), y = mod & 07777, n = mod >> 12;
3373 return ((v & y) == y && !(v & n));
3376 /* regular executable file? */
3378 /**/
3379 static int
3380 qualiscom(UNUSED(char *name), struct stat *buf, UNUSED(off_t mod), UNUSED(char *dummy))
3382 return S_ISREG(buf->st_mode) && (buf->st_mode & S_IXUGO);
3385 /* size in required range? */
3387 /**/
3388 static int
3389 qualsize(UNUSED(char *name), struct stat *buf, off_t size, UNUSED(char *dummy))
3391 #if defined(LONG_IS_64_BIT) || defined(OFF_T_IS_64_BIT)
3392 # define QS_CAST_SIZE()
3393 off_t scaled = buf->st_size;
3394 #else
3395 # define QS_CAST_SIZE() (unsigned long)
3396 unsigned long scaled = (unsigned long)buf->st_size;
3397 #endif
3399 switch (g_units) {
3400 case TT_POSIX_BLOCKS:
3401 scaled += 511l;
3402 scaled /= 512l;
3403 break;
3404 case TT_KILOBYTES:
3405 scaled += 1023l;
3406 scaled /= 1024l;
3407 break;
3408 case TT_MEGABYTES:
3409 scaled += 1048575l;
3410 scaled /= 1048576l;
3411 break;
3414 return (g_range < 0 ? scaled < QS_CAST_SIZE() size :
3415 g_range > 0 ? scaled > QS_CAST_SIZE() size :
3416 scaled == QS_CAST_SIZE() size);
3417 #undef QS_CAST_SIZE
3420 /* time in required range? */
3422 /**/
3423 static int
3424 qualtime(UNUSED(char *name), struct stat *buf, off_t days, UNUSED(char *dummy))
3426 time_t now, diff;
3428 time(&now);
3429 diff = now - (g_amc == 0 ? buf->st_atime : g_amc == 1 ? buf->st_mtime :
3430 buf->st_ctime);
3431 /* handle multipliers indicating units */
3432 switch (g_units) {
3433 case TT_DAYS:
3434 diff /= 86400l;
3435 break;
3436 case TT_HOURS:
3437 diff /= 3600l;
3438 break;
3439 case TT_MINS:
3440 diff /= 60l;
3441 break;
3442 case TT_WEEKS:
3443 diff /= 604800l;
3444 break;
3445 case TT_MONTHS:
3446 diff /= 2592000l;
3447 break;
3450 return (g_range < 0 ? diff < days :
3451 g_range > 0 ? diff > days :
3452 diff == days);
3455 /* evaluate a string */
3457 /**/
3458 static int
3459 qualsheval(char *name, UNUSED(struct stat *buf), UNUSED(off_t days), char *str)
3461 Eprog prog;
3463 if ((prog = parse_string(str, 0))) {
3464 int ef = errflag, lv = lastval, ret;
3466 unsetparam("reply");
3467 setsparam("REPLY", ztrdup(name));
3469 execode(prog, 1, 0);
3471 ret = lastval;
3472 errflag = ef;
3473 lastval = lv;
3475 if (!(inserts = getaparam("reply")) &&
3476 !(inserts = gethparam("reply"))) {
3477 char *tmp;
3479 if ((tmp = getsparam("reply")) || (tmp = getsparam("REPLY"))) {
3480 static char *tmparr[2];
3482 tmparr[0] = tmp;
3483 tmparr[1] = NULL;
3485 inserts = tmparr;
3488 return !ret;
3490 return 0;
3493 /**/
3494 static int
3495 qualnonemptydir(char *name, struct stat *buf, UNUSED(off_t days), UNUSED(char *str))
3497 DIR *dirh;
3498 struct dirent *de;
3499 int unamelen;
3500 char *uname = unmetafy(dupstring(name), &unamelen);
3502 if (!S_ISDIR(buf->st_mode))
3503 return 0;
3505 if (buf->st_nlink > 2)
3506 return 1;
3508 if (!(dirh = opendir(uname)))
3509 return 0;
3511 while ((de = readdir(dirh))) {
3512 if (strcmp(de->d_name, ".") && strcmp(de->d_name, "..")) {
3513 closedir(dirh);
3514 return 1;
3518 closedir(dirh);
3519 return 0;