1 /* exclude.c -- exclude file names
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2024 Free Software
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com>
20 and Sergey Poznyakoff <gray@gnu.org>.
21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22 for improvement suggestions. */
39 #if GNULIB_MCEL_PREFER
46 #if GNULIB_EXCLUDE_SINGLE_THREAD
47 # include "unlocked-io.h"
50 /* Non-GNU systems lack these options, so we don't need to check them. */
52 # define FNM_CASEFOLD 0
55 # define FNM_EXTMATCH 0
57 #ifndef FNM_LEADING_DIR
58 # define FNM_LEADING_DIR 0
61 static_assert (((EXCLUDE_ANCHORED
| EXCLUDE_INCLUDE
| EXCLUDE_WILDCARDS
)
62 & (FNM_PATHNAME
| FNM_NOESCAPE
| FNM_PERIOD
| FNM_LEADING_DIR
63 | FNM_CASEFOLD
| FNM_EXTMATCH
))
67 /* Exclusion patterns are grouped into a singly-linked list of
68 "exclusion segments". Each segment represents a set of patterns
69 that can be matches using the same algorithm. Non-wildcard
70 patterns are kept in hash tables, to speed up searches. Wildcard
71 patterns are stored as arrays of patterns. */
74 /* An exclude pattern-options pair. The options are fnmatch options
75 ORed with EXCLUDE_* options. */
87 /* An array of pattern-options pairs. */
89 struct exclude_pattern
91 struct patopts
*exclude
;
98 exclude_hash
, /* a hash table of excluded names */
99 exclude_pattern
/* an array of exclude patterns */
102 struct exclude_segment
104 struct exclude_segment
*next
; /* next segment in list */
105 enum exclude_type type
; /* type of this segment */
106 int options
; /* common options for this segment */
109 Hash_table
*table
; /* for type == exclude_hash */
110 struct exclude_pattern pat
; /* for type == exclude_pattern */
114 struct pattern_buffer
116 struct pattern_buffer
*next
;
120 /* The exclude structure keeps a singly-linked list of exclude segments,
121 maintained in reverse order. */
124 struct exclude_segment
*head
;
125 struct pattern_buffer
*patbuf
;
128 /* Register BUF in the pattern buffer list of EX. ADD_FUNC (see
129 add_exclude_file and add_exclude_fp below) can use this function
130 if it modifies the pattern, to ensure the allocated memory will be
131 properly reclaimed upon calling free_exclude. */
133 exclude_add_pattern_buffer (struct exclude
*ex
, char *buf
)
135 struct pattern_buffer
*pbuf
= xmalloc (sizeof *pbuf
);
137 pbuf
->next
= ex
->patbuf
;
141 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
142 Return false if STR definitely does not have wildcards. */
144 fnmatch_pattern_has_wildcards (const char *str
, int options
)
155 if (options
& EXCLUDE_REGEX
)
160 if (options
& EXCLUDE_REGEX
)
163 str
+= ! (options
& FNM_NOESCAPE
) && *str
;
166 case '+': case '@': case '!':
167 if (options
& FNM_EXTMATCH
&& *str
== '(')
171 case '?': case '*': case '[':
181 unescape_pattern (char *str
)
185 q
+= *q
== '\\' && q
[1];
186 while ((*str
++ = *q
++));
189 /* Return a newly allocated and empty exclude list. */
194 return xzalloc (sizeof *new_exclude ());
197 /* Calculate the hash of string. */
199 string_hasher (void const *data
, size_t n_buckets
)
201 return hash_string (data
, n_buckets
);
204 /* Ditto, for case-insensitive hashes */
206 string_hasher_ci (void const *data
, size_t n_buckets
)
208 char const *p
= data
;
211 #if GNULIB_MCEL_PREFER
214 mcel_t g
= mcel_scanz (p
);
215 value
= value
* 31 + (c32tolower (g
.ch
) - g
.err
);
219 mbui_iterator_t iter
;
221 for (mbui_init (iter
, p
); mbui_avail (iter
); mbui_advance (iter
))
223 mbchar_t m
= mbui_cur (iter
);
227 wc
= c32tolower (m
.wc
);
231 value
= value
* 31 + wc
;
235 return value
% n_buckets
;
238 /* compare two strings for equality */
240 string_compare (void const *data1
, void const *data2
)
242 return strcmp (data1
, data2
) == 0;
245 /* compare two strings for equality, case-insensitive */
247 string_compare_ci (void const *data1
, void const *data2
)
249 return mbscasecmp (data1
, data2
) == 0;
252 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
253 to the head of EX. */
255 new_exclude_segment (struct exclude
*ex
, enum exclude_type type
, int options
)
257 struct exclude_segment
*sp
= xmalloc (sizeof (struct exclude_segment
));
259 sp
->options
= options
;
262 case exclude_pattern
:
263 sp
->v
.pat
= (struct exclude_pattern
) { NULL
, 0, 0 };
267 sp
->v
.table
= hash_initialize (0, nullptr,
268 (options
& FNM_CASEFOLD
271 (options
& FNM_CASEFOLD
281 /* Free a single exclude segment */
283 free_exclude_segment (struct exclude_segment
*seg
)
287 case exclude_pattern
:
288 for (idx_t i
= 0; i
< seg
->v
.pat
.exclude_count
; i
++)
289 if (seg
->v
.pat
.exclude
[i
].options
& EXCLUDE_REGEX
)
290 regfree (&seg
->v
.pat
.exclude
[i
].v
.re
);
291 free (seg
->v
.pat
.exclude
);
295 hash_free (seg
->v
.table
);
301 /* Free the storage associated with an exclude list. */
303 free_exclude (struct exclude
*ex
)
305 for (struct exclude_segment
*seg
= ex
->head
; seg
; )
307 struct exclude_segment
*next
= seg
->next
;
308 free_exclude_segment (seg
);
312 for (struct pattern_buffer
*pbuf
= ex
->patbuf
; pbuf
; )
314 struct pattern_buffer
*next
= pbuf
->next
;
323 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
324 (unlike fnmatch) wildcards are disabled in PATTERN. */
327 fnmatch_no_wildcards (char const *pattern
, char const *f
, int options
)
329 if (! (options
& FNM_LEADING_DIR
))
330 return ((options
& FNM_CASEFOLD
)
331 ? mbscasecmp (pattern
, f
)
332 : strcmp (pattern
, f
));
333 else if (! (options
& FNM_CASEFOLD
))
335 idx_t patlen
= strlen (pattern
);
336 int r
= strncmp (pattern
, f
, patlen
);
347 /* Walk through a copy of F, seeing whether P matches any prefix
350 FIXME: This is an O(N**2) algorithm; it should be O(N).
351 Also, the copy should not be necessary. However, fixing this
352 will probably involve a change to the mbs* API. */
354 char *fcopy
= xstrdup (f
);
356 for (char *p
= fcopy
; ; *p
++ = '/')
361 r
= mbscasecmp (pattern
, fcopy
);
371 exclude_fnmatch (char const *pattern
, char const *f
, int options
)
373 int (*matcher
) (char const *, char const *, int) =
374 (options
& EXCLUDE_WILDCARDS
376 : fnmatch_no_wildcards
);
377 bool matched
= matcher (pattern
, f
, options
) == 0;
379 if (! (options
& EXCLUDE_ANCHORED
))
380 for (char const *p
= f
; *p
&& ! matched
; p
++)
381 if (*p
== '/' && p
[1] != '/')
382 matched
= matcher (pattern
, p
+ 1, options
) == 0;
388 exclude_patopts (struct patopts
const *opts
, char const *f
)
390 int options
= opts
->options
;
392 return (options
& EXCLUDE_REGEX
393 ? regexec (&opts
->v
.re
, f
, 0, nullptr, 0) == 0
394 : exclude_fnmatch (opts
->v
.pattern
, f
, options
));
397 /* Return true if the exclude_pattern segment SEG matches F. */
400 file_pattern_matches (struct exclude_segment
const *seg
, char const *f
)
402 idx_t exclude_count
= seg
->v
.pat
.exclude_count
;
403 struct patopts
const *exclude
= seg
->v
.pat
.exclude
;
405 for (idx_t i
= 0; i
< exclude_count
; i
++)
406 if (exclude_patopts (exclude
+ i
, f
))
411 /* Return true if the exclude_hash segment SEG matches F.
412 BUFFER is an auxiliary storage of the same length as F (with nul
413 terminator included) */
415 file_name_matches (struct exclude_segment
const *seg
, char const *f
,
418 int options
= seg
->options
;
419 Hash_table
*table
= seg
->v
.table
;
423 /* initialize the pattern */
428 if (hash_lookup (table
, buffer
))
430 if (options
& FNM_LEADING_DIR
)
432 char *p
= strrchr (buffer
, '/');
442 if (!(options
& EXCLUDE_ANCHORED
))
456 /* Return true if EX excludes F. */
459 excluded_file_name (struct exclude
const *ex
, char const *f
)
461 /* If no patterns are given, the default is to include. */
466 char *filename
= nullptr;
468 /* Scan through the segments, reporting the status of the first match.
469 The segments are in reverse order, so this reports the status of
470 the last match in the original option list. */
471 struct exclude_segment
*seg
;
472 for (seg
= ex
->head
; ; seg
= seg
->next
)
474 if (seg
->type
== exclude_hash
)
477 filename
= xmalloc (strlen (f
) + 1);
478 if (file_name_matches (seg
, f
, filename
))
483 if (file_pattern_matches (seg
, f
))
489 /* If patterns are given but none match, the default is the
490 opposite of the last segment (i.e., the first in the
491 original option list). For example, in the command
492 'grep -r --exclude="a*" --include="*b" pat dir', the
493 first option is --exclude so any file name matching
494 neither a* nor *b is included. */
501 return invert
^ ! (seg
->options
& EXCLUDE_INCLUDE
);
504 /* Append to EX the exclusion PATTERN with OPTIONS. */
507 add_exclude (struct exclude
*ex
, char const *pattern
, int options
)
509 if ((options
& (EXCLUDE_REGEX
|EXCLUDE_WILDCARDS
))
510 && fnmatch_pattern_has_wildcards (pattern
, options
))
512 if (! (ex
->head
&& ex
->head
->type
== exclude_pattern
513 && ((ex
->head
->options
& EXCLUDE_INCLUDE
)
514 == (options
& EXCLUDE_INCLUDE
))))
515 new_exclude_segment (ex
, exclude_pattern
, options
);
517 struct exclude_pattern
*pat
= &ex
->head
->v
.pat
;
518 if (pat
->exclude_count
== pat
->exclude_alloc
)
519 pat
->exclude
= xpalloc (pat
->exclude
, &pat
->exclude_alloc
, 1, -1,
520 sizeof *pat
->exclude
);
521 struct patopts
*patopts
= &pat
->exclude
[pat
->exclude_count
++];
523 patopts
->options
= options
;
524 if (options
& EXCLUDE_REGEX
)
527 int cflags
= (REG_NOSUB
| REG_EXTENDED
528 | (options
& FNM_CASEFOLD
? REG_ICASE
: 0));
530 if (! (options
& FNM_LEADING_DIR
))
531 rc
= regcomp (&patopts
->v
.re
, pattern
, cflags
);
533 for (idx_t len
= strlen (pattern
); ; len
--)
540 if (!ISSLASH (pattern
[len
- 1]))
542 static char const patsuffix
[] = "(/.*)?";
543 char *tmp
= ximalloc (len
+ sizeof patsuffix
);
544 memcpy (tmp
, pattern
, len
);
545 strcpy (tmp
+ len
, patsuffix
);
546 rc
= regcomp (&patopts
->v
.re
, tmp
, cflags
);
554 pat
->exclude_count
--;
560 if (options
& EXCLUDE_ALLOC
)
562 char *dup
= xstrdup (pattern
);
564 exclude_add_pattern_buffer (ex
, dup
);
566 patopts
->v
.pattern
= pattern
;
571 int exclude_hash_flags
= (EXCLUDE_INCLUDE
| EXCLUDE_ANCHORED
572 | FNM_LEADING_DIR
| FNM_CASEFOLD
);
573 if (! (ex
->head
&& ex
->head
->type
== exclude_hash
574 && ((ex
->head
->options
& exclude_hash_flags
)
575 == (options
& exclude_hash_flags
))))
576 new_exclude_segment (ex
, exclude_hash
, options
);
578 char *str
= xstrdup (pattern
);
579 if ((options
& (EXCLUDE_WILDCARDS
| FNM_NOESCAPE
)) == EXCLUDE_WILDCARDS
)
580 unescape_pattern (str
);
581 if (hash_insert (ex
->head
->v
.table
, str
) != str
)
586 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
587 OPTIONS. LINE_END terminates each pattern in the file. If
588 LINE_END is a space character, ignore trailing spaces and empty
589 lines in FP. Return -1 (setting errno) on failure, 0 on success. */
592 add_exclude_fp (void (*add_func
) (struct exclude
*, char const *, int, void *),
593 struct exclude
*ex
, FILE *fp
, int options
,
594 char line_end
, void *data
)
600 for (int c
; (c
= getc (fp
)) != EOF
; )
602 if (buf_count
== buf_alloc
)
603 buf
= xpalloc (buf
, &buf_alloc
, 1, -1, 1);
604 buf
[buf_count
++] = c
;
607 int e
= ferror (fp
) ? errno
: 0;
609 buf
= xirealloc (buf
, buf_count
+ 1);
610 buf
[buf_count
] = line_end
;
611 char const *lim
= (buf
+ buf_count
612 + ! (buf_count
== 0 || buf
[buf_count
- 1] == line_end
));
614 exclude_add_pattern_buffer (ex
, buf
);
618 for (char *p
= buf
; p
< lim
; p
++)
621 char *pattern_end
= p
;
623 if (isspace ((unsigned char) line_end
))
625 /* Assume that no multi-byte character has a trailing byte
626 that satisfies isspace, and that nobody cares about
627 trailing white space containing non-single-byte characters.
628 If either assumption turns out to be false, presumably
629 the code should be changed to scan forward through the
630 entire pattern, one multi-byte character at a time. */
631 for (; ; pattern_end
--)
632 if (pattern_end
== pattern
)
634 else if (! isspace ((unsigned char) pattern_end
[-1]))
639 add_func (ex
, pattern
, options
, data
);
650 call_addfn (struct exclude
*ex
, char const *pattern
, int options
, void *data
)
652 void (**addfnptr
) (struct exclude
*, char const *, int) = data
;
653 (*addfnptr
) (ex
, pattern
, options
);
657 add_exclude_file (void (*add_func
) (struct exclude
*, char const *, int),
658 struct exclude
*ex
, char const *file_name
, int options
,
661 if (strcmp (file_name
, "-") == 0)
662 return add_exclude_fp (call_addfn
, ex
, stdin
, options
, line_end
, &add_func
);
664 FILE *in
= fopen (file_name
, "re");
667 int rc
= add_exclude_fp (call_addfn
, ex
, in
, options
, line_end
, &add_func
);