2 * Copyright (c) 2011 Jiri Zarevucky
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /** @addtogroup libposix
32 /** @file Filename-matching.
36 * This file contains an implementation of the fnmatch() pattern matching
37 * function. There is more code than necessary to account for the possibility
38 * of adding POSIX-like locale support to the system in the future. Functions
39 * that are only necessary for locale support currently simply use single
40 * characters for "collation elements".
41 * When (or if) locales are properly implemented, extending this implementation
42 * will be fairly straightforward.
51 #include "internal/common.h"
54 /* Returned by _match... functions. */
55 #define INVALID_PATTERN -1
58 * Type for collating element, simple identity with characters now,
59 * but may be extended for better locale support.
61 typedef int coll_elm_t
;
63 /** Return value indicating that the element in question
64 * is not valid in the current locale. (That is, if locales are supported.)
66 #define COLL_ELM_INVALID -1
69 * Get collating element matching a string.
71 * @param str String representation of the element.
72 * @return Matching collating element or COLL_ELM_INVALID.
74 static coll_elm_t
_coll_elm_get(const char *str
)
76 if (str
[0] == '\0' || str
[1] != '\0') {
77 return COLL_ELM_INVALID
;
83 * Get collating element matching a single character.
85 * @param c Character representation of the element.
86 * @return Matching collating element.
88 static coll_elm_t
_coll_elm_char(int c
)
94 * Match collating element with a beginning of a string.
96 * @param elm Collating element to match.
97 * @param str String which beginning should match the element.
98 * @return 0 if the element doesn't match, or the number of characters matched.
100 static int _coll_elm_match(coll_elm_t elm
, const char *str
)
106 * Checks whether a string begins with a collating element in the given range.
107 * Ordering depends on the locale (if locales are supported).
109 * @param first First element of the range.
110 * @param second Last element of the range.
111 * @param str String to match.
112 * @return 0 if there is no match, or the number of characters matched.
114 static int _coll_elm_between(coll_elm_t first
, coll_elm_t second
,
117 return *str
>= first
&& *str
<= second
;
121 * Read a string delimited by [? and ?].
123 * @param pattern Pointer to the string to read from. Its position is moved
124 * to the first character after the closing ].
125 * @param seq The character on the inside of brackets.
126 * @param buf Read buffer.
127 * @param buf_sz Read buffer's size. If the buffer is not large enough for
128 * the entire string, the string is cut with no error indication.
129 * @param flags Flags modifying the behavior.
130 * @return True on success, false if the pattern is invalid.
132 static bool _get_delimited(
133 const char **pattern
, int seq
,
134 char *buf
, size_t buf_sz
, int flags
)
136 const bool noescape
= (flags
& FNM_NOESCAPE
) != 0;
137 const bool pathname
= (flags
& FNM_PATHNAME
) != 0;
139 const char *p
= *pattern
;
140 assert(p
[0] == '[' && p
[1] == seq
/* Caller should ensure this. */);
144 if (*p
== seq
&& *(p
+ 1) == ']') {
145 /* String properly ended, return. */
150 if (!noescape
&& *p
== '\\') {
154 /* String not ended properly, invalid pattern. */
157 if (pathname
&& *p
== '/') {
158 /* Slash in a pathname pattern is invalid. */
162 /* Only add to the buffer if there is space. */
175 #define MAX_CLASS_OR_COLL_LEN 6
182 /* List of supported character classes. */
183 static const struct _char_class _char_classes
[] = {
184 { "alnum", isalnum
},
185 { "alpha", isalpha
},
186 { "blank", isblank
},
187 { "cntrl", iscntrl
},
188 { "digit", isdigit
},
189 { "graph", isgraph
},
190 { "lower", islower
},
191 { "print", isprint
},
192 { "punct", ispunct
},
193 { "space", isspace
},
194 { "upper", isupper
},
195 { "xdigit", isxdigit
}
199 * Compare function for binary search in the _char_classes array.
201 * @param key Key of the searched element.
202 * @param elem Element of _char_classes array.
203 * @return Ordering indicator (-1 less than, 0 equal, 1 greater than).
205 static int _class_compare(const void *key
, const void *elem
)
207 const struct _char_class
*class = elem
;
208 return strcmp((const char *) key
, class->name
);
212 * Returns whether the given character belongs to the specified character class.
214 * @param cname Name of the character class.
215 * @param c Character.
216 * @return True if the character belongs to the class, false otherwise.
218 static bool _is_in_class (const char *cname
, int c
)
220 /* Search for class in the array of supported character classes. */
221 const struct _char_class
*class = bsearch(cname
, _char_classes
,
222 sizeof(_char_classes
) / sizeof(struct _char_class
),
223 sizeof(struct _char_class
), _class_compare
);
226 /* No such class supported - treat as an empty class. */
230 return class->func(c
);
235 * Tries to parse an initial part of the pattern as a character class pattern,
236 * and if successful, matches the beginning of the given string against the class.
238 * @param pattern Pointer to the pattern to match. Must begin with a class
239 * specifier and is repositioned to the first character after the specifier
241 * @param str String to match.
242 * @param flags Flags modifying the behavior (see fnmatch()).
243 * @return INVALID_PATTERN if the pattern doesn't start with a valid class
244 * specifier, 0 if the beginning of the matched string doesn't belong
245 * to the class, or positive number of characters matched.
247 static int _match_char_class(const char **pattern
, const char *str
, int flags
)
249 char class[MAX_CLASS_OR_COLL_LEN
+ 1];
251 if (!_get_delimited(pattern
, ':', class, sizeof(class), flags
)) {
252 return INVALID_PATTERN
;
255 return _is_in_class(class, *str
);
259 * END CHARACTER CLASSES
263 * Reads the next collating element in the pattern, taking into account
264 * locale (if supported) and flags (see fnmatch()).
266 * @param pattern Pattern.
267 * @param flags Flags given to fnmatch().
268 * @return Collating element on success,
269 * or COLL_ELM_INVALID if the pattern is invalid.
271 static coll_elm_t
_next_coll_elm(const char **pattern
, int flags
)
273 assert(pattern
!= NULL
);
274 assert(*pattern
!= NULL
);
275 assert(**pattern
!= '\0');
277 const char *p
= *pattern
;
278 const bool noescape
= (flags
& FNM_NOESCAPE
) != 0;
279 const bool pathname
= (flags
& FNM_PATHNAME
) != 0;
282 if (*(p
+ 1) == '.') {
283 char buf
[MAX_CLASS_OR_COLL_LEN
+ 1];
284 if (!_get_delimited(pattern
, '.', buf
, sizeof(buf
), flags
)) {
285 return COLL_ELM_INVALID
;
287 return _coll_elm_get(buf
);
290 if (*(p
+ 1) == '=') {
291 char buf
[MAX_CLASS_OR_COLL_LEN
+ 1];
292 if (!_get_delimited(pattern
, '=', buf
, sizeof(buf
), flags
)) {
293 return COLL_ELM_INVALID
;
295 return _coll_elm_get(buf
);
299 if (!noescape
&& *p
== '\\') {
303 return COLL_ELM_INVALID
;
306 if (pathname
&& *p
== '/') {
307 return COLL_ELM_INVALID
;
311 return _coll_elm_char(*p
);
314 #define _matched(match) { \
315 int _match = match; \
317 /* Invalid pattern */ \
319 } else if (matched == 0 && _match > 0) { \
326 * Matches the beginning of the given string against a bracket expression
327 * the pattern begins with.
329 * @param pattern Pointer to the beginning of a bracket expression in a pattern.
330 * On success, the pointer is moved to the first character after the
331 * bracket expression.
332 * @param str Unmatched part of the string.
333 * @param flags Flags given to fnmatch().
334 * @return INVALID_PATTERN if the pattern is invalid, 0 if there is no match
335 * or the number of matched characters on success.
337 static int _match_bracket_expr(const char **pattern
, const char *str
, int flags
)
339 const bool pathname
= (flags
& FNM_PATHNAME
) != 0;
340 const bool special_period
= (flags
& FNM_PERIOD
) != 0;
341 const char *p
= *pattern
;
342 bool negative
= false;
345 assert(*p
== '['); /* calling code should ensure this */
348 if (*str
== '\0' || (pathname
&& *str
== '/') ||
349 (pathname
&& special_period
&& *str
== '.' && *(str
- 1) == '/')) {
351 * No bracket expression matches end of string,
352 * slash in pathname match or initial period with FNM_PERIOD
358 if (*p
== '^' || *p
== '!') {
364 /* When ']' is first, treat it as a normal character. */
365 _matched(*str
== ']');
369 coll_elm_t current_elm
= COLL_ELM_INVALID
;
372 if (*p
== '-' && *(p
+ 1) != ']' &&
373 current_elm
!= COLL_ELM_INVALID
) {
374 /* Range expression. */
376 coll_elm_t end_elm
= _next_coll_elm(&p
, flags
);
377 if (end_elm
== COLL_ELM_INVALID
) {
378 return INVALID_PATTERN
;
380 _matched(_coll_elm_between(current_elm
, end_elm
, str
));
384 if (*p
== '[' && *(p
+ 1) == ':') {
385 current_elm
= COLL_ELM_INVALID
;
386 _matched(_match_char_class(&p
, str
, flags
));
390 current_elm
= _next_coll_elm(&p
, flags
);
391 if (current_elm
== COLL_ELM_INVALID
) {
392 return INVALID_PATTERN
;
394 _matched(_coll_elm_match(current_elm
, str
));
397 /* No error occured - update pattern pointer. */
404 /* Matched 'match' characters. */
405 return negative
? 0 : matched
;
410 * Matches a portion of the pattern containing no asterisks (*) against
413 * @param pattern Pointer to the unmatched portion of the pattern.
414 * On success, the pointer is moved to the first asterisk, or to the
415 * terminating nul character, whichever occurs first.
416 * @param string Pointer to the input string. On success, the pointer is moved
417 * to the first character that wasn't explicitly matched.
418 * @param flags Flags given to fnmatch().
419 * @return True if the entire subpattern matched. False otherwise.
421 static bool _partial_match(const char **pattern
, const char **string
, int flags
)
424 * Only a single *-delimited subpattern is matched here.
425 * So in this function, '*' is understood as the end of pattern.
428 const bool pathname
= (flags
& FNM_PATHNAME
) != 0;
429 const bool special_period
= (flags
& FNM_PERIOD
) != 0;
430 const bool noescape
= (flags
& FNM_NOESCAPE
) != 0;
431 const bool leading_dir
= (flags
& FNM_LEADING_DIR
) != 0;
433 const char *s
= *string
;
434 const char *p
= *pattern
;
437 /* Bracket expression. */
439 int matched
= _match_bracket_expr(&p
, s
, flags
);
444 if (matched
!= INVALID_PATTERN
) {
449 assert(matched
== INVALID_PATTERN
);
450 /* Fall through to match [ as an ordinary character. */
453 /* Wildcard match. */
456 /* No character to match. */
459 if (pathname
&& *s
== '/') {
460 /* Slash must be matched explicitly. */
463 if (special_period
&& pathname
&&
464 *s
== '.' && *(s
- 1) == '/') {
465 /* Initial period must be matched explicitly. */
469 /* None of the above, match anything else. */
475 if (!noescape
&& *p
== '\\') {
476 /* Escaped character. */
482 * End of pattern, must match end of string or
483 * an end of subdirectory name (optional).
486 if (*s
== '\0' || (leading_dir
&& *s
== '/')) {
500 /* Nothing matched. */
504 /* Entire sub-pattern matched. */
507 assert(*p
== '\0' || *p
== '*');
508 assert(*p
!= '\0' || *s
== '\0' || (leading_dir
&& *s
== '/'));
516 * Match string against a pattern.
518 * @param pattern Pattern.
519 * @param string String to match.
520 * @param flags Flags given to fnmatch().
521 * @return True if the string matched the pattern, false otherwise.
523 static bool _full_match(const char *pattern
, const char *string
, int flags
)
525 const bool pathname
= (flags
& FNM_PATHNAME
) != 0;
526 const bool special_period
= (flags
& FNM_PERIOD
) != 0;
527 const bool leading_dir
= (flags
& FNM_LEADING_DIR
) != 0;
529 if (special_period
&& *string
== '.') {
530 /* Initial dot must be matched by an explicit dot in pattern. */
531 if (*pattern
!= '.') {
538 if (*pattern
!= '*') {
539 if (!_partial_match(&pattern
, &string
, flags
)) {
540 /* The initial match must succeed. */
545 while (*pattern
!= '\0') {
546 assert(*pattern
== '*');
549 bool matched
= false;
552 if (pathname
&& special_period
&&
553 *string
== '.' && *(string
- 1) == '/') {
556 end
= gnu_strchrnul(string
, pathname
? '/' : '\0');
559 /* Try to match every possible offset. */
560 while (string
<= end
) {
561 if (_partial_match(&pattern
, &string
, flags
)) {
575 return *string
== '\0' || (leading_dir
&& *string
== '/');
579 * Transform the entire string to lowercase.
581 * @param s Input string.
582 * @return Newly allocated copy of the input string with all uppercase
583 * characters folded to their lowercase variants.
585 static char *_casefold(const char *s
)
588 char *result
= strdup(s
);
589 for (char *i
= result
; *i
!= '\0'; ++i
) {
596 * Filename pattern matching.
598 * @param pattern Pattern to match the string against.
599 * @param string Matched string.
600 * @param flags Flags altering the matching of special characters
601 * (mainly for dot and slash).
602 * @return Zero if the string matches the pattern, FNM_NOMATCH otherwise.
604 int fnmatch(const char *pattern
, const char *string
, int flags
)
606 assert(pattern
!= NULL
);
607 assert(string
!= NULL
);
609 // TODO: don't fold everything in advance, but only when needed
611 if ((flags
& FNM_CASEFOLD
) != 0) {
612 /* Just fold the entire pattern and string. */
613 pattern
= _casefold(pattern
);
614 string
= _casefold(string
);
617 bool result
= _full_match(pattern
, string
, flags
);
619 if ((flags
& FNM_CASEFOLD
) != 0) {
621 free((char *) pattern
);
624 free((char *) string
);
628 return result
? 0 : FNM_NOMATCH
;
631 // FIXME: put the testcases to the app/tester after fnmatch is included into libc
637 #define fnmatch_test(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
638 #define match(s1, s2, flags) fnmatch_test(fnmatch(s1, s2, flags) == 0)
639 #define nomatch(s1, s2, flags) fnmatch_test(fnmatch(s1, s2, flags) == FNM_NOMATCH)
641 void __posix_fnmatch_test()
645 static_assert(FNM_PATHNAME
== FNM_FILE_NAME
);
647 match("*", "hello", 0);
648 match("hello", "hello", 0);
649 match("hello*", "hello", 0);
650 nomatch("hello?", "hello", 0);
651 match("*hello", "prdel hello", 0);
652 match("he[sl]lo", "hello", 0);
653 match("he[sl]lo", "heslo", 0);
654 nomatch("he[sl]lo", "heblo", 0);
655 nomatch("he[^sl]lo", "hello", 0);
656 nomatch("he[^sl]lo", "heslo", 0);
657 match("he[^sl]lo", "heblo", 0);
658 nomatch("he[!sl]lo", "hello", 0);
659 nomatch("he[!sl]lo", "heslo", 0);
660 match("he[!sl]lo", "heblo", 0);
661 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
662 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
663 match("[a-ce-z]", "a", 0);
664 match("[a-ce-z]", "c", 0);
665 nomatch("[a-ce-z]", "d", 0);
666 match("[a-ce-z]", "e", 0);
667 match("[a-ce-z]", "z", 0);
668 nomatch("[^a-ce-z]", "a", 0);
669 nomatch("[^a-ce-z]", "c", 0);
670 match("[^a-ce-z]", "d", 0);
671 nomatch("[^a-ce-z]", "e", 0);
672 nomatch("[^a-ce-z]", "z", 0);
673 match("helen??", "helenos", 0);
674 match("****booo****", "booo", 0);
676 match("hello[[:space:]]world", "hello world", 0);
677 nomatch("hello[[:alpha:]]world", "hello world", 0);
679 match("/hoooo*", "/hooooooo/hooo", 0);
680 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME
);
681 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME
);
682 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME
);
683 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME
);
684 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME
| FNM_LEADING_DIR
);
685 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME
| FNM_LEADING_DIR
);
686 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR
);
687 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR
);
689 match("*", "hell", 0);
690 match("*?", "hell", 0);
691 match("?*?", "hell", 0);
692 match("?*??", "hell", 0);
693 match("??*??", "hell", 0);
694 nomatch("???*??", "hell", 0);
696 nomatch("", "hell", 0);
697 nomatch("?", "hell", 0);
698 nomatch("??", "hell", 0);
699 nomatch("???", "hell", 0);
700 match("????", "hell", 0);
702 match("*", "h.ello", FNM_PERIOD
);
703 match("*", "h.ello", FNM_PATHNAME
| FNM_PERIOD
);
704 nomatch("*", ".hello", FNM_PERIOD
);
705 match("h?ello", "h.ello", FNM_PERIOD
);
706 nomatch("?hello", ".hello", FNM_PERIOD
);
707 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME
| FNM_PERIOD
);
708 match("/home/user/*", "/home/user/.hello", FNM_PERIOD
);
709 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME
| FNM_PERIOD
);
711 nomatch("HeLlO", "hello", 0);
712 match("HeLlO", "hello", FNM_CASEFOLD
);
714 printf("Failed: %d\n", fail
);