Handle Enter/Escape keys in message dialog.
[helenos.git] / uspace / lib / posix / src / fnmatch.c
bloba5738b1049c70a4512cdb533d2f22426999943ef
1 /*
2 * Copyright (c) 2011 Jiri Zarevucky
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /** @addtogroup libposix
30 * @{
32 /** @file Filename-matching.
36 * This file contains an implementation of the fnmatch() pattern matching
37 * function. There is more code than necessary to account for the possibility
38 * of adding POSIX-like locale support to the system in the future. Functions
39 * that are only necessary for locale support currently simply use single
40 * characters for "collation elements".
41 * When (or if) locales are properly implemented, extending this implementation
42 * will be fairly straightforward.
45 #include <stdbool.h>
46 #include <ctype.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <assert.h>
51 #include "internal/common.h"
52 #include <fnmatch.h>
54 /* Returned by _match... functions. */
55 #define INVALID_PATTERN -1
58 * Type for collating element, simple identity with characters now,
59 * but may be extended for better locale support.
61 typedef int coll_elm_t;
63 /** Return value indicating that the element in question
64 * is not valid in the current locale. (That is, if locales are supported.)
66 #define COLL_ELM_INVALID -1
68 /**
69 * Get collating element matching a string.
71 * @param str String representation of the element.
72 * @return Matching collating element or COLL_ELM_INVALID.
74 static coll_elm_t _coll_elm_get(const char *str)
76 if (str[0] == '\0' || str[1] != '\0') {
77 return COLL_ELM_INVALID;
79 return str[0];
82 /**
83 * Get collating element matching a single character.
85 * @param c Character representation of the element.
86 * @return Matching collating element.
88 static coll_elm_t _coll_elm_char(int c)
90 return c;
93 /**
94 * Match collating element with a beginning of a string.
96 * @param elm Collating element to match.
97 * @param str String which beginning should match the element.
98 * @return 0 if the element doesn't match, or the number of characters matched.
100 static int _coll_elm_match(coll_elm_t elm, const char *str)
102 return elm == *str;
106 * Checks whether a string begins with a collating element in the given range.
107 * Ordering depends on the locale (if locales are supported).
109 * @param first First element of the range.
110 * @param second Last element of the range.
111 * @param str String to match.
112 * @return 0 if there is no match, or the number of characters matched.
114 static int _coll_elm_between(coll_elm_t first, coll_elm_t second,
115 const char *str)
117 return *str >= first && *str <= second;
121 * Read a string delimited by [? and ?].
123 * @param pattern Pointer to the string to read from. Its position is moved
124 * to the first character after the closing ].
125 * @param seq The character on the inside of brackets.
126 * @param buf Read buffer.
127 * @param buf_sz Read buffer's size. If the buffer is not large enough for
128 * the entire string, the string is cut with no error indication.
129 * @param flags Flags modifying the behavior.
130 * @return True on success, false if the pattern is invalid.
132 static bool _get_delimited(
133 const char **pattern, int seq,
134 char *buf, size_t buf_sz, int flags)
136 const bool noescape = (flags & FNM_NOESCAPE) != 0;
137 const bool pathname = (flags & FNM_PATHNAME) != 0;
139 const char *p = *pattern;
140 assert(p[0] == '[' && p[1] == seq /* Caller should ensure this. */);
141 p += 2;
143 while (true) {
144 if (*p == seq && *(p + 1) == ']') {
145 /* String properly ended, return. */
146 *pattern = p + 2;
147 *buf = '\0';
148 return true;
150 if (!noescape && *p == '\\') {
151 p++;
153 if (*p == '\0') {
154 /* String not ended properly, invalid pattern. */
155 return false;
157 if (pathname && *p == '/') {
158 /* Slash in a pathname pattern is invalid. */
159 return false;
161 if (buf_sz > 1) {
162 /* Only add to the buffer if there is space. */
163 *buf = *p;
164 buf++;
165 buf_sz--;
167 p++;
172 * CHARACTER CLASSES
175 #define MAX_CLASS_OR_COLL_LEN 6
177 struct _char_class {
178 const char *name;
179 int (*func) (int);
182 /* List of supported character classes. */
183 static const struct _char_class _char_classes[] = {
184 { "alnum", isalnum },
185 { "alpha", isalpha },
186 { "blank", isblank },
187 { "cntrl", iscntrl },
188 { "digit", isdigit },
189 { "graph", isgraph },
190 { "lower", islower },
191 { "print", isprint },
192 { "punct", ispunct },
193 { "space", isspace },
194 { "upper", isupper },
195 { "xdigit", isxdigit }
199 * Compare function for binary search in the _char_classes array.
201 * @param key Key of the searched element.
202 * @param elem Element of _char_classes array.
203 * @return Ordering indicator (-1 less than, 0 equal, 1 greater than).
205 static int _class_compare(const void *key, const void *elem)
207 const struct _char_class *class = elem;
208 return strcmp((const char *) key, class->name);
212 * Returns whether the given character belongs to the specified character class.
214 * @param cname Name of the character class.
215 * @param c Character.
216 * @return True if the character belongs to the class, false otherwise.
218 static bool _is_in_class (const char *cname, int c)
220 /* Search for class in the array of supported character classes. */
221 const struct _char_class *class = bsearch(cname, _char_classes,
222 sizeof(_char_classes) / sizeof(struct _char_class),
223 sizeof(struct _char_class), _class_compare);
225 if (class == NULL) {
226 /* No such class supported - treat as an empty class. */
227 return false;
228 } else {
229 /* Class matched. */
230 return class->func(c);
235 * Tries to parse an initial part of the pattern as a character class pattern,
236 * and if successful, matches the beginning of the given string against the class.
238 * @param pattern Pointer to the pattern to match. Must begin with a class
239 * specifier and is repositioned to the first character after the specifier
240 * if successful.
241 * @param str String to match.
242 * @param flags Flags modifying the behavior (see fnmatch()).
243 * @return INVALID_PATTERN if the pattern doesn't start with a valid class
244 * specifier, 0 if the beginning of the matched string doesn't belong
245 * to the class, or positive number of characters matched.
247 static int _match_char_class(const char **pattern, const char *str, int flags)
249 char class[MAX_CLASS_OR_COLL_LEN + 1];
251 if (!_get_delimited(pattern, ':', class, sizeof(class), flags)) {
252 return INVALID_PATTERN;
255 return _is_in_class(class, *str);
259 * END CHARACTER CLASSES
263 * Reads the next collating element in the pattern, taking into account
264 * locale (if supported) and flags (see fnmatch()).
266 * @param pattern Pattern.
267 * @param flags Flags given to fnmatch().
268 * @return Collating element on success,
269 * or COLL_ELM_INVALID if the pattern is invalid.
271 static coll_elm_t _next_coll_elm(const char **pattern, int flags)
273 assert(pattern != NULL);
274 assert(*pattern != NULL);
275 assert(**pattern != '\0');
277 const char *p = *pattern;
278 const bool noescape = (flags & FNM_NOESCAPE) != 0;
279 const bool pathname = (flags & FNM_PATHNAME) != 0;
281 if (*p == '[') {
282 if (*(p + 1) == '.') {
283 char buf[MAX_CLASS_OR_COLL_LEN + 1];
284 if (!_get_delimited(pattern, '.', buf, sizeof(buf), flags)) {
285 return COLL_ELM_INVALID;
287 return _coll_elm_get(buf);
290 if (*(p + 1) == '=') {
291 char buf[MAX_CLASS_OR_COLL_LEN + 1];
292 if (!_get_delimited(pattern, '=', buf, sizeof(buf), flags)) {
293 return COLL_ELM_INVALID;
295 return _coll_elm_get(buf);
299 if (!noescape && *p == '\\') {
300 p++;
301 if (*p == '\0') {
302 *pattern = p;
303 return COLL_ELM_INVALID;
306 if (pathname && *p == '/') {
307 return COLL_ELM_INVALID;
310 *pattern = p + 1;
311 return _coll_elm_char(*p);
314 #define _matched(match) { \
315 int _match = match; \
316 if (_match < 0) { \
317 /* Invalid pattern */ \
318 return _match; \
319 } else if (matched == 0 && _match > 0) { \
320 /* First match */ \
321 matched = _match; \
326 * Matches the beginning of the given string against a bracket expression
327 * the pattern begins with.
329 * @param pattern Pointer to the beginning of a bracket expression in a pattern.
330 * On success, the pointer is moved to the first character after the
331 * bracket expression.
332 * @param str Unmatched part of the string.
333 * @param flags Flags given to fnmatch().
334 * @return INVALID_PATTERN if the pattern is invalid, 0 if there is no match
335 * or the number of matched characters on success.
337 static int _match_bracket_expr(const char **pattern, const char *str, int flags)
339 const bool pathname = (flags & FNM_PATHNAME) != 0;
340 const bool special_period = (flags & FNM_PERIOD) != 0;
341 const char *p = *pattern;
342 bool negative = false;
343 int matched = 0;
345 assert(*p == '['); /* calling code should ensure this */
346 p++;
348 if (*str == '\0' || (pathname && *str == '/') ||
349 (pathname && special_period && *str == '.' && *(str - 1) == '/')) {
351 * No bracket expression matches end of string,
352 * slash in pathname match or initial period with FNM_PERIOD
353 * option.
355 return 0;
358 if (*p == '^' || *p == '!') {
359 negative = true;
360 p++;
363 if (*p == ']') {
364 /* When ']' is first, treat it as a normal character. */
365 _matched(*str == ']');
366 p++;
369 coll_elm_t current_elm = COLL_ELM_INVALID;
371 while (*p != ']') {
372 if (*p == '-' && *(p + 1) != ']' &&
373 current_elm != COLL_ELM_INVALID) {
374 /* Range expression. */
375 p++;
376 coll_elm_t end_elm = _next_coll_elm(&p, flags);
377 if (end_elm == COLL_ELM_INVALID) {
378 return INVALID_PATTERN;
380 _matched(_coll_elm_between(current_elm, end_elm, str));
381 continue;
384 if (*p == '[' && *(p + 1) == ':') {
385 current_elm = COLL_ELM_INVALID;
386 _matched(_match_char_class(&p, str, flags));
387 continue;
390 current_elm = _next_coll_elm(&p, flags);
391 if (current_elm == COLL_ELM_INVALID) {
392 return INVALID_PATTERN;
394 _matched(_coll_elm_match(current_elm, str));
397 /* No error occured - update pattern pointer. */
398 *pattern = p + 1;
400 if (matched == 0) {
401 /* No match found */
402 return negative;
403 } else {
404 /* Matched 'match' characters. */
405 return negative ? 0 : matched;
410 * Matches a portion of the pattern containing no asterisks (*) against
411 * the given string.
413 * @param pattern Pointer to the unmatched portion of the pattern.
414 * On success, the pointer is moved to the first asterisk, or to the
415 * terminating nul character, whichever occurs first.
416 * @param string Pointer to the input string. On success, the pointer is moved
417 * to the first character that wasn't explicitly matched.
418 * @param flags Flags given to fnmatch().
419 * @return True if the entire subpattern matched. False otherwise.
421 static bool _partial_match(const char **pattern, const char **string, int flags)
424 * Only a single *-delimited subpattern is matched here.
425 * So in this function, '*' is understood as the end of pattern.
428 const bool pathname = (flags & FNM_PATHNAME) != 0;
429 const bool special_period = (flags & FNM_PERIOD) != 0;
430 const bool noescape = (flags & FNM_NOESCAPE) != 0;
431 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
433 const char *s = *string;
434 const char *p = *pattern;
436 while (*p != '*') {
437 /* Bracket expression. */
438 if (*p == '[') {
439 int matched = _match_bracket_expr(&p, s, flags);
440 if (matched == 0) {
441 /* Doesn't match. */
442 return false;
444 if (matched != INVALID_PATTERN) {
445 s += matched;
446 continue;
449 assert(matched == INVALID_PATTERN);
450 /* Fall through to match [ as an ordinary character. */
453 /* Wildcard match. */
454 if (*p == '?') {
455 if (*s == '\0') {
456 /* No character to match. */
457 return false;
459 if (pathname && *s == '/') {
460 /* Slash must be matched explicitly. */
461 return false;
463 if (special_period && pathname &&
464 *s == '.' && *(s - 1) == '/') {
465 /* Initial period must be matched explicitly. */
466 return false;
469 /* None of the above, match anything else. */
470 p++;
471 s++;
472 continue;
475 if (!noescape && *p == '\\') {
476 /* Escaped character. */
477 p++;
480 if (*p == '\0') {
482 * End of pattern, must match end of string or
483 * an end of subdirectory name (optional).
486 if (*s == '\0' || (leading_dir && *s == '/')) {
487 break;
490 return false;
493 if (*p == *s) {
494 /* Exact match. */
495 p++;
496 s++;
497 continue;
500 /* Nothing matched. */
501 return false;
504 /* Entire sub-pattern matched. */
506 /* postconditions */
507 assert(*p == '\0' || *p == '*');
508 assert(*p != '\0' || *s == '\0' || (leading_dir && *s == '/'));
510 *pattern = p;
511 *string = s;
512 return true;
516 * Match string against a pattern.
518 * @param pattern Pattern.
519 * @param string String to match.
520 * @param flags Flags given to fnmatch().
521 * @return True if the string matched the pattern, false otherwise.
523 static bool _full_match(const char *pattern, const char *string, int flags)
525 const bool pathname = (flags & FNM_PATHNAME) != 0;
526 const bool special_period = (flags & FNM_PERIOD) != 0;
527 const bool leading_dir = (flags & FNM_LEADING_DIR) != 0;
529 if (special_period && *string == '.') {
530 /* Initial dot must be matched by an explicit dot in pattern. */
531 if (*pattern != '.') {
532 return false;
534 pattern++;
535 string++;
538 if (*pattern != '*') {
539 if (!_partial_match(&pattern, &string, flags)) {
540 /* The initial match must succeed. */
541 return false;
545 while (*pattern != '\0') {
546 assert(*pattern == '*');
547 pattern++;
549 bool matched = false;
551 const char *end;
552 if (pathname && special_period &&
553 *string == '.' && *(string - 1) == '/') {
554 end = string;
555 } else {
556 end = gnu_strchrnul(string, pathname ? '/' : '\0');
559 /* Try to match every possible offset. */
560 while (string <= end) {
561 if (_partial_match(&pattern, &string, flags)) {
562 matched = true;
563 break;
565 string++;
568 if (matched) {
569 continue;
572 return false;
575 return *string == '\0' || (leading_dir && *string == '/');
579 * Transform the entire string to lowercase.
581 * @param s Input string.
582 * @return Newly allocated copy of the input string with all uppercase
583 * characters folded to their lowercase variants.
585 static char *_casefold(const char *s)
587 assert(s != NULL);
588 char *result = strdup(s);
589 for (char *i = result; *i != '\0'; ++i) {
590 *i = tolower(*i);
592 return result;
596 * Filename pattern matching.
598 * @param pattern Pattern to match the string against.
599 * @param string Matched string.
600 * @param flags Flags altering the matching of special characters
601 * (mainly for dot and slash).
602 * @return Zero if the string matches the pattern, FNM_NOMATCH otherwise.
604 int fnmatch(const char *pattern, const char *string, int flags)
606 assert(pattern != NULL);
607 assert(string != NULL);
609 // TODO: don't fold everything in advance, but only when needed
611 if ((flags & FNM_CASEFOLD) != 0) {
612 /* Just fold the entire pattern and string. */
613 pattern = _casefold(pattern);
614 string = _casefold(string);
617 bool result = _full_match(pattern, string, flags);
619 if ((flags & FNM_CASEFOLD) != 0) {
620 if (pattern) {
621 free((char *) pattern);
623 if (string) {
624 free((char *) string);
628 return result ? 0 : FNM_NOMATCH;
631 // FIXME: put the testcases to the app/tester after fnmatch is included into libc
633 #if 0
635 #include <stdio.h>
637 #define fnmatch_test(x) { if (x) printf("SUCCESS: "#x"\n"); else { printf("FAILED: "#x"\n"); fail++; } }
638 #define match(s1, s2, flags) fnmatch_test(fnmatch(s1, s2, flags) == 0)
639 #define nomatch(s1, s2, flags) fnmatch_test(fnmatch(s1, s2, flags) == FNM_NOMATCH)
641 void __posix_fnmatch_test()
643 int fail = 0;
645 static_assert(FNM_PATHNAME == FNM_FILE_NAME);
646 match("", "", 0);
647 match("*", "hello", 0);
648 match("hello", "hello", 0);
649 match("hello*", "hello", 0);
650 nomatch("hello?", "hello", 0);
651 match("*hello", "prdel hello", 0);
652 match("he[sl]lo", "hello", 0);
653 match("he[sl]lo", "heslo", 0);
654 nomatch("he[sl]lo", "heblo", 0);
655 nomatch("he[^sl]lo", "hello", 0);
656 nomatch("he[^sl]lo", "heslo", 0);
657 match("he[^sl]lo", "heblo", 0);
658 nomatch("he[!sl]lo", "hello", 0);
659 nomatch("he[!sl]lo", "heslo", 0);
660 match("he[!sl]lo", "heblo", 0);
661 match("al*[c-t]a*vis*ta", "alheimer talir jehovista", 0);
662 match("al*[c-t]a*vis*ta", "alfons had jehovista", 0);
663 match("[a-ce-z]", "a", 0);
664 match("[a-ce-z]", "c", 0);
665 nomatch("[a-ce-z]", "d", 0);
666 match("[a-ce-z]", "e", 0);
667 match("[a-ce-z]", "z", 0);
668 nomatch("[^a-ce-z]", "a", 0);
669 nomatch("[^a-ce-z]", "c", 0);
670 match("[^a-ce-z]", "d", 0);
671 nomatch("[^a-ce-z]", "e", 0);
672 nomatch("[^a-ce-z]", "z", 0);
673 match("helen??", "helenos", 0);
674 match("****booo****", "booo", 0);
676 match("hello[[:space:]]world", "hello world", 0);
677 nomatch("hello[[:alpha:]]world", "hello world", 0);
679 match("/hoooo*", "/hooooooo/hooo", 0);
680 nomatch("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME);
681 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME);
682 match("/hoooo*/*", "/hooooooo/hooo", FNM_PATHNAME);
683 match("/hoooo*/hooo", "/hooooooo/hooo", FNM_PATHNAME);
684 match("/hoooo*", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
685 nomatch("/hoooo*/", "/hooooooo/hooo", FNM_PATHNAME | FNM_LEADING_DIR);
686 nomatch("/hoooo", "/hooooooo/hooo", FNM_LEADING_DIR);
687 match("/hooooooo", "/hooooooo/hooo", FNM_LEADING_DIR);
689 match("*", "hell", 0);
690 match("*?", "hell", 0);
691 match("?*?", "hell", 0);
692 match("?*??", "hell", 0);
693 match("??*??", "hell", 0);
694 nomatch("???*??", "hell", 0);
696 nomatch("", "hell", 0);
697 nomatch("?", "hell", 0);
698 nomatch("??", "hell", 0);
699 nomatch("???", "hell", 0);
700 match("????", "hell", 0);
702 match("*", "h.ello", FNM_PERIOD);
703 match("*", "h.ello", FNM_PATHNAME | FNM_PERIOD);
704 nomatch("*", ".hello", FNM_PERIOD);
705 match("h?ello", "h.ello", FNM_PERIOD);
706 nomatch("?hello", ".hello", FNM_PERIOD);
707 match("/home/user/.*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
708 match("/home/user/*", "/home/user/.hello", FNM_PERIOD);
709 nomatch("/home/user/*", "/home/user/.hello", FNM_PATHNAME | FNM_PERIOD);
711 nomatch("HeLlO", "hello", 0);
712 match("HeLlO", "hello", FNM_CASEFOLD);
714 printf("Failed: %d\n", fail);
717 #endif
719 /** @}