Cygwin: strptime: add release note
[newlib-cygwin.git] / winsup / cygwin / libc / fnmatch.c
bloba1cb5d1e4616b8671178a430b560a0226e2653ed
1 /*
2 * Copyright (c) 1989, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
8 * Copyright (c) 2011 The FreeBSD Foundation
9 * All rights reserved.
10 * Portions of this software were developed by David Chisnall
11 * under sponsorship from the FreeBSD Foundation.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
38 #if defined(LIBC_SCCS) && !defined(lint)
39 static char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94";
40 #endif /* LIBC_SCCS and not lint */
41 #include "winsup.h"
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD: head/lib/libc/gen/fnmatch.c 288309 2015-09-27 12:52:18Z jilles $");
46 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
47 * Compares a filename or pathname to a pattern.
51 * Some notes on multibyte character support:
52 * 1. Patterns with illegal byte sequences match nothing.
53 * 2. Illegal byte sequences in the "string" argument are handled by treating
54 * them as single-byte characters with a value of the first byte of the
55 * sequence cast to wint_t.
56 * 3. Multibyte conversion state objects (mbstate_t) are passed around and
57 * used for most, but not all, conversions. Further work will be required
58 * to support state-dependent encodings.
61 #include <fnmatch.h>
62 #include <limits.h>
63 #include <string.h>
64 #include <wchar.h>
65 #include <wctype.h>
67 #include "collate.h"
69 #define EOS '\0'
71 #define RANGE_MATCH 1
72 #define RANGE_NOMATCH 0
73 #define RANGE_ERROR (-1)
75 static int rangematch(const wint_t *, wint_t *, int, wint_t **, mbstate_t *);
77 int
78 fnmatch(const char *in_pattern, const char *in_string, int flags)
80 size_t pclen = strlen (in_pattern);
81 size_t sclen = strlen (in_string);
82 wint_t *pattern = (wint_t *) alloca ((pclen + 1) * sizeof (wint_t));
83 wint_t *string = (wint_t *) alloca ((sclen + 1) * sizeof (wint_t));
85 const wint_t *stringstart = string;
86 const wint_t *bt_pattern, *bt_string;
87 mbstate_t patmbs = { 0 };
88 mbstate_t strmbs = { 0 };
89 mbstate_t bt_patmbs, bt_strmbs;
90 wint_t *newp;
91 wint_t *c;
92 wint_t *pc, *sc;
94 pclen = mbsnrtowci (pattern, &in_pattern, (size_t) -1, pclen, &patmbs);
95 if (pclen == (size_t) -1)
96 return (FNM_NOMATCH);
97 pattern[pclen] = '\0';
98 sclen = mbsnrtowci (string, &in_string, (size_t) -1, sclen, &strmbs);
99 if (sclen == (size_t) -1)
100 return (FNM_NOMATCH);
101 string[sclen] = '\0';
103 bt_pattern = bt_string = NULL;
104 for (;;) {
105 pc = pattern++;
106 sc = string;
107 switch (*pc) {
108 case EOS:
109 if ((flags & FNM_LEADING_DIR) && *sc == '/')
110 return (0);
111 if (*sc == EOS)
112 return (0);
113 goto backtrack;
114 case '?':
115 if (*sc == EOS)
116 return (FNM_NOMATCH);
117 if (*sc == '/' && (flags & FNM_PATHNAME))
118 goto backtrack;
119 if (*sc == '.' && (flags & FNM_PERIOD) &&
120 (string == stringstart ||
121 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
122 goto backtrack;
123 ++string;
124 break;
125 case '*':
126 c = pattern;
127 /* Collapse multiple stars. */
128 while (*c == '*')
129 *c = *++pattern;
131 if (*sc == '.' && (flags & FNM_PERIOD) &&
132 (string == stringstart ||
133 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
134 goto backtrack;
136 /* Optimize for pattern with * at end or before /. */
137 if (*c == EOS)
138 if (flags & FNM_PATHNAME)
139 return ((flags & FNM_LEADING_DIR) ||
140 wcichr(string, '/') == NULL ?
141 0 : FNM_NOMATCH);
142 else
143 return (0);
144 else if (*c == '/' && flags & FNM_PATHNAME) {
145 if ((string = wcichr(string, '/')) == NULL)
146 return (FNM_NOMATCH);
147 break;
151 * First try the shortest match for the '*' that
152 * could work. We can forget any earlier '*' since
153 * there is no way having it match more characters
154 * can help us, given that we are already here.
156 bt_pattern = pattern;
157 bt_patmbs = patmbs;
158 bt_string = string;
159 bt_strmbs = strmbs;
160 break;
161 case '[':
162 if (*sc == EOS)
163 return (FNM_NOMATCH);
164 if (*sc == '/' && (flags & FNM_PATHNAME))
165 goto backtrack;
166 if (*sc == '.' && (flags & FNM_PERIOD) &&
167 (string == stringstart ||
168 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
169 goto backtrack;
171 int ret = rangematch(pattern, sc, flags, &newp,
172 &patmbs);
173 switch (ret) {
174 case RANGE_ERROR:
175 goto norm;
176 case RANGE_NOMATCH:
177 goto backtrack;
178 default: /* > 0 ... case RANGE_MATCH */
179 pattern = newp;
180 break;
182 string += ret;
183 break;
184 case '\\':
185 if (!(flags & FNM_NOESCAPE)) {
186 pc = pattern++;
188 fallthrough;
189 default:
190 norm:
191 ++string;
192 if (*pc == *sc)
194 else if ((flags & FNM_CASEFOLD) &&
195 (towlower(*pc) == towlower(*sc)))
197 else {
198 backtrack:
200 * If we have a mismatch (other than hitting
201 * the end of the string), go back to the last
202 * '*' seen and have it match one additional
203 * character.
205 if (bt_pattern == NULL)
206 return (FNM_NOMATCH);
207 sc = (wint_t *) bt_string;
208 if (*sc == EOS)
209 return (FNM_NOMATCH);
210 if (*sc == '/' && flags & FNM_PATHNAME)
211 return (FNM_NOMATCH);
212 ++bt_string;
213 pattern = (wint_t *) bt_pattern;
214 patmbs = bt_patmbs;
215 string = (wint_t *) bt_string;
216 strmbs = bt_strmbs;
218 break;
221 /* NOTREACHED */
224 /* Return value is either '\0', ':', '.', '=', or '[' if no class
225 expression found. cptr_p is set to the next character which needs
226 checking. */
227 static inline wint_t
228 check_classes_expr(const wint_t **cptr_p, wint_t *classbuf, size_t classbufsize)
230 const wint_t *ctype = NULL;
231 const wint_t *cptr = *cptr_p;
233 if (*cptr == '[' &&
234 (cptr[1] == ':' || cptr[1] == '.' || cptr[1] == '=')) {
235 ctype = ++cptr;
236 while (*++cptr && (*cptr != *ctype || cptr[1] != ']'))
238 if (!*cptr)
239 return '\0';
240 if (classbuf) {
241 const wint_t *class_p = ctype + 1;
242 size_t clen = cptr - class_p;
244 if (clen < classbufsize)
245 *wcipncpy (classbuf, class_p, clen) = '\0';
246 else
247 ctype = NULL;
249 cptr += 2; /* Advance cptr to next char after class expr. */
251 *cptr_p = cptr;
252 return ctype ? *ctype : '[';
255 static int
256 rangematch(const wint_t *pattern, wint_t *test, int flags, wint_t **newp,
257 mbstate_t *patmbs)
259 int negate, ok;
260 wint_t *c, *c2;
261 //size_t pclen;
262 const wint_t *origpat;
263 size_t tlen = next_unicode_char (test);
266 * A bracket expression starting with an unquoted circumflex
267 * character produces unspecified results (IEEE 1003.2-1992,
268 * 3.13.2). This implementation treats it like '!', for
269 * consistency with the regular expression syntax.
270 * J.T. Conklin (conklin@ngai.kaleida.com)
272 if ( (negate = (*pattern == '!' || *pattern == '^')) )
273 ++pattern;
275 if (flags & FNM_CASEFOLD) {
276 for (int idx = 0; idx < tlen; ++idx)
277 test[idx] = towlower(test[idx]);
281 * A right bracket shall lose its special meaning and represent
282 * itself in a bracket expression if it occurs first in the list.
283 * -- POSIX.2 2.8.3.2
285 ok = 0;
286 origpat = pattern;
287 for (;;) {
288 wint_t wclass[64], wclass2[64];
289 char cclass[64];
290 wint_t ctype;
291 size_t clen = 1, c2len = 1;
293 if (*pattern == ']' && pattern > origpat) {
294 pattern++;
295 break;
296 } else if (*pattern == '\0') {
297 return (RANGE_ERROR);
298 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
299 return (RANGE_NOMATCH);
300 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
301 pattern++;
302 switch (ctype = check_classes_expr (&pattern, wclass, 64)) {
303 case ':':
304 /* No worries, char classes are ASCII-only */
305 wcitoascii (cclass, wclass);
306 if (iswctype (*test, wctype (cclass)))
307 ok = 1;
308 continue;
309 case '=':
310 if (wcilen (wclass) == 1 &&
311 is_unicode_equiv (*test, *wclass))
312 ok = 1;
313 continue;
314 case '.':
315 if (!is_unicode_coll_elem (wclass))
316 return (RANGE_NOMATCH);
317 c = wclass;
318 clen = wcilen (wclass);
319 break;
320 default:
321 c = (wint_t *) pattern++;
322 break;
324 if (flags & FNM_CASEFOLD) {
325 for (int idx = 0; idx < tlen; ++idx)
326 c[idx] = towlower(c[idx]);
329 if (*pattern == '-' && *(pattern + 1) != EOS &&
330 *(pattern + 1) != ']') {
331 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
332 if (*pattern != EOS)
333 pattern++;
334 const wint_t *orig_pattern = pattern;
335 switch (ctype = check_classes_expr (&pattern, wclass2,
336 64)) {
337 case '.':
338 if (!is_unicode_coll_elem (wclass2))
339 return (RANGE_NOMATCH);
340 c2 = wclass2;
341 c2len = wcilen (wclass2);
342 break;
343 default:
344 pattern = orig_pattern;
345 c2 = (wint_t *) pattern++;
347 if (*c2 == EOS)
348 return (RANGE_ERROR);
350 if (flags & FNM_CASEFOLD) {
351 for (int idx = 0; idx < tlen; ++idx)
352 c2[idx] = towlower(c2[idx]);
355 if ((!__get_current_collate_locale ()->win_locale[0]) ?
356 *c <= *test && *test <= *c2 :
357 __wscollate_range_cmp(c, test, clen, tlen) <= 0
358 && __wscollate_range_cmp(test, c2, tlen, c2len) <= 0
360 ok = 1;
361 } else if (clen == tlen && wcincmp (c, test, clen) == 0)
362 ok = 1;
365 *newp = (wint_t *) pattern;
366 return (ok == negate ? RANGE_NOMATCH : tlen);