GDelayedSettingsBackend: another mandatory fixup
[glib.git] / glib / gpattern.c
blob33e4aac2caac456997b3e1faac78ffd90f27a48d
1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
20 #include "config.h"
22 #include <string.h>
24 #include "gpattern.h"
26 #include "gmacros.h"
27 #include "gmessages.h"
28 #include "gmem.h"
29 #include "gunicode.h"
30 #include "gutils.h"
31 #include "galias.h"
33 /**
34 * SECTION: patterns
35 * @title: Glob-style pattern matching
36 * @short_description: matches strings against patterns containing '*'
37 * (wildcard) and '?' (joker)
39 * The <function>g_pattern_match*</function> functions match a string
40 * against a pattern containing '*' and '?' wildcards with similar
41 * semantics as the standard glob() function: '*' matches an arbitrary,
42 * possibly empty, string, '?' matches an arbitrary character.
44 * Note that in contrast to glob(), the '/' character
45 * <emphasis>can</emphasis> be matched by the wildcards, there are no
46 * '[...]' character ranges and '*' and '?' can
47 * <emphasis>not</emphasis> be escaped to include them literally in a
48 * pattern.
50 * When multiple strings must be matched against the same pattern, it
51 * is better to compile the pattern to a #GPatternSpec using
52 * g_pattern_spec_new() and use g_pattern_match_string() instead of
53 * g_pattern_match_simple(). This avoids the overhead of repeated
54 * pattern compilation.
55 **/
57 /**
58 * GPatternSpec:
60 * A <structname>GPatternSpec</structname> is the 'compiled' form of a
61 * pattern. This structure is opaque and its fields cannot be accessed
62 * directly.
63 **/
65 /* keep enum and structure of gpattern.c and patterntest.c in sync */
66 typedef enum
68 G_MATCH_ALL, /* "*A?A*" */
69 G_MATCH_ALL_TAIL, /* "*A?AA" */
70 G_MATCH_HEAD, /* "AAAA*" */
71 G_MATCH_TAIL, /* "*AAAA" */
72 G_MATCH_EXACT, /* "AAAAA" */
73 G_MATCH_LAST
74 } GMatchType;
76 struct _GPatternSpec
78 GMatchType match_type;
79 guint pattern_length;
80 guint min_length;
81 guint max_length;
82 gchar *pattern;
86 /* --- functions --- */
87 static inline gboolean
88 g_pattern_ph_match (const gchar *match_pattern,
89 const gchar *match_string,
90 gboolean *wildcard_reached_p)
92 register const gchar *pattern, *string;
93 register gchar ch;
95 pattern = match_pattern;
96 string = match_string;
98 ch = *pattern;
99 pattern++;
100 while (ch)
102 switch (ch)
104 case '?':
105 if (!*string)
106 return FALSE;
107 string = g_utf8_next_char (string);
108 break;
110 case '*':
111 *wildcard_reached_p = TRUE;
114 ch = *pattern;
115 pattern++;
116 if (ch == '?')
118 if (!*string)
119 return FALSE;
120 string = g_utf8_next_char (string);
123 while (ch == '*' || ch == '?');
124 if (!ch)
125 return TRUE;
128 gboolean next_wildcard_reached = FALSE;
129 while (ch != *string)
131 if (!*string)
132 return FALSE;
133 string = g_utf8_next_char (string);
135 string++;
136 if (g_pattern_ph_match (pattern, string, &next_wildcard_reached))
137 return TRUE;
138 if (next_wildcard_reached)
139 /* the forthcoming pattern substring up to the next wildcard has
140 * been matched, but a mismatch occoured for the rest of the
141 * pattern, following the next wildcard.
142 * there's no need to advance the current match position any
143 * further if the rest pattern will not match.
145 return FALSE;
147 while (*string);
148 break;
150 default:
151 if (ch == *string)
152 string++;
153 else
154 return FALSE;
155 break;
158 ch = *pattern;
159 pattern++;
162 return *string == 0;
166 * g_pattern_match:
167 * @pspec: a #GPatternSpec
168 * @string_length: the length of @string (in bytes, i.e. strlen(),
169 * <emphasis>not</emphasis> g_utf8_strlen())
170 * @string: the UTF-8 encoded string to match
171 * @string_reversed: the reverse of @string or %NULL
172 * @Returns: %TRUE if @string matches @pspec
174 * Matches a string against a compiled pattern. Passing the correct
175 * length of the string given is mandatory. The reversed string can be
176 * omitted by passing %NULL, this is more efficient if the reversed
177 * version of the string to be matched is not at hand, as
178 * g_pattern_match() will only construct it if the compiled pattern
179 * requires reverse matches.
181 * Note that, if the user code will (possibly) match a string against a
182 * multitude of patterns containing wildcards, chances are high that
183 * some patterns will require a reversed string. In this case, it's
184 * more efficient to provide the reversed string to avoid multiple
185 * constructions thereof in the various calls to g_pattern_match().
187 * Note also that the reverse of a UTF-8 encoded string can in general
188 * <emphasis>not</emphasis> be obtained by g_strreverse(). This works
189 * only if the string doesn't contain any multibyte characters. GLib
190 * offers the g_utf8_strreverse() function to reverse UTF-8 encoded
191 * strings.
193 gboolean
194 g_pattern_match (GPatternSpec *pspec,
195 guint string_length,
196 const gchar *string,
197 const gchar *string_reversed)
199 g_return_val_if_fail (pspec != NULL, FALSE);
200 g_return_val_if_fail (string != NULL, FALSE);
202 if (string_length < pspec->min_length ||
203 string_length > pspec->max_length)
204 return FALSE;
206 switch (pspec->match_type)
208 gboolean dummy;
209 case G_MATCH_ALL:
210 return g_pattern_ph_match (pspec->pattern, string, &dummy);
211 case G_MATCH_ALL_TAIL:
212 if (string_reversed)
213 return g_pattern_ph_match (pspec->pattern, string_reversed, &dummy);
214 else
216 gboolean result;
217 gchar *tmp;
218 tmp = g_utf8_strreverse (string, string_length);
219 result = g_pattern_ph_match (pspec->pattern, tmp, &dummy);
220 g_free (tmp);
221 return result;
223 case G_MATCH_HEAD:
224 if (pspec->pattern_length == string_length)
225 return strcmp (pspec->pattern, string) == 0;
226 else if (pspec->pattern_length)
227 return strncmp (pspec->pattern, string, pspec->pattern_length) == 0;
228 else
229 return TRUE;
230 case G_MATCH_TAIL:
231 if (pspec->pattern_length)
232 return strcmp (pspec->pattern, string + (string_length - pspec->pattern_length)) == 0;
233 else
234 return TRUE;
235 case G_MATCH_EXACT:
236 if (pspec->pattern_length != string_length)
237 return FALSE;
238 else
239 return strcmp (pspec->pattern, string) == 0;
240 default:
241 g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE);
242 return FALSE;
247 * g_pattern_spec_new:
248 * @pattern: a zero-terminated UTF-8 encoded string
249 * @Returns: a newly-allocated #GPatternSpec
251 * Compiles a pattern to a #GPatternSpec.
253 GPatternSpec*
254 g_pattern_spec_new (const gchar *pattern)
256 GPatternSpec *pspec;
257 gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
258 gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
259 gboolean follows_wildcard = FALSE;
260 guint pending_jokers = 0;
261 const gchar *s;
262 gchar *d;
263 guint i;
265 g_return_val_if_fail (pattern != NULL, NULL);
267 /* canonicalize pattern and collect necessary stats */
268 pspec = g_new (GPatternSpec, 1);
269 pspec->pattern_length = strlen (pattern);
270 pspec->min_length = 0;
271 pspec->max_length = 0;
272 pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
273 d = pspec->pattern;
274 for (i = 0, s = pattern; *s != 0; s++)
276 switch (*s)
278 case '*':
279 if (follows_wildcard) /* compress multiple wildcards */
281 pspec->pattern_length--;
282 continue;
284 follows_wildcard = TRUE;
285 if (hw_pos < 0)
286 hw_pos = i;
287 tw_pos = i;
288 break;
289 case '?':
290 pending_jokers++;
291 pspec->min_length++;
292 pspec->max_length += 4; /* maximum UTF-8 character length */
293 continue;
294 default:
295 for (; pending_jokers; pending_jokers--, i++) {
296 *d++ = '?';
297 if (hj_pos < 0)
298 hj_pos = i;
299 tj_pos = i;
301 follows_wildcard = FALSE;
302 pspec->min_length++;
303 pspec->max_length++;
304 break;
306 *d++ = *s;
307 i++;
309 for (; pending_jokers; pending_jokers--) {
310 *d++ = '?';
311 if (hj_pos < 0)
312 hj_pos = i;
313 tj_pos = i;
315 *d++ = 0;
316 seen_joker = hj_pos >= 0;
317 seen_wildcard = hw_pos >= 0;
318 more_wildcards = seen_wildcard && hw_pos != tw_pos;
319 if (seen_wildcard)
320 pspec->max_length = G_MAXUINT;
322 /* special case sole head/tail wildcard or exact matches */
323 if (!seen_joker && !more_wildcards)
325 if (pspec->pattern[0] == '*')
327 pspec->match_type = G_MATCH_TAIL;
328 memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
329 pspec->pattern[pspec->pattern_length] = 0;
330 return pspec;
332 if (pspec->pattern_length > 0 &&
333 pspec->pattern[pspec->pattern_length - 1] == '*')
335 pspec->match_type = G_MATCH_HEAD;
336 pspec->pattern[--pspec->pattern_length] = 0;
337 return pspec;
339 if (!seen_wildcard)
341 pspec->match_type = G_MATCH_EXACT;
342 return pspec;
346 /* now just need to distinguish between head or tail match start */
347 tw_pos = pspec->pattern_length - 1 - tw_pos; /* last pos to tail distance */
348 tj_pos = pspec->pattern_length - 1 - tj_pos; /* last pos to tail distance */
349 if (seen_wildcard)
350 pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
351 else /* seen_joker */
352 pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
353 if (pspec->match_type == G_MATCH_ALL_TAIL) {
354 gchar *tmp = pspec->pattern;
355 pspec->pattern = g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
356 g_free (tmp);
358 return pspec;
362 * g_pattern_spec_free:
363 * @pspec: a #GPatternSpec
365 * Frees the memory allocated for the #GPatternSpec.
367 void
368 g_pattern_spec_free (GPatternSpec *pspec)
370 g_return_if_fail (pspec != NULL);
372 g_free (pspec->pattern);
373 g_free (pspec);
377 * g_pattern_spec_equal:
378 * @pspec1: a #GPatternSpec
379 * @pspec2: another #GPatternSpec
380 * @Returns: Whether the compiled patterns are equal
382 * Compares two compiled pattern specs and returns whether they will
383 * match the same set of strings.
385 gboolean
386 g_pattern_spec_equal (GPatternSpec *pspec1,
387 GPatternSpec *pspec2)
389 g_return_val_if_fail (pspec1 != NULL, FALSE);
390 g_return_val_if_fail (pspec2 != NULL, FALSE);
392 return (pspec1->pattern_length == pspec2->pattern_length &&
393 pspec1->match_type == pspec2->match_type &&
394 strcmp (pspec1->pattern, pspec2->pattern) == 0);
398 * g_pattern_match_string:
399 * @pspec: a #GPatternSpec
400 * @string: the UTF-8 encoded string to match
401 * @Returns: %TRUE if @string matches @pspec
403 * Matches a string against a compiled pattern. If the string is to be
404 * matched against more than one pattern, consider using
405 * g_pattern_match() instead while supplying the reversed string.
407 gboolean
408 g_pattern_match_string (GPatternSpec *pspec,
409 const gchar *string)
411 g_return_val_if_fail (pspec != NULL, FALSE);
412 g_return_val_if_fail (string != NULL, FALSE);
414 return g_pattern_match (pspec, strlen (string), string, NULL);
418 * g_pattern_match_simple:
419 * @pattern: the UTF-8 encoded pattern
420 * @string: the UTF-8 encoded string to match
421 * @Returns: %TRUE if @string matches @pspec
423 * Matches a string against a pattern given as a string. If this
424 * function is to be called in a loop, it's more efficient to compile
425 * the pattern once with g_pattern_spec_new() and call
426 * g_pattern_match_string() repeatedly.
428 gboolean
429 g_pattern_match_simple (const gchar *pattern,
430 const gchar *string)
432 GPatternSpec *pspec;
433 gboolean ergo;
435 g_return_val_if_fail (pattern != NULL, FALSE);
436 g_return_val_if_fail (string != NULL, FALSE);
438 pspec = g_pattern_spec_new (pattern);
439 ergo = g_pattern_match (pspec, strlen (string), string, NULL);
440 g_pattern_spec_free (pspec);
442 return ergo;
445 #define __G_PATTERN_C__
446 #include "galiasdef.c"