GApplication: Make ::startup run-first
[glib.git] / glib / gpattern.c
blob7770d9ca262a6418647fd7c0ce7394f07803fb53
1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
20 #include "config.h"
22 #include <string.h>
24 #include "gpattern.h"
26 #include "gmacros.h"
27 #include "gmessages.h"
28 #include "gmem.h"
29 #include "gunicode.h"
30 #include "gutils.h"
32 /**
33 * SECTION:patterns
34 * @title: Glob-style pattern matching
35 * @short_description: matches strings against patterns containing '*'
36 * (wildcard) and '?' (joker)
38 * The <function>g_pattern_match*</function> functions match a string
39 * against a pattern containing '*' and '?' wildcards with similar
40 * semantics as the standard glob() function: '*' matches an arbitrary,
41 * possibly empty, string, '?' matches an arbitrary character.
43 * Note that in contrast to glob(), the '/' character
44 * <emphasis>can</emphasis> be matched by the wildcards, there are no
45 * '[...]' character ranges and '*' and '?' can
46 * <emphasis>not</emphasis> be escaped to include them literally in a
47 * pattern.
49 * When multiple strings must be matched against the same pattern, it
50 * is better to compile the pattern to a #GPatternSpec using
51 * g_pattern_spec_new() and use g_pattern_match_string() instead of
52 * g_pattern_match_simple(). This avoids the overhead of repeated
53 * pattern compilation.
54 **/
56 /**
57 * GPatternSpec:
59 * A <structname>GPatternSpec</structname> is the 'compiled' form of a
60 * pattern. This structure is opaque and its fields cannot be accessed
61 * directly.
62 **/
64 /* keep enum and structure of gpattern.c and patterntest.c in sync */
65 typedef enum
67 G_MATCH_ALL, /* "*A?A*" */
68 G_MATCH_ALL_TAIL, /* "*A?AA" */
69 G_MATCH_HEAD, /* "AAAA*" */
70 G_MATCH_TAIL, /* "*AAAA" */
71 G_MATCH_EXACT, /* "AAAAA" */
72 G_MATCH_LAST
73 } GMatchType;
75 struct _GPatternSpec
77 GMatchType match_type;
78 guint pattern_length;
79 guint min_length;
80 guint max_length;
81 gchar *pattern;
85 /* --- functions --- */
86 static inline gboolean
87 g_pattern_ph_match (const gchar *match_pattern,
88 const gchar *match_string,
89 gboolean *wildcard_reached_p)
91 register const gchar *pattern, *string;
92 register gchar ch;
94 pattern = match_pattern;
95 string = match_string;
97 ch = *pattern;
98 pattern++;
99 while (ch)
101 switch (ch)
103 case '?':
104 if (!*string)
105 return FALSE;
106 string = g_utf8_next_char (string);
107 break;
109 case '*':
110 *wildcard_reached_p = TRUE;
113 ch = *pattern;
114 pattern++;
115 if (ch == '?')
117 if (!*string)
118 return FALSE;
119 string = g_utf8_next_char (string);
122 while (ch == '*' || ch == '?');
123 if (!ch)
124 return TRUE;
127 gboolean next_wildcard_reached = FALSE;
128 while (ch != *string)
130 if (!*string)
131 return FALSE;
132 string = g_utf8_next_char (string);
134 string++;
135 if (g_pattern_ph_match (pattern, string, &next_wildcard_reached))
136 return TRUE;
137 if (next_wildcard_reached)
138 /* the forthcoming pattern substring up to the next wildcard has
139 * been matched, but a mismatch occoured for the rest of the
140 * pattern, following the next wildcard.
141 * there's no need to advance the current match position any
142 * further if the rest pattern will not match.
144 return FALSE;
146 while (*string);
147 break;
149 default:
150 if (ch == *string)
151 string++;
152 else
153 return FALSE;
154 break;
157 ch = *pattern;
158 pattern++;
161 return *string == 0;
165 * g_pattern_match:
166 * @pspec: a #GPatternSpec
167 * @string_length: the length of @string (in bytes, i.e. strlen(),
168 * <emphasis>not</emphasis> g_utf8_strlen())
169 * @string: the UTF-8 encoded string to match
170 * @string_reversed: the reverse of @string or %NULL
171 * @Returns: %TRUE if @string matches @pspec
173 * Matches a string against a compiled pattern. Passing the correct
174 * length of the string given is mandatory. The reversed string can be
175 * omitted by passing %NULL, this is more efficient if the reversed
176 * version of the string to be matched is not at hand, as
177 * g_pattern_match() will only construct it if the compiled pattern
178 * requires reverse matches.
180 * Note that, if the user code will (possibly) match a string against a
181 * multitude of patterns containing wildcards, chances are high that
182 * some patterns will require a reversed string. In this case, it's
183 * more efficient to provide the reversed string to avoid multiple
184 * constructions thereof in the various calls to g_pattern_match().
186 * Note also that the reverse of a UTF-8 encoded string can in general
187 * <emphasis>not</emphasis> be obtained by g_strreverse(). This works
188 * only if the string doesn't contain any multibyte characters. GLib
189 * offers the g_utf8_strreverse() function to reverse UTF-8 encoded
190 * strings.
192 gboolean
193 g_pattern_match (GPatternSpec *pspec,
194 guint string_length,
195 const gchar *string,
196 const gchar *string_reversed)
198 g_return_val_if_fail (pspec != NULL, FALSE);
199 g_return_val_if_fail (string != NULL, FALSE);
201 if (string_length < pspec->min_length ||
202 string_length > pspec->max_length)
203 return FALSE;
205 switch (pspec->match_type)
207 gboolean dummy;
208 case G_MATCH_ALL:
209 return g_pattern_ph_match (pspec->pattern, string, &dummy);
210 case G_MATCH_ALL_TAIL:
211 if (string_reversed)
212 return g_pattern_ph_match (pspec->pattern, string_reversed, &dummy);
213 else
215 gboolean result;
216 gchar *tmp;
217 tmp = g_utf8_strreverse (string, string_length);
218 result = g_pattern_ph_match (pspec->pattern, tmp, &dummy);
219 g_free (tmp);
220 return result;
222 case G_MATCH_HEAD:
223 if (pspec->pattern_length == string_length)
224 return strcmp (pspec->pattern, string) == 0;
225 else if (pspec->pattern_length)
226 return strncmp (pspec->pattern, string, pspec->pattern_length) == 0;
227 else
228 return TRUE;
229 case G_MATCH_TAIL:
230 if (pspec->pattern_length)
231 return strcmp (pspec->pattern, string + (string_length - pspec->pattern_length)) == 0;
232 else
233 return TRUE;
234 case G_MATCH_EXACT:
235 if (pspec->pattern_length != string_length)
236 return FALSE;
237 else
238 return strcmp (pspec->pattern, string) == 0;
239 default:
240 g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE);
241 return FALSE;
246 * g_pattern_spec_new:
247 * @pattern: a zero-terminated UTF-8 encoded string
248 * @Returns: a newly-allocated #GPatternSpec
250 * Compiles a pattern to a #GPatternSpec.
252 GPatternSpec*
253 g_pattern_spec_new (const gchar *pattern)
255 GPatternSpec *pspec;
256 gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
257 gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
258 gboolean follows_wildcard = FALSE;
259 guint pending_jokers = 0;
260 const gchar *s;
261 gchar *d;
262 guint i;
264 g_return_val_if_fail (pattern != NULL, NULL);
266 /* canonicalize pattern and collect necessary stats */
267 pspec = g_new (GPatternSpec, 1);
268 pspec->pattern_length = strlen (pattern);
269 pspec->min_length = 0;
270 pspec->max_length = 0;
271 pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
272 d = pspec->pattern;
273 for (i = 0, s = pattern; *s != 0; s++)
275 switch (*s)
277 case '*':
278 if (follows_wildcard) /* compress multiple wildcards */
280 pspec->pattern_length--;
281 continue;
283 follows_wildcard = TRUE;
284 if (hw_pos < 0)
285 hw_pos = i;
286 tw_pos = i;
287 break;
288 case '?':
289 pending_jokers++;
290 pspec->min_length++;
291 pspec->max_length += 4; /* maximum UTF-8 character length */
292 continue;
293 default:
294 for (; pending_jokers; pending_jokers--, i++) {
295 *d++ = '?';
296 if (hj_pos < 0)
297 hj_pos = i;
298 tj_pos = i;
300 follows_wildcard = FALSE;
301 pspec->min_length++;
302 pspec->max_length++;
303 break;
305 *d++ = *s;
306 i++;
308 for (; pending_jokers; pending_jokers--) {
309 *d++ = '?';
310 if (hj_pos < 0)
311 hj_pos = i;
312 tj_pos = i;
314 *d++ = 0;
315 seen_joker = hj_pos >= 0;
316 seen_wildcard = hw_pos >= 0;
317 more_wildcards = seen_wildcard && hw_pos != tw_pos;
318 if (seen_wildcard)
319 pspec->max_length = G_MAXUINT;
321 /* special case sole head/tail wildcard or exact matches */
322 if (!seen_joker && !more_wildcards)
324 if (pspec->pattern[0] == '*')
326 pspec->match_type = G_MATCH_TAIL;
327 memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
328 pspec->pattern[pspec->pattern_length] = 0;
329 return pspec;
331 if (pspec->pattern_length > 0 &&
332 pspec->pattern[pspec->pattern_length - 1] == '*')
334 pspec->match_type = G_MATCH_HEAD;
335 pspec->pattern[--pspec->pattern_length] = 0;
336 return pspec;
338 if (!seen_wildcard)
340 pspec->match_type = G_MATCH_EXACT;
341 return pspec;
345 /* now just need to distinguish between head or tail match start */
346 tw_pos = pspec->pattern_length - 1 - tw_pos; /* last pos to tail distance */
347 tj_pos = pspec->pattern_length - 1 - tj_pos; /* last pos to tail distance */
348 if (seen_wildcard)
349 pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
350 else /* seen_joker */
351 pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
352 if (pspec->match_type == G_MATCH_ALL_TAIL) {
353 gchar *tmp = pspec->pattern;
354 pspec->pattern = g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
355 g_free (tmp);
357 return pspec;
361 * g_pattern_spec_free:
362 * @pspec: a #GPatternSpec
364 * Frees the memory allocated for the #GPatternSpec.
366 void
367 g_pattern_spec_free (GPatternSpec *pspec)
369 g_return_if_fail (pspec != NULL);
371 g_free (pspec->pattern);
372 g_free (pspec);
376 * g_pattern_spec_equal:
377 * @pspec1: a #GPatternSpec
378 * @pspec2: another #GPatternSpec
379 * @Returns: Whether the compiled patterns are equal
381 * Compares two compiled pattern specs and returns whether they will
382 * match the same set of strings.
384 gboolean
385 g_pattern_spec_equal (GPatternSpec *pspec1,
386 GPatternSpec *pspec2)
388 g_return_val_if_fail (pspec1 != NULL, FALSE);
389 g_return_val_if_fail (pspec2 != NULL, FALSE);
391 return (pspec1->pattern_length == pspec2->pattern_length &&
392 pspec1->match_type == pspec2->match_type &&
393 strcmp (pspec1->pattern, pspec2->pattern) == 0);
397 * g_pattern_match_string:
398 * @pspec: a #GPatternSpec
399 * @string: the UTF-8 encoded string to match
400 * @Returns: %TRUE if @string matches @pspec
402 * Matches a string against a compiled pattern. If the string is to be
403 * matched against more than one pattern, consider using
404 * g_pattern_match() instead while supplying the reversed string.
406 gboolean
407 g_pattern_match_string (GPatternSpec *pspec,
408 const gchar *string)
410 g_return_val_if_fail (pspec != NULL, FALSE);
411 g_return_val_if_fail (string != NULL, FALSE);
413 return g_pattern_match (pspec, strlen (string), string, NULL);
417 * g_pattern_match_simple:
418 * @pattern: the UTF-8 encoded pattern
419 * @string: the UTF-8 encoded string to match
420 * @Returns: %TRUE if @string matches @pspec
422 * Matches a string against a pattern given as a string. If this
423 * function is to be called in a loop, it's more efficient to compile
424 * the pattern once with g_pattern_spec_new() and call
425 * g_pattern_match_string() repeatedly.
427 gboolean
428 g_pattern_match_simple (const gchar *pattern,
429 const gchar *string)
431 GPatternSpec *pspec;
432 gboolean ergo;
434 g_return_val_if_fail (pattern != NULL, FALSE);
435 g_return_val_if_fail (string != NULL, FALSE);
437 pspec = g_pattern_spec_new (pattern);
438 ergo = g_pattern_match (pspec, strlen (string), string, NULL);
439 g_pattern_spec_free (pspec);
441 return ergo;