1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc.
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 02111-1307, USA.
27 #include "gmessages.h"
35 * @title: Glob-style pattern matching
36 * @short_description: matches strings against patterns containing '*'
37 * (wildcard) and '?' (joker)
39 * The <function>g_pattern_match*</function> functions match a string
40 * against a pattern containing '*' and '?' wildcards with similar
41 * semantics as the standard glob() function: '*' matches an arbitrary,
42 * possibly empty, string, '?' matches an arbitrary character.
44 * Note that in contrast to glob(), the '/' character
45 * <emphasis>can</emphasis> be matched by the wildcards, there are no
46 * '[...]' character ranges and '*' and '?' can
47 * <emphasis>not</emphasis> be escaped to include them literally in a
50 * When multiple strings must be matched against the same pattern, it
51 * is better to compile the pattern to a #GPatternSpec using
52 * g_pattern_spec_new() and use g_pattern_match_string() instead of
53 * g_pattern_match_simple(). This avoids the overhead of repeated
54 * pattern compilation.
60 * A <structname>GPatternSpec</structname> is the 'compiled' form of a
61 * pattern. This structure is opaque and its fields cannot be accessed
65 /* keep enum and structure of gpattern.c and patterntest.c in sync */
68 G_MATCH_ALL
, /* "*A?A*" */
69 G_MATCH_ALL_TAIL
, /* "*A?AA" */
70 G_MATCH_HEAD
, /* "AAAA*" */
71 G_MATCH_TAIL
, /* "*AAAA" */
72 G_MATCH_EXACT
, /* "AAAAA" */
78 GMatchType match_type
;
86 /* --- functions --- */
87 static inline gboolean
88 g_pattern_ph_match (const gchar
*match_pattern
,
89 const gchar
*match_string
,
90 gboolean
*wildcard_reached_p
)
92 register const gchar
*pattern
, *string
;
95 pattern
= match_pattern
;
96 string
= match_string
;
107 string
= g_utf8_next_char (string
);
111 *wildcard_reached_p
= TRUE
;
120 string
= g_utf8_next_char (string
);
123 while (ch
== '*' || ch
== '?');
128 gboolean next_wildcard_reached
= FALSE
;
129 while (ch
!= *string
)
133 string
= g_utf8_next_char (string
);
136 if (g_pattern_ph_match (pattern
, string
, &next_wildcard_reached
))
138 if (next_wildcard_reached
)
139 /* the forthcoming pattern substring up to the next wildcard has
140 * been matched, but a mismatch occoured for the rest of the
141 * pattern, following the next wildcard.
142 * there's no need to advance the current match position any
143 * further if the rest pattern will not match.
167 * @pspec: a #GPatternSpec
168 * @string_length: the length of @string (in bytes, i.e. strlen(),
169 * <emphasis>not</emphasis> g_utf8_strlen())
170 * @string: the UTF-8 encoded string to match
171 * @string_reversed: the reverse of @string or %NULL
172 * @Returns: %TRUE if @string matches @pspec
174 * Matches a string against a compiled pattern. Passing the correct
175 * length of the string given is mandatory. The reversed string can be
176 * omitted by passing %NULL, this is more efficient if the reversed
177 * version of the string to be matched is not at hand, as
178 * g_pattern_match() will only construct it if the compiled pattern
179 * requires reverse matches.
181 * Note that, if the user code will (possibly) match a string against a
182 * multitude of patterns containing wildcards, chances are high that
183 * some patterns will require a reversed string. In this case, it's
184 * more efficient to provide the reversed string to avoid multiple
185 * constructions thereof in the various calls to g_pattern_match().
187 * Note also that the reverse of a UTF-8 encoded string can in general
188 * <emphasis>not</emphasis> be obtained by g_strreverse(). This works
189 * only if the string doesn't contain any multibyte characters. GLib
190 * offers the g_utf8_strreverse() function to reverse UTF-8 encoded
194 g_pattern_match (GPatternSpec
*pspec
,
197 const gchar
*string_reversed
)
199 g_return_val_if_fail (pspec
!= NULL
, FALSE
);
200 g_return_val_if_fail (string
!= NULL
, FALSE
);
202 if (string_length
< pspec
->min_length
||
203 string_length
> pspec
->max_length
)
206 switch (pspec
->match_type
)
210 return g_pattern_ph_match (pspec
->pattern
, string
, &dummy
);
211 case G_MATCH_ALL_TAIL
:
213 return g_pattern_ph_match (pspec
->pattern
, string_reversed
, &dummy
);
218 tmp
= g_utf8_strreverse (string
, string_length
);
219 result
= g_pattern_ph_match (pspec
->pattern
, tmp
, &dummy
);
224 if (pspec
->pattern_length
== string_length
)
225 return strcmp (pspec
->pattern
, string
) == 0;
226 else if (pspec
->pattern_length
)
227 return strncmp (pspec
->pattern
, string
, pspec
->pattern_length
) == 0;
231 if (pspec
->pattern_length
)
232 return strcmp (pspec
->pattern
, string
+ (string_length
- pspec
->pattern_length
)) == 0;
236 if (pspec
->pattern_length
!= string_length
)
239 return strcmp (pspec
->pattern
, string
) == 0;
241 g_return_val_if_fail (pspec
->match_type
< G_MATCH_LAST
, FALSE
);
247 * g_pattern_spec_new:
248 * @pattern: a zero-terminated UTF-8 encoded string
249 * @Returns: a newly-allocated #GPatternSpec
251 * Compiles a pattern to a #GPatternSpec.
254 g_pattern_spec_new (const gchar
*pattern
)
257 gboolean seen_joker
= FALSE
, seen_wildcard
= FALSE
, more_wildcards
= FALSE
;
258 gint hw_pos
= -1, tw_pos
= -1, hj_pos
= -1, tj_pos
= -1;
259 gboolean follows_wildcard
= FALSE
;
260 guint pending_jokers
= 0;
265 g_return_val_if_fail (pattern
!= NULL
, NULL
);
267 /* canonicalize pattern and collect necessary stats */
268 pspec
= g_new (GPatternSpec
, 1);
269 pspec
->pattern_length
= strlen (pattern
);
270 pspec
->min_length
= 0;
271 pspec
->max_length
= 0;
272 pspec
->pattern
= g_new (gchar
, pspec
->pattern_length
+ 1);
274 for (i
= 0, s
= pattern
; *s
!= 0; s
++)
279 if (follows_wildcard
) /* compress multiple wildcards */
281 pspec
->pattern_length
--;
284 follows_wildcard
= TRUE
;
292 pspec
->max_length
+= 4; /* maximum UTF-8 character length */
295 for (; pending_jokers
; pending_jokers
--, i
++) {
301 follows_wildcard
= FALSE
;
309 for (; pending_jokers
; pending_jokers
--) {
316 seen_joker
= hj_pos
>= 0;
317 seen_wildcard
= hw_pos
>= 0;
318 more_wildcards
= seen_wildcard
&& hw_pos
!= tw_pos
;
320 pspec
->max_length
= G_MAXUINT
;
322 /* special case sole head/tail wildcard or exact matches */
323 if (!seen_joker
&& !more_wildcards
)
325 if (pspec
->pattern
[0] == '*')
327 pspec
->match_type
= G_MATCH_TAIL
;
328 memmove (pspec
->pattern
, pspec
->pattern
+ 1, --pspec
->pattern_length
);
329 pspec
->pattern
[pspec
->pattern_length
] = 0;
332 if (pspec
->pattern_length
> 0 &&
333 pspec
->pattern
[pspec
->pattern_length
- 1] == '*')
335 pspec
->match_type
= G_MATCH_HEAD
;
336 pspec
->pattern
[--pspec
->pattern_length
] = 0;
341 pspec
->match_type
= G_MATCH_EXACT
;
346 /* now just need to distinguish between head or tail match start */
347 tw_pos
= pspec
->pattern_length
- 1 - tw_pos
; /* last pos to tail distance */
348 tj_pos
= pspec
->pattern_length
- 1 - tj_pos
; /* last pos to tail distance */
350 pspec
->match_type
= tw_pos
> hw_pos
? G_MATCH_ALL_TAIL
: G_MATCH_ALL
;
351 else /* seen_joker */
352 pspec
->match_type
= tj_pos
> hj_pos
? G_MATCH_ALL_TAIL
: G_MATCH_ALL
;
353 if (pspec
->match_type
== G_MATCH_ALL_TAIL
) {
354 gchar
*tmp
= pspec
->pattern
;
355 pspec
->pattern
= g_utf8_strreverse (pspec
->pattern
, pspec
->pattern_length
);
362 * g_pattern_spec_free:
363 * @pspec: a #GPatternSpec
365 * Frees the memory allocated for the #GPatternSpec.
368 g_pattern_spec_free (GPatternSpec
*pspec
)
370 g_return_if_fail (pspec
!= NULL
);
372 g_free (pspec
->pattern
);
377 * g_pattern_spec_equal:
378 * @pspec1: a #GPatternSpec
379 * @pspec2: another #GPatternSpec
380 * @Returns: Whether the compiled patterns are equal
382 * Compares two compiled pattern specs and returns whether they will
383 * match the same set of strings.
386 g_pattern_spec_equal (GPatternSpec
*pspec1
,
387 GPatternSpec
*pspec2
)
389 g_return_val_if_fail (pspec1
!= NULL
, FALSE
);
390 g_return_val_if_fail (pspec2
!= NULL
, FALSE
);
392 return (pspec1
->pattern_length
== pspec2
->pattern_length
&&
393 pspec1
->match_type
== pspec2
->match_type
&&
394 strcmp (pspec1
->pattern
, pspec2
->pattern
) == 0);
398 * g_pattern_match_string:
399 * @pspec: a #GPatternSpec
400 * @string: the UTF-8 encoded string to match
401 * @Returns: %TRUE if @string matches @pspec
403 * Matches a string against a compiled pattern. If the string is to be
404 * matched against more than one pattern, consider using
405 * g_pattern_match() instead while supplying the reversed string.
408 g_pattern_match_string (GPatternSpec
*pspec
,
411 g_return_val_if_fail (pspec
!= NULL
, FALSE
);
412 g_return_val_if_fail (string
!= NULL
, FALSE
);
414 return g_pattern_match (pspec
, strlen (string
), string
, NULL
);
418 * g_pattern_match_simple:
419 * @pattern: the UTF-8 encoded pattern
420 * @string: the UTF-8 encoded string to match
421 * @Returns: %TRUE if @string matches @pspec
423 * Matches a string against a pattern given as a string. If this
424 * function is to be called in a loop, it's more efficient to compile
425 * the pattern once with g_pattern_spec_new() and call
426 * g_pattern_match_string() repeatedly.
429 g_pattern_match_simple (const gchar
*pattern
,
435 g_return_val_if_fail (pattern
!= NULL
, FALSE
);
436 g_return_val_if_fail (string
!= NULL
, FALSE
);
438 pspec
= g_pattern_spec_new (pattern
);
439 ergo
= g_pattern_match (pspec
, strlen (string
), string
, NULL
);
440 g_pattern_spec_free (pspec
);
445 #define __G_PATTERN_C__
446 #include "galiasdef.c"