2 /*--------------------------------------------------------------------*/
3 /*--- A simple sequence matching facility. ---*/
4 /*--- m_seqmatch.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2008-2013 OpenWorks Ltd
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_core_basics.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h" // VG_(strlen)
35 #include "pub_core_seqmatch.h" // self
37 /* ---------------------------------------------------------------------
38 A simple sequence matching facility
39 ------------------------------------------------------------------ */
41 /* See detailed comment in include/pub_tool_seqmatch.h about this. */
42 Bool
VG_(generic_match
) (
44 const void* patt
, SizeT szbPatt
, UWord nPatt
, UWord ixPatt
,
45 const void* input
, SizeT szbInput
, UWord nInput
, UWord ixInput
,
46 Bool (*pIsStar
)(const void*),
47 Bool (*pIsQuery
)(const void*),
48 Bool (*pattEQinp
)(const void*,const void*,void*,UWord
),
49 void* inputCompleter
, Bool (*haveInputInpC
)(void*,UWord
)
52 /* This is the spec, written in my favourite formal specification
53 language. It specifies non-greedy matching of '*'s.
55 ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
56 ma ('*':ps) [] = ma ps []
58 ma ('?':ps) (i:is) = ma ps is
59 ma ('?':ps) [] = False
61 ma (p:ps) (i:is) = p == i && ma ps is
64 ma [] (i:is) = False -- m-all, True for m-prefix
67 Bool havePatt
, haveInput
;
68 const HChar
*currPatt
, *currInput
;
70 vg_assert(nPatt
>= 0 && nPatt
< 1000000); /* arbitrary */
71 vg_assert(inputCompleter
72 || (nInput
>= 0 && nInput
< 1000000)); /* arbitrary */
73 vg_assert(ixPatt
>= 0 && ixPatt
<= nPatt
);
74 vg_assert(ixInput
>= 0 && (inputCompleter
|| ixInput
<= nInput
));
76 havePatt
= ixPatt
< nPatt
;
77 haveInput
= inputCompleter
?
78 (*haveInputInpC
)(inputCompleter
, ixInput
)
81 /* No specific need to set NULL when !have{Patt,Input}, but guards
82 against inadvertantly dereferencing an out of range pointer to
83 the pattern or input arrays. */
84 currPatt
= havePatt
? ((const HChar
*)patt
) + szbPatt
* ixPatt
: NULL
;
85 currInput
= haveInput
&& !inputCompleter
?
86 ((const HChar
*)input
) + szbInput
* ixInput
: NULL
;
88 // Deal with the complex case first: wildcards. Do frugal
89 // matching. When encountering a '*', first skip no characters
90 // at all, and see if the rest of the match still works. Only if
91 // that fails do we then skip a character, and retry at the next
94 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
96 // If we're out of input, check the rest of the pattern matches
97 // the empty input. This really means can only be be empty or
98 // composed entirely of '*'s.
100 // ma ('*':ps) [] = ma ps []
102 if (havePatt
&& pIsStar(currPatt
)) {
104 // ma ('*':ps) (i:is) = ma ps (i:is) || ma ('*':ps) is
105 // we unavoidably have to make a real recursive call for the
106 // first half of the OR, since this isn't straight tail-recursion.
107 if (VG_(generic_match
)( matchAll
,
108 patt
, szbPatt
, nPatt
, ixPatt
+1,
109 input
,szbInput
,nInput
, ixInput
+0,
110 pIsStar
,pIsQuery
,pattEQinp
,
111 inputCompleter
,haveInputInpC
) ) {
114 // but we can tail-recurse for the second call
115 ixInput
++; goto tailcall
;
117 // ma ('*':ps) [] = ma ps []
118 ixPatt
++; goto tailcall
;
122 // simpler cases now. Deal with '?' wildcards.
124 // ma ('?':ps) (i:is) = ma ps is
125 // ma ('?':ps) [] = False
126 if (havePatt
&& pIsQuery(currPatt
)) {
128 ixPatt
++; ixInput
++; goto tailcall
;
134 // obvious case with literal chars in the pattern
136 // ma (p:ps) (i:is) = p == i && ma ps is
137 if (havePatt
&& haveInput
) {
138 if (!pattEQinp(currPatt
,currInput
,inputCompleter
,ixInput
)) return False
;
139 ixPatt
++; ixInput
++; goto tailcall
;
142 // if we run out of input before we run out of pattern, we must fail
143 // ma (p:ps) [] = False
144 if (havePatt
&& !haveInput
) return False
;
146 // if we run out of pattern before we run out of input, the
147 // verdict depends on the matching mode. If we are trying to
148 // match exactly (the pattern must consume the entire input)
149 // then the outcome is failure. However, if we're merely attempting
150 // to match some prefix of the input, then we have been successful.
152 // ma [] (i:is) = False -- m-all, True for m-prefix
153 if (!havePatt
&& haveInput
) {
154 return matchAll
? False
// match-all
155 : True
; // match-prefix
158 // finally, if both sequence and input are both completely
159 // consumed, then we were successful, regardless of matching mode.
160 if (!havePatt
&& !haveInput
) return True
;
167 /* And a parameterization of the above, to make it do
170 static Bool
charIsStar ( const void* pV
) { return *(const HChar
*)pV
== '*'; }
171 static Bool
charIsQuery ( const void* pV
) { return *(const HChar
*)pV
== '?'; }
172 static Bool
char_p_EQ_i ( const void* pV
, const void* cV
,
173 void* null_completer
, UWord ixcV
) {
174 HChar p
= *(const HChar
*)pV
;
175 HChar c
= *(const HChar
*)cV
;
176 vg_assert(p
!= '*' && p
!= '?');
179 Bool
VG_(string_match
) ( const HChar
* patt
, const HChar
* input
)
181 return VG_(generic_match
)(
183 patt
, sizeof(HChar
), VG_(strlen
)(patt
), 0,
184 input
, sizeof(HChar
), VG_(strlen
)(input
), 0,
185 charIsStar
, charIsQuery
, char_p_EQ_i
,
191 // test cases for the matcher (in match-all mode)
192 // typedef struct { char* patt; char* input; Bool xres; } Test;
194 //static Test tests[] =
196 // { "" ,"" , True },
197 // { "a" ,"" , False },
198 // { "a" ,"b" , False },
199 // { "a" ,"a" , True },
200 // { "a" ,"aa" , False },
201 // { "*" ,"" , True },
202 // { "**" ,"" , True },
203 // { "*" ,"abc", True },
204 // { "*a" ,"abc", False },
205 // { "*b" ,"abc", False },
206 // { "*bc" ,"abc", True },
207 // { "a*b" ,"abc", False },
208 // { "a*c" ,"abc", True },
209 // { "*c" ,"abc", True },
210 // { "c*c" ,"abc", False },
211 // { "abc*" ,"abc", True },
212 // { "abc**" ,"abc", True },
213 // { "**abc" ,"abc", True },
214 // { "**a*b*c**" ,"abc", True },
215 // { "**a*b*d**" ,"abc", False },
216 // { "a?b" ,"abc", False },
217 // { "a?c" ,"abc", True },
218 // { "?" ,"" , False },
219 // { "?" ,"a" , True },
220 // { "?" ,"ab" , False },
221 // { "abcd" ,"abc", False },
222 // { "ab" ,"abc", False },
223 // { NULL ,NULL , False }
229 // for (t = tests; t->patt; t++) {
230 // printf("%10s %6s %s\n",
231 // t->patt, t->input,
232 // match_string_all((UChar*)t->patt,(UChar*)t->input,True)
234 // ? "pass" : "FAIL" );
239 /*--------------------------------------------------------------------*/
240 /*--- end m_seqmatch.c ---*/
241 /*--------------------------------------------------------------------*/