1 # Copyright (C) 2006-2009, Parrot Foundation.
6 Regex - base class for grammars and built-in rules
10 This implements the base classes for forming grammars, and provides
11 a number of built-in rules.
15 .namespace [ 'PGE'; 'Match' ]
17 .include 'cclass.pasm'
18 .include 'interpinfo.pasm'
22 p6meta = new 'P6metaclass'
23 p6meta.'new_class'('PGE::Grammar', 'parent'=>'PGE::Match')
25 set_global '%!cache', $P0
40 .sub 'ident' :method :nsentry('ident')
41 .param pmc adverbs :slurpy :named
43 .local pmc mob, mfrom, mpos
44 .local int pos, lastpos
46 $P0 = get_hll_global ['PGE'], 'Match'
47 (mob, pos, target) = $P0.'new'(self)
49 lastpos = length target
50 $S0 = substr target, pos, 1
51 if $S0 == '_' goto ident_1
52 $I0 = is_cclass .CCLASS_ALPHABETIC, target, pos
55 pos = find_not_cclass .CCLASS_WORD, target, pos, lastpos
64 Match a single alphabetic character.
69 .param pmc adverbs :slurpy :named
71 .local pmc mob, mfrom, mpos
72 .local int pos, lastpos
74 $P0 = get_hll_global ['PGE'], 'Match'
75 (mob, pos, target) = $P0.'new'(self)
77 lastpos = length target
78 $S0 = substr target, pos, 1
79 if $S0 == '_' goto ident_1
80 $I0 = is_cclass .CCLASS_ALPHABETIC, target, pos
92 Match a single uppercase character.
97 .tailcall '!cclass'(self, .CCLASS_UPPERCASE)
103 Match a single lowercase character.
108 .tailcall '!cclass'(self, .CCLASS_LOWERCASE)
114 Match a single digit.
119 .tailcall '!cclass'(self, .CCLASS_NUMERIC)
124 Match a single alphanumeric character.
128 .sub "xdigit" :method
129 .tailcall '!cclass'(self, .CCLASS_HEXADECIMAL)
134 Match a single whitespace character.
139 .tailcall '!cclass'(self, .CCLASS_WHITESPACE)
144 Match a single printable character.
149 .tailcall '!cclass'(self, .CCLASS_PRINTING)
154 Match a single "graphical" character.
159 .tailcall '!cclass'(self, .CCLASS_GRAPHICAL)
164 Match a single "blank" character.
169 .tailcall '!cclass'(self, .CCLASS_BLANK)
174 Match a single "control" character.
179 .tailcall '!cclass'(self, .CCLASS_CONTROL)
184 Match a single punctuation character.
189 .tailcall '!cclass'(self, .CCLASS_PUNCTUATION)
194 Match a single alphanumeric character.
199 .tailcall '!cclass'(self, .CCLASS_ALPHANUMERIC)
205 Match whitespace between tokens.
211 .local pmc mob, mfrom, mpos
212 .local int rep, pos, lastpos
213 .local string nextchars
214 .const 'Sub' corou = "ws_corou"
217 $P0 = get_hll_global ['PGE'], 'Match'
218 (mob, pos, target, mfrom, mpos) = $P0.'new'(self)
219 lastpos = length target
221 if pos >= lastpos goto found
222 if pos < 1 goto ws_scan
223 $I0 = is_cclass .CCLASS_WORD, target, pos
224 if $I0 == 0 goto ws_scan
226 $I0 = is_cclass .CCLASS_WORD, target, $I1
227 if $I0 == 0 goto ws_scan
231 pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
232 if pos == $I0 goto nobacktrack
233 $I0 = length nextchars
234 if $I0 == 0 goto backtrack
235 $I1 = find_cclass .CCLASS_WHITESPACE, nextchars, 0, $I0
236 if $I1 >= $I0 goto nobacktrack
241 setattribute mob, '&!corou', $P0
242 $P0(mob, mfrom, mpos)
245 if nextchars == "" goto found
246 $S1 = substr target, pos, 1
247 $I1 = index nextchars, $S1
254 .sub "ws_corou" :anon
261 if mpos > mfrom goto loop
263 setattribute mob, '&!corou', $P0
270 Returns true if we're at a word boundary (as defined by
279 $P0 = get_hll_global ['PGE'], 'Match'
280 (mob, pos, target) = $P0.'new'(self)
281 if pos == 0 goto succeed
283 if pos == $I0 goto succeed
285 $I1 = is_cclass .CCLASS_WORD, target, $I0
286 $I2 = is_cclass .CCLASS_WORD, target, pos
287 if $I1 == $I2 goto end
295 =item C<before(PMC mob, STR pattern)>
297 Perform lookahead -- i.e., check if we're at a position where
298 C<pattern> matches. Returns a zero-width Match object on
303 .sub "before" :method
304 .param string pattern :optional
305 .param int has_pattern :opt_flag
306 .param pmc adverbs :slurpy :named
307 .local pmc mob, cache, rule
309 if has_pattern goto lookahead
313 cache = get_global '%!cache'
314 $I0 = exists cache[pattern]
315 if $I0 == 0 goto new_pattern
316 rule = cache[pattern]
319 $P0 = compreg 'PGE::Perl6Regex'
321 cache[pattern] = rule
325 $P0 = getattribute mob, '$.from'
326 $P1 = getattribute mob, '$.pos'
329 setattribute mob, '&!corou', $P0
334 =item C<after(PMC mob, STR pattern)>
336 Perform lookbehind -- i.e., check if the string before the
337 current position matches <pattern> (anchored at the end).
338 Returns a zero-width Match object on success.
340 XXX: Note that this implementation cheats in a big way.
341 S05 says that C<after> is implemented by reversing the
342 syntax tree and looking for things in opposite order going
343 to the left. This implementation just grabs the (sub)string
344 up to the current match position and tests that, anchoring
345 the pattern to the end of the substring. It's cheap and
346 potentially very inefficient, but it "works" for now.
351 .param string pattern :optional
352 .param int has_pattern :opt_flag
353 .param pmc adverbs :slurpy :named
354 .local pmc mob, cache, rule
358 if has_pattern goto lookbehind
362 pattern = concat '[', pattern
363 pattern = concat pattern, ']$'
364 cache = get_global '%!cache'
365 $I0 = exists cache[pattern]
366 if $I0 == 0 goto new_pattern
367 rule = cache[pattern]
370 $P0 = compreg 'PGE::Perl6Regex'
372 cache[pattern] = rule
374 $P0 = getattribute mob, '$.target'
376 $P0 = getattribute mob, '$.pos'
378 $S0 = substr $S0, 0, from
381 $P0 = getattribute mob, '$.from'
382 $P1 = getattribute mob, '$.pos'
386 setattribute mob, '&!corou', $P0
391 =item FAILGOAL(pmc mob, string goal [, 'dba'=>dba])
393 Throw an exception when parsing fails in goal matching.
397 .sub 'FAILGOAL' :method
399 .param pmc options :slurpy :named
407 .local string message
408 message = concat "Unable to parse ", dba
409 message .= ", couldn't find final "
416 =head2 Support subroutines
423 Force a backtrack. (Taken from A05.)
429 $P0 = get_hll_global ['PGE'], 'Match'
430 .tailcall $P0.'new'(mob)
434 =item C<!cclass(mob, cclass)>
436 Match according to character class C<cclass>.
445 $P0 = get_hll_global ['PGE'], 'Match'
446 (mob, $I0, target) = $P0.'new'(mob)
447 $I1 = is_cclass cclass, target, $I0
455 =item C<!literal(mob, literal)>
457 Match according to C<literal>.
461 .sub '!literal' :anon
463 .param string literal
466 $P0 = get_hll_global ['PGE'], 'Match'
467 (mob, pos, target) = $P0.'new'(mob)
469 $S0 = substr target, pos, $I0
470 if $S0 != literal goto end
481 Patrick Michaud (pmichaud@pobox.com) is the author and maintainer.
482 Patches and suggestions should be sent to the Perl 6 compiler list
483 (perl6-compiler@perl.org).
491 # vim: expandtab shiftwidth=4 ft=pir: