1 # Copyright (C) 2005-2009, Parrot Foundation.
4 .namespace [ "PGE";"P5Regex" ]
8 .param pmc adverbs :slurpy :named
10 $I0 = exists adverbs['grammar']
11 if $I0 goto have_grammar
12 adverbs['grammar'] = 'PGE::Grammar'
16 target = adverbs['target']
17 target = downcase target
19 ## If we're passed the results of a previous parse, use it.
21 $I0 = isa source, ['PGE';'Match']
22 if $I0 == 0 goto parse
24 if null $P0 goto parse
25 $I0 = isa $P0, ['PGE';'Exp']
26 if $I0 == 0 goto parse
31 $P0 = get_global "p5regex"
32 match = $P0(source, adverbs :flat :named)
33 if target != 'parse' goto check
37 unless match goto check_1
40 if $S0 == $S1 goto analyze
50 exp = exp.'p5analyze'(pad)
51 .tailcall exp.'compile'(adverbs :flat :named)
57 .param pmc adverbs :slurpy :named
59 .local string stop, tighter
60 .local pmc stopstack, optable
62 stopstack = get_hll_global ['PGE';'P5Regex'], '@!stopstack'
63 optable = get_hll_global ["PGE";"P5Regex"], "$optable"
65 stop = adverbs['stop']
66 tighter = adverbs['tighter']
68 $P0 = optable."parse"(mob, 'stop'=>stop, 'tighter'=>tighter)
75 .include "cclass.pasm"
77 .const int PGE_INF = 2147483647
82 optable = new ['PGE';'OPTable']
83 set_hll_global ["PGE";"P5Regex"], "$optable", optable
85 $P0 = get_hll_global ["PGE";"P5Regex"], "parse_lit"
86 optable.'newtok'('term:', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
88 optable.'newtok'('term:\b', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
89 optable.'newtok'('term:\B', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
90 optable.'newtok'('term:^', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
91 optable.'newtok'('term:$', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
93 optable.'newtok'('term:\d', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
94 optable.'newtok'('term:\D', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
95 optable.'newtok'('term:\s', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
96 optable.'newtok'('term:\S', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
97 optable.'newtok'('term:\w', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
98 optable.'newtok'('term:\W', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
100 optable.'newtok'('circumfix:( )', 'equiv'=>'term:', 'nows'=>1, 'nullterm'=>1, 'match'=>'PGE::Exp::CGroup')
101 optable.'newtok'('circumfix:(?: )', 'equiv'=>'term:', 'nows'=>1, 'nullterm'=>1, 'match'=>'PGE::Exp::Group')
103 $P0 = get_hll_global ['PGE';'P5Regex'], 'parse_enumclass'
104 optable.'newtok'('term:[', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
105 optable.'newtok'('term:.', 'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
107 $P0 = get_hll_global ['PGE';'P5Regex'], 'parse_quant'
108 optable.'newtok'('postfix:*', 'looser'=>'term:', 'left'=>1, 'nows'=>1, 'parsed'=>$P0)
109 optable.'newtok'('postfix:+', 'equiv'=>'postfix:*', 'left'=>1, 'nows'=>1, 'parsed'=>$P0)
110 optable.'newtok'('postfix:?', 'equiv'=>'postfix:*', 'left'=>1, 'nows'=>1, 'parsed'=>$P0)
111 optable.'newtok'('postfix:{', 'equiv'=>'postfix:*', 'left'=>1, 'nows'=>1, 'parsed'=>$P0)
113 optable.'newtok'('infix:', 'looser'=>'postfix:*', 'right'=>1, 'nows'=>1, 'match'=>'PGE::Exp::Concat')
114 optable.'newtok'('infix:|', 'looser'=>'infix:', 'left'=>1, 'nows'=>1, 'match'=>'PGE::Exp::Alt')
116 optable.'newtok'('close:}', 'looser'=>'infix:|', 'nows'=>1) # XXX: hack
118 # Create a stack for holding stop tokens
119 $P0 = new 'ResizablePMCArray'
120 set_hll_global ['PGE';'P5Regex'], '@!stopstack', $P0
122 $P0 = get_hll_global ["PGE";"P5Regex"], "compile_p5regex"
123 compreg "PGE::P5Regex", $P0
130 .param string message
131 $P0 = getattribute mob, '$.pos'
133 $P0 = new 'Exception'
134 $S0 = 'p5regex parse error: '
140 $P1 = getattribute mob, '$.target'
142 $S1 = substr $S1, pos, 1
154 .local int pos, lastpos
155 .local int litstart, litlen
156 .local string initchar
157 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
158 lastpos = length target
159 initchar = substr target, pos, 1
160 unless initchar == '*' goto initchar_ok
161 parse_error(mob, pos, "Quantifier follows nothing")
164 if initchar == ')' goto end
166 if initchar != "\\" goto term_literal
168 initchar = substr target, pos, 1
170 if pos <= lastpos goto term_backslash_ok
171 parse_error(mob, pos, "Search pattern not terminated")
173 $I0 = index "nrteab", initchar
174 if $I0 < 0 goto term_literal
175 initchar = substr "\n\r\t\e\a\b", $I0, 1
181 $P0 = get_hll_global ['PGE';'P5Regex'], '@!stopstack'
183 stoplen = length stop
185 if pos >= lastpos goto term_literal_end
186 if stoplen == 0 goto not_stop
187 $S0 = substr target, pos, stoplen
188 if $S0 == stop goto term_literal_end
190 $S0 = substr target, pos, 1
191 $I0 = index "[](){}*?+\\|^$.", $S0
192 # if not in circumfix:( ) throw error on end paren
193 if $I0 >= 0 goto term_literal_end
196 goto term_literal_loop
198 if litlen < 1 goto term_literal_one
202 $S0 = substr target, litstart, $I0
203 $S0 = concat initchar, $S0
214 .local int min, max, backtrack
215 .local int pos, lastpos
218 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Quant')
219 lastpos = length target
223 if key == '{' goto quant_range
224 if key != '+' goto quant_max
227 if key != "?" goto quant_lazy
231 $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
232 if $I1 <= pos goto quant_range_max
233 $S0 = substr target, pos
238 $S0 = substr target, pos, 1
239 if $S0 != "," goto quant_range_end
242 $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
243 if $I1 <= pos goto quant_range_end
244 $S0 = substr target, pos
248 $S0 = substr target, pos, 1
249 if $S0 != "}" goto err_range
252 $S0 = substr target, pos, 1
253 if $S0 != "?" goto end
254 backtrack = PGE_BACKTRACK_EAGER
259 mob["backtrack"] = backtrack
263 parse_error(mob, pos, "Error in quantified range")
270 .local int pos, lastpos
271 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::CGroup')
273 $S0 = substr target, pos, 2
274 if $S0 == "?:" goto nocapture
283 .sub "parse_enumclass"
286 .local int pos, lastpos
288 .local string charlist
291 (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList')
292 if key == '.' goto dot
293 lastpos = length target
297 $S0 = substr target, pos, 1
298 if $S0 != "^" goto scan_first
302 if pos >= lastpos goto err_close
303 $S0 = substr target, pos, 1
305 if $S0 == "\\" goto backslash
308 if pos >= lastpos goto err_close
309 $S0 = substr target, pos, 1
311 if $S0 == "]" goto endclass
312 if $S0 == "-" goto hyphenrange
313 if $S0 != "\\" goto addchar
315 $S0 = substr target, pos, 1
317 $I0 = index "nrtfae0b", $S0
318 if $I0 == -1 goto addchar
319 $S0 = substr "\n\r\t\f\a\e\0\b", $I0, 1
321 if isrange goto addrange
326 $I2 = ord charlist, -1
328 if $I0 < $I2 goto err_range
331 if $I2 > $I0 goto scan
336 if isrange goto addrange
340 if isrange == 0 goto end
348 mob.'!make'(charlist)
352 parse_error(mob, pos, "Unmatched [")
354 $S0 = 'Invalid [] range "'
361 parse_error(mob, pos, $S0)
365 .namespace [ "PGE";"Exp" ]
367 .sub "p5analyze" :method
372 $I1 = defined self[$I0]
375 $P0 = $P0."p5analyze"(pad)
383 .namespace [ "PGE";"Exp";"CGroup" ]
385 .sub "p5analyze" :method
389 self["iscapture"] = 0
390 if self != "(" goto end
391 self["iscapture"] = 1
400 exp = exp."p5analyze"(pad)
411 # vim: expandtab shiftwidth=4 ft=pir: