compilers/pge/PGE/Perl6Regex.pir

   1 # Copyright (C) 2006-2009, Parrot Foundation.
   2 # $Id$
   3
   4 =head1 TITLE
   5
   6 Perl6Regex - compiler and parser for Perl 6 regex
   7
   8 =over 4
   9
  10 =item C<compile_perl6regex(PMC source, PMC adverbs :slurpy :named)>
  11
  12 Return the result of compiling C<source> according to Perl 6
  13 regex syntax and the associated C<adverbs>.  Normally this
  14 function is obtained using C<compreg 'PGE::Perl6Regex'> instead
  15 of calling it directly.
  16
  17 Returns the compiled regular expression.  If a C<target>
  18 named parameter is supplied, then it will return the parse tree
  19 (target='parse'), the expression tree (target='exp'),
  20 or the resulting PIR code (target='PIR').
  21
  22 =cut
  23
  24 .namespace [ 'PGE';'Perl6Regex' ]
  25
  26 .sub 'compile_perl6regex'
  27     .param pmc source
  28     .param pmc args            :slurpy
  29     .param pmc adverbs         :slurpy :named
  30
  31     unless null adverbs goto set_adverbs
  32     adverbs = new 'Hash'
  33
  34   set_adverbs:
  35     $I0 = exists adverbs['grammar']
  36     if $I0 goto with_grammar
  37     unless args goto adverb_grammar_1
  38     $S0 = shift args
  39     adverbs['grammar'] = $S0
  40     goto with_grammar
  41   adverb_grammar_1:
  42     adverbs['grammar'] = 'PGE::Grammar'
  43   with_grammar:
  44     $I0 = exists adverbs['name']
  45     if $I0 goto with_name
  46     unless args goto with_name
  47     $S0 = shift args
  48     adverbs['name'] = $S0
  49   with_name:
  50     $I0 = exists adverbs['lang']
  51     if $I0 goto with_lang
  52     adverbs['lang'] = 'PIR'
  53   with_lang:
  54     $I0 = exists adverbs['ignorecase']
  55     if $I0 goto with_ignorecase
  56     $I0 = adverbs['i']
  57     adverbs['ignorecase'] = $I0
  58   with_ignorecase:
  59     $I0 = exists adverbs['sigspace']
  60     if $I0 goto with_sigspace
  61     $I0 = exists adverbs['s']
  62     if $I0 goto with_s
  63     $I0 = exists adverbs['words']
  64     if $I0 goto with_words
  65     $I0 = adverbs['w']
  66     adverbs['sigspace'] = $I0
  67     goto with_sigspace
  68   with_s:
  69     $I0 = adverbs['s']
  70     adverbs['sigspace'] = $I0
  71     goto with_sigspace
  72   with_words:
  73     $I0 = adverbs['words']
  74     adverbs['sigspace'] = $I0
  75   with_sigspace:
  76
  77     .local string target
  78     target = adverbs['target']
  79     target = downcase target
  80
  81     ##   If we're passed the results of a previous parse,  use it.
  82     .local pmc match, exp
  83     $I0 = isa source, ['PGE';'Match']
  84     if $I0 == 0 goto parse
  85     $P0 = source['expr']
  86     if null $P0 goto parse
  87     $I0 = isa $P0, ['PGE';'Exp']
  88     if $I0 == 0 goto parse
  89     match = source
  90     goto analyze
  91
  92   parse:
  93     ##   Let's parse the source as a regex
  94     $P0 = get_global 'regex'
  95     match = $P0(source, adverbs :flat :named)
  96     if source == '' goto err_null
  97     if target != 'parse' goto check
  98     .return (match)
  99
 100   check:
 101     unless match goto check_1
 102     $S0 = source
 103     $S1 = match
 104     if $S0 == $S1 goto analyze
 105   check_1:
 106     null $P0
 107     .return ($P0)
 108
 109   analyze:
 110     .local pmc pad
 111     exp = match['expr']
 112     pad = clone adverbs
 113     $P0 = new 'Hash'
 114     pad['lexscope'] = $P0
 115     exp = exp.'perl6exp'(pad)
 116     if null exp goto err_null
 117     .tailcall exp.'compile'(adverbs :flat :named)
 118
 119   err_null:
 120     $I0 = match.'from'()
 121     'parse_error'(match, $I0, 'Null pattern illegal')
 122 .end
 123
 124
 125 =item C<regex(PMC mob, PMC adverbs :slurpy :named)>
 126
 127 Parses a regex according to Perl 6 regex syntax, and returns
 128 the corresponding parse tree.
 129
 130 =cut
 131
 132 .sub 'regex'
 133     .param pmc mob
 134     .param pmc adverbs         :slurpy :named
 135
 136     .local string stop, tighter
 137     .local pmc stopstack, optable, match
 138
 139     stopstack = get_global '@!stopstack'
 140     optable = get_global '$optable'
 141
 142     stop = adverbs['stop']
 143     tighter = adverbs['tighter']
 144     push stopstack, stop
 145     match = optable.'parse'(mob, 'stop'=>stop, 'tighter'=>tighter)
 146     $S0 = pop stopstack
 147
 148     .return (match)
 149 .end
 150
 151 =item C<p6escapes>
 152
 153 Parse and calculate various Perl 6 string escapes, such as \n, \r,
 154 \x, \o, and \c.  For the latter escapes, also handle the bracketed
 155 forms and other special forms.
 156
 157 Note that this function is used directly by PCT::Grammar and Rakudo,
 158 and someday may be refactored to a different location.
 159
 160 =cut
 161
 162 .sub 'trim'
 163     .param string s
 164     .local int rpos, lpos
 165     rpos = length s
 166     lpos = find_not_cclass .CCLASS_WHITESPACE, s, 0, rpos
 167   rtrim_loop:
 168     unless rpos > lpos goto rtrim_done
 169     dec rpos
 170     $I0 = is_cclass .CCLASS_WHITESPACE, s, rpos
 171     if $I0 goto rtrim_loop
 172   rtrim_done:
 173     inc rpos
 174     $I0 = rpos - lpos
 175     $S0 = substr s, lpos, $I0
 176     .return ($S0)
 177 .end
 178
 179
 180 .sub 'p6escapes'
 181     .param pmc mob
 182     .param pmc adverbs         :slurpy :named
 183     .local string target, backchar, literal
 184     .local int pos, lastpos
 185     $P0 = get_hll_global ['PGE'], '$!MATCH'
 186     (mob, pos, target) = $P0.'new'(mob, adverbs :flat :named)
 187     lastpos = length target
 188     if pos >= lastpos goto fail
 189     $S0 = substr target, pos, 1
 190     inc pos
 191     if $S0 != "\\" goto fail
 192     if pos >= lastpos goto fail
 193     backchar = substr target, pos, 1
 194     inc pos
 195     backchar = downcase backchar
 196     $I0 = index "\\0abefnrtxco", backchar
 197     if $I0 < 0 goto fail
 198     if $I0 >= 9 goto scan_xco
 199     literal = substr "\\\0\a\b\e\f\n\r\t", $I0, 1
 200     goto succeed
 201   scan_xco:
 202     ##  Handle \x, \c, and \o escapes.  Start by converting
 203     ##  backchar into the appropriate radix, then loop through
 204     ##  the characters that follow to compute the decimal value
 205     ##  of codepoints, and concatenate the codepoints into a
 206     ##  literal.
 207     .local int base, decnum, isbracketed
 208     base = index '        o c     x', backchar
 209     literal = ''
 210     $S0 = substr target, pos, 1
 211     isbracketed = iseq $S0, '['
 212     pos += isbracketed
 213     ##  Handle the case of \cC (control escape).
 214     if base != 10 goto scan_xco_char
 215     if isbracketed goto scan_xco_char
 216     $I0 = is_cclass .CCLASS_NUMERIC, $S0, 0
 217     if $I0 goto scan_xco_char
 218     ##  xor the 64 bit
 219     $I0 = ord $S0
 220     bxor $I0, 64
 221     literal = chr $I0
 222     inc pos
 223     goto succeed
 224   scan_xco_char:
 225     decnum = 0
 226     # inside brackets, skip leading ws
 227     unless isbracketed goto scan_xco_char_ws
 228     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 229   scan_xco_char_ws:
 230     if base != 10 goto scan_xco_char_digits
 231     unless isbracketed goto scan_xco_char_digits
 232     $I0 = is_cclass .CCLASS_NUMERIC, target, pos
 233     if $I0 goto scan_xco_char_digits
 234     ##  look up character by name
 235     .local int namepos
 236     namepos = index target, ']', pos
 237     if namepos < 0 goto err_missing_bracket
 238     $I0 = index target, ',', pos
 239     if $I0 < 0 goto have_namepos
 240     if namepos < $I0 goto have_namepos
 241     namepos = $I0
 242   have_namepos:
 243     $I0 = namepos - pos
 244     $S0 = substr target, pos, $I0
 245     $S0 = 'trim'($S0)
 246     $P0 = new 'CodeString'
 247     decnum = $P0.'charname_to_ord'($S0)
 248     if decnum < 0 goto err_unicode_name
 249     pos = namepos
 250     goto scan_xco_char_end
 251   scan_xco_char_digits:
 252     $S0 = substr target, pos, 1
 253     $I0 = index "0123456789abcdef0123456789ABCDEF", $S0
 254     if $I0 < 0 goto scan_xco_char_end
 255     $I0 %= 16
 256     if $I0 >= base goto scan_xco_char_end
 257     decnum *= base
 258     decnum += $I0
 259     inc pos
 260     goto scan_xco_char_digits
 261   scan_xco_char_end:
 262     $S1 = chr decnum
 263     concat literal, $S1
 264     unless isbracketed goto scan_xco_end
 265     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 266     $S0 = substr target, pos, 1
 267     if $S0 == ']' goto scan_xco_end
 268     if $S0 == '' goto err_missing_bracket
 269     if $S0 != ',' goto err_digit
 270     inc pos
 271     goto scan_xco_char
 272   scan_xco_end:
 273     pos += isbracketed
 274   succeed:
 275     mob.'!make'(literal)
 276     mob.'to'(pos)
 277   fail:
 278     .return (mob)
 279
 280   err_unicode_name:
 281     $S0 = concat "Unrecognized character name ", $S0
 282     'parse_error'(mob, pos, $S0)
 283   err_missing_bracket:
 284     'parse_error'(mob, pos, "Missing close bracket for \\x[...], \\o[...], or \\c[...]")
 285   err_digit:
 286     'parse_error'(mob, pos, "Invalid digit in \\x[...], \\o[...], or \\c[...]")
 287 .end
 288
 289
 290 =item C<onload()>
 291
 292 Initializes the Perl6Regex parser and other data structures
 293 needed for compiling regexes.
 294
 295 =cut
 296
 297 .include 'cclass.pasm'
 298
 299 .namespace [ 'PGE';'Perl6Regex' ]
 300
 301 .sub '__onload' :load
 302     .local pmc p6meta
 303     p6meta = new 'P6metaclass'
 304     p6meta.'new_class'('PGE::Exp::WS', 'parent'=>'PGE::Exp::Subrule')
 305     p6meta.'new_class'('PGE::Exp::Alias', 'parent'=>'PGE::Exp')
 306
 307     .local pmc optable
 308     optable = new ['PGE';'OPTable']
 309     set_global '$optable', optable
 310
 311     $P0 = get_global 'parse_term'
 312     optable.'newtok'('term:',    'precedence'=>'=', 'nows'=>1, 'parsed'=>$P0)
 313
 314     $P0 = get_global 'parse_term_ws'
 315     optable.'newtok'('term:#',   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 316
 317     $P0 = get_global 'parse_term_backslash'
 318     optable.'newtok'("term:\\",  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 319
 320     optable.'newtok'('term:^',   'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 321     optable.'newtok'('term:^^',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 322     optable.'newtok'('term:$$',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 323     optable.'newtok'('term:\b',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 324     optable.'newtok'('term:\B',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 325     optable.'newtok'('term:<<',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 326     optable.'newtok'('term:>>',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 327     optable.'newtok'('term:<?>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 328     optable.'newtok'('term:<!>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 329     optable.'newtok'(unicode:"term:\xab", 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 330     optable.'newtok'(unicode:"term:\xbb", 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Anchor')
 331
 332     optable.'newtok'('term:.',   'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 333     optable.'newtok'('term:\d',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 334     optable.'newtok'('term:\D',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 335     optable.'newtok'('term:\s',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 336     optable.'newtok'('term:\S',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 337     optable.'newtok'('term:\w',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 338     optable.'newtok'('term:\W',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 339     optable.'newtok'('term:\N',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CCShortcut')
 340     optable.'newtok'('term:\n',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Newline')
 341
 342     $P0 = get_global 'parse_dollar'
 343     optable.'newtok'('term:$',   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 344
 345     $P0 = get_global 'parse_subrule'
 346     optable.'newtok'('term:<',   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 347     optable.'newtok'('term:<?',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 348     optable.'newtok'('term:<!',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 349     optable.'newtok'('term:<.',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 350
 351     $P0 = get_global 'parse_enumcharclass'
 352     optable.'newtok'('term:<[',  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 353     optable.'newtok'('term:<+', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 354     optable.'newtok'('term:<-', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 355     optable.'newtok'('term:<![', 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 356
 357     $P0 = get_global 'parse_quoted_literal'
 358     optable.'newtok'("term:'",  'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 359
 360     $P0 = get_global 'parse_goal'
 361     optable.'newtok'('term:~', 'equiv'=>'term:', 'parsed'=>$P0)
 362
 363     optable.'newtok'('term:::',  'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
 364     optable.'newtok'('term::::', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
 365     optable.'newtok'('term:<cut>',    'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
 366     optable.'newtok'('term:<commit>', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Cut')
 367
 368     $P0 = get_global 'parse_closure'
 369     optable.'newtok'("term:{{",   'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 370     optable.'newtok'("term:<?{{", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 371     optable.'newtok'("term:<!{{", 'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 372
 373     $P0 = get_global 'parse_action'
 374     optable.'newtok'("term:{*}",      'equiv'=>'term:', 'nows'=>1, 'parsed'=>$P0)
 375
 376
 377     optable.'newtok'('circumfix:[ ]', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::Group')
 378     optable.'newtok'('circumfix:( )', 'equiv'=>'term:', 'nows'=>1, 'match'=>'PGE::Exp::CGroup')
 379
 380     $P0 = get_global 'parse_quant'
 381     optable.'newtok'('postfix:*',  'looser'=>'term:', 'parsed'=>$P0)
 382     optable.'newtok'('postfix:+',  'equiv'=>'postfix:*', 'parsed'=>$P0)
 383     optable.'newtok'('postfix:?',  'equiv'=>'postfix:*', 'parsed'=>$P0)
 384     optable.'newtok'('postfix::',  'equiv'=>'postfix:*', 'parsed'=>$P0)
 385     optable.'newtok'('postfix:**', 'equiv'=>'postfix:*', 'parsed'=>$P0)
 386     $P0 = get_global 'parse_quant_error'
 387     optable.'newtok'('term:*', 'equiv'=>'term:', 'parsed'=>$P0)
 388     optable.'newtok'('term:+', 'equiv'=>'term:', 'parsed'=>$P0)
 389     optable.'newtok'('term:?', 'equiv'=>'term:', 'parsed'=>$P0)
 390
 391     optable.'newtok'('infix:',   'looser'=>'postfix:*', 'assoc'=>'list', 'nows'=>1, 'match'=>'PGE::Exp::Concat')
 392     optable.'newtok'('infix:&',  'looser'=>'infix:', 'nows'=>1, 'match'=>'PGE::Exp::Conj')
 393     optable.'newtok'('infix:|',  'looser'=>'infix:&', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
 394     optable.'newtok'('prefix:|', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
 395     optable.'newtok'('infix:||', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
 396     optable.'newtok'('prefix:||', 'equiv'=>'infix:|', 'nows'=>1, 'match'=>'PGE::Exp::Alt')
 397
 398     optable.'newtok'('infix::=', 'tighter'=>'infix:', 'assoc'=>'right', 'match'=>'PGE::Exp::Alias')
 399     optable.'newtok'('infix:=', 'tighter'=>'infix:', 'assoc'=>'right', 'match'=>'PGE::Exp::Alias')
 400
 401     $P0 = get_global 'parse_modifier'
 402     optable.'newtok'('prefix::', 'looser'=>'infix:|', 'parsed'=>$P0)
 403
 404     optable.'newtok'('close:}',  'precedence'=>'<', 'nows'=>1)
 405
 406     .local pmc esclist
 407     esclist = new 'Hash'
 408     set_global '%esclist', esclist
 409     esclist['e'] = "\e"
 410     esclist['f'] = "\f"
 411     esclist['r'] = "\r"
 412     esclist['t'] = "\t"
 413     esclist['v'] = unicode:"\x0a\x0b\x0c\x0d\x85\u2028\u2029"
 414     esclist['h'] = unicode:"\x09\x20\xa0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"
 415     esclist['n'] = unicode:"\x0a\x0d\x0c\x85\u2028\u2029"
 416     # See http://www.unicode.org/Public/UNIDATA/PropList.txt for above
 417
 418     # Create and store closure preprocessors in %closure_pp
 419     $P0 = new 'Hash'
 420     set_hll_global ['PGE';'Perl6Regex'], '%closure_pp', $P0
 421     $P1 = get_hll_global ['PGE';'Perl6Regex'], 'PIR_closure'
 422     $P0["PIR"] = $P1
 423
 424     # Create an array for holding stop tokens
 425     $P0 = new 'ResizablePMCArray'
 426     set_hll_global ['PGE';'Perl6Regex'], '@!stopstack', $P0
 427
 428     $P0 = get_global 'compile_perl6regex'
 429     compreg 'PGE::Perl6Regex', $P0
 430     .return ()
 431 .end
 432
 433
 434 =item C<parse_term(PMC mob [, PMC adverbs :slurpy :named])>
 435
 436 Parses literal strings and whitespace.
 437 Return a failed match if the stoptoken is found.
 438
 439 =cut
 440
 441 .sub 'parse_term'
 442     .param pmc mob
 443     .param pmc adverbs         :slurpy :named
 444
 445     .local string target
 446     .local int pos, lastpos
 447     $P0 = getattribute mob, '$.target'
 448     target = $P0
 449     $P0 = getattribute mob, '$.pos'
 450     pos = $P0
 451     lastpos = length target
 452
 453     .local string stop
 454     $P0 = get_hll_global ['PGE';'Perl6Regex'], '@!stopstack'
 455     stop = $P0[-1]
 456
 457     $I0 = is_cclass .CCLASS_WHITESPACE, target, pos
 458     if $I0 goto term_ws
 459     $I0 = length stop
 460     if $I0 == 0 goto not_stop
 461     $S0 = substr target, pos, $I0
 462     if $S0 == stop goto end_noterm
 463   not_stop:
 464     ##   find length of word character sequence
 465     .local int litlen
 466     $I0 = find_not_cclass .CCLASS_WORD, target, pos, lastpos
 467     litlen = $I0 - pos
 468
 469     ##   if we didn't find any, return no term
 470     if litlen == 0 goto end_noterm
 471
 472     ##   for multi-char unquoted literals, leave the last character
 473     ##   in case it's quantified (it gets processed as a subsequent term)
 474     if litlen < 2 goto term_literal
 475     dec litlen
 476   term_literal:
 477     $S0 = substr target, pos, litlen
 478     pos += litlen
 479     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
 480     mob.'!make'($S0)
 481     mob.'to'(pos)
 482     .return (mob)
 483
 484   term_ws:
 485     .tailcall 'parse_term_ws'(mob)
 486
 487   end_noterm:
 488     $S0 = substr target, pos, 1
 489     if $S0 == ':' goto err_cut
 490     (mob) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
 491     .return (mob)
 492   err_cut:
 493     'parse_error'(mob, pos, 'Quantifier follows nothing in regex')
 494     .return (mob)
 495 .end
 496
 497
 498 =item C<parse_term_backslash(mob [, adverbs :slurpy :named])>
 499
 500 Parses terms beginning with backslash.
 501
 502 =cut
 503
 504 .sub 'parse_term_backslash'
 505     .param pmc mob
 506     .param pmc adverbs         :slurpy :named
 507
 508     .local string target
 509     .local int pos, lastpos, isnegated
 510     $P0 = getattribute mob, '$.target'
 511     target = $P0
 512     $P0 = getattribute mob, '$.pos'
 513     pos = $P0
 514     lastpos = length target
 515     isnegated = 0
 516
 517     .local string backchar, charlist
 518     ##  get whatever follows the backslash
 519     backchar = substr target, pos, 1
 520     charlist = backchar
 521     inc pos
 522
 523     ##  if it's not a word character, it's a quoted metachar
 524     $I0 = is_cclass .CCLASS_WORD, backchar, 0
 525     unless $I0 goto term_literal
 526
 527     ##  if it's a word character, it may be negated
 528     isnegated = is_cclass .CCLASS_UPPERCASE, backchar, 0
 529     ##  $S0 = downcase charlist
 530             $I0 = ord backchar
 531             $S0 = chr $I0
 532             backchar = downcase $S0
 533
 534     ##  if it's \x, \c, or \o, parse as string escape
 535     $I0 = index 'xco', backchar
 536     if $I0 < 0 goto meta_esclist
 537   meta_xco:
 538     $I0 = pos - 2
 539     $P0 = 'p6escapes'(mob, 'pos' => $I0)
 540     unless $P0 goto err_xcoparse
 541     pos = $P0.'to'()
 542     charlist = $P0.'ast'()
 543     unless isnegated goto term_literal
 544     $I0 = length charlist
 545     if $I0 > 1 goto err_negated_brackets
 546     goto term_charlist
 547
 548   meta_esclist:
 549     $P0 = get_global '%esclist'
 550     $I0 = exists $P0[backchar]
 551     unless $I0 goto err_reserved_metachar
 552     charlist = $P0[backchar]
 553     if isnegated goto term_charlist
 554     $I0 = length charlist
 555     if $I0 > 1 goto term_charlist
 556
 557   term_literal:
 558     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
 559     mob.'!make'(charlist)
 560     mob.'to'(pos)
 561     .return (mob)
 562
 563   term_charlist:
 564     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList')
 565     mob.'!make'(charlist)
 566     mob['isnegated'] = isnegated
 567     mob.'to'(pos)
 568     .return (mob)
 569
 570   err_xcoparse:
 571     parse_error(mob, pos, 'Unable to parse \x, \c, or \o value')
 572   err_negated_brackets:
 573     pos = mob.'from'()
 574     parse_error(mob, pos, 'Cannot use comma in \\X[...] or \\O[...]')
 575   err_reserved_metachar:
 576     parse_error(mob, pos, 'Alphanumeric metacharacters are reserved')
 577 .end
 578
 579
 580 =item C<parse_term_ws(PMC mob)>
 581
 582 Parses a whitespace term.
 583
 584 =cut
 585
 586 .sub 'parse_term_ws'
 587     .param pmc mob
 588     .local string target
 589     .local int pos, lastpos
 590     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS')
 591     lastpos = length target
 592
 593   term_ws_loop:
 594     ##   scan for the next non-whitespace character
 595     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 596     $S0 = substr target, pos, 1
 597     if $S0 != '#' goto end
 598     ##   we have a #-comment, determine its closing delimiter
 599     inc pos
 600     .local string closedelim
 601     closedelim = "\n"
 602     $S0 = substr target, pos, 1
 603     $I0 = index '<[{(', $S0
 604     if $I0 < 0 goto term_ws_loop_1
 605     closedelim = substr '>]})', $I0, 1
 606   term_ws_loop_1:
 607     $I0 = index target, closedelim, pos
 608     pos = $I0 + 1
 609     if pos > 0 goto term_ws_loop
 610     pos = lastpos
 611   end:
 612     mob.'to'(pos)
 613     .return (mob)
 614 .end
 615
 616
 617 =item C<parse_quant(PMC mob)>
 618
 619 Parses a quantifier, such as *, +, ?, :, and all of their wondrous
 620 combinations.
 621
 622 =cut
 623
 624 .sub 'parse_quant'
 625     .param pmc mob
 626     .local string target
 627     .local pmc key
 628     .local int pos, lastpos
 629     key = mob['KEY']
 630     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Quant')
 631     lastpos = length target
 632
 633     .local int min, max, suffixpos, sepws
 634     .local string suffix
 635     min = 1
 636     max = 1
 637     sepws = is_cclass .CCLASS_WHITESPACE, target, pos
 638     suffixpos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 639
 640     if key == '**' goto quant_suffix
 641     if key == ':' goto quant_cut
 642     if key == '+' goto quant_max
 643     ##  quantifier is '?' or '*'
 644     min = 0
 645   quant_max:
 646     if key == '?' goto quant_suffix
 647     ##  quantifier is '+' or '*'
 648     max = PGE_INF
 649     goto quant_suffix
 650
 651   quant_cut:
 652     #   The postfix:<:> operator may bring us here when it's really a
 653     #   term:<::> term.  So, we check for that here and fail this match
 654     #   if we really have a cut term.
 655     if key != ':' goto quant_suffix
 656     $S0 = substr target, pos, 1
 657     if $S0 == ':' goto end
 658     mob['backtrack'] = PGE_BACKTRACK_NONE
 659
 660   quant_suffix:
 661     suffix = substr target, suffixpos, 2
 662     if suffix == ':?' goto quant_eager
 663     if suffix == ':!' goto quant_greedy
 664   quant_suffix_1:
 665     suffix = substr target, suffixpos, 1
 666     if suffix == '?' goto quant_eager
 667     if suffix == '!' goto quant_greedy
 668     if suffix != ':' goto quant
 669   quant_none:
 670     mob['backtrack'] = PGE_BACKTRACK_NONE
 671     goto quant_skip_suffix
 672   quant_eager:
 673     mob['backtrack'] = PGE_BACKTRACK_EAGER
 674     goto quant_skip_suffix
 675   quant_greedy:
 676     mob['backtrack'] = PGE_BACKTRACK_GREEDY
 677   quant_skip_suffix:
 678     $I0 = length suffix
 679     pos = suffixpos + $I0
 680
 681   quant:
 682     if key != '**' goto quant_set
 683   quant_closure:
 684     $I0 = is_cclass .CCLASS_WHITESPACE, target, pos
 685     sepws |= $I0
 686     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 687     .local int isconst
 688     isconst = is_cclass .CCLASS_NUMERIC, target, pos
 689     if isconst goto brace_skip
 690     $S0 = substr target, pos, 1
 691     if $S0 != "{" goto parse_repetition_controller
 692     inc pos
 693   brace_skip:
 694     $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
 695     if $I1 <= pos goto err_closure
 696     $S0 = substr target, pos
 697     min = $S0
 698     max = $S0
 699     pos = $I1
 700     $S0 = substr target, pos, 2
 701     if $S0 != '..' goto quant_closure_end
 702     pos += 2
 703     max = PGE_INF
 704     $S0 = substr target, pos, 1
 705     if $S0 != '*' goto quant_range_end
 706     inc pos
 707     goto quant_closure_end
 708   quant_range_end:
 709     $I1 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
 710     if $I1 <= pos goto err_closure
 711     $S0 = substr target, pos
 712     max = $S0
 713     pos = $I1
 714   quant_closure_end:
 715     if isconst goto brace_skip2
 716     $S0 = substr target, pos, 1
 717     if $S0 != "}" goto err_closure
 718     inc pos
 719   brace_skip2:
 720     suffixpos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 721
 722   quant_set:
 723     mob['min'] = min
 724     mob['max'] = max
 725     mob.'to'(pos)
 726   end:
 727     .return (mob)
 728
 729   parse_repetition_controller:
 730     .local pmc regex, repetition_controller
 731     mob.'to'(pos)
 732     regex = get_global 'regex'
 733     #parse everything down to concatenation precedence
 734     repetition_controller = regex(mob, 'tighter'=>'infix:')
 735     unless repetition_controller goto err_repetition_controller
 736
 737     #update pos to after the matched
 738     pos = repetition_controller.'to'()
 739     repetition_controller = repetition_controller['expr']
 740
 741     # if there's surrounding ws, then add WS nodes
 742     unless sepws goto sepws_done
 743     $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
 744     $P0.'to'(pos)
 745     $P1 = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS')
 746     $P1.'to'(pos)
 747     push $P0, $P1
 748     push $P0, repetition_controller
 749     $P1 = mob.'new'(mob, 'grammar'=>'PGE::Exp::WS')
 750     $P1.'to'(pos)
 751     push $P0, $P1
 752     repetition_controller = $P0
 753   sepws_done:
 754
 755     #save the matched in the mob as sep
 756     mob['sep'] = repetition_controller
 757
 758     #force the match to be 1..Inf
 759     mob['min'] = 1
 760     mob['max'] = PGE_INF
 761
 762     #move position to after the matched
 763     mob.'to'(pos)
 764     .return (mob)
 765
 766   err_repetition_controller:
 767     'parse_error'(mob, pos, "Error in repetition controller")
 768   err_closure:
 769     'parse_error'(mob, pos, "Error in closure quantifier")
 770 .end
 771
 772
 773 =item C<parse_quant_error(mob)>
 774
 775 Throw an exception for quantifiers in term position.
 776
 777 =cut
 778
 779 .sub 'parse_quant_error'
 780     .param pmc mob
 781     .local int pos
 782     pos = mob.'to'()
 783     'parse_error'(mob, pos, "Quantifier follows nothing in regex")
 784 .end
 785
 786
 787 =item C<parse_dollar(PMC mob)>
 788
 789 Parse things that begin with a dollar sign, such as scalars,
 790 anchors, and match subscripts.
 791
 792 =cut
 793
 794 .sub "parse_dollar"
 795     .param pmc mob
 796     .local string target
 797     .local int pos, lastpos
 798     .local string cname
 799     $P0 = getattribute mob, '$.target'
 800     target = $P0
 801     $P0 = getattribute mob, '$.pos'
 802     pos = $P0
 803     lastpos = length target
 804     $S0 = substr target, pos, 1
 805     if $S0 == '<' goto name
 806     $I0 = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
 807     if $I0 > pos goto numeric
 808     $I0 = find_not_cclass .CCLASS_WORD, target, pos, lastpos
 809     if $I0 > pos goto scalar
 810
 811   eos_anchor:
 812     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Anchor')
 813     mob.'to'(pos)
 814     .return (mob)
 815
 816   scalar:
 817     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
 818     dec pos
 819     $I1 = $I0 - pos
 820     cname = substr target, pos, $I1
 821     cname = concat '"', cname
 822     cname = concat cname, '"'
 823     mob["cname"] = cname
 824     mob.'to'($I0)
 825     .return (mob)
 826
 827   numeric:
 828     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
 829     $I1 = $I0 - pos
 830     cname = substr target, pos, $I1
 831     mob["cname"] = cname
 832     mob.'to'($I0)
 833     .return (mob)
 834
 835   name:
 836     inc pos
 837     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Scalar')
 838     $I0 = index target, ">", pos
 839     if $I0 < pos goto err_close
 840   name_1:
 841     $I1 = $I0 - pos
 842     cname = substr target, pos, $I1
 843     cname = escape cname
 844     cname = concat '"', cname
 845     cname = concat cname, '"'
 846     mob["cname"] = cname
 847     pos = $I0 + 1
 848     mob.'to'(pos)
 849     .return (mob)
 850
 851   err_close:
 852     parse_error(mob, pos, "Missing close '>' in scalar")
 853     .return (mob)
 854 .end
 855
 856
 857 =item C<parse_subname(STR target, INT pos)>
 858
 859 Scan C<target> starting at C<pos> looking for a subrule name
 860 (following Perl 6's identifier syntax).  Returns any subrule
 861 name found, and the ending position of the name.
 862
 863 =cut
 864
 865
 866 .sub 'parse_subname'
 867     .param string target
 868     .param int pos
 869     .local int startpos, targetlen
 870
 871     targetlen = length target
 872     startpos = pos
 873     $I0 = pos
 874   loop:
 875     $I1 = find_not_cclass .CCLASS_WORD, target, $I0, targetlen
 876     if $I1 == $I0 goto end
 877     pos = $I1
 878     $S0 = substr target, pos, 2
 879     if $S0 != '::' goto end
 880     $I0 = pos + 2
 881     goto loop
 882   end:
 883     $I0 = pos - startpos
 884     $S0 = substr target, startpos, $I0
 885     .return ($S0, pos)
 886 .end
 887
 888
 889 =item C<parse_subrule(PMC mob)>
 890
 891 Parses a subrule token.
 892
 893 =cut
 894
 895 .sub 'parse_subrule'
 896     .param pmc mob
 897     .local string target
 898     .local pmc mobsave
 899     .local int pos, lastpos
 900     .local string key
 901     key = mob['KEY']
 902     mobsave = mob
 903     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule')
 904     lastpos = length target
 905
 906     ##  default to non-capturing rule
 907     .local int iscapture
 908     iscapture = 0
 909
 910     ##  see what type of subrule this is
 911     if key == '<.' goto scan_subname
 912     if key == '<?' goto zerowidth
 913     if key == '<!' goto negated
 914
 915     ##  capturing subrule, get its name/alias
 916     iscapture = 1
 917     .local string subname, cname
 918     (subname, pos) = 'parse_subname'(target, pos)
 919     cname = subname
 920     $S0 = substr target, pos, 1
 921     unless $S0 == '=' goto subrule_arg
 922     ##  aliased subrule, skip the '=' and get the real name
 923     inc pos
 924     goto scan_subname
 925
 926   negated:
 927     mob['isnegated'] = 1
 928   zerowidth:
 929     mob['iszerowidth'] = 1
 930
 931   scan_subname:
 932     (subname, pos) = 'parse_subname'(target, pos)
 933
 934   subrule_arg:
 935     mob['subname'] = subname
 936     $S0 = substr target, pos, 1
 937     if $S0 == ':' goto subrule_text_arg
 938     if $S0 != ' ' goto subrule_end
 939   subrule_pattern_arg:
 940     inc pos
 941     mob.'to'(pos)
 942     .local pmc regex
 943     regex = get_global 'regex'
 944     $P1 = regex(mob, 'stop'=>'>')
 945     unless $P1 goto end
 946     $S0 = $P1
 947     mob['arg'] = $S0
 948     pos = $P1.'to'()
 949     mob.'to'(-1)
 950     goto subrule_end
 951   subrule_text_arg:
 952     $I0 = pos + 1
 953     pos = find_not_cclass .CCLASS_WHITESPACE, target, $I0, lastpos
 954     if pos == $I0 goto end
 955     if pos >= lastpos goto end
 956     .local string textarg, closedelim
 957     textarg = ''
 958     closedelim = '>'
 959     $S0 = substr target, pos, 1
 960     if $S0 == '"' goto subrule_text_quote
 961     if $S0 != "'" goto subrule_text_loop
 962   subrule_text_quote:
 963     closedelim = $S0
 964     inc pos
 965   subrule_text_loop:
 966     if pos >= lastpos goto end
 967     $S0 = substr target, pos, 1
 968     if $S0 == closedelim goto subrule_text_end
 969     if $S0 != "\\" goto subrule_text_add
 970     inc pos
 971     $S0 = substr target, pos, 1
 972     if $S0 == closedelim goto subrule_text_add
 973     if $S0 == "\\" goto subrule_text_add
 974     textarg .= "\\"
 975   subrule_text_add:
 976     textarg .= $S0
 977     inc pos
 978     goto subrule_text_loop
 979   subrule_text_end:
 980     mob['arg'] = textarg
 981     if closedelim == '>' goto subrule_end
 982     inc pos
 983     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
 984   subrule_end:
 985     $S0 = substr target, pos, 1
 986     if $S0 != '>' goto end
 987     inc pos
 988     mob.'to'(pos)
 989     mob['iscapture'] = iscapture
 990     unless iscapture goto end
 991     $S0 = escape cname
 992     $S0 = concat '"', $S0
 993     $S0 = concat $S0, '"'
 994     mob['cname'] = $S0
 995   end:
 996     .return (mob)
 997 .end
 998
 999
1000 =item C<parse_enumcharclass(PMC mob)>
1001
1002 Extract an enumerated character list.
1003
1004 =cut
1005
1006 .sub 'parse_enumcharclass'
1007     .param pmc mob
1008     .param pmc adverbs         :slurpy :named
1009     .local string target
1010     .local pmc term
1011     .local string op
1012     .local int pos, lastpos
1013
1014     $P0 = getattribute mob, '$.target'
1015     target = $P0
1016     pos = mob.'to'()
1017     lastpos = length target
1018     op = mob['KEY']
1019
1020     ##   handle the case of <[, <+[, <-[, and <![ as the token
1021     ##   by converting to <, <+, <-, or <!
1022     $S0 = substr op, -1, 1
1023     if $S0 != '[' goto parse_loop
1024     op = chopn op, 1
1025     goto enum
1026
1027   parse_loop:
1028     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1029     if pos >= lastpos goto err_close
1030     $S0 = substr target, pos, 1
1031     if $S0 != '[' goto subrule
1032     inc pos
1033
1034   enum:
1035     .local string charlist
1036     .local int isrange
1037     charlist = ''
1038     isrange = 0
1039
1040   enum_loop:
1041     ##   skip leading whitespace and get next character
1042     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1043     if pos >= lastpos goto err_close
1044     $S0 = substr target, pos, 1
1045     if $S0 == ']' goto enum_close
1046     if $S0 == '-' goto err_hyphen
1047     if $S0 == '.' goto enum_dotrange
1048     if $S0 != "\\" goto enum_addchar
1049   enum_backslash:
1050     inc pos
1051     ##   get escaped character
1052     $S0 = substr target, pos, 1
1053     ##   handle metas such as \n, \t, \r, etc.
1054     $I0 = index 'nrtfae0xco', $S0
1055     if $I0 == -1 goto enum_addchar
1056     if $I0 >= 7 goto enum_xco
1057     $S0 = substr "\n\r\t\f\a\e\0", $I0, 1
1058     goto enum_addchar
1059   enum_xco:
1060     $I0 = pos - 1
1061     $P0 = 'p6escapes'(mob, 'pos'=>$I0)
1062     $S0 = $P0.'ast'()
1063     pos = $P0.'to'()
1064     goto enum_addchar_1
1065   enum_addchar:
1066     inc pos
1067   enum_addchar_1:
1068     if isrange goto enum_addrange
1069     charlist .= $S0
1070     goto enum_loop
1071   enum_dotrange:
1072     ##   check if we have a .. range marker
1073     if isrange goto enum_addrange
1074     $S1 = substr target, pos, 2
1075     if $S1 != '..' goto enum_addchar
1076     pos += 2
1077     isrange = 1
1078     goto enum_loop
1079   enum_addrange:
1080     ##   add character range to charlist
1081     isrange = 0
1082     $I2 = ord charlist, -1
1083     $I0 = ord $S0
1084   enum_addrange_1:
1085     inc $I2
1086     if $I2 > $I0 goto enum_loop
1087     $S1 = chr $I2
1088     charlist .= $S1
1089     goto enum_addrange_1
1090   enum_close:
1091     inc pos
1092     ##   create a node for the charlist
1093     term = mob.'new'(mob, 'grammar'=>'PGE::Exp::EnumCharList')
1094     term.'to'(pos)
1095     term.'!make'(charlist)
1096     goto combine
1097
1098   subrule:
1099     $I0 = pos
1100     .local string subname
1101     (subname, pos) = 'parse_subname'(target, $I0)
1102     if pos == $I0 goto err
1103     term = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule')
1104     term.'from'($I0)
1105     term.'to'(pos)
1106     term['subname'] = subname
1107     term['iscapture'] = 0
1108
1109   combine:
1110     ##   find out what operator preceded this term
1111     if op == '+' goto combine_plus
1112     if op == '-' goto combine_minus
1113     if op == '<' goto combine_init
1114     if op == '<+' goto combine_init
1115     ##   token was '<-' or '<!'
1116     term['isnegated'] = 1
1117     term['iszerowidth'] = 1
1118     if op == '<!' goto combine_init
1119     ##   token is '<-', we need to match a char by concat dot
1120     $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::CCShortcut')
1121     $P0.'to'(pos)
1122     $P0.'!make'('.')
1123     mob = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
1124     mob.'to'(pos)
1125     mob[0] = term
1126     mob[1] = $P0
1127     goto next_op
1128
1129   combine_init:
1130     mob = term
1131     goto next_op
1132
1133   combine_plus:
1134     ##   <a+b>  ==>   <a> | <b>
1135     $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Alt')
1136     $P0.'to'(pos)
1137     $P0[0] = mob
1138     $P0[1] = term
1139     mob = $P0
1140     goto next_op
1141
1142   combine_minus:
1143     ##   <a-b> ==>   <!b> <a>
1144     term['isnegated'] = 1
1145     term['iszerowidth'] = 1
1146     $P0 = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
1147     $P0.'to'(pos)
1148     $P0[0] = term
1149     $P0[1] = mob
1150     mob = $P0
1151     goto next_op
1152
1153   next_op:
1154     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1155     if pos >= lastpos goto err_close
1156
1157     op = substr target, pos, 1
1158     inc pos
1159     if op == '+' goto parse_loop
1160     if op == '-' goto parse_loop
1161     if op != '>' goto err
1162     mob.'to'(pos)
1163     goto end
1164
1165   err:
1166     parse_error(mob, pos, "Error parsing enumerated character class")
1167     goto end
1168   err_hyphen:
1169     parse_error(mob, pos, "Unescaped '-' in charlist (use '..' or '\\-')")
1170     goto end
1171   err_close:
1172     parse_error(mob, pos, "Missing close '>' or ']>' in enumerated character class")
1173   end:
1174     .return (mob)
1175 .end
1176
1177
1178 =item C<parse_quoted_literal>
1179
1180 Parses '...' literals.
1181
1182 =cut
1183
1184 .sub 'parse_quoted_literal'
1185     .param pmc mob
1186     .local int pos, lastpos
1187     .local string target
1188     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Literal')
1189     lastpos = length target
1190     lastpos -= 1
1191     .local string lit
1192     lit = ''
1193   literal_iter:
1194     if pos > lastpos goto literal_error
1195     $S0 = substr target, pos, 1
1196     if $S0 == "'" goto literal_end
1197     if $S0 != "\\" goto literal_add
1198     inc pos
1199     $S0 = substr target, pos, 1
1200   literal_add:
1201     inc pos
1202     lit .= $S0
1203     goto literal_iter
1204   literal_end:
1205     inc pos
1206     mob.'!make'(lit)
1207     mob.'to'(pos)
1208     .return (mob)
1209   literal_error:
1210     parse_error(mob, pos, "No closing ' in quoted literal")
1211     .return (mob)
1212 .end
1213
1214
1215 =item C<parse_goal>
1216
1217 Parse a goal.
1218
1219 =cut
1220
1221 .sub 'parse_goal'
1222     .param pmc mob
1223     .local int pos, lastpos
1224     .local string target
1225     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Concat')
1226     lastpos = length target
1227     ##  skip any leading whitespace before goal
1228     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1229     .local pmc regex, goal, expr, alt, failsub
1230     regex = get_global 'regex'
1231     ##  parse the goal, down to concatenation precedence
1232     mob.'to'(pos)
1233     goal = regex(mob, 'tighter'=>'infix:')
1234     unless goal goto fail_goal
1235     pos = goal.'to'()
1236     goal = goal['expr']
1237     ##  skip any leading whitespace before expression
1238     pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1239     ##  parse the goal, down to concatenation precedence
1240     mob.'to'(pos)
1241     expr = regex(mob, 'tighter'=>'infix:')
1242     unless expr goto fail_expr
1243     pos = expr.'to'()
1244     expr = expr['expr']
1245     mob.'to'(pos)
1246     failsub = mob.'new'(mob, 'grammar'=>'PGE::Exp::Subrule')
1247     failsub.'to'(pos)
1248     failsub['subname'] = 'FAILGOAL'
1249     $S0 = goal.'Str'()
1250     failsub['arg'] = $S0
1251     alt = mob.'new'(mob, 'grammar'=>'PGE::Exp::Alt')
1252     alt.'to'(pos)
1253     push alt, goal
1254     push alt, failsub
1255     push mob, expr
1256     push mob, alt
1257     .return (mob)
1258   fail_goal:
1259     'parse_error'(mob, pos, 'Unable to parse goal after ~')
1260   fail_expr:
1261     'parse_error'(mob, pos, 'Unable to parse expression after ~')
1262 .end
1263
1264
1265 =item C<parse_modifier>
1266
1267 Parse a modifier.
1268
1269 =cut
1270
1271 .sub 'parse_modifier'
1272     .param pmc mob
1273     .local int pos, lastpos
1274     .local string target, value
1275     .local string key
1276     key = mob['KEY']
1277     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Modifier')
1278     lastpos = length target
1279     value = "1"
1280     $I0 = pos
1281     pos = find_not_cclass .CCLASS_NUMERIC, target, pos, lastpos
1282     if pos == $I0 goto name
1283     $I1 = pos - $I0
1284     value = substr target, $I0, $I1
1285     $I0 = pos
1286   name:
1287     pos = find_not_cclass .CCLASS_WORD, target, pos, lastpos
1288     $I1 = pos - $I0
1289     if $I1 == 0 goto fail
1290     $S0 = substr target, $I0, $I1
1291     mob['key'] = $S0
1292     mob.'!make'(value)
1293     $S0 = substr target, pos, 1
1294     if $S0 != '(' goto end
1295     $I0 = pos + 1
1296     pos = index target, ')', pos
1297     $I1 = pos - $I0
1298     $S0 = substr target, $I0, $I1
1299     mob.'!make'($S0)
1300     inc pos
1301   end:
1302     ### XXX pos = find_not_cclass .CCLASS_WHITESPACE, target, pos, lastpos
1303     mob.'to'(pos)
1304     .return (mob)
1305   fail:
1306     .return (mob)
1307 .end
1308
1309
1310 .sub 'parse_closure'
1311     .param pmc mob
1312     .local pmc key
1313     key = mob['KEY']
1314     .local string target
1315     .local int pos, len
1316     (mob, pos, target) = mob.'new'(mob, 'grammar'=>'PGE::Exp::Closure')
1317     len = 2
1318   init:
1319     $S0 = substr target, pos, 1
1320     if $S0 != "{" goto body
1321     inc len
1322     inc pos
1323     goto init
1324   body:
1325     .local string close
1326     close = repeat "}", len
1327     if key == '<?{{' goto assert_pos
1328     if key == '<!{{' goto assert_neg
1329     goto have_close
1330   assert_neg:
1331     mob['isnegated'] = 1
1332   assert_pos:
1333     mob['iszerowidth'] = 1
1334     concat close, '>'
1335     inc len
1336   have_close:
1337     $I0 = index target, close, pos
1338     if $I0 < pos goto err_noclose
1339     $I1 = $I0 - pos
1340     $S1 = substr target, pos, $I1
1341     mob.'!make'($S1)
1342     pos = $I0 + len
1343     mob.'to'(pos)
1344     .return (mob)
1345  err_noclose:
1346     parse_error(mob, pos, "Missing closing braces for closure")
1347     .return (mob)
1348 .end
1349
1350
1351 .sub 'parse_action'
1352     .param pmc mob
1353     .local string target
1354     .local int pos, keypos
1355     (mob, pos, target) = mob.'new'(mob, 'grammar' => 'PGE::Exp::Action')
1356     keypos = index target, '#= ', pos
1357     if keypos < 0 goto end
1358     $I0 = find_cclass .CCLASS_NEWLINE, target, pos, keypos
1359     if $I0 < keypos goto end
1360     .local string actionkey
1361     keypos += 3
1362     $I0 -= keypos
1363     actionkey = substr target, keypos, $I0
1364     actionkey = 'trim'(actionkey)
1365     mob['actionkey'] = actionkey
1366   end:
1367     mob.'to'(pos)
1368     .return (mob)
1369 .end
1370
1371
1372 .sub 'parse_error'
1373     .param pmc mob
1374     .param int pos
1375     .param string message
1376     $P0 = getattribute mob, '$.pos'
1377     $P0 = pos
1378     $P0 = new 'Exception'
1379     $S0 = 'perl6regex parse error: '
1380     $S0 .= message
1381     $S0 .= ' at offset '
1382     $S1 = pos
1383     $S0 .= $S1
1384     $S0 .= ", found '"
1385     $P1 = getattribute mob, '$.target'
1386     $S1 = $P1
1387     $S1 = substr $S1, pos, 1
1388     $S0 .= $S1
1389     $S0 .= "'"
1390     $P0 = $S0
1391     throw $P0
1392     .return ()
1393 .end
1394
1395
1396
1397
1398 .namespace [ 'PGE';'Exp' ]
1399
1400 .sub 'perl6exp' :method
1401     .param pmc pad
1402     .return (self)
1403 .end
1404
1405
1406 .namespace [ 'PGE';'Exp';'Literal' ]
1407
1408 .sub 'perl6exp' :method
1409     .param pmc pad
1410     $I0 = pad['ignorecase']
1411     self['ignorecase'] = $I0
1412     .return (self)
1413 .end
1414
1415
1416 .namespace [ 'PGE';'Exp';'Concat' ]
1417
1418 .sub 'perl6exp' :method
1419     .param pmc pad
1420
1421     .local pmc array, exp
1422     .local int i, j, n
1423     array = self.'list'()
1424     n = elements array
1425     i = 0
1426     j = 0
1427   iter_loop:
1428     if i >= n goto iter_end
1429     exp = self[i]
1430     inc i
1431     exp = exp.'perl6exp'(pad)
1432     if null exp goto iter_loop
1433     self[j] = exp
1434     inc j
1435     goto iter_loop
1436   iter_end:
1437     array = j
1438     if j > 1 goto end
1439     $P0 = array[0]
1440     .return ($P0)
1441   end:
1442     .return (self)
1443 .end
1444
1445
1446 .namespace [ 'PGE';'Exp';'Quant' ]
1447
1448 .sub 'perl6exp' :method
1449     .param pmc pad
1450
1451     $I0 = exists self['backtrack']
1452     if $I0 goto backtrack_done
1453     self['backtrack'] = PGE_BACKTRACK_GREEDY
1454     $I0 = pad['ratchet']
1455     if $I0 == 0 goto backtrack_done
1456     self['backtrack'] = PGE_BACKTRACK_NONE
1457   backtrack_done:
1458
1459     .local pmc exp0, sep
1460     .local int isarray
1461     isarray = pad['isarray']
1462     pad['isarray'] = 1
1463     exp0 = self[0]
1464     $I0 = isa exp0, ['PGE';'Exp';'WS']
1465     if $I0 goto err_parse_quant
1466     exp0['isquant'] = 1
1467     exp0 = exp0.'perl6exp'(pad)
1468     self[0] = exp0
1469     sep = self['sep']
1470     if null sep goto sep_done
1471     sep = sep.'perl6exp'(pad)
1472     self['sep'] = sep
1473   sep_done:
1474     pad['isarray'] = isarray
1475     .return (self)
1476   err_parse_quant:
1477     $P0 = get_hll_global ['PGE';'Perl6Regex'], 'parse_quant_error'
1478     $P0(self)
1479     .return (self)
1480 .end
1481
1482
1483 .namespace [ 'PGE';'Exp';'Group' ]
1484
1485 .sub 'perl6exp' :method
1486     .param pmc pad
1487     .local pmc exp0
1488
1489     $I0 = self['isquant']
1490     if $I0 goto backtrack_done
1491     $I0 = exists self['backtrack']
1492     if $I0 goto backtrack_done
1493     $I0 = pad['ratchet']
1494     if $I0 == 0 goto backtrack_done
1495     self['backtrack'] = PGE_BACKTRACK_NONE
1496   backtrack_done:
1497
1498     exp0 = self[0]
1499     exp0 = exp0.'perl6exp'(pad)
1500     self[0] = exp0
1501     .return (self)
1502 .end
1503
1504
1505 .namespace [ 'PGE';'Exp';'CGroup' ]
1506
1507 .sub 'perl6exp' :method
1508     .param pmc pad
1509     .local pmc exp
1510
1511     $I0 = self['isquant']
1512     if $I0 goto backtrack_done
1513     $I0 = exists self['backtrack']
1514     if $I0 goto backtrack_done
1515     $I0 = pad['ratchet']
1516     if $I0 == 0 goto backtrack_done
1517     self['backtrack'] = PGE_BACKTRACK_NONE
1518   backtrack_done:
1519
1520     self['iscapture'] = 1
1521     $I0 = exists self['isscope']
1522     if $I0 goto set_cname
1523     self['isscope'] = 1
1524
1525   set_cname:
1526     $I0 = exists self['cname']
1527     if $I0 goto set_subpats
1528     $I0 = pad['subpats']
1529     self['cname'] = $I0
1530
1531   set_subpats:
1532     .local string cname
1533     cname = self['cname']
1534     $S0 = substr cname, 0, 1
1535     if $S0 == '"' goto set_lexicals
1536     $I0 = cname
1537     inc $I0
1538     pad['subpats'] = $I0
1539
1540   set_lexicals:
1541     .local int isarray
1542     isarray = 0
1543     .local pmc lexscope
1544     lexscope = pad['lexscope']
1545     $I0 = exists lexscope[cname]
1546     if $I0 == 0 goto set_lexicals_1
1547     $P0 = lexscope[cname]
1548     $P0['isarray'] = 1
1549     isarray = 1
1550   set_lexicals_1:
1551     lexscope[cname] = self
1552
1553     .local int padarray
1554     padarray = pad['isarray']
1555     isarray |= padarray
1556     self['isarray'] = isarray
1557     $I0 = self['isscope']
1558     if $I0 == 0 goto unscoped
1559
1560   scoped:
1561     .local int subpats
1562     subpats = pad['subpats']
1563     pad['subpats'] = 0
1564     pad['isarray'] = 0
1565     $P0 = new 'Hash'
1566     pad['lexscope'] = $P0
1567     exp = self[0]
1568     exp = exp.'perl6exp'(pad)
1569     self[0] = exp
1570     pad['lexscope'] = lexscope
1571     pad['isarray'] = padarray
1572     pad['subpats'] = subpats
1573     goto end
1574
1575   unscoped:
1576     exp = self[0]
1577     exp = exp.'perl6exp'(pad)
1578     self[0] = exp
1579   end:
1580     .return (self)
1581 .end
1582
1583
1584 .namespace [ 'PGE';'Exp';'Subrule' ]
1585
1586 .sub 'perl6exp' :method
1587     .param pmc pad
1588
1589     $I0 = self['isquant']
1590     if $I0 goto backtrack_done
1591     $I0 = exists self['backtrack']
1592     if $I0 goto backtrack_done
1593     $I0 = pad['ratchet']
1594     if $I0 == 0 goto backtrack_done
1595     self['backtrack'] = PGE_BACKTRACK_NONE
1596   backtrack_done:
1597
1598     .local int iscapture, isarray
1599     .local pmc lexscope
1600     iscapture = self['iscapture']
1601     if iscapture == 0 goto end
1602     .local string cname
1603     cname = self['cname']
1604     isarray = pad['isarray']
1605     lexscope = pad['lexscope']
1606     $I0 = exists lexscope[cname]
1607     if $I0 == 0 goto lexscope_1
1608     $P0 = lexscope[cname]
1609     $P0['isarray'] = 1
1610     isarray = 1
1611   lexscope_1:
1612     lexscope[cname] = self
1613     self['isarray'] = isarray
1614   next_cname:
1615     $S0 = substr cname, 0, 1
1616     if $S0 == '"' goto end
1617     $I0 = cname
1618     inc $I0
1619     pad['subpats'] = $I0
1620   end:
1621     $S0 = pad['dba']
1622     self['dba'] = $S0
1623     .return (self)
1624 .end
1625
1626
1627 .namespace [ 'PGE';'Exp';'WS' ]
1628
1629 .sub 'perl6exp' :method
1630     .param pmc pad
1631
1632     $I0 = pad['sigspace']
1633     if $I0 goto end
1634     null $P0
1635     .return ($P0)
1636   end:
1637     self['subname'] = 'ws'
1638     self['iscapture'] = 0
1639     $I0 = pad['ratchet']
1640     unless $I0 goto end_1
1641     self['backtrack'] = PGE_BACKTRACK_NONE
1642   end_1:
1643     .return (self)
1644 .end
1645
1646
1647 .namespace [ 'PGE';'Exp';'Alt' ]
1648
1649 .sub 'perl6exp' :method
1650     .param pmc pad
1651
1652     .local pmc exp0, exp1
1653     exp0 = self[0]
1654     exp1 = self[1]
1655
1656     ##   if we only have one operand (prefix:|),
1657     ##   reduce and return it.
1658     $I0 = defined self[1]
1659     if $I0 goto with_rhs
1660     .tailcall exp0.'perl6exp'(pad)
1661   with_rhs:
1662
1663     ##   if lhs is whitespace, then this is a prefix-alt and
1664     ##   we ignore it (by simply returning its rhs)
1665     $I0 = isa exp0, ['PGE';'Exp';'WS']
1666     if $I0 == 0 goto with_lhs
1667     .tailcall exp1.'perl6exp'(pad)
1668   with_lhs:
1669
1670     .local pmc lexscope, savescope, it
1671     lexscope = pad['lexscope']
1672     savescope = new 'Hash'
1673     it = iter lexscope
1674   iter_loop:
1675     unless it goto iter_end
1676     $P1 = shift it
1677     $P2 = it[$P1]
1678     savescope[$P1] = $P2
1679     goto iter_loop
1680   iter_end:
1681     $I0 = pad['subpats']
1682     exp0 = exp0.'perl6exp'(pad)
1683     self[0] = exp0
1684
1685     $I1 = pad['subpats']
1686     pad['subpats'] = $I0
1687     pad['lexscope'] = savescope
1688     exp1 = exp1.'perl6exp'(pad)
1689     self[1] = exp1
1690     $I0 = pad['subpats']
1691     if $I0 >= $I1 goto end
1692     pad['subpats'] = $I1
1693   end:
1694     .return (self)
1695 .end
1696
1697
1698 .namespace [ 'PGE';'Exp';'Alias' ]
1699
1700 .sub 'perl6exp' :method
1701     .param pmc pad
1702     .local string cname
1703     .local pmc exp0, exp1
1704
1705     exp0 = self[0]
1706     $I0 = isa exp0, ['PGE';'Exp';'Scalar']
1707     unless $I0 goto err_no_lvalue
1708
1709     cname = exp0['cname']
1710     exp1 = self[1]
1711
1712     ##   If we're aliasing a capture group or a quantified capture
1713     ##   group, then we just move the alias name to that group.
1714     ##   Otherwise, we need to create a capture group for this
1715     ##   alias and return that.
1716
1717     $I0 = isa exp1, ['PGE';'Exp';'CGroup']
1718     if $I0 == 1 goto make_alias
1719     $I0 = isa exp1, ['PGE';'Exp';'Subrule']
1720     if $I0 == 1 goto make_alias
1721     $I0 = isa exp1, ['PGE';'Exp';'Quant']
1722     if $I0 == 0 goto add_cgroup
1723     $P0 = exp1[0]
1724     $I0 = isa $P0, ['PGE';'Exp';'CGroup']
1725     if $I0 == 0 goto add_cgroup
1726     $P0['cname'] = cname
1727     goto end
1728
1729   add_cgroup:
1730     .local pmc cexp
1731     cexp = self.'new'(self, 'grammar'=>'PGE::Exp::CGroup')
1732     $I0 = self.'from'()
1733     cexp.'from'($I0)
1734     $I0 = self.'to'()
1735     cexp.'to'($I0)
1736     cexp[0] = exp1
1737     cexp['isscope'] = 0
1738     cexp['iscapture'] = 1
1739     cexp['cname'] = cname
1740     cexp = cexp.'perl6exp'(pad)
1741     .return (cexp)
1742
1743   make_alias:
1744     exp1['cname'] = cname
1745     exp1['iscapture'] = 1
1746   end:
1747     exp1 = exp1.'perl6exp'(pad)
1748     .return (exp1)
1749
1750   err_no_lvalue:
1751     $P0 = get_hll_global ['PGE';'Perl6Regex'], 'parse_error'
1752     $I0 = self.'from'()
1753     $P0(self, $I0, 'LHS of alias must be lvalue')
1754 .end
1755
1756
1757 .namespace [ 'PGE';'Exp';'Modifier' ]
1758
1759 .sub 'perl6exp' :method
1760     .param pmc pad
1761     .local string key
1762     .local string value
1763     key = self['key']
1764     value = self.'ast'()
1765     if key == 'words' goto sigspace
1766     if key == 's' goto sigspace
1767     if key == 'w' goto sigspace
1768     if key == 'i' goto ignorecase
1769     goto setpad
1770   sigspace:
1771     key = 'sigspace'
1772     goto setpad
1773   ignorecase:
1774     key = 'ignorecase'
1775   setpad:
1776     $P0 = pad[key]
1777     pad[key] = value
1778     .local pmc exp
1779     exp = self[0]
1780     exp = exp.'perl6exp'(pad)
1781     self[0] = exp
1782     pad[key] = $P0
1783     .return (exp)
1784 .end
1785
1786 .namespace [ 'PGE';'Exp';'Conj' ]
1787
1788 .sub 'perl6exp' :method
1789     .param pmc pad
1790     $P0 = self[0]
1791     $P0 = $P0.'perl6exp'(pad)
1792     self[0] = $P0
1793     $P1 = self[1]
1794     $P1 = $P1.'perl6exp'(pad)
1795     self[1] = $P1
1796     .return (self)
1797 .end
1798
1799
1800 .namespace [ 'PGE';'Exp';'Closure' ]
1801
1802 .sub 'perl6exp' :method
1803     .param pmc pad
1804     .local string lang
1805     .local pmc closure_pp
1806     .local pmc closure_fn
1807     lang = pad['lang']
1808     self['lang'] = lang
1809     # see if we need to do any pre-processing of the closure
1810     closure_pp = get_hll_global ['PGE';'Perl6Regex'], '%closure_pp'
1811     $I0 = defined closure_pp[lang]
1812     if $I0 == 0 goto end
1813     closure_fn = closure_pp[lang]
1814     $S1 = self.'ast'()
1815     $S1 = closure_fn($S1)
1816     self.'!make'($S1)
1817   end:
1818     .return (self)
1819 .end
1820
1821 =back
1822
1823 =head1 Functions
1824
1825 =over 4
1826
1827 =item C<PIR_closure(string code)>
1828
1829 This helper function helps with :lang(PIR) closures in rules
1830 by adding a ".sub" wrapper around the code if one isn't
1831 already present.
1832
1833 =back
1834 =cut
1835
1836 .namespace [ 'PGE';'Perl6Regex' ]
1837
1838 .sub 'PIR_closure'
1839     .param string code
1840     $I0 = index code, '.sub'
1841     if $I0 >= 0 goto end
1842     code = concat ".sub anon :anon\n.param pmc match\n", code
1843     code .= "\n.end\n"
1844   end:
1845     .return (code)
1846 .end
1847
1848
1849 .namespace [ 'PGE';'Exp';'Action' ]
1850
1851 .sub 'perl6exp' :method
1852     .param pmc pad
1853     $S0 = pad['name']
1854     self['actionname'] = $S0
1855     .return (self)
1856 .end
1857
1858
1859 .namespace [ 'PGE';'Exp';'Cut' ]
1860
1861 .sub 'perl6exp' :method
1862     .param pmc pad
1863     $S0 = self.'ast'()
1864     if $S0 == ':::' goto cut_rule
1865     if $S0 == '<commit>' goto cut_match
1866     self['cutmark'] = PGE_CUT_GROUP
1867     .return (self)
1868   cut_rule:
1869     self['cutmark'] = PGE_CUT_RULE
1870     .return (self)
1871   cut_match:
1872     self['cutmark'] = PGE_CUT_MATCH
1873     .return (self)
1874 .end
1875
1876 # Local Variables:
1877 #   mode: pir
1878 #   fill-column: 100
1879 # End:
1880 # vim: expandtab shiftwidth=4 ft=pir: