importing python metacircular evaluator into git
[boa.git] / boa2 / boa / parser.py
blobcdcf94bd9355389c27c1ffa3b3cfbcb8290a65ee
1 """Lisp syntax parser
3 This module is deliberately kept simple (and inefficient). The parse
4 tree is just a cons list (code is data), so that implementing things
5 like `quote` and `define-syntax` will be straightforward.
6 """
8 import re
10 from boa.error import *
11 from boa.primitives import symbol, list as cons_list
12 from boa.evaluator import sequence_to_exp
14 c = re.compile
15 PATTERNS = [
16 ('whitespace', c(r'(\s+)')),
17 ('comment', c(r'(;[^\n]*)')),
18 ('(', c(r'(\()')),
19 (')', c(r'(\))')),
20 ('number', c(r'''( [+\-]? ## optional sign,
21 (?: ## followed by some
22 ## decimals
23 \d+\.\d+
24 | \d+\.
25 | \.\d+
26 | \d+
29 ''',
30 re.VERBOSE)),
31 ('symbol', c(r'''([a-zA-Z\+\=\?\!\@\#\$\%\^\&\*\-\_\/\.\>\<]
32 [\w\+\=\?\!\@\#\$\%\^\&\*\-\_\/\.\>\<]*)''',
33 re.VERBOSE)),
34 ('string', c(r'''
36 (([^\"] | \\")*)
38 ''',
39 re.VERBOSE)),
40 ("'", c(r'(\')')),
41 ('`', c(r'(`)')),
42 (',', c(r'(,)')),
45 def tokenize(s):
46 tokens = []
47 while s:
48 m = None
49 for type, regex in PATTERNS:
50 m = regex.match(s)
51 if m:
52 token = m.group(1)
53 tokens.append((type, token))
54 s = s[m.span()[1]:]
55 break
56 if not m:
57 error("TOKENIZE error from: %s..." % s[:20])
58 return tokens
60 def filter_executable_tokens(tokens):
61 return filter(
62 lambda x: x[0] not in ('whitespace', 'comment'),
63 tokens)
65 def parse(text):
66 tokens = filter_executable_tokens(tokenize(text))
67 sexps, n = [], 0
68 while n < len(tokens):
69 sexp, n = parse_sexp(tokens, n)
70 sexps.append(sexp)
71 return sequence_to_exp(cons_list(*sexps))
73 def parse_sexp(tokens, n):
74 if tokens[n][0] is 'string':
75 return tokens[n][1], n+1
76 if tokens[n][0] is 'number':
77 return int(tokens[n][1]), n+1 # ??
78 if tokens[n][0] is 'symbol':
79 return symbol(tokens[n][1]), n+1
80 if tokens[n][0] is "'":
81 e, n = parse_sexp(tokens, n+1)
82 return cons_list(symbol("quote"), e), n
83 if tokens[n][0] == '(':
84 sexps, n = [], n+1
85 while tokens[n][0] != ')':
86 e, n = parse_sexp(tokens, n)
87 sexps.append(e)
88 return cons_list(*sexps), n+1
89 error("PARSE error -- Invalid/unsupported token: %s" % tokens[n][0])