3 This module is deliberately kept simple (and inefficient). The parse
4 tree is just a cons list (code is data), so that implementing things
5 like `quote` and `define-syntax` will be straightforward.
10 from boa
.error
import *
11 from boa
.primitives
import symbol
, list as cons_list
12 from boa
.evaluator
import sequence_to_exp
16 ('whitespace', c(r
'(\s+)')),
17 ('comment', c(r
'(;[^\n]*)')),
20 ('number', c(r
'''( [+\-]? ## optional sign,
21 (?: ## followed by some
31 ('symbol', c(r
'''([a-zA-Z\+\=\?\!\@\#\$\%\^\&\*\-\_\/\.\>\<]
32 [\w\+\=\?\!\@\#\$\%\^\&\*\-\_\/\.\>\<]*)''',
49 for type, regex in PATTERNS:
53 tokens.append((type, token))
57 error("TOKENIZE error from: %s..." % s[:20])
60 def filter_executable_tokens(tokens):
62 lambda x: x[0] not in ('whitespace
', 'comment
'),
66 tokens = filter_executable_tokens(tokenize(text))
68 while n < len(tokens):
69 sexp, n = parse_sexp(tokens, n)
71 return sequence_to_exp(cons_list(*sexps))
73 def parse_sexp(tokens, n):
74 if tokens[n][0] is 'string
':
75 return tokens[n][1], n+1
76 if tokens[n][0] is 'number
':
77 return int(tokens[n][1]), n+1 # ??
78 if tokens[n][0] is 'symbol
':
79 return symbol(tokens[n][1]), n+1
80 if tokens[n][0] is "'":
81 e, n = parse_sexp(tokens, n+1)
82 return cons_list(symbol("quote
"), e), n
83 if tokens[n][0] == '(':
85 while tokens[n][0] != ')':
86 e, n = parse_sexp(tokens, n)
88 return cons_list(*sexps), n+1
89 error("PARSE error
-- Invalid
/unsupported token
: %s" % tokens[n][0])