cleaner way of writing iterator loops
[pythonc.git] / syntax.py
blob405231d15a6b227436884ce38e1d4b7f00fc7fb0
1 ################################################################################
2 ##
3 ## Pythonc--Python to C++ translator
4 ##
5 ## Copyright 2011 Zach Wegner
6 ##
7 ## This file is part of Pythonc.
8 ##
9 ## Pythonc is free software: you can redistribute it and/or modify
10 ## it under the terms of the GNU General Public License as published by
11 ## the Free Software Foundation, either version 3 of the License, or
12 ## (at your option) any later version.
14 ## Pythonc is distributed in the hope that it will be useful,
15 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ## GNU General Public License for more details.
19 ## You should have received a copy of the GNU General Public License
20 ## along with Pythonc. If not, see <http://www.gnu.org/licenses/>.
22 ################################################################################
24 import copy
26 def block_str(stmts):
27 stmts = '\n'.join('%s;' % s for s in stmts).splitlines()
28 return '\n'.join(' %s' % s for s in stmts)
30 all_ints = set()
31 def register_int(value):
32 global all_ints
33 all_ints |= {value}
35 all_strings = {}
36 def register_string(value):
37 global all_strings
38 if value in all_strings:
39 return all_strings[value][0]
40 # Compute hash via FNV-1a algorithm. Python makes signed 64-bit arithmetic hard.
41 hashkey = 14695981039346656037
42 for c in value:
43 hashkey ^= ord(c)
44 hashkey *= 1099511628211
45 hashkey &= (1 << 64) - 1
46 all_strings[value] = (len(all_strings), hashkey)
47 return all_strings[value][0]
49 def int_name(i):
50 return 'int_singleton_neg%d' % -i if i < 0 else 'int_singleton_%d' % i
52 def export_consts(f):
53 global all_ints, all_strings
55 for i in all_ints:
56 f.write('int_const_singleton %s(%sll);\n' % (int_name(i), i))
58 char_escape = {
59 '"': r'\"',
60 '\\': r'\\',
61 '\n': r'\n',
62 '\r': r'\r',
63 '\t': r'\t',
65 for k, (v, hashkey) in all_strings.items():
66 c_str = ''.join(char_escape.get(c, c) for c in k)
67 f.write('string_const_singleton string_singleton_%s("%s", %sull);\n' % (v, c_str, hashkey))
69 class Node:
70 def is_atom(self):
71 return False
73 class NoneConst(Node):
74 def __init__(self):
75 pass
77 def is_atom(self):
78 return True
80 def __str__(self):
81 return '(&none_singleton)'
83 class BoolConst(Node):
84 def __init__(self, value):
85 self.value = value
87 def __str__(self):
88 return '(&bool_singleton_%s)' % self.value
90 class IntConst(Node):
91 def __init__(self, value):
92 self.value = value
93 register_int(value)
95 def is_atom(self):
96 return True
98 def __str__(self):
99 return '(&%s)' % int_name(self.value)
101 class StringConst(Node):
102 def __init__(self, value):
103 self.value = value
104 self.id = register_string(value)
106 def is_atom(self):
107 return True
109 def __str__(self):
110 return '(&string_singleton_%s)' % self.id
112 class Identifier(Node):
113 def __init__(self, name):
114 self.name = name
116 def is_atom(self):
117 return True
119 def __str__(self):
120 return self.name
122 class Ref(Node):
123 def __init__(self, ref_type, *args):
124 self.ref_type = ref_type
125 self.args = args
127 def __str__(self):
128 return '(new(allocator) %s(%s))' % (self.ref_type, ', '.join(str(a) for a in self.args))
130 class UnaryOp(Node):
131 def __init__(self, op, rhs):
132 self.op = op
133 self.rhs = rhs
135 def __str__(self):
136 return '%s->%s()' % (self.rhs, self.op)
138 class BinaryOp(Node):
139 def __init__(self, op, lhs, rhs):
140 self.op = op
141 self.lhs = lhs
142 self.rhs = rhs
144 def __str__(self):
145 return '%s->%s(%s)' % (self.lhs, self.op, self.rhs)
147 class Load(Node):
148 def __init__(self, name, binding):
149 self.name = name
150 self.scope, self.idx = binding
152 def __str__(self):
153 if self.scope == 'global':
154 return 'globals->load(%s)' % self.idx
155 elif self.scope == 'class':
156 return 'class_ctx->load("%s")' % self.name
157 return 'ctx.load(%s)' % self.idx
159 class Store(Node):
160 def __init__(self, name, expr, binding):
161 self.name = name
162 self.expr = expr
163 self.scope, self.idx = binding
165 def __str__(self):
166 if self.scope == 'global':
167 return 'globals->store(%s, %s)' % (self.idx, self.expr)
168 elif self.scope == 'class':
169 return 'class_ctx->store("%s", %s)' % (self.name, self.expr)
170 return 'ctx.store(%s, %s)' % (self.idx, self.expr)
172 class StoreAttr(Node):
173 def __init__(self, name, attr, expr):
174 self.name = name
175 self.attr = attr
176 self.expr = expr
178 def __str__(self):
179 return '%s->__setattr__(%s, %s)' % (self.name, self.attr, self.expr)
181 class StoreSubscript(Node):
182 def __init__(self, expr, index, value):
183 self.expr = expr
184 self.index = index
185 self.value = value
187 def __str__(self):
188 return '%s->__setitem__(%s, %s)' % (self.expr, self.index, self.value)
190 class DeleteSubscript(Node):
191 def __init__(self, expr, index):
192 self.expr = expr
193 self.index = index
195 def __str__(self):
196 return '%s->__delitem__(%s)' % (self.expr, self.index)
198 class List(Node):
199 def __init__(self, items):
200 self.items = items
202 def flatten(self, ctx):
203 list_name = ctx.get_temp()
204 name = ctx.get_temp()
205 # XXX HACK: add just some C++ text instead of syntax nodes...
206 ctx.statements += ['node *%s[%d]' % (list_name, len(self.items))]
207 for i, item in enumerate(self.items):
208 ctx.statements += ['%s[%d] = %s' % (list_name, i, item)]
209 ctx.statements += [Assign(name, Ref('list', len(self.items), list_name), target_type='list')]
210 return name
212 def __str__(self):
213 return ''
215 class Tuple(Node):
216 def __init__(self, items):
217 self.items = items
219 def flatten(self, ctx):
220 list_name = ctx.get_temp()
221 name = ctx.get_temp()
222 # XXX HACK: add just some C++ text instead of syntax nodes...
223 if isinstance(self.items, list):
224 ctx.statements += ['node *%s[%d]' % (list_name, len(self.items))]
225 for i, item in enumerate(self.items):
226 ctx.statements += ['%s[%d] = %s' % (list_name, i, item)]
227 ctx.statements += [Assign(name, Ref('tuple', len(self.items), list_name), target_type='tuple')]
228 else:
229 iter_name = ctx.get_temp()
230 ctx.statements += [
231 'node_list %s' % list_name,
232 'node *%s = %s->__iter__()' % (iter_name, self.items),
233 'while (node *item = %s->next()) %s.push_back(item)' % (iter_name, iter_name, list_name),
234 Assign(name, Ref('tuple', list_name), target_type='tuple')
236 return name
238 def __str__(self):
239 return ''
241 class Dict(Node):
242 def __init__(self, keys, values):
243 self.keys = keys
244 self.values = values
246 def flatten(self, ctx):
247 name = ctx.get_temp()
248 ctx.statements += [Assign(name, Ref('dict'), target_type='dict')]
249 for k, v in zip(self.keys, self.values):
250 # XXX HACK: add just some C++ text instead of syntax nodes...
251 ctx.statements += ['%s->__setitem__(%s, %s)' % (name, k, v)]
252 return name
254 def __str__(self):
255 return ''
257 class Set(Node):
258 def __init__(self, items):
259 self.items = items
261 def flatten(self, ctx):
262 name = ctx.get_temp()
263 ctx.statements += [Assign(name, Ref('set'), target_type='set')]
264 for i in self.items:
265 # XXX HACK: add just some C++ text instead of syntax nodes...
266 ctx.statements += ['%s->add(%s)' % (name, i)]
267 return name
269 def __str__(self):
270 return ''
272 class Slice(Node):
273 def __init__(self, expr, start, end, step):
274 self.expr = expr
275 self.start = start
276 self.end = end
277 self.step = step
279 def __str__(self):
280 return '%s->__slice__(%s, %s, %s)' % (self.expr, self.start, self.end, self.step)
282 class Subscript(Node):
283 def __init__(self, expr, index):
284 self.expr = expr
285 self.index = index
287 def __str__(self):
288 return '%s->__getitem__(%s)' % (self.expr, self.index)
290 class Attribute(Node):
291 def __init__(self, expr, attr):
292 self.expr = expr
293 self.attr = attr
295 def __str__(self):
296 return '%s->__getattr__(%s)' % (self.expr, self.attr)
298 class Call(Node):
299 def __init__(self, func, args, kwargs):
300 self.func = func
301 self.args = args
302 self.kwargs = kwargs
304 def __str__(self):
305 return '%s->__call__(globals, &ctx, %s, %s)' % (self.func, self.args, self.kwargs)
307 class IfExp(Node):
308 def __init__(self, expr, true_stmts, true_expr, false_stmts, false_expr):
309 self.expr = expr
310 self.true_stmts = true_stmts
311 self.true_expr = true_expr
312 self.false_stmts = false_stmts
313 self.false_expr = false_expr
315 def flatten(self, ctx):
316 self.temp = ctx.get_temp()
317 ctx.statements += [Assign(self.temp, 'NULL'), self]
318 return self.temp
320 def __str__(self):
321 true_stmts = block_str(self.true_stmts)
322 false_stmts = block_str(self.false_stmts)
323 body = """if ({expr}->bool_value()) {{
324 {true_stmts}
325 {temp} = {true_expr};
326 }} else {{
327 {false_stmts}
328 {temp} = {false_expr};
330 """.format(expr=self.expr, temp=self.temp.name, true_stmts=true_stmts,
331 true_expr=self.true_expr, false_stmts=false_stmts, false_expr=self.false_expr)
332 return body
334 class BoolOp(Node):
335 def __init__(self, op, lhs_expr, rhs_stmts, rhs_expr):
336 self.op = op
337 self.lhs_expr = lhs_expr
338 self.rhs_stmts = rhs_stmts
339 self.rhs_expr = rhs_expr
341 # XXX hack
342 def flatten(self, ctx, statements):
343 self.temp = ctx.get_temp()
344 statements += [Assign(self.temp, self.lhs_expr), self]
345 return self.temp
347 def __str__(self):
348 rhs_stmts = block_str(self.rhs_stmts)
349 body = """if ({op}{lhs_expr}->bool_value()) {{
350 {rhs_stmts}
351 {temp} = {rhs_expr};
353 """.format(op='!' if self.op == 'or' else '', lhs_expr=self.lhs_expr,
354 temp=self.temp.name, rhs_stmts=rhs_stmts, rhs_expr=self.rhs_expr)
355 return body
357 class Assign(Node):
358 def __init__(self, target, expr, target_type='node'):
359 self.target = target
360 self.expr = expr
361 self.target_type = target_type
363 def __str__(self):
364 if self.target_type is None:
365 return '%s = %s' % (self.target, self.expr)
366 else:
367 return '%s *%s = %s' % (self.target_type, self.target, self.expr)
369 class If(Node):
370 def __init__(self, expr, stmts, else_block):
371 self.expr = expr
372 self.stmts = stmts
373 self.else_block = else_block
375 def __str__(self):
376 stmts = block_str(self.stmts)
377 body = """if ({expr}->bool_value()) {{
378 {stmts}
380 """.format(expr=self.expr, stmts=stmts)
381 if self.else_block:
382 stmts = block_str(self.else_block)
383 body += """else {{
384 {stmts}
386 """.format(expr=self.expr, stmts=stmts)
387 return body
389 class Comprehension(Node):
390 def __init__(self, comp_type, target, iter, iter_name, iter_binding,
391 cond_stmts, cond, expr_stmts, expr, expr2):
392 self.comp_type = comp_type
393 self.target = target
394 self.iter = iter
395 self.iter_name = iter_name
396 self.iter_binding = iter_binding
397 self.cond_stmts = cond_stmts
398 self.cond = cond
399 self.expr_stmts = expr_stmts
400 self.expr = expr
401 self.expr2 = expr2
403 def flatten(self, ctx):
404 if self.comp_type == 'set':
405 l = Set([])
406 elif self.comp_type == 'dict':
407 l = Dict([], [])
408 else:
409 l = List([])
410 self.temp = l.flatten(ctx)
411 ctx.statements += [Assign(self.iter_name, '%s->__iter__()' % self.iter)]
412 ctx.statements += [self]
413 # HACK: prevent iterator from being garbage collected
414 self.iter_store = Store(self.iter_name, self.iter_name, self.iter_binding)
415 return self.temp
417 def __str__(self):
418 cond_stmts = block_str(self.cond_stmts)
419 expr_stmts = block_str(self.expr_stmts)
420 arg_unpacking = []
421 # XXX HACK
422 if isinstance(self.target[0], tuple):
423 for i, (target, binding) in enumerate(self.target):
424 arg_unpacking += [Store(target, 'item->__getitem__(%s)' % i, binding)]
425 else:
426 target, binding = self.target
427 arg_unpacking = [Store(target, 'item', binding)]
428 arg_unpacking = block_str(arg_unpacking)
429 if self.cond:
430 cond = 'if (!(%s)->bool_value()) continue;' % self.cond
431 else:
432 cond = ''
433 if self.comp_type == 'set':
434 adder = '%s->add(%s);' % (self.temp, self.expr)
435 elif self.comp_type == 'dict':
436 adder = '%s->__setitem__(%s, %s);' % (self.temp, self.expr, self.expr2)
437 else:
438 adder = '%s->append(%s);' % (self.temp, self.expr)
439 body = """
440 {iter_store};
441 while (node *item = {iter}->next()) {{
442 {arg_unpacking}
443 {cond_stmts}
444 {cond}
445 {expr_stmts}
446 {adder}
448 """.format(iter=self.iter_name, iter_store=self.iter_store, arg_unpacking=arg_unpacking,
449 cond_stmts=cond_stmts, cond=cond, expr_stmts=expr_stmts, adder=adder)
450 return body
452 class Break(Node):
453 def __init__(self):
454 pass
456 def is_atom(self):
457 return True
459 def __str__(self):
460 return 'break'
462 class Continue(Node):
463 def __init__(self):
464 pass
466 def is_atom(self):
467 return True
469 def __str__(self):
470 return 'continue'
472 class For(Node):
473 def __init__(self, target, iter, stmts, iter_name, iter_binding):
474 self.target = target
475 self.iter = iter
476 self.stmts = stmts
477 self.iter_name = Identifier(iter_name)
478 self.iter_binding = iter_binding
480 def flatten(self, ctx):
481 ctx.statements += [Assign(self.iter_name, '%s->__iter__()' % self.iter)]
482 # HACK: prevent iterator from being garbage collected
483 self.iter_store = Store(self.iter_name, self.iter_name, self.iter_binding)
484 return self
486 def __str__(self):
487 stmts = block_str(self.stmts)
488 arg_unpacking = []
489 if isinstance(self.target, list):
490 for i, (arg, binding) in enumerate(self.target):
491 arg_unpacking += [Store(arg, 'item->__getitem__(%s)' % i, binding)]
492 else:
493 arg, binding = self.target
494 arg_unpacking = [Store(arg, 'item', binding)]
495 arg_unpacking = block_str(arg_unpacking)
496 # XXX sorta weird?
497 body = """
498 {iter_store};
499 while (node *item = {iter}->next()) {{
500 {arg_unpacking}
501 {stmts}
502 collect_garbage(&ctx, NULL);
504 """.format(iter=self.iter_name, iter_store=self.iter_store, arg_unpacking=arg_unpacking,
505 stmts=stmts)
506 return body
508 class While(Node):
509 def __init__(self, test_stmts, test, stmts):
510 self.test_stmts = test_stmts
511 self.test = test
512 self.stmts = stmts
514 def __str__(self):
515 # XXX Super hack: too lazy to do this properly now
516 dup_test_stmts = copy.deepcopy(self.test_stmts)
517 assert isinstance(dup_test_stmts[-1], Assign)
518 dup_test_stmts[-1].target_type = None
520 test_stmts = block_str(self.test_stmts)
521 dup_test_stmts = block_str(dup_test_stmts)
522 stmts = block_str(self.stmts)
523 body = """
524 {test_stmts}
525 while ({test}->bool_value())
527 {stmts}
528 collect_garbage(&ctx, NULL);
529 {dup_test_stmts}
531 """.format(test_stmts=test_stmts, dup_test_stmts=dup_test_stmts, test=self.test, stmts=stmts)
532 return body
534 class Return(Node):
535 def __init__(self, value):
536 self.value = value
537 if self.value is None:
538 self.value = NoneConst()
540 def __str__(self):
541 body = """
542 collect_garbage(&ctx, %s);
543 return %s;
544 """ % (self.value, self.value)
545 return body
547 class Assert(Node):
548 def __init__(self, expr, lineno):
549 self.expr = expr
550 self.lineno = lineno
552 def __str__(self):
553 body = """if (!{expr}->bool_value()) {{
554 error("assert failed at line {lineno}");
556 """.format(expr=self.expr, lineno=self.lineno)
557 return body
559 class Arguments(Node):
560 def __init__(self, args, binding, defaults):
561 self.args = args
562 self.binding = binding
563 self.defaults = defaults
565 def flatten(self, ctx):
566 new_def = [None] * (len(self.args) - len(self.defaults))
567 self.defaults = new_def + self.defaults
568 self.name_strings = [StringConst(a) for a in self.args]
569 return self
571 def __str__(self):
572 arg_unpacking = []
573 for i, (arg, binding, default, name) in enumerate(zip(self.args, self.binding,
574 self.defaults, self.name_strings)):
575 if default:
576 arg_unpacking += [Store(arg, 'kwargs->lookup(%s) ? kwargs->lookup(%s) '
577 ': (args->len() > %s ? args->__getitem__(%s) : %s)' %
578 (name, name, i, i, default), binding)]
579 else:
580 arg_unpacking += [Store(arg, 'args->__getitem__(%s)' % i, binding)]
581 return block_str(arg_unpacking)
583 class FunctionDef(Node):
584 def __init__(self, name, args, stmts, exp_name, binding, local_count):
585 self.name = name
586 self.exp_name = exp_name if exp_name else name
587 self.exp_name = 'fn_%s' % self.exp_name # make sure no name collisions
588 self.args = args
589 self.stmts = stmts
590 self.binding = binding
591 self.local_count = local_count
593 def flatten(self, ctx):
594 ctx.functions += [self]
595 return [Store(self.name, Ref('function_def', Identifier(self.exp_name)), self.binding)]
597 def __str__(self):
598 stmts = block_str(self.stmts)
599 arg_unpacking = str(self.args)
600 body = """
601 node *{name}(context *globals, context *parent_ctx, tuple *args, dict *kwargs) {{
602 node *local_syms[{local_count}];
603 context ctx(parent_ctx, {local_count}, local_syms);
604 {arg_unpacking}
605 {stmts}
606 return &none_singleton;
607 }}""".format(name=self.exp_name, local_count=self.local_count,
608 arg_unpacking=arg_unpacking, stmts=stmts)
609 return body
611 class ClassDef(Node):
612 def __init__(self, name, binding, stmts):
613 self.name = name
614 self.binding = binding
615 self.stmts = stmts
617 def flatten(self, ctx):
618 ctx.functions += [self]
619 return [Store(self.name, Ref('class_def', '"%s"' % self.name, Identifier('_%s__create__' % self.name)), self.binding)]
621 def __str__(self):
622 stmts = block_str(self.stmts)
623 body = """
624 void _{name}__create__(class_def *class_ctx) {{
625 {stmts}
626 }}""".format(name=self.name, stmts=stmts)
627 return body