Removed the notion of a "large" context. For simplicity, all contexts
[panda.git] / src / st-generator.c
blob71b09182c5b6b7352eaf9f3a33dfef5d4622be0a
1 /*
2 * st-generator.c
4 * Copyright (C) 2008 Vincent Geddes
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "st-compiler.h"
27 #include "st-types.h"
28 #include "st-object.h"
29 #include "st-symbol.h"
30 #include "st-dictionary.h"
31 #include "st-method.h"
32 #include "st-array.h"
33 #include "st-array.h"
34 #include "st-universe.h"
35 #include "st-behavior.h"
36 #include "st-character.h"
37 #include "st-unicode.h"
39 #include <string.h>
40 #include <stdlib.h>
41 #include <setjmp.h>
43 #define DEFAULT_CODE_SIZE 20
45 #define CSTRING(string) ((char *) st_byte_array_bytes (string))
47 typedef struct
49 st_oop class;
51 jmp_buf jmploc;
53 st_compiler_error *error;
55 /* names of temporaries, in order of appearance */
56 st_list *temporaries;
57 /* names of instvars, in order they were defined */
58 st_list *instvars;
59 /* literal frame for the compiled code */
60 st_list *literals;
62 } Generator;
64 typedef struct
66 st_uchar *buffer;
67 st_uint size;
68 st_uint alloc;
69 st_uint max_stack_depth;
70 } st_bytecode;
72 static st_uint sizes[255] = { 0, };
75 // setup global data for compiler
76 static void
77 check_init (void)
79 static bool initialized = false;
81 if (initialized)
82 return;
83 initialized = true;
85 /* The size (in bytes) of each bytecode instruction
87 sizes[PUSH_TEMP] = 2;
88 sizes[PUSH_INSTVAR] = 2;
89 sizes[PUSH_LITERAL_CONST] = 2;
90 sizes[PUSH_LITERAL_VAR] = 2;
91 sizes[PUSH_SELF] = 1;
92 sizes[PUSH_NIL] = 1;
93 sizes[PUSH_TRUE] = 1;
94 sizes[PUSH_FALSE] = 1;
95 sizes[PUSH_INTEGER] = 2;
96 sizes[STORE_LITERAL_VAR] = 2;
97 sizes[STORE_TEMP] = 2;
98 sizes[STORE_INSTVAR] = 2;
99 sizes[STORE_POP_LITERAL_VAR] = 2;
100 sizes[STORE_POP_TEMP] = 2;
101 sizes[STORE_POP_INSTVAR] = 2;
102 sizes[RETURN_STACK_TOP] = 1;
103 sizes[BLOCK_RETURN] = 1;
104 sizes[POP_STACK_TOP] = 1;
105 sizes[DUPLICATE_STACK_TOP] = 1;
106 sizes[PUSH_ACTIVE_CONTEXT] = 1;
107 sizes[BLOCK_COPY] = 2;
108 sizes[JUMP_TRUE] = 3;
109 sizes[JUMP_FALSE] = 3;
110 sizes[JUMP] = 3;
111 sizes[SEND] = 3;
112 sizes[SEND_SUPER] = 3;
113 sizes[SEND_PLUS] = 1;
114 sizes[SEND_MINUS] = 1;
115 sizes[SEND_LT] = 1;
116 sizes[SEND_GT] = 1;
117 sizes[SEND_LE] = 1;
118 sizes[SEND_GE] = 1;
119 sizes[SEND_EQ] = 1;
120 sizes[SEND_NE] = 1;
121 sizes[SEND_MUL] = 1;
122 sizes[SEND_DIV] = 1;
123 sizes[SEND_MOD] = 1;
124 sizes[SEND_BITSHIFT] = 1;
125 sizes[SEND_BITAND] = 1;
126 sizes[SEND_BITOR] = 1;
127 sizes[SEND_BITXOR] = 1;
128 sizes[SEND_AT] = 1;
129 sizes[SEND_AT_PUT] = 1;
130 sizes[SEND_SIZE] = 1;
131 sizes[SEND_VALUE] = 1;
132 sizes[SEND_VALUE_ARG] = 1;
133 sizes[SEND_IDENTITY_EQ] = 1;
134 sizes[SEND_CLASS] = 1;
135 sizes[SEND_NEW] = 1;
136 sizes[SEND_NEW_ARG] = 1;
139 static int size_message (Generator *gt, st_node *node);
140 static int size_expression (Generator *gt, st_node *node);
141 static int size_statements (Generator *gt, st_node *node);
144 static void generate_expression (Generator *gt, st_bytecode *code, st_node *node);
145 static void generate_statements (Generator *gt, st_bytecode *code, st_node *statements);
147 static void
148 generation_error (Generator *gt, const char *message, st_node *node)
150 if (gt->error) {
151 strncpy (gt->error->message, message, 255);
152 gt->error->line = node->line;
153 gt->error->column = 0;
156 longjmp (gt->jmploc, 0);
159 static void
160 bytecode_init (st_bytecode *code)
162 code->buffer = st_malloc (DEFAULT_CODE_SIZE);
163 code->alloc = DEFAULT_CODE_SIZE;
164 code->size = 0;
165 code->max_stack_depth = 0;
168 static void
169 bytecode_destroy (st_bytecode *code)
171 st_free (code->buffer);
175 static st_list *
176 get_temporaries (Generator *gt,
177 st_list *instvars,
178 st_node *arguments,
179 st_node *temporaries)
181 st_list *temps = NULL;
183 for (st_node *n = arguments; n; n = n->next) {
184 for (st_list *l = instvars; l; l = l->next) {
185 if (streq (n->variable.name, (char *) l->data))
186 generation_error (gt, "name is already defined", arguments);
188 temps = st_list_prepend (temps, (st_pointer) n->variable.name);
191 for (st_node *n = temporaries; n; n = n->next) {
192 for (st_list *l = instvars; l; l = l->next) {
193 if (streq (n->variable.name, (char *) l->data))
194 generation_error (gt, "name is already defined", temporaries);
196 temps = st_list_prepend (temps, (st_pointer) n->variable.name);
199 return st_list_reverse (temps);
202 static Generator *
203 generator_new (void)
205 Generator *gt;
207 gt = st_new0 (Generator);
209 gt->class = 0;
210 gt->instvars = NULL;
211 gt->literals = NULL;
212 gt->temporaries = NULL;
214 return gt;
217 static void
218 generator_destroy (Generator *gt)
220 st_list_foreach (gt->instvars, st_free);
221 st_list_destroy (gt->instvars);
222 st_list_destroy (gt->temporaries);
223 st_list_destroy (gt->literals);
225 st_free (gt);
228 static st_oop
229 create_literals_array (Generator *gt)
231 st_oop literals;
232 st_uint i;
234 gt->literals = st_list_append (gt->literals, (st_pointer) gt->class);
235 literals = st_object_new_arrayed (ST_ARRAY_CLASS, st_list_length (gt->literals));
237 i = 1;
238 for (st_list *l = gt->literals; l; l = l->next) {
239 st_array_at_put (literals, i, (st_oop) l->data);
240 i++;
243 return literals;
246 static st_oop
247 create_bytecode_array (st_bytecode *code)
249 st_oop array;
251 if (code->size == 0)
252 return ST_NIL;
254 array = st_object_new_arrayed (ST_BYTE_ARRAY_CLASS, code->size);
255 memcpy (st_byte_array_bytes (array), code->buffer, code->size);
257 return array;
260 static void
261 emit (st_bytecode *code, st_uchar value)
263 if (++code->size > code->alloc) {
264 code->alloc += code->alloc;
265 code->buffer = st_realloc (code->buffer, code->alloc);
268 code->buffer[code->size - 1] = value;
271 static int
272 find_instvar (Generator *gt, char *name)
274 int i = 0;
275 for (st_list *l = gt->instvars; l; l = l->next) {
277 if (streq (name, (char *) l->data))
278 return i;
279 i++;
281 return -1;
284 static int
285 find_temporary (Generator *gt, char *name)
287 int i = 0;
289 for (st_list *l = gt->temporaries; l; l = l->next) {
291 if (streq (name, (char *) l->data))
292 return i;
293 i++;
295 return -1;
298 static int
299 find_literal_const (Generator *gt, st_oop literal)
301 int i = 0;
302 for (st_list *l = gt->literals; l; l = l->next) {
304 if (st_object_equal (literal, (st_oop) l->data))
305 return i;
306 i++;
308 gt->literals = st_list_append (gt->literals, (void *) literal);
309 return i;
312 static int
313 find_literal_var (Generator *gt, char *name)
315 st_oop assoc;
317 assoc = st_dictionary_association_at (ST_GLOBALS, st_symbol_new (name));
318 if (assoc == ST_NIL)
319 return -1;
321 int i = 0;
322 for (st_list *l = gt->literals; l; l = l->next) {
323 if (st_object_equal (assoc, (st_oop) l->data))
324 return i;
325 i++;
327 gt->literals = st_list_append (gt->literals, (st_pointer) assoc);
328 return i;
331 static void
332 jump_offset (Generator *gt, st_bytecode *code, int offset)
334 st_assert (offset <= INT16_MAX);
336 emit (code, JUMP);
338 /* push low byte */
339 emit (code, offset & 0xFF);
340 /* push high byte */
341 emit (code, (offset >> 8) & 0xFF);
345 static void
346 assign_temp (Generator *gt, st_bytecode *code, int index, bool pop)
348 st_assert (index <= 255);
350 if (pop)
351 emit (code, STORE_POP_TEMP);
352 else
353 emit (code, STORE_TEMP);
354 emit (code, (st_uchar) index);
357 static void
358 assign_instvar (Generator *gt, st_bytecode *code, int index, bool pop)
360 st_assert (index <= 255);
362 if (pop)
363 emit (code, STORE_POP_INSTVAR);
364 else
365 emit (code, STORE_INSTVAR);
366 emit (code, (st_uchar) index);
369 static void
370 assign_literal_var (Generator *gt, st_bytecode *code, int index, bool pop)
372 st_assert (index <= 255);
374 if (pop)
375 emit (code, STORE_POP_LITERAL_VAR);
376 else
377 emit (code, STORE_LITERAL_VAR);
378 emit (code, (st_uchar) index);
381 static void
382 push (Generator *gt, st_bytecode *code, st_uchar value, st_uchar index)
384 emit (code, value);
385 emit (code, index);
387 code->max_stack_depth++;
391 static void
392 push_special (Generator *gt, st_bytecode *code, st_uchar value)
394 emit (code, value);
395 code->max_stack_depth++;
398 static void
399 generate_assign (Generator *gt, st_bytecode *code, st_node *node, bool pop)
401 int index;
403 generate_expression (gt, code, node->assign.expression);
405 index = find_temporary (gt, node->assign.assignee->variable.name);
406 if (index >= 0) {
407 assign_temp (gt, code, index, pop);
408 return;
411 index = find_instvar (gt, node->assign.assignee->variable.name);
412 if (index >= 0) {
413 assign_instvar (gt, code, index, pop);
414 return;
417 index = find_literal_var (gt, node->assign.assignee->variable.name);
418 if (index >= 0) {
419 assign_literal_var (gt, code, index, pop);
420 return;
423 generation_error (gt, "unknown variable", node);
427 static int
428 size_assign (Generator *gt, st_node *node)
430 st_bytecode code;
432 bytecode_init (&code);
433 generate_assign (gt, &code, node, true);
434 bytecode_destroy (&code);
436 return code.size;
439 static void
440 generate_return (Generator *gt, st_bytecode *code, st_node *node)
442 generate_expression (gt, code, node->retrn.expression);
444 emit (code, RETURN_STACK_TOP);
447 static int
448 size_return (Generator *gt, st_node *node)
450 st_bytecode code;
452 bytecode_init (&code);
453 generate_return (gt, &code, node);
454 bytecode_destroy (&code);
456 return code.size;
459 static st_list *
460 get_block_temporaries (Generator *gt, st_node *temporaries)
462 st_list *temps = NULL;
464 for (st_node *node = temporaries; node; node = node->next) {
466 for (st_list *l = gt->instvars; l; l = l->next) {
467 if (streq (node->variable.name, (char *) l->data))
468 generation_error (gt, "name is already defined", node);
471 for (st_list *l = gt->temporaries; l; l = l->next) {
472 if (streq (node->variable.name, (char *) l->data))
473 generation_error (gt, "name already used in method", node);
476 temps = st_list_prepend (temps, (st_pointer) node->variable.name);
480 return st_list_reverse (temps);
483 static void
484 generate_block (Generator *gt, st_bytecode *code, st_node *node)
486 int index, size = 0;
487 st_uint i, argcount;
488 st_node *l;
490 argcount = st_node_list_length (node->block.arguments);
492 emit (code, BLOCK_COPY);
493 emit (code, argcount);
495 // get size of block code and then jump around that code
496 size = 2 * argcount + size_statements (gt, node->block.statements) + sizes[BLOCK_RETURN];
497 jump_offset (gt, code, size);
499 /* Store all block arguments into the temporary frame.
500 Note that upon a block activation, the stack pointer sits
501 above the last argument to to the block */
502 i = st_node_list_length (node->block.arguments);
503 for (; i > 0; --i) {
504 l = st_node_list_at (node->block.arguments, i - 1);
505 index = find_temporary (gt, l->variable.name);
506 st_assert (index >= 0);
507 assign_temp (gt, code, index, true);
510 generate_statements (gt, code, node->block.statements);
511 emit (code, BLOCK_RETURN);
514 static int
515 size_block (Generator *gt, st_node *node)
517 st_bytecode code;
519 bytecode_init (&code);
520 generate_block (gt, &code, node);
521 bytecode_destroy (&code);
523 return code.size;
526 /* #ifTrue:
528 static bool
529 match_ifTrue (Generator *gt, st_node *node)
531 st_node *block;
533 if (strcmp (CSTRING (node->message.selector), "ifTrue:") != 0)
534 return false;
536 block = node->message.arguments;
537 if (block->type != ST_BLOCK_NODE || block->block.arguments != NULL)
538 return false;
540 return true;
543 static void
544 generate_ifTrue (Generator *gt, st_bytecode *code, st_node *node)
546 st_node *block;
547 int size;
549 block = node->message.arguments;
551 generate_expression (gt, code, node->message.receiver);
552 size = size_statements (gt, block->block.statements);
553 size += node->message.is_statement ? sizes[POP_STACK_TOP] : sizes[JUMP];
554 emit (code, JUMP_FALSE);
555 emit (code, size & 0xFF);
556 emit (code, (size >> 8) & 0xFF);
557 generate_statements (gt, code, block->block.statements);
559 if (node->message.is_statement) {
560 emit (code, POP_STACK_TOP);
561 } else {
562 emit (code, JUMP);
563 emit (code, 1);
564 emit (code, 0);
565 emit (code, PUSH_NIL);
569 /* #ifFalse:
571 static bool
572 match_ifFalse (Generator *gt, st_node *node)
574 st_node *block;
576 if (strcmp (CSTRING (node->message.selector), "ifFalse:") != 0)
577 return false;
579 block = node->message.arguments;
580 if (block->type != ST_BLOCK_NODE || block->block.arguments != NULL)
581 return false;
583 return true;
586 static void
587 generate_ifFalse (Generator *gt, st_bytecode *code, st_node *node)
589 st_node *block;
590 int size;
592 block = node->message.arguments;
594 generate_expression (gt, code, node->message.receiver);
595 size = size_statements (gt, block->block.statements);
596 size += node->message.is_statement ? sizes[POP_STACK_TOP] : sizes[JUMP];
597 emit (code, JUMP_TRUE);
598 emit (code, size & 0xFF);
599 emit (code, (size >> 8) & 0xFF);
600 generate_statements (gt, code, block->block.statements);
602 if (node->message.is_statement) {
603 emit (code, POP_STACK_TOP);
604 } else {
605 emit (code, JUMP);
606 emit (code, 1);
607 emit (code, 0);
608 emit (code, PUSH_NIL);
612 /* #ifTrueifFalse:
614 static bool
615 match_ifTrueifFalse (Generator *gt, st_node *node)
617 st_node *true_block, *false_block;
619 if (strcmp (CSTRING (node->message.selector), "ifTrue:ifFalse:") != 0)
620 return false;
622 true_block = node->message.arguments;
623 if (true_block->type != ST_BLOCK_NODE || true_block->block.arguments != NULL)
624 return false;
626 false_block = node->message.arguments->next;
627 if (false_block->type != ST_BLOCK_NODE || false_block->block.arguments != NULL)
628 return false;
630 return true;
633 static void
634 generate_ifTrueifFalse (Generator *gt, st_bytecode *code, st_node *node)
636 int size;
638 st_node *true_block, *false_block;
640 true_block = node->message.arguments;
641 false_block = node->message.arguments->next;
643 generate_expression (gt, code, node->message.receiver);
644 size = size_statements (gt, true_block->block.statements) + sizes[JUMP];
645 emit (code, JUMP_FALSE);
646 emit (code, size & 0xFF);
647 emit (code, (size >> 8) & 0xFF);
648 generate_statements (gt, code, true_block->block.statements);
649 size = size_statements (gt, false_block->block.statements);
650 emit (code, JUMP);
651 emit (code, size & 0xFF);
652 emit (code, (size >> 8) & 0xFF);
653 generate_statements (gt, code, false_block->block.statements);
655 if (node->message.is_statement)
656 emit (code, POP_STACK_TOP);
659 /* #ifFalseifTrue:
661 static bool
662 match_ifFalseifTrue (Generator *gt, st_node *node)
664 st_node *true_block, *false_block;
666 if (strcmp (CSTRING (node->message.selector), "ifFalse:ifTrue:") != 0)
667 return false;
669 true_block = node->message.arguments;
670 if (true_block->type != ST_BLOCK_NODE || true_block->block.arguments != NULL)
671 return false;
673 false_block = node->message.arguments->next;
674 if (false_block->type != ST_BLOCK_NODE || false_block->block.arguments != NULL)
675 return false;
677 return true;
680 static void
681 generate_ifFalseifTrue (Generator *gt, st_bytecode *code, st_node *node)
683 st_node *true_block, *false_block;
684 int size;
686 true_block = node->message.arguments;
687 false_block = node->message.arguments->next;
689 generate_expression (gt, code, node->message.receiver);
690 size = size_statements (gt, true_block->block.statements) + sizes[JUMP];
691 emit (code, JUMP_TRUE);
692 emit (code, size & 0xFF);
693 emit (code, (size >> 8) & 0xFF);
694 generate_statements (gt, code, true_block->block.statements);
695 size = size_statements (gt, false_block->block.statements);
696 emit (code, JUMP);
697 emit (code, size & 0xFF);
698 emit (code, (size >> 8) & 0xFF);
699 generate_statements (gt, code, false_block->block.statements);
701 if (node->message.is_statement)
702 emit (code, POP_STACK_TOP);
705 /* #whileTrue
707 static bool
708 match_whileTrue (Generator *gt, st_node *node)
710 st_node *block;
712 if (strcmp (CSTRING (node->message.selector), "whileTrue") != 0)
713 return false;
715 block = node->message.receiver;
716 if (block->type != ST_BLOCK_NODE || block->block.arguments != NULL)
717 return false;
719 return true;
722 static void
723 generate_whileTrue (Generator *gt, st_bytecode *code, st_node *node)
725 st_node *block;
726 int size;
728 block = node->message.receiver;
730 generate_statements (gt, code, block->block.statements);
732 emit (code, JUMP_FALSE);
733 emit (code, 3);
734 emit (code, 0);
735 size = size_statements (gt, block->block.statements);
736 size += sizes[JUMP_FALSE];
737 size = -(size + 3);
738 emit (code, JUMP);
739 emit (code, size & 0xFF);
740 emit (code, (size >> 8) & 0xFF);
742 if (!node->message.is_statement)
743 emit (code, PUSH_NIL);
746 /* #whileFalse
748 static bool
749 match_whileFalse (Generator *gt, st_node *node)
751 st_node *block;
753 if (strcmp (CSTRING (node->message.selector), "whileFalse") != 0)
754 return false;
756 block = node->message.receiver;
757 if (block->type != ST_BLOCK_NODE || block->block.arguments != NULL)
758 return false;
760 return true;
763 static void
764 generate_whileFalse (Generator *gt, st_bytecode *code, st_node *node)
766 st_node *block;
767 int size;
769 block = node->message.receiver;
770 generate_statements (gt, code, block->block.statements);
772 emit (code, JUMP_TRUE);
773 emit (code, 3);
774 emit (code, 0);
775 size = size_statements (gt, block->block.statements);
776 size += sizes[JUMP_FALSE];
777 size = -(size + 3);
778 emit (code, JUMP);
779 emit (code, size & 0xFF);
780 emit (code, (size >> 8) & 0xFF);
782 if (!node->message.is_statement)
783 emit (code, PUSH_NIL);
786 /* #whileTrueArg
788 static bool
789 match_whileTrueArg (Generator *gt, st_node *node)
791 st_node *block;
793 if (strcmp (CSTRING (node->message.selector), "whileTrue:") != 0)
794 return false;
796 block = node->message.receiver;
797 if (block->type != ST_BLOCK_NODE || block->block.arguments != NULL)
798 return false;
800 block = node->message.arguments;
801 if (block->type != ST_BLOCK_NODE || block->block.arguments != NULL)
802 return false;
804 return true;
807 static void
808 generate_whileTrueArg (Generator *gt, st_bytecode *code, st_node *node)
810 st_node *block;
811 int size;
813 generate_statements (gt, code, node->message.receiver->block.statements);
814 emit (code, JUMP_FALSE);
815 size = size_statements (gt, node->message.arguments->block.statements);
816 size += sizes[POP_STACK_TOP] + sizes[JUMP];
818 emit (code, size & 0xFF);
819 emit (code, (size >> 8) & 0xFF);
821 generate_statements (gt, code, node->message.arguments->block.statements);
823 size += size_statements (gt, node->message.receiver->block.statements);
824 size = -(size + 3);
825 emit (code, POP_STACK_TOP);
826 emit (code, JUMP);
827 emit (code, size & 0xFF);
828 emit (code, (size >> 8) & 0xFF);
830 if (!node->message.is_statement)
831 emit (code, PUSH_NIL);
834 /* #whileFalseArg
836 static bool
837 match_whileFalseArg (Generator *gt, st_node *node)
839 st_node *block;
841 if (strcmp (CSTRING (node->message.selector), "whileFalse:") != 0)
842 return false;
844 block = node->message.receiver;
845 if (block->type != ST_BLOCK_NODE || block->block.arguments != NULL)
846 return false;
848 block = node->message.arguments;
849 if (block->type != ST_BLOCK_NODE || block->block.arguments != NULL)
850 return false;
852 return true;
855 static void
856 generate_whileFalseArg (Generator *gt, st_bytecode *code, st_node *node)
858 st_node *block;
859 int size;
861 generate_statements (gt, code, node->message.receiver->block.statements);
862 emit (code, JUMP_TRUE);
863 size = size_statements (gt, node->message.arguments->block.statements);
864 size += sizes[POP_STACK_TOP] + sizes[JUMP];
866 emit (code, size & 0xFF);
867 emit (code, (size >> 8) & 0xFF);
869 generate_statements (gt, code, node->message.arguments->block.statements);
871 size += size_statements (gt, node->message.receiver->block.statements);
872 size = -(size + 3);
874 emit (code, POP_STACK_TOP);
875 emit (code, JUMP);
876 emit (code, size & 0xFF);
877 emit (code, (size >> 8) & 0xFF);
879 if (!node->message.is_statement)
880 emit (code, PUSH_NIL);
883 /* #and:
885 static bool
886 match_and (Generator *gt, st_node *node)
888 st_node *block;
890 if (strcmp (CSTRING (node->message.selector), "and:") != 0)
891 return false;
893 block = node->message.arguments;
894 if (block->type != ST_BLOCK_NODE || block->block.arguments != NULL)
895 return false;
897 return true;
900 static void
901 generate_and (Generator *gt, st_bytecode *code, st_node *node)
903 st_node *block;
904 int size;
906 block = node->message.arguments;
907 generate_expression (gt, code, node->message.receiver);
908 size = size_statements (gt, block->block.statements) + sizes[JUMP];
910 emit (code, JUMP_FALSE);
911 emit (code, size & 0xFF);
912 emit (code, (size >> 8) & 0xFF);
914 generate_statements (gt, code, block->block.statements);
916 emit (code, JUMP);
917 emit (code, 1);
918 emit (code, 0);
919 emit (code, PUSH_FALSE);
921 if (node->message.is_statement)
922 emit (code, POP_STACK_TOP);
925 /* #or:
927 static bool
928 match_or (Generator *gt, st_node *node)
930 st_node *block;
932 if (strcmp (CSTRING (node->message.selector), "or:") != 0)
933 return false;
935 block = node->message.arguments;
936 if (block->type != ST_BLOCK_NODE || block->block.arguments != NULL)
937 return false;
939 return true;
942 static void
943 generate_or (Generator *gt, st_bytecode *code, st_node *node)
945 st_node *block;
946 int size;
948 block = node->message.arguments;
949 generate_expression (gt, code, node->message.receiver);
950 size = size_statements (gt, block->block.statements) + sizes[JUMP];
952 emit (code, JUMP_TRUE);
953 emit (code, size & 0xFF);
954 emit (code, (size >> 8) & 0xFF);
956 generate_statements (gt, code, block->block.statements);
958 emit (code, JUMP);
959 emit (code, 1);
960 emit (code, 0);
961 emit (code, PUSH_TRUE);
962 if (node->message.is_statement)
963 emit (code, POP_STACK_TOP);
966 typedef void (* CodeGenerationFunc) (Generator *gt, st_bytecode *code, st_node *node);
967 typedef bool (* OptimisationMatchFunc) (Generator *gt, st_node *node);
969 static const struct optimisers {
971 CodeGenerationFunc generation_func;
972 OptimisationMatchFunc match_func;
974 } optimisers[] =
976 { generate_ifTrue, match_ifTrue },
977 { generate_ifFalse, match_ifFalse },
978 { generate_ifTrueifFalse, match_ifTrueifFalse },
979 { generate_ifFalseifTrue, match_ifFalseifTrue },
980 { generate_whileTrue, match_whileTrue },
981 { generate_whileFalse, match_whileFalse },
982 { generate_whileTrueArg, match_whileTrueArg },
983 { generate_whileFalseArg, match_whileFalseArg },
984 { generate_and, match_and },
985 { generate_or, match_or }
988 static void
989 generate_message_send (Generator *gt, st_bytecode *code, st_node *node)
991 st_node *args;
992 st_uint argcount = 0;
993 int index;
995 /* generate arguments */
996 args = node->message.arguments;
997 for (; args; args = args->next) {
998 generate_expression (gt, code, args);
999 argcount++;
1002 /* check if message is a special */
1003 for (int i = 0; i < ST_N_ELEMENTS (__machine.selectors); i++) {
1004 if (!node->message.super_send && node->message.selector == __machine.selectors[i]) {
1005 emit (code, SEND_PLUS + i);
1006 goto out;
1010 if (node->message.super_send)
1011 emit (code, SEND_SUPER);
1012 else
1013 emit (code, SEND);
1015 index = find_literal_const (gt, node->message.selector);
1017 emit (code, (st_uchar) argcount);
1018 emit (code, (st_uchar) index);
1020 out:
1021 if (node->message.is_statement)
1022 emit (code, POP_STACK_TOP);
1025 static int
1026 size_message_send (Generator *gt, st_node *node)
1028 st_bytecode code;
1030 bytecode_init (&code);
1031 generate_message_send (gt, &code, node);
1032 bytecode_destroy (&code);
1034 return code.size;
1037 static void
1038 generate_cascade (Generator *gt, st_bytecode *code, st_node *node)
1040 st_uint i, j;
1042 generate_expression (gt, code, node->cascade.receiver);
1043 emit (code, DUPLICATE_STACK_TOP);
1045 for (st_list *l = node->cascade.messages; l; l = l->next) {
1047 generate_message_send (gt, code, (st_node*) l->data);
1049 if (l->next || node->cascade.is_statement)
1050 emit (code, POP_STACK_TOP);
1052 if (l->next && l->next->next)
1053 emit (code, DUPLICATE_STACK_TOP);
1057 static int
1058 size_cascade (Generator *gt, st_node *node)
1060 st_bytecode code;
1062 bytecode_init (&code);
1063 generate_cascade (gt, &code, node);
1064 bytecode_destroy (&code);
1066 return code.size;
1069 static void
1070 generate_message (Generator *gt, st_bytecode *code, st_node *node)
1072 st_uint i, j;
1074 for (i = 0; i < ST_N_ELEMENTS (optimisers); i++) {
1075 if (optimisers[i].match_func (gt, node)) {
1076 optimisers[i].generation_func (gt, code, node);
1077 return;
1081 generate_expression (gt, code, node->message.receiver);
1082 generate_message_send (gt, code, node);
1085 static int
1086 size_message (Generator *gt, st_node *node)
1088 st_bytecode code;
1090 bytecode_init (&code);
1091 generate_message (gt, &code, node);
1092 bytecode_destroy (&code);
1094 return code.size;
1097 static void
1098 generate_expression (Generator *gt, st_bytecode *code, st_node *node)
1100 int index;
1102 switch (node->type) {
1103 case ST_VARIABLE_NODE:
1105 const char *name = node->variable.name;
1107 if (streq (name, "self")) {
1108 push_special (gt, code, PUSH_SELF);
1109 break;
1110 } else if (streq (name, "super")) {
1111 push_special (gt, code, PUSH_SELF);
1112 break;
1113 } else if (streq (name, "true")) {
1114 push_special (gt, code, PUSH_TRUE);
1115 break;
1116 } else if (streq (name, "false")) {
1117 push_special (gt, code, PUSH_FALSE);
1118 break;
1119 } else if (streq (name, "nil")) {
1120 push_special (gt, code, PUSH_NIL);
1121 break;
1122 } else if (streq (name, "thisContext")) {
1123 push_special (gt, code, PUSH_ACTIVE_CONTEXT);
1124 break;
1127 index = find_temporary (gt, node->variable.name);
1128 if (index >= 0) {
1129 push (gt, code, PUSH_TEMP, index);
1130 break;
1132 index = find_instvar (gt, node->variable.name);
1133 if (index >= 0) {
1134 push (gt, code, PUSH_INSTVAR, index);
1135 break;
1137 index = find_literal_var (gt, node->variable.name);
1138 if (index >= 0) {
1139 push (gt, code, PUSH_LITERAL_VAR, index);
1140 break;
1142 generation_error (gt, "unknown variable", node);
1145 case ST_LITERAL_NODE:
1147 /* use optimized PUSH_INTEGER for smis in the range of -127..127 */
1148 if (st_object_is_smi (node->literal.value) &&
1149 ((st_smi_value (node->literal.value) >= -127) &&
1150 (st_smi_value (node->literal.value) <= 127))) {
1151 emit (code, PUSH_INTEGER);
1152 emit (code, st_smi_value (node->literal.value));
1153 } else {
1154 index = find_literal_const (gt, node->literal.value);
1155 push (gt, code, PUSH_LITERAL_CONST, index);
1157 break;
1159 case ST_ASSIGN_NODE:
1160 generate_assign (gt, code, node, false);
1161 break;
1163 case ST_BLOCK_NODE:
1164 generate_block (gt, code, node);
1165 break;
1167 case ST_MESSAGE_NODE:
1168 generate_message (gt, code, node);
1169 break;
1171 case ST_CASCADE_NODE:
1172 generate_cascade (gt, code, node);
1173 break;
1175 default:
1176 st_assert_not_reached ();
1180 static int
1181 size_expression (Generator *gt, st_node *node)
1183 st_bytecode code;
1185 bytecode_init (&code);
1186 generate_expression (gt, &code, node);
1187 bytecode_destroy (&code);
1189 return code.size;
1192 static void
1193 generate_statements (Generator *gt, st_bytecode *code, st_node *statements)
1195 if (statements == NULL) {
1196 emit (code, PUSH_NIL);
1199 for (st_node *node = statements; node; node = node->next) {
1201 switch (node->type) {
1203 case ST_VARIABLE_NODE:
1204 case ST_LITERAL_NODE:
1205 case ST_BLOCK_NODE:
1207 * don't generate anything in this case since we would end up with a constant
1208 * expression with no side-effects.
1210 * However, in a block with no explicit return, the value of the last statement is the implicit
1211 * value of the block.
1213 if (node->next == NULL)
1214 generate_expression (gt, code, node);
1215 break;
1217 case ST_ASSIGN_NODE:
1219 /* don't use STORE_POP if this is the last statement */
1220 if (node->next == NULL)
1221 generate_assign (gt, code, node, false);
1222 else
1223 generate_assign (gt, code, node, true);
1224 break;
1226 case ST_RETURN_NODE:
1228 st_assert (node->next == NULL);
1229 generate_return (gt, code, node);
1230 return;
1232 case ST_MESSAGE_NODE:
1234 generate_message (gt, code, node);
1235 break;
1237 case ST_CASCADE_NODE:
1239 generate_cascade (gt, code, node);
1240 break;
1242 default:
1243 st_assert_not_reached ();
1248 static int
1249 size_statements (Generator *gt, st_node *statements)
1251 st_bytecode code;
1253 bytecode_init (&code);
1254 generate_statements (gt, &code, statements);
1255 bytecode_destroy (&code);
1257 return code.size;
1260 static void
1261 generate_method_statements (Generator *gt, st_bytecode *code, st_node *statements)
1263 for (st_node *node = statements; node; node = node->next) {
1265 switch (node->type) {
1267 case ST_VARIABLE_NODE:
1268 case ST_LITERAL_NODE:
1269 case ST_BLOCK_NODE:
1270 break;
1272 case ST_ASSIGN_NODE:
1274 generate_assign (gt, code, node, true);
1275 break;
1277 case ST_RETURN_NODE:
1279 st_assert (node->next == NULL);
1280 generate_return (gt, code, node);
1281 return;
1283 case ST_MESSAGE_NODE:
1285 generate_message (gt, code, node);
1286 break;
1288 case ST_CASCADE_NODE:
1290 generate_cascade (gt, code, node);
1291 break;
1293 default:
1294 st_assert_not_reached ();
1298 emit (code, PUSH_SELF);
1299 emit (code, RETURN_STACK_TOP);
1302 static st_list *
1303 collect_temporaries (Generator *gt, st_node *node)
1305 st_list *temps = NULL;
1307 if (node == NULL)
1308 return NULL;
1310 if (node->type == ST_BLOCK_NODE) {
1311 temps = st_list_concat (get_block_temporaries (gt, node->block.arguments),
1312 get_block_temporaries (gt, node->block.temporaries));
1315 switch (node->type) {
1316 case ST_BLOCK_NODE:
1317 temps = st_list_concat (temps, collect_temporaries (gt, node->block.statements));
1318 break;
1319 case ST_ASSIGN_NODE:
1320 temps = st_list_concat (temps, collect_temporaries (gt, node->assign.expression));
1321 break;
1322 case ST_RETURN_NODE:
1323 temps = st_list_concat (temps, collect_temporaries (gt, node->retrn.expression));
1324 break;
1325 case ST_MESSAGE_NODE:
1326 temps = st_list_concat (temps, collect_temporaries (gt, node->message.receiver));
1327 temps = st_list_concat (temps, collect_temporaries (gt, node->message.arguments));
1328 break;
1329 break;
1330 case ST_CASCADE_NODE:
1331 temps = st_list_concat (temps, collect_temporaries (gt, node->cascade.receiver));
1332 for (st_list *l = node->cascade.messages; l; l = l->next)
1333 temps = st_list_concat (temps, collect_temporaries (gt, (st_node *) l->data));
1335 break;
1337 case ST_METHOD_NODE:
1338 case ST_VARIABLE_NODE:
1339 case ST_LITERAL_NODE:
1340 break;
1343 temps = st_list_concat (temps, collect_temporaries (gt, node->next));
1345 return temps;
1348 st_oop
1349 st_generate_method (st_oop class, st_node *node, st_compiler_error *error)
1351 Generator *gt;
1352 st_oop method;
1353 st_uint argcount;
1354 st_uint tempcount;
1355 st_bytecode code;
1357 st_assert (class != ST_NIL);
1358 st_assert (node != NULL && node->type == ST_METHOD_NODE);
1360 check_init ();
1362 gt = generator_new ();
1363 gt->error = error;
1365 if (setjmp (gt->jmploc)) {
1366 generator_destroy (gt);
1367 return ST_NIL;
1370 gt->class = class;
1371 gt->instvars = st_behavior_all_instance_variables (class);
1372 gt->temporaries = get_temporaries (gt, gt->instvars, node->method.arguments, node->method.temporaries);
1374 /* collect all block-level temporaries */
1375 gt->temporaries = st_list_concat (gt->temporaries, collect_temporaries (gt, node->method.statements));
1377 bytecode_init (&code);
1378 generate_method_statements (gt, &code, node->method.statements);
1379 method = st_object_new (ST_COMPILED_METHOD_CLASS);
1381 argcount = st_node_list_length (node->method.arguments);
1382 tempcount = st_list_length (gt->temporaries) - st_node_list_length (node->method.arguments);
1384 ST_METHOD_HEADER (method) = st_smi_new (0);
1385 st_method_set_arg_count (method, argcount);
1386 st_method_set_temp_count (method, tempcount);
1387 st_method_set_primitive_index (method, node->method.primitive);
1389 if (node->method.primitive >= 0) {
1390 st_method_set_flags (method, ST_METHOD_PRIMITIVE);
1391 } else {
1392 st_method_set_flags (method, ST_METHOD_NORMAL);
1395 ST_METHOD_LITERALS (method) = create_literals_array (gt);
1396 ST_METHOD_BYTECODE (method) = create_bytecode_array (&code);
1397 ST_METHOD_SELECTOR (method) = node->method.selector;
1399 generator_destroy (gt);
1400 bytecode_destroy (&code);
1402 return method;
1405 #define NEXT(ip) \
1406 ip += sizes[*ip]; \
1407 break
1409 #define FORMAT(ip) (formats[sizes[*ip]-1])
1411 static void
1412 print_bytecodes (st_oop literals, st_uchar *codes, int len)
1414 st_uchar *ip;
1416 static const char * const formats[] = {
1417 "<%02x> ",
1418 "<%02x %02x> ",
1419 "<%02x %02x %02x> ",
1422 ip = codes;
1423 while ((ip - codes) < len) {
1425 printf ("%3li ", ip - codes);
1427 switch ((Code) *ip) {
1429 case PUSH_TEMP:
1430 printf (FORMAT (ip), ip[0], ip[1]);
1431 printf ("pushTemp: %i", ip[1]);
1433 NEXT (ip);
1435 case PUSH_INSTVAR:
1436 printf (FORMAT (ip), ip[0], ip[1]);
1437 printf ("pushInstvar: %i", ip[1]);
1439 NEXT (ip);
1441 case PUSH_LITERAL_CONST:
1442 printf (FORMAT (ip), ip[0], ip[1]);
1443 printf ("pushConst: %i", ip[1]);
1445 NEXT (ip);
1447 case PUSH_LITERAL_VAR:
1448 printf (FORMAT (ip), ip[0], ip[1]);
1449 printf ("pushLit: %i", ip[1]);
1451 NEXT (ip);
1453 case PUSH_SELF:
1454 printf (FORMAT (ip), ip[0]);
1455 printf ("push: self");
1457 NEXT (ip);
1459 case PUSH_TRUE:
1460 printf (FORMAT (ip), ip[0]);
1461 printf ("pushConst: true");
1463 NEXT (ip);
1465 case PUSH_FALSE:
1466 printf (FORMAT (ip), ip[0]);
1467 printf ("pushConst: false");
1469 NEXT (ip);
1471 case PUSH_NIL:
1472 printf (FORMAT (ip), ip[0]);
1473 printf ("pushConst: nil");
1475 NEXT (ip);
1477 case PUSH_INTEGER:
1478 printf (FORMAT (ip), ip[0], ip[1]);
1479 printf ("pushConst: %i", (signed char) ip[1]);
1481 NEXT (ip);
1483 case PUSH_ACTIVE_CONTEXT:
1484 printf (FORMAT (ip), ip[0]);
1485 printf ("push: thisContext");
1487 NEXT (ip);
1489 case STORE_TEMP:
1490 printf (FORMAT (ip), ip[0], ip[1]);
1491 printf ("storeTemp: %i", ip[1]);
1493 NEXT (ip);
1495 case STORE_INSTVAR:
1496 printf (FORMAT (ip), ip[0], ip[1]);
1497 printf ("storeInstvar: %i", ip[1]);
1499 NEXT (ip);
1501 case STORE_LITERAL_VAR:
1502 printf (FORMAT (ip), ip[0], ip[1]);
1503 printf ("storeLiteral: %i", ip[1]);
1505 NEXT (ip);
1507 case STORE_POP_TEMP:
1508 printf (FORMAT (ip), ip[0], ip[1]);
1509 printf ("popIntoTemp: %i", ip[1]);
1511 NEXT (ip);
1513 case STORE_POP_INSTVAR:
1514 printf (FORMAT (ip), ip[0], ip[1]);
1515 printf ("popIntoInstvar: %i", ip[1]);
1517 NEXT (ip);
1519 case STORE_POP_LITERAL_VAR:
1520 printf (FORMAT (ip), ip[0], ip[1]);
1521 printf ("popIntoLiteral: %i", ip[1]);
1523 NEXT (ip);
1525 case BLOCK_COPY:
1526 printf (FORMAT (ip), ip[0], ip[1]);
1527 printf ("blockCopy: %i", ip[1]);
1529 NEXT (ip);
1531 case JUMP:
1532 printf (FORMAT (ip), ip[0], ip[1], ip[2]);
1534 short offset = *((short *) (ip + 1));
1535 printf ("jump: %li", 3 + (ip - codes) + offset);
1537 NEXT (ip);
1539 case JUMP_TRUE:
1540 printf (FORMAT (ip), ip[0], ip[1], ip[2]);
1541 printf ("jumpTrue: %li", 3 + (ip - codes) + *((short *) (ip + 1)));
1543 NEXT (ip);
1545 case JUMP_FALSE:
1546 printf (FORMAT (ip), ip[0], ip[1], ip[2]);
1547 printf ("jumpFalse: %li", 3 + (ip - codes) + *((short *) (ip + 1)));
1549 NEXT (ip);
1551 case POP_STACK_TOP:
1552 printf (FORMAT (ip), ip[0]);
1553 printf ("pop");
1555 NEXT (ip);
1557 case DUPLICATE_STACK_TOP:
1558 printf (FORMAT (ip), ip[0]);
1559 printf ("dup");
1561 NEXT (ip);
1563 case RETURN_STACK_TOP:
1564 printf (FORMAT (ip), ip[0]);
1565 printf ("returnTop");
1567 ip += 1;
1568 break;
1570 NEXT (ip);
1572 case BLOCK_RETURN:
1573 printf (FORMAT (ip), ip[0]);
1574 printf ("blockReturn");
1576 NEXT (ip);
1578 case SEND:
1580 st_oop selector;
1582 selector = st_array_at (literals, ip[2] + 1);
1584 printf (FORMAT (ip), ip[0], ip[1], ip[2]);
1586 printf ("send: #%s", (char *) st_byte_array_bytes (selector));
1588 NEXT (ip);
1590 case SEND_SUPER:
1592 st_oop selector;
1594 selector = st_array_at (literals, ip[2] + 1);
1596 printf (FORMAT (ip), ip[0], ip[1], ip[2]);
1598 printf ("sendSuper: #%s", (char *) st_byte_array_bytes (selector));
1600 NEXT (ip);
1603 case SEND_PLUS:
1604 case SEND_MINUS:
1605 case SEND_LT:
1606 case SEND_GT:
1607 case SEND_LE:
1608 case SEND_GE:
1609 case SEND_EQ:
1610 case SEND_NE:
1611 case SEND_MUL:
1612 case SEND_DIV:
1613 case SEND_MOD:
1614 case SEND_BITSHIFT:
1615 case SEND_BITAND:
1616 case SEND_BITOR:
1617 case SEND_BITXOR:
1618 case SEND_AT:
1619 case SEND_AT_PUT:
1620 case SEND_SIZE:
1621 case SEND_VALUE:
1622 case SEND_VALUE_ARG:
1623 case SEND_IDENTITY_EQ:
1624 case SEND_CLASS:
1625 case SEND_NEW:
1626 case SEND_NEW_ARG:
1628 printf (FORMAT (ip), ip[0]);
1629 printf ("sendSpecial: #%s", st_byte_array_bytes (__machine.selectors[ip[0] - SEND_PLUS]));
1631 NEXT (ip);
1634 printf ("\n");
1639 static void
1640 print_literal (st_oop lit)
1642 if (st_object_is_smi (lit)) {
1644 printf ("%li", st_smi_value (lit));
1646 } else if (st_object_is_symbol (lit)) {
1648 printf ("#%s", (char *) st_byte_array_bytes (lit));
1650 } else if (st_object_class (lit) == ST_STRING_CLASS) {
1652 printf ("'%s'", (char *) st_byte_array_bytes (lit));
1654 } else if (st_object_class (lit) == ST_CHARACTER_CLASS) {
1656 char outbuf[6] = { 0 };
1657 st_unichar_to_utf8 (st_character_value (lit), outbuf);
1658 printf ("$%s", outbuf);
1663 static void
1664 print_literals (st_oop literals)
1666 if (literals == ST_NIL)
1667 return;
1669 printf ("literals: ");
1671 for (int i = 1; i <= st_smi_value (ST_ARRAYED_OBJECT (literals)->size); i++) {
1673 st_oop lit = st_array_at (literals, i);
1675 print_literal (lit);
1677 printf (" ");
1680 printf ("\n");
1683 void
1684 st_print_method (st_oop method)
1686 st_oop literals;
1687 st_uchar *bytecodes;
1688 int size;
1690 printf ("flags: %i; ", st_method_get_flags (method));
1691 printf ("arg-count: %i; ", st_method_get_arg_count (method));
1692 printf ("temp-count: %i; ", st_method_get_temp_count (method));
1693 printf ("primitive: %i;\n", st_method_get_primitive_index (method));
1695 printf ("\n");
1697 literals = ST_METHOD_LITERALS (method);
1698 bytecodes = st_method_bytecode_bytes (method);
1699 size = st_smi_value (st_arrayed_object_size (ST_METHOD_BYTECODE (method)));
1701 print_bytecodes (literals, bytecodes, size);
1703 print_literals (literals);