4 * Copyright (C) 2008 Vincent Geddes
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "st-compiler.h"
29 #include "st-primitives.h"
31 #include "st-symbol.h"
33 #include "st-large-integer.h"
34 #include "st-universe.h"
35 #include "st-object.h"
37 #include "st-character.h"
38 #include "st-unicode.h"
39 #include "st-behavior.h"
52 st_compiler_error
*error
;
57 parse_error (st_parser
*parser
,
62 strncpy (parser
->error
->message
, message
, 255);
63 parser
->error
->line
= st_token_get_line (token
);
64 parser
->error
->column
= st_token_get_column (token
);
67 longjmp (parser
->jmploc
, 0);
70 /* adaptor for st_lexer_next_token(). Catches lexer errors and filters out comments
73 next (st_parser
*parser
)
78 token
= st_lexer_next_token (parser
->lexer
);
79 type
= st_token_get_type (token
);
81 if (type
== ST_TOKEN_COMMENT
)
83 else if (type
== ST_TOKEN_INVALID
)
84 parse_error (parser
, st_lexer_error_message (parser
->lexer
), token
);
90 current (st_lexer
*lexer
)
92 return st_lexer_current_token (lexer
);
95 static st_node
*parse_statements (st_parser
*parser
);
96 static st_node
*parse_temporaries (st_parser
*parser
);
97 static st_node
*parse_subexpression (st_parser
*parser
);
98 static st_node
*parse_expression (st_parser
*parser
);
99 static int parse_primitive (st_parser
*parser
);
102 parse_block_arguments (st_parser
*parser
)
105 st_node
*arguments
= NULL
, *node
;
107 token
= current (parser
->lexer
);
109 while (st_token_get_type (token
) == ST_TOKEN_COLON
) {
111 token
= next (parser
);
112 if (st_token_get_type (token
) != ST_TOKEN_IDENTIFIER
)
113 parse_error (parser
,"expected identifier", token
);
115 node
= st_node_new (ST_VARIABLE_NODE
);
116 node
->line
= st_token_get_line (token
);
117 node
->variable
.name
= st_strdup (st_token_get_text (token
));
118 arguments
= st_node_list_append (arguments
, node
);
120 token
= next (parser
);
123 if (st_token_get_type (token
) != ST_TOKEN_BINARY_SELECTOR
||
124 !streq (st_token_get_text (token
), "|"))
125 parse_error (parser
,"expected ':' or '|'", token
);
134 parse_block (st_parser
*parser
)
140 node
= st_node_new (ST_BLOCK_NODE
);
142 // parse block arguments
143 token
= next (parser
);
145 node
->line
= st_token_get_line (token
);
147 if (st_token_get_type (token
) == ST_TOKEN_COLON
)
148 node
->block
.arguments
= parse_block_arguments (parser
);
150 token
= current (parser
->lexer
);
151 if (st_token_get_type (token
) == ST_TOKEN_BINARY_SELECTOR
152 && streq (st_token_get_text (token
), "|"))
153 node
->block
.temporaries
= parse_temporaries (parser
);
155 nested
= parser
->in_block
;
156 parser
->in_block
= true;
158 node
->block
.statements
= parse_statements (parser
);
161 parser
->in_block
= false;
163 token
= current (parser
->lexer
);
164 if (st_token_get_type (token
) != ST_TOKEN_BLOCK_END
)
165 parse_error (parser
,"expected ']'", token
);
172 parse_number (st_parser
*parser
)
181 token
= current (parser
->lexer
);
183 sign
= st_number_token_negative (token
) ? -1 : 1;
184 radix
= st_number_token_radix (token
);
185 exponent
= st_number_token_exponent (token
);
187 p
= number
= st_number_token_number (token
);
189 node
= st_node_new (ST_LITERAL_NODE
);
190 node
->line
= st_token_get_line (token
);
192 /* check if there is a decimal point */
193 while (*p
&& *p
!= '.')
196 if (*p
== '.' || exponent
!= 0) {
201 parse_error (parser
,"only base-10 floats are supported at the moment", token
);
203 format
= st_strdup_printf ("%se%i", number
, exponent
);
205 node
->literal
.value
= st_float_new (sign
* strtod (format
, NULL
));
212 long int integer
= sign
* strtol (number
, NULL
, radix
);
214 /* check for overflow */
216 || integer
< ST_SMALL_INTEGER_MIN
|| integer
> ST_SMALL_INTEGER_MAX
) {
221 result
= mp_init (&value
);
222 if (result
!= MP_OKAY
)
223 parse_error (parser
,"memory exhausted while trying parse LargeInteger", token
);
225 result
= mp_read_radix (&value
, number
, radix
);
226 if (result
!= MP_OKAY
)
227 parse_error (parser
,"memory exhausted while trying parse LargeInteger", token
);
230 mp_neg (&value
, &value
);
232 node
->literal
.value
= st_large_integer_new (&value
);
235 node
->literal
.value
= st_smi_new (integer
);
244 static st_node
*parse_primary (st_parser
*parser
);
247 parse_tuple (st_parser
*parser
)
251 st_list
*items
= NULL
;
253 token
= next (parser
);
256 switch (st_token_get_type (token
)) {
257 case ST_TOKEN_NUMBER_CONST
:
258 case ST_TOKEN_STRING_CONST
:
259 case ST_TOKEN_SYMBOL_CONST
:
260 case ST_TOKEN_CHARACTER_CONST
:
261 node
= parse_primary (parser
);
262 items
= st_list_prepend (items
, (st_pointer
) node
->literal
.value
);
263 st_node_destroy (node
);
266 case ST_TOKEN_LPAREN
:
267 node
= parse_tuple (parser
);
268 items
= st_list_prepend (items
, (st_pointer
) node
->literal
.value
);
269 st_node_destroy (node
);
273 if (st_token_get_type (token
) == ST_TOKEN_RPAREN
) {
276 parse_error (parser
,"expected ')'", token
);
279 token
= current (parser
->lexer
);
283 token
= next (parser
);
286 items
= st_list_reverse (items
);
288 tuple
= st_object_new_arrayed (ST_ARRAY_CLASS
, st_list_length (items
));
291 for (st_list
*l
= items
; l
; l
= l
->next
)
292 st_array_at_put (tuple
, i
++, (st_oop
) l
->data
);
294 node
= st_node_new (ST_LITERAL_NODE
);
295 node
->literal
.value
= tuple
;
296 node
->line
= st_token_get_line (token
);
298 st_list_destroy (items
);
304 /* identifiers, literals, blocks */
306 parse_primary (st_parser
*parser
)
308 st_node
*node
= NULL
;
311 token
= current (parser
->lexer
);
313 switch (st_token_get_type (token
)) {
315 case ST_TOKEN_IDENTIFIER
:
317 node
= st_node_new (ST_VARIABLE_NODE
);
318 node
->line
= st_token_get_line (token
);
319 node
->variable
.name
= st_strdup (st_token_get_text (token
));
324 case ST_TOKEN_NUMBER_CONST
:
326 node
= parse_number (parser
);
329 case ST_TOKEN_STRING_CONST
:
331 node
= st_node_new (ST_LITERAL_NODE
);
332 node
->line
= st_token_get_line (token
);
333 node
->literal
.value
= st_string_new (st_token_get_text (token
));
338 case ST_TOKEN_SYMBOL_CONST
:
340 node
= st_node_new (ST_LITERAL_NODE
);
341 node
->line
= st_token_get_line (token
);
342 node
->literal
.value
= st_symbol_new (st_token_get_text (token
));
347 case ST_TOKEN_CHARACTER_CONST
:
349 node
= st_node_new (ST_LITERAL_NODE
);
350 node
->line
= st_token_get_line (token
);
351 node
->literal
.value
= st_character_new (st_utf8_get_unichar (st_token_get_text (token
)));
356 case ST_TOKEN_BLOCK_BEGIN
:
358 node
= parse_block (parser
);
361 case ST_TOKEN_TUPLE_BEGIN
:
363 node
= parse_tuple (parser
);
367 parse_error (parser
,"expected expression", token
);
374 parse_unary_message (st_parser
*parser
, st_node
*receiver
)
379 token
= current (parser
->lexer
);
381 node
= st_node_new (ST_MESSAGE_NODE
);
382 node
->line
= st_token_get_line (token
);
383 node
->message
.precedence
= ST_UNARY_PRECEDENCE
;
384 node
->message
.receiver
= receiver
;
385 node
->message
.selector
= st_symbol_new (st_token_get_text (token
));
386 node
->message
.arguments
= NULL
;
394 parse_binary_argument (st_parser
*parser
, st_node
*receiver
)
399 token
= current (parser
->lexer
);
400 if (st_token_get_type (token
) != ST_TOKEN_IDENTIFIER
)
403 node
= parse_unary_message (parser
, receiver
);
405 return parse_binary_argument (parser
, node
);
409 parse_binary_message (st_parser
*parser
, st_node
*receiver
)
411 st_node
*node
, *argument
;
415 token
= current (parser
->lexer
);
417 selector
= st_token_get_text (token
);
419 /* parse the primary */
420 token
= next (parser
);
421 if (st_token_get_type (token
) == ST_TOKEN_LPAREN
)
422 argument
= parse_subexpression (parser
);
424 argument
= parse_primary (parser
);
426 argument
= parse_binary_argument (parser
, argument
);
428 node
= st_node_new (ST_MESSAGE_NODE
);
430 node
->message
.precedence
= ST_BINARY_PRECEDENCE
;
431 node
->message
.receiver
= receiver
;
432 node
->message
.selector
= st_symbol_new (selector
);
433 node
->message
.arguments
= argument
;
440 parse_keyword_argument (st_parser
*parser
, st_node
*receiver
)
444 token
= current (parser
->lexer
);
446 if (receiver
== NULL
) {
447 /* parse the primary */
448 if (st_token_get_type (token
) == ST_TOKEN_LPAREN
)
449 receiver
= parse_subexpression (parser
);
451 receiver
= parse_primary (parser
);
453 } else if (st_token_get_type (token
) == ST_TOKEN_IDENTIFIER
) {
454 receiver
= parse_unary_message (parser
, receiver
);
456 } else if (st_token_get_type (token
) == ST_TOKEN_BINARY_SELECTOR
&& !streq (st_token_get_text (token
), "!")) {
457 receiver
= parse_binary_message (parser
, receiver
);
463 return parse_keyword_argument (parser
, receiver
);
467 parse_keyword_message (st_parser
*parser
, st_node
*receiver
)
470 st_node
*node
, *arguments
= NULL
, *arg
;
471 char *temp
, *string
= st_strdup ("");
473 token
= current (parser
->lexer
);
475 while (st_token_get_type (token
) == ST_TOKEN_KEYWORD_SELECTOR
) {
477 temp
= st_strconcat (string
, st_token_get_text (token
), NULL
);
481 token
= next (parser
);
482 arg
= parse_keyword_argument (parser
, NULL
);
483 arguments
= st_node_list_append (arguments
, arg
);
485 token
= current (parser
->lexer
);
488 node
= st_node_new (ST_MESSAGE_NODE
);
490 node
->message
.precedence
= ST_KEYWORD_PRECEDENCE
;
491 node
->message
.receiver
= receiver
;
492 node
->message
.selector
= st_symbol_new (string
);
493 node
->message
.arguments
= arguments
;
501 * parses an expression from left to right, by recursively parsing subexpressions
504 parse_message (st_parser
*parser
, st_node
*receiver
)
506 st_node
*message
= NULL
;
510 /* Before parsing messages, check if expression is simply a variable.
511 * This is the case if token is ')' or '.'
513 token
= current (parser
->lexer
);
514 type
= st_token_get_type (token
);
516 if (type
== ST_TOKEN_PERIOD
|| type
== ST_TOKEN_RPAREN
|| type
== ST_TOKEN_SEMICOLON
517 || type
== ST_TOKEN_EOF
|| type
== ST_TOKEN_BLOCK_END
518 || (type
== ST_TOKEN_BINARY_SELECTOR
&& streq (st_token_get_text (token
), "!")))
521 if (type
== ST_TOKEN_IDENTIFIER
)
522 message
= parse_unary_message (parser
, receiver
);
524 else if (type
== ST_TOKEN_BINARY_SELECTOR
)
525 message
= parse_binary_message (parser
, receiver
);
527 else if (type
== ST_TOKEN_KEYWORD_SELECTOR
)
528 message
= parse_keyword_message (parser
, receiver
);
531 parse_error (parser
,"nothing more expected", token
);
533 return parse_message (parser
, message
);
537 parse_assign (st_parser
*parser
, st_node
*assignee
)
540 st_node
*node
, *expression
;
542 token
= next (parser
);
544 expression
= parse_expression (parser
);
546 node
= st_node_new (ST_ASSIGN_NODE
);
547 node
->line
= st_token_get_line (token
);
548 node
->assign
.assignee
= assignee
;
549 node
->assign
.expression
= expression
;
555 parse_cascade (st_parser
*parser
, st_node
*first_message
)
558 st_node
*message
, *node
;
559 bool super_send
= first_message
->message
.super_send
;
561 token
= current (parser
->lexer
);
563 node
= st_node_new (ST_CASCADE_NODE
);
564 node
->line
= st_token_get_line (token
);
566 node
->cascade
.receiver
= first_message
->message
.receiver
;
567 node
->cascade
.messages
= st_list_append (node
->cascade
.messages
, first_message
);
569 first_message
->message
.receiver
= NULL
;
571 while (st_token_get_type (token
) == ST_TOKEN_SEMICOLON
) {
575 message
= parse_message (parser
, NULL
);
578 parse_error (parser
,"expected cascade", token
);
580 message
->message
.super_send
= super_send
;
582 node
->cascade
.messages
= st_list_append (node
->cascade
.messages
, message
);
583 token
= current (parser
->lexer
);
590 parse_expression (st_parser
*parser
)
592 st_node
*receiver
= NULL
;
593 st_node
*message
, *cascade
;
595 bool super_send
=false;
597 token
= current (parser
->lexer
);
599 switch (st_token_get_type (token
)) {
600 case ST_TOKEN_NUMBER_CONST
:
601 case ST_TOKEN_STRING_CONST
:
602 case ST_TOKEN_SYMBOL_CONST
:
603 case ST_TOKEN_CHARACTER_CONST
:
604 case ST_TOKEN_BLOCK_BEGIN
:
605 case ST_TOKEN_TUPLE_BEGIN
:
607 receiver
= parse_primary (parser
);
610 case ST_TOKEN_IDENTIFIER
:
612 receiver
= parse_primary (parser
);
614 if (st_token_get_type (current (parser
->lexer
)) == ST_TOKEN_ASSIGN
)
615 return parse_assign (parser
, receiver
);
619 case ST_TOKEN_LPAREN
:
621 receiver
= parse_subexpression (parser
);
625 parse_error (parser
,"expected expression", token
);
628 /* check if receiver is pseudo-variable 'super' */
629 if (receiver
->type
== ST_VARIABLE_NODE
630 && streq (receiver
->variable
.name
, "super"))
633 message
= parse_message (parser
, receiver
);
634 message
->message
.super_send
= super_send
;
636 token
= current (parser
->lexer
);
637 if (st_token_get_type (token
) == ST_TOKEN_SEMICOLON
)
638 return parse_cascade (parser
, message
);
644 parse_subexpression (st_parser
*parser
)
651 expression
= parse_expression (parser
);
653 token
= current (parser
->lexer
);
654 if (st_token_get_type (token
) != ST_TOKEN_RPAREN
)
655 parse_error (parser
,"expected ')' after expression", token
);
663 parse_return (st_parser
*parser
)
668 token
= next (parser
);
670 node
= st_node_new (ST_RETURN_NODE
);
671 node
->line
= st_token_get_line (token
);
672 node
->retrn
.expression
= parse_expression (parser
);
678 parse_statement (st_parser
*parser
)
682 token
= current (parser
->lexer
);
684 if (st_token_get_type (token
) == ST_TOKEN_RETURN
)
685 return parse_return (parser
);
687 return parse_expression (parser
);
691 parse_statements (st_parser
*parser
)
694 st_node
*expression
= NULL
, *statements
= NULL
;
696 token
= current (parser
->lexer
);
698 while (st_token_get_type (token
) != ST_TOKEN_EOF
699 && st_token_get_type (token
) != ST_TOKEN_BLOCK_END
) {
701 /* check for unreachable statements */
702 if (expression
&& expression
->type
== ST_RETURN_NODE
) {
703 /* first check that unreachable statement is valid ! */
704 parse_statement (parser
);
705 parse_error (parser
,"statement is unreachable", token
);
708 expression
= parse_statement (parser
);
709 statements
= st_node_list_append (statements
, expression
);
711 /* Consume statement delimiter ('.') if there is one.
713 * If the current token is a wrongly placed/mismatched
714 * closing token (')' or ']'), then parse_expression() will handle
717 token
= current (parser
->lexer
);
718 if (st_token_get_type (token
) == ST_TOKEN_PERIOD
) {
719 token
= next (parser
);
724 for (st_node
*node
= statements
; node
; node
= node
->next
) {
725 if (parser
->in_block
&& node
->type
== ST_MESSAGE_NODE
&& node
->next
!= NULL
)
726 node
->message
.is_statement
= true;
727 else if ((!parser
->in_block
) && node
->type
== ST_MESSAGE_NODE
)
728 node
->message
.is_statement
= true;
730 if (parser
->in_block
&& node
->type
== ST_CASCADE_NODE
&& node
->next
!= NULL
)
731 node
->cascade
.is_statement
= true;
732 else if ((!parser
->in_block
) && node
->type
== ST_CASCADE_NODE
)
733 node
->cascade
.is_statement
= true;
741 parse_primitive (st_parser
*parser
)
746 token
= current (parser
->lexer
);
747 if (st_token_get_type (token
) != ST_TOKEN_BINARY_SELECTOR
748 || !streq (st_token_get_text (token
), "<"))
751 token
= next (parser
);
752 if (st_token_get_type (token
) == ST_TOKEN_KEYWORD_SELECTOR
753 && streq (st_token_get_text (token
), "primitive:")) {
755 token
= next (parser
);
756 if (st_token_get_type (token
) != ST_TOKEN_STRING_CONST
)
757 parse_error (parser
,"expected string literal", token
);
759 index
= st_primitive_index_for_name (st_token_get_text (token
));
761 parse_error (parser
,"unknown primitive", token
);
763 token
= next (parser
);
764 if (st_token_get_type (token
) != ST_TOKEN_BINARY_SELECTOR
765 || !streq (st_token_get_text (token
), ">"))
766 parse_error (parser
,"expected '>'", token
);
771 parse_error (parser
,"expected primitive declaration", token
);
778 * '|' identifier* '|'
781 parse_temporaries (st_parser
*parser
)
784 st_node
*temporaries
= NULL
, *temp
;
786 token
= current (parser
->lexer
);
788 if (st_token_get_type (token
) != ST_TOKEN_BINARY_SELECTOR
789 || !streq (st_token_get_text (token
), "|"))
792 token
= next (parser
);
793 while (st_token_get_type (token
) == ST_TOKEN_IDENTIFIER
) {
795 temp
= st_node_new (ST_VARIABLE_NODE
);
796 temp
->line
= st_token_get_line (token
);
797 temp
->variable
.name
= st_strdup (st_token_get_text (token
));
799 temporaries
= st_node_list_append (temporaries
, temp
);
801 token
= next (parser
);
804 if (st_token_get_type (token
) != ST_TOKEN_BINARY_SELECTOR
805 || !streq (st_token_get_text (token
), "|"))
806 parse_error (parser
,"expected '|'", token
);
808 token
= next (parser
);
814 parse_message_pattern (st_parser
*parser
, st_node
*method
)
818 st_node
*arguments
= NULL
;
820 token
= next (parser
);
821 type
= st_token_get_type (token
);
823 if (type
== ST_TOKEN_IDENTIFIER
) {
825 method
->method
.selector
= st_symbol_new (st_token_get_text (token
));
826 method
->method
.precedence
= ST_UNARY_PRECEDENCE
;
830 } else if (type
== ST_TOKEN_BINARY_SELECTOR
) {
832 method
->method
.selector
= st_symbol_new (st_token_get_text (token
));
834 token
= next (parser
);
835 if (st_token_get_type (token
) != ST_TOKEN_IDENTIFIER
)
836 parse_error (parser
,"argument name expected after binary selector", token
);
838 arguments
= st_node_new (ST_VARIABLE_NODE
);
839 arguments
->line
= st_token_get_line (token
);
840 arguments
->variable
.name
= st_strdup (st_token_get_text (token
));
842 method
->method
.precedence
= ST_BINARY_PRECEDENCE
;
846 } else if (type
== ST_TOKEN_KEYWORD_SELECTOR
) {
848 char *temp
, *string
= st_strdup ("");
851 while (st_token_get_type (token
) == ST_TOKEN_KEYWORD_SELECTOR
) {
853 temp
= st_strconcat (string
, st_token_get_text (token
), NULL
);
857 token
= next (parser
);
858 if (st_token_get_type (token
) != ST_TOKEN_IDENTIFIER
)
859 parse_error (parser
,"argument name expected after keyword", token
);
861 arg
= st_node_new (ST_VARIABLE_NODE
);
862 arg
->line
= st_token_get_line (token
);
863 arg
->variable
.name
= st_strdup (st_token_get_text (token
));
864 arguments
= st_node_list_append (arguments
, arg
);
866 token
= next (parser
);
869 method
->method
.selector
= st_symbol_new (string
);
870 method
->method
.precedence
= ST_KEYWORD_PRECEDENCE
;
874 parse_error (parser
,"invalid message pattern", token
);
877 method
->method
.arguments
= arguments
;
881 parse_method (st_parser
*parser
)
885 parser
->in_block
= false;
887 node
= st_node_new (ST_METHOD_NODE
);
889 node
->method
.primitive
= -1;
891 parse_message_pattern (parser
, node
);
893 node
->method
.temporaries
= parse_temporaries (parser
);
894 node
->method
.primitive
= parse_primitive (parser
);
895 node
->method
.statements
= parse_statements (parser
);
897 st_assert (node
->type
== ST_METHOD_NODE
);
903 st_parser_parse (st_lexer
*lexer
,
904 st_compiler_error
*error
)
909 st_assert (lexer
!= NULL
);
911 parser
= st_new0 (st_parser
);
913 parser
->lexer
= lexer
;
914 parser
->error
= error
;
915 parser
->in_block
= false;
917 if (!setjmp (parser
->jmploc
)) {
918 method
= parse_method (parser
);