Updated kn translations
[gcalctool.git] / src / equation-lexer.vala
blobf012168111e9de40e5efe85aecab8863d06faf7b
1 /*
2 * Copyright (C) 2012 Arth Patel
3 * Copyright (C) 2012 Robert Ancell
5 * This program is free software: you can redistribute it and/or modify it under
6 * the terms of the GNU General Public License as published by the Free Software
7 * Foundation, either version 2 of the License, or (at your option) any later
8 * version. See http://www.gnu.org/copyleft/gpl.html the full text of the
9 * license.
12 /* Enum for tokens generated by pre-lexer and lexer. */
13 public enum LexerTokenType
15 UNKNOWN, /* Unknown */
17 /* These are all Pre-Lexer tokens, returned by pre-lexer */
18 PL_DECIMAL, /* Decimal separator */
19 PL_DIGIT, /* Decimal digit */
20 PL_HEX, /* A-F of Hex digits */
21 PL_SUPER_DIGIT, /* Super digits */
22 PL_SUPER_MINUS, /* Super minus */
23 PL_SUB_DIGIT, /* Sub digits */
24 PL_FRACTION, /* Fractions */
25 PL_DEGREE, /* Degree */
26 PL_MINUTE, /* Minutes */
27 PL_SECOND, /* Seconds */
28 PL_LETTER, /* Alphabets */
29 PL_EOS, /* End of stream */
30 PL_SKIP, /* Skip this symbol (whitespace or newline). */
32 /* These are all tokens, returned by Lexer. */
33 ADD, /* Plus */
34 SUBTRACT, /* Minus */
35 MULTIPLY, /* Multiply */
36 DIVIDE, /* Divide */
37 MOD, /* Modulus */
38 L_FLOOR, /* Floor ( Left ) */
39 R_FLOOR, /* Floor ( Right ) */
40 L_CEILING, /* Ceiling ( Left ) */
41 R_CEILING, /* Ceiling ( Right ) */
42 ROOT, /* Square root */
43 ROOT_3, /* Cube root */
44 ROOT_4, /* Fourth root */
45 NOT, /* Bitwise NOT */
46 AND, /* Bitwise AND */
47 OR, /* Bitwise OR */
48 XOR, /* Bitwise XOR */
49 IN, /* IN ( for converter ) */
50 NUMBER, /* Number */
51 SUP_NUMBER, /* Super Number */
52 NSUP_NUMBER, /* Negative Super Number */
53 SUB_NUMBER, /* Sub Number */
54 FUNCTION, /* Function */
55 VARIABLE, /* Variable name */
56 ASSIGN, /* = */
57 L_R_BRACKET, /* ( */
58 R_R_BRACKET, /* ) */
59 L_S_BRACKET, /* [ */
60 R_S_BRACKET, /* ] */
61 L_C_BRACKET, /* { */
62 R_C_BRACKET, /* } */
63 ABS, /* | */
64 POWER, /* ^ */
65 FACTORIAL, /* ! */
66 PERCENTAGE /* % */
69 // FIXME: Merge into lexer
70 public class PreLexer
72 public string stream; /* String being scanned */
73 public int index; /* Current character index */
74 public int mark_index; /* Location, last marked. Useful for getting substrings as part of highlighting */
75 private bool eos = false;
77 public PreLexer (string input)
79 stream = input;
80 index = 0;
81 mark_index = 0;
84 /* Roll back last scanned unichar. */
85 public void roll_back ()
87 if (eos)
89 eos = false;
90 return;
92 unichar c;
93 stream.get_prev_char (ref index, out c);
96 /* Set marker index. To be used for highlighting and error reporting. */
97 public void set_marker ()
99 mark_index = index;
102 /* Get marked substring. To be used for error reporting. */
103 public string get_marked_substring ()
105 return stream.substring (mark_index, index - mark_index);
108 /* Pre-Lexer tokanizer. To be called only by Lexer. */
109 public LexerTokenType get_next_token ()
111 unichar c;
112 if (!stream.get_next_char (ref index, out c))
114 // We have to flag if we ran out of chars, as roll_back from PL_EOS should have no effect
115 eos = true;
116 return LexerTokenType.PL_EOS;
118 eos = false;
120 if (c == ',' || c == '.')
121 return LexerTokenType.PL_DECIMAL;
123 if (c.isdigit ())
124 return LexerTokenType.PL_DIGIT;
126 if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
127 return LexerTokenType.PL_HEX;
129 if (c == '⁰' || c == '¹' || c == '²' || c == '³' || c == '⁴' || c == '⁵' || c == '⁶' || c == '⁷' || c == '⁸' || c == '⁹')
130 return LexerTokenType.PL_SUPER_DIGIT;
132 if (c == '⁻')
133 return LexerTokenType.PL_SUPER_MINUS;
135 if (c == '₀' || c == '₁' || c == '₂' || c == '₃' || c == '₄' || c == '₅' || c == '₆' || c == '₇' || c == '₈' || c == '₉')
136 return LexerTokenType.PL_SUB_DIGIT;
138 if (c == '½' || c == '⅓' || c == '⅔' || c == '¼' || c == '¾' || c == '⅕' || c == '⅖' || c == '⅗' || c == '⅘' || c == '⅙' || c == '⅚' || c == '⅛' || c == '⅜' || c == '⅝' || c == '⅞')
139 return LexerTokenType.PL_FRACTION;
141 if (c == '°')
142 return LexerTokenType.PL_DEGREE;
144 if (c == '\'')
145 return LexerTokenType.PL_MINUTE;
147 if (c == '"')
148 return LexerTokenType.PL_SECOND;
150 if (c.isalpha () || c == '_')
151 return LexerTokenType.PL_LETTER;
153 if (c == '∧')
154 return LexerTokenType.AND;
156 if (c == '∨')
157 return LexerTokenType.OR;
159 if (c == '⊻' || c == '⊕')
160 return LexerTokenType.XOR;
162 if (c == '¬' || c == '~')
163 return LexerTokenType.NOT;
165 if (c == '+')
166 return LexerTokenType.ADD;
168 if (c == '-' || c == '−' || c == '–')
169 return LexerTokenType.SUBTRACT;
171 if (c == '*' || c == '×')
172 return LexerTokenType.MULTIPLY;
174 if (c == '/' || c == '∕' || c == '÷')
175 return LexerTokenType.DIVIDE;
177 if (c == '⌊')
178 return LexerTokenType.L_FLOOR;
180 if (c == '⌋')
181 return LexerTokenType.R_FLOOR;
183 if (c == '⌈')
184 return LexerTokenType.L_CEILING;
186 if (c == '⌉')
187 return LexerTokenType.R_CEILING;
189 if (c == '√')
190 return LexerTokenType.ROOT;
192 if (c == '∛')
193 return LexerTokenType.ROOT_3;
195 if (c == '∜')
196 return LexerTokenType.ROOT_4;
198 if (c == '=')
199 return LexerTokenType.ASSIGN;
201 if (c == '(')
202 return LexerTokenType.L_R_BRACKET;
204 if (c == ')')
205 return LexerTokenType.R_R_BRACKET;
207 if (c == '[')
208 return LexerTokenType.L_S_BRACKET;
210 if (c == ']')
211 return LexerTokenType.R_S_BRACKET;
213 if (c == '{')
214 return LexerTokenType.L_C_BRACKET;
216 if (c == '}')
217 return LexerTokenType.R_C_BRACKET;
219 if (c == '|')
220 return LexerTokenType.ABS;
222 if (c == '^')
223 return LexerTokenType.POWER;
225 if (c == '!')
226 return LexerTokenType.FACTORIAL;
228 if (c == '%')
229 return LexerTokenType.PERCENTAGE;
231 if (c == ' ' || c == '\r' || c == '\t' || c == '\n')
232 return LexerTokenType.PL_SKIP;
234 return LexerTokenType.UNKNOWN;
238 /* Structure to hold single token. */
239 public class LexerToken
241 public string text; /* Copy of token string. */
242 public uint start_index; /* Start index in original stream. */
243 public uint end_index; /* End index in original stream. */
244 public LexerTokenType type; /* Type of token. */
247 /* Structure to hold lexer state and all the tokens. */
248 public class Lexer
250 private Parser parser; /* Pointer to the parser parser. */
251 private PreLexer prelexer; /* Pre-lexer Pre-lexer is part of lexer. */
252 public List<LexerToken> tokens; /* Pointer to the dynamic array of LexerTokens. */
253 private uint next_token; /* Index of next, to be sent, token. */
254 private int number_base;
256 public Lexer (string input, Parser parser, int number_base = 10)
258 prelexer = new PreLexer (input);
259 tokens = new List<LexerToken> ();
260 next_token = 0;
261 this.parser = parser;
262 this.number_base = number_base;
265 public void scan ()
267 while (true)
269 var token = insert_next_token ();
270 tokens.append (token);
271 if (token.type == LexerTokenType.PL_EOS)
272 break;
276 /* Get next token interface. Will be called by parser to get pointer to next token in token stream. */
277 public LexerToken get_next_token ()
279 var token = tokens.nth_data (next_token);
280 next_token++;
281 if (next_token >= tokens.length ())
282 next_token = tokens.length ();
284 return token;
287 /* Roll back one lexer token. */
288 public void roll_back ()
290 if (next_token > 0)
291 next_token--;
294 private bool check_if_function ()
296 var name = prelexer.get_marked_substring ();
298 if (parser.function_is_defined (name))
299 return true;
300 else
301 return false;
304 private bool check_if_number ()
306 int count = 0;
307 var text = prelexer.get_marked_substring ();
309 var tmp = mp_set_from_string (text, number_base);
310 if (tmp != null)
311 return true;
312 else
314 /* Try to rollback several characters to see, if that yields any number. */
315 while (text != "")
317 tmp = mp_set_from_string (text, number_base);
318 if (tmp != null)
319 return true;
320 count++;
321 prelexer.roll_back ();
322 text = prelexer.get_marked_substring ();
325 /* Undo all rollbacks. */
326 while (count-- > 0)
327 prelexer.get_next_token ();
329 return false;
333 /* Insert generated token to the lexer */
334 private LexerToken insert_token (LexerTokenType type)
336 var token = new LexerToken ();
337 token.text = prelexer.get_marked_substring ();
338 token.start_index = prelexer.mark_index;
339 token.end_index = prelexer.index;
340 token.type = type;
342 return token;
345 /* Generates next token from pre-lexer stream and call insert_token () to insert it at the end. */
346 private LexerToken insert_next_token ()
348 /* Mark start of next token */
349 prelexer.set_marker ();
351 /* Ignore whitespace */
352 var type = prelexer.get_next_token ();
353 while (type == LexerTokenType.PL_SKIP)
355 prelexer.set_marker ();
356 type = prelexer.get_next_token ();
359 if (type == LexerTokenType.AND || type == LexerTokenType.OR || type == LexerTokenType.XOR || type == LexerTokenType.NOT || type == LexerTokenType.ADD || type == LexerTokenType.SUBTRACT || type == LexerTokenType.MULTIPLY || type == LexerTokenType.DIVIDE || type == LexerTokenType.L_FLOOR || type == LexerTokenType.R_FLOOR || type == LexerTokenType.L_CEILING || type == LexerTokenType.R_CEILING || type == LexerTokenType.ROOT || type == LexerTokenType.ROOT_3 || type == LexerTokenType.ROOT_4 || type == LexerTokenType.ASSIGN || type == LexerTokenType.L_R_BRACKET || type == LexerTokenType.R_R_BRACKET || type == LexerTokenType.L_S_BRACKET || type == LexerTokenType.R_S_BRACKET || type == LexerTokenType.L_C_BRACKET || type == LexerTokenType.R_C_BRACKET || type == LexerTokenType.ABS || type == LexerTokenType.POWER || type == LexerTokenType.FACTORIAL || type == LexerTokenType.PERCENTAGE)
360 return insert_token (type);
362 /* [LexerTokenType.PL_SUPER_MINUS][LexerTokenType.PL_SUPER_DIGIT]+ */
363 if (type == LexerTokenType.PL_SUPER_MINUS)
365 if ((type = prelexer.get_next_token ()) != LexerTokenType.PL_SUPER_DIGIT)
367 /* ERROR: expected LexerTokenType.PL_SUP_DIGIT */
368 parser.set_error (ErrorCode.MP, prelexer.get_marked_substring (), prelexer.mark_index, prelexer.index);
369 return insert_token (LexerTokenType.UNKNOWN);
372 /* Get all LexerTokenType.PL_SUPER_DIGITs. */
373 while (prelexer.get_next_token () == LexerTokenType.PL_SUPER_DIGIT);
374 prelexer.roll_back ();
376 return insert_token (LexerTokenType.NSUP_NUMBER);
379 /* [LexerTokenType.PL_SUPER_DIGIT]+ */
380 if (type == LexerTokenType.PL_SUPER_DIGIT)
382 while (prelexer.get_next_token () == LexerTokenType.PL_SUPER_DIGIT);
383 prelexer.roll_back ();
385 return insert_token (LexerTokenType.SUP_NUMBER);
388 /* [LexerTokenType.PL_SUB_DIGIT]+ */
389 if (type == LexerTokenType.PL_SUB_DIGIT)
391 while (prelexer.get_next_token () == LexerTokenType.PL_SUB_DIGIT);
392 prelexer.roll_back ();
394 return insert_token (LexerTokenType.SUB_NUMBER);
397 /* [LexerTokenType.PL_FRACTION] */
398 if (type == LexerTokenType.PL_FRACTION)
399 return insert_token (LexerTokenType.NUMBER);
401 if (type == LexerTokenType.PL_DIGIT)
402 return insert_digit ();
404 if (type == LexerTokenType.PL_DECIMAL)
405 return insert_decimal ();
407 if (type == LexerTokenType.PL_HEX)
408 return insert_hex ();
410 if (type == LexerTokenType.PL_LETTER)
411 return insert_letter ();
413 if (type == LexerTokenType.PL_EOS)
414 return insert_token (LexerTokenType.PL_EOS);
416 /* ERROR: Unexpected token */
417 parser.set_error (ErrorCode.INVALID, prelexer.get_marked_substring (), prelexer.mark_index, prelexer.index);
419 return insert_token (LexerTokenType.UNKNOWN);
422 private LexerToken insert_digit ()
424 var type = prelexer.get_next_token ();
425 while (type == LexerTokenType.PL_DIGIT)
426 type = prelexer.get_next_token ();
428 if (type == LexerTokenType.PL_FRACTION)
429 return insert_token (LexerTokenType.NUMBER);
430 else if (type == LexerTokenType.PL_SUB_DIGIT)
432 while (prelexer.get_next_token () == LexerTokenType.PL_SUB_DIGIT);
433 prelexer.roll_back ();
434 return insert_token (LexerTokenType.NUMBER);
436 else if (type == LexerTokenType.PL_DEGREE)
438 type = prelexer.get_next_token ();
439 if (type == LexerTokenType.PL_DIGIT)
441 while ((type = prelexer.get_next_token ()) == LexerTokenType.PL_DIGIT);
442 if (type == LexerTokenType.PL_DECIMAL)
443 return insert_angle_num_dm ();
445 else if (type == LexerTokenType.PL_MINUTE)
447 type = prelexer.get_next_token ();
448 if (type == LexerTokenType.PL_DIGIT)
450 while ((type = prelexer.get_next_token ()) == LexerTokenType.PL_DIGIT);
451 if (type == LexerTokenType.PL_DECIMAL)
452 return insert_angle_num_dms ();
453 else if (type == LexerTokenType.PL_SECOND)
454 return insert_token (LexerTokenType.NUMBER);
455 else
457 /* ERROR: expected LexerTokenType.PL_SECOND */
458 parser.set_error (ErrorCode.MP, prelexer.get_marked_substring (), prelexer.mark_index, prelexer.index);
459 return insert_token (LexerTokenType.UNKNOWN);
462 else if (type == LexerTokenType.PL_DECIMAL)
463 return insert_angle_num_dms ();
464 else
466 prelexer.roll_back ();
467 return insert_token (LexerTokenType.NUMBER);
470 else
472 /* ERROR: expected LexerTokenType.PL_MINUTE | LexerTokenType.PL_DIGIT */
473 parser.set_error (ErrorCode.MP, prelexer.get_marked_substring (), prelexer.mark_index, prelexer.index);
474 return insert_token (LexerTokenType.UNKNOWN);
477 else if (type == LexerTokenType.PL_DECIMAL)
478 return insert_angle_num_dm ();
479 else
480 return insert_token (LexerTokenType.NUMBER);
482 else if (type == LexerTokenType.PL_DECIMAL)
483 return insert_decimal ();
484 else if (type == LexerTokenType.PL_HEX)
485 return insert_hex_dec ();
486 else
488 prelexer.roll_back ();
489 return insert_token (LexerTokenType.NUMBER);
493 private LexerToken insert_angle_num_dm ()
495 var type = prelexer.get_next_token ();
496 if (type != LexerTokenType.PL_DIGIT)
498 /* ERROR: expected LexerTokenType.PL_DIGIT */
499 parser.set_error (ErrorCode.MP, prelexer.get_marked_substring (), prelexer.mark_index, prelexer.index);
500 return insert_token (LexerTokenType.UNKNOWN);
503 while (type == LexerTokenType.PL_DIGIT);
504 type = prelexer.get_next_token ();
506 if (type == LexerTokenType.PL_MINUTE)
507 return insert_token (LexerTokenType.NUMBER);
508 else
510 /* ERROR: expected LexerTokenType.PL_MINUTE */
511 parser.set_error (ErrorCode.MP, prelexer.get_marked_substring (), prelexer.mark_index, prelexer.index);
512 return insert_token (LexerTokenType.UNKNOWN);
516 private LexerToken insert_angle_num_dms ()
518 var type = prelexer.get_next_token ();
519 if (type != LexerTokenType.PL_DIGIT)
521 /* ERROR: expected LexerTokenType.PL_DIGIT */
522 parser.set_error (ErrorCode.MP, prelexer.get_marked_substring (), prelexer.mark_index, prelexer.index);
523 return insert_token (LexerTokenType.UNKNOWN);
525 while ((type = prelexer.get_next_token ()) == LexerTokenType.PL_DIGIT);
526 if (type == LexerTokenType.PL_SECOND)
527 return insert_token (LexerTokenType.NUMBER);
528 else
530 /* ERROR: expected LexerTokenType.PL_SECOND */
531 parser.set_error (ErrorCode.MP, prelexer.get_marked_substring (), prelexer.mark_index, prelexer.index);
532 return insert_token (LexerTokenType.UNKNOWN);
536 private LexerToken insert_decimal ()
538 var type = prelexer.get_next_token ();
539 if (type == LexerTokenType.PL_DIGIT)
541 while ((type = prelexer.get_next_token ()) == LexerTokenType.PL_DIGIT);
542 if (type == LexerTokenType.PL_DEGREE)
543 return insert_token (LexerTokenType.NUMBER);
544 else if (type == LexerTokenType.PL_HEX)
545 return insert_decimal_hex ();
546 else if (type == LexerTokenType.PL_SUB_DIGIT)
548 while (prelexer.get_next_token () == LexerTokenType.PL_SUB_DIGIT);
549 prelexer.roll_back ();
550 return insert_token (LexerTokenType.NUMBER);
552 else
554 prelexer.roll_back ();
555 return insert_token (LexerTokenType.NUMBER);
558 else if (type == LexerTokenType.PL_HEX)
559 return insert_decimal_hex ();
560 else
562 /* ERROR: expected LexerTokenType.PL_DIGIT | LexerTokenType.PL_HEX */
563 parser.set_error (ErrorCode.MP, prelexer.get_marked_substring (), prelexer.mark_index, prelexer.index);
564 return insert_token (LexerTokenType.UNKNOWN);
568 private LexerToken insert_hex ()
570 var type = prelexer.get_next_token ();
571 while (type == LexerTokenType.PL_HEX)
572 type = prelexer.get_next_token ();
574 if (type == LexerTokenType.PL_DIGIT)
575 return insert_hex_dec ();
576 else if (type == LexerTokenType.PL_DECIMAL)
577 return insert_decimal_hex ();
578 else if (type == LexerTokenType.PL_SUB_DIGIT)
580 while (prelexer.get_next_token () == LexerTokenType.PL_SUB_DIGIT);
581 prelexer.roll_back ();
583 if (check_if_number ())
584 return insert_token (LexerTokenType.NUMBER);
585 else
587 if (check_if_function ())
588 return insert_token (LexerTokenType.FUNCTION);
589 else
590 return insert_token (LexerTokenType.VARIABLE);
593 else if (type == LexerTokenType.PL_LETTER)
594 return insert_letter ();
595 else
597 prelexer.roll_back ();
598 if (check_if_number ())
599 return insert_token (LexerTokenType.NUMBER);
600 else
602 if (check_if_function ())
603 return insert_token (LexerTokenType.FUNCTION);
604 else
605 return insert_token (LexerTokenType.VARIABLE);
610 private LexerToken insert_hex_dec ()
612 var type = prelexer.get_next_token ();
613 while (type == LexerTokenType.PL_DIGIT || type == LexerTokenType.PL_HEX)
614 type = prelexer.get_next_token ();
616 if (type == LexerTokenType.PL_DECIMAL)
617 return insert_decimal_hex ();
618 else if (type == LexerTokenType.PL_SUB_DIGIT)
620 while (prelexer.get_next_token () == LexerTokenType.PL_SUB_DIGIT);
621 prelexer.roll_back ();
622 return insert_token (LexerTokenType.NUMBER);
624 else
626 if (check_if_number ())
627 return insert_token (LexerTokenType.NUMBER);
628 /* ERROR: expected LexerTokenType.PL_DECIMAL | LexerTokenType.PL_DIGIT | LexerTokenType.PL_HEX */
629 parser.set_error (ErrorCode.MP, prelexer.get_marked_substring (), prelexer.mark_index, prelexer.index);
630 return insert_token (LexerTokenType.UNKNOWN);
634 private LexerToken insert_decimal_hex ()
636 /* Make up of digits and hexadecimal characters */
637 var type = prelexer.get_next_token ();
638 while (type == LexerTokenType.PL_DIGIT || type == LexerTokenType.PL_HEX)
639 type = prelexer.get_next_token ();
641 /* Allow a subdigit suffix */
642 while (type == LexerTokenType.PL_SUB_DIGIT)
643 type = prelexer.get_next_token ();
645 prelexer.roll_back ();
647 return insert_token (LexerTokenType.NUMBER);
650 private LexerToken insert_letter ()
652 /* Get string of letters */
653 var type = prelexer.get_next_token ();
654 while (type == LexerTokenType.PL_LETTER || type == LexerTokenType.PL_HEX)
655 type = prelexer.get_next_token ();
657 /* Allow a subdigit suffix */
658 while (type == LexerTokenType.PL_SUB_DIGIT)
659 type = prelexer.get_next_token ();
661 prelexer.roll_back ();
663 var name = prelexer.get_marked_substring ().down ();
664 if (name == "mod")
665 return insert_token (LexerTokenType.MOD);
666 if (name == "and")
667 return insert_token (LexerTokenType.AND);
668 if (name == "or")
669 return insert_token (LexerTokenType.OR);
670 if (name == "xor")
671 return insert_token (LexerTokenType.XOR);
672 if (name == "not")
673 return insert_token (LexerTokenType.NOT);
674 if (name == "in")
675 return insert_token (LexerTokenType.IN);
676 if (check_if_function ())
677 return insert_token (LexerTokenType.FUNCTION);
678 else
679 return insert_token (LexerTokenType.VARIABLE);