vfs: check userland buffers before reading them.
[haiku.git] / src / kits / shared / ExpressionParser.cpp
blob078e060752cbe232e58bccc97b1c8e23dc7bb3db
1 /*
2 * Copyright 2006-2012 Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
5 * Authors:
6 * Stephan Aßmus <superstippi@gmx.de>
7 * John Scipione <jscipione@gmail.com>
8 * Ingo Weinhold <bonefish@cs.tu-berlin.de>
9 */
11 #include <ExpressionParser.h>
13 #include <ctype.h>
14 #include <math.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <strings.h>
19 #include <m_apm.h>
22 static const int32 kMaxDecimalPlaces = 32;
25 enum {
26 TOKEN_NONE = 0,
27 TOKEN_IDENTIFIER,
28 TOKEN_CONSTANT,
30 TOKEN_END_OF_LINE = '\n',
32 TOKEN_PLUS = '+',
33 TOKEN_MINUS = '-',
35 TOKEN_STAR = '*',
36 TOKEN_SLASH = '/',
37 TOKEN_MODULO = '%',
39 TOKEN_POWER = '^',
40 TOKEN_FACTORIAL = '!',
42 TOKEN_OPENING_BRACKET = '(',
43 TOKEN_CLOSING_BRACKET = ')',
45 TOKEN_AND = '&',
46 TOKEN_OR = '|',
47 TOKEN_NOT = '~'
51 struct ExpressionParser::Token {
52 Token()
53 : string(""),
54 type(TOKEN_NONE),
55 value(0),
56 position(0)
60 Token(const Token& other)
61 : string(other.string),
62 type(other.type),
63 value(other.value),
64 position(other.position)
68 Token(const char* string, int32 length, int32 position, int32 type)
69 : string(string, length),
70 type(type),
71 value(0),
72 position(position)
76 Token& operator=(const Token& other)
78 string = other.string;
79 type = other.type;
80 value = other.value;
81 position = other.position;
82 return *this;
85 BString string;
86 int32 type;
87 MAPM value;
89 int32 position;
93 class ExpressionParser::Tokenizer {
94 public:
95 Tokenizer()
96 : fString(""),
97 fCurrentChar(NULL),
98 fCurrentToken(),
99 fReuseToken(false),
100 fHexSupport(false)
104 void SetSupportHexInput(bool enabled)
106 fHexSupport = enabled;
109 void SetTo(const char* string)
111 fString = string;
112 fCurrentChar = fString.String();
113 fCurrentToken = Token();
114 fReuseToken = false;
117 const Token& NextToken()
119 if (fCurrentToken.type == TOKEN_END_OF_LINE)
120 return fCurrentToken;
122 if (fReuseToken) {
123 fReuseToken = false;
124 //printf("next token (recycled): '%s'\n", fCurrentToken.string.String());
125 return fCurrentToken;
128 while (*fCurrentChar != 0 && isspace(*fCurrentChar))
129 fCurrentChar++;
131 if (*fCurrentChar == 0)
132 return fCurrentToken = Token("", 0, _CurrentPos(), TOKEN_END_OF_LINE);
134 bool decimal = *fCurrentChar == '.' || *fCurrentChar == ',';
136 if (decimal || isdigit(*fCurrentChar)) {
137 if (fHexSupport && *fCurrentChar == '0' && fCurrentChar[1] == 'x')
138 return _ParseHexNumber();
140 BString temp;
142 const char* begin = fCurrentChar;
144 // optional digits before the comma
145 while (isdigit(*fCurrentChar)) {
146 temp << *fCurrentChar;
147 fCurrentChar++;
150 // optional post comma part
151 // (required if there are no digits before the comma)
152 if (*fCurrentChar == '.' || *fCurrentChar == ',') {
153 temp << '.';
154 fCurrentChar++;
156 // optional post comma digits
157 while (isdigit(*fCurrentChar)) {
158 temp << *fCurrentChar;
159 fCurrentChar++;
163 // optional exponent part
164 if (*fCurrentChar == 'E') {
165 temp << *fCurrentChar;
166 fCurrentChar++;
168 // optional exponent sign
169 if (*fCurrentChar == '+' || *fCurrentChar == '-') {
170 temp << *fCurrentChar;
171 fCurrentChar++;
174 // required exponent digits
175 if (!isdigit(*fCurrentChar)) {
176 throw ParseException("missing exponent in constant",
177 fCurrentChar - begin);
180 while (isdigit(*fCurrentChar)) {
181 temp << *fCurrentChar;
182 fCurrentChar++;
186 int32 length = fCurrentChar - begin;
187 BString test = temp;
188 test << "&_";
189 double value;
190 char t[2];
191 int32 matches = sscanf(test.String(), "%lf&%s", &value, t);
192 if (matches != 2) {
193 throw ParseException("error in constant",
194 _CurrentPos() - length);
197 fCurrentToken = Token(begin, length, _CurrentPos() - length,
198 TOKEN_CONSTANT);
199 fCurrentToken.value = temp.String();
200 } else if (isalpha(*fCurrentChar) && *fCurrentChar != 'x') {
201 const char* begin = fCurrentChar;
202 while (*fCurrentChar != 0 && (isalpha(*fCurrentChar)
203 || isdigit(*fCurrentChar))) {
204 fCurrentChar++;
206 int32 length = fCurrentChar - begin;
207 fCurrentToken = Token(begin, length, _CurrentPos() - length,
208 TOKEN_IDENTIFIER);
209 } else if (strncmp(fCurrentChar, "π", 2) == 0) {
210 fCurrentToken = Token(fCurrentChar, 2, _CurrentPos() - 1,
211 TOKEN_IDENTIFIER);
212 fCurrentChar += 2;
213 } else {
214 int32 type = TOKEN_NONE;
216 switch (*fCurrentChar) {
217 case TOKEN_PLUS:
218 case TOKEN_MINUS:
219 case TOKEN_STAR:
220 case TOKEN_SLASH:
221 case TOKEN_MODULO:
222 case TOKEN_POWER:
223 case TOKEN_FACTORIAL:
224 case TOKEN_OPENING_BRACKET:
225 case TOKEN_CLOSING_BRACKET:
226 case TOKEN_AND:
227 case TOKEN_OR:
228 case TOKEN_NOT:
229 case TOKEN_END_OF_LINE:
230 type = *fCurrentChar;
231 break;
233 case '\\':
234 case ':':
235 type = TOKEN_SLASH;
236 break;
238 case 'x':
239 if (!fHexSupport) {
240 type = TOKEN_STAR;
241 break;
243 // fall through
245 default:
246 throw ParseException("unexpected character", _CurrentPos());
248 fCurrentToken = Token(fCurrentChar, 1, _CurrentPos(), type);
249 fCurrentChar++;
252 //printf("next token: '%s'\n", fCurrentToken.string.String());
253 return fCurrentToken;
256 void RewindToken()
258 fReuseToken = true;
261 private:
262 static bool _IsHexDigit(char c)
264 return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
267 Token& _ParseHexNumber()
269 const char* begin = fCurrentChar;
270 fCurrentChar += 2;
271 // skip "0x"
273 if (!_IsHexDigit(*fCurrentChar))
274 throw ParseException("expected hex digit", _CurrentPos());
276 fCurrentChar++;
277 while (_IsHexDigit(*fCurrentChar))
278 fCurrentChar++;
280 int32 length = fCurrentChar - begin;
281 fCurrentToken = Token(begin, length, _CurrentPos() - length,
282 TOKEN_CONSTANT);
284 // MAPM has no conversion from long long, so we need to improvise.
285 uint64 value = strtoll(fCurrentToken.string.String(), NULL, 0);
286 if (value <= 0x7fffffff) {
287 fCurrentToken.value = (long)value;
288 } else {
289 fCurrentToken.value = (int)(value >> 60);
290 fCurrentToken.value *= 1 << 30;
291 fCurrentToken.value += (int)((value >> 30) & 0x3fffffff);
292 fCurrentToken.value *= 1 << 30;
293 fCurrentToken.value += (int)(value& 0x3fffffff);
296 return fCurrentToken;
299 int32 _CurrentPos() const
301 return fCurrentChar - fString.String();
304 BString fString;
305 const char* fCurrentChar;
306 Token fCurrentToken;
307 bool fReuseToken;
308 bool fHexSupport;
312 ExpressionParser::ExpressionParser()
313 : fTokenizer(new Tokenizer()),
314 fDegreeMode(false)
319 ExpressionParser::~ExpressionParser()
321 delete fTokenizer;
325 bool
326 ExpressionParser::DegreeMode()
328 return fDegreeMode;
332 void
333 ExpressionParser::SetDegreeMode(bool degrees)
335 fDegreeMode = degrees;
339 void
340 ExpressionParser::SetSupportHexInput(bool enabled)
342 fTokenizer->SetSupportHexInput(enabled);
346 BString
347 ExpressionParser::Evaluate(const char* expressionString)
349 fTokenizer->SetTo(expressionString);
351 MAPM value = _ParseBinary();
352 Token token = fTokenizer->NextToken();
353 if (token.type != TOKEN_END_OF_LINE)
354 throw ParseException("parse error", token.position);
356 if (value == 0)
357 return BString("0");
359 char* buffer = value.toFixPtStringExp(kMaxDecimalPlaces, '.', 0, 0);
360 if (buffer == NULL)
361 throw ParseException("out of memory", 0);
363 // remove surplus zeros
364 int32 lastChar = strlen(buffer) - 1;
365 if (strchr(buffer, '.')) {
366 while (buffer[lastChar] == '0')
367 lastChar--;
368 if (buffer[lastChar] == '.')
369 lastChar--;
372 BString result(buffer, lastChar + 1);
373 free(buffer);
374 return result;
378 int64
379 ExpressionParser::EvaluateToInt64(const char* expressionString)
381 fTokenizer->SetTo(expressionString);
383 MAPM value = _ParseBinary();
384 Token token = fTokenizer->NextToken();
385 if (token.type != TOKEN_END_OF_LINE)
386 throw ParseException("parse error", token.position);
388 char buffer[128];
389 value.toIntegerString(buffer);
391 return strtoll(buffer, NULL, 0);
395 double
396 ExpressionParser::EvaluateToDouble(const char* expressionString)
398 fTokenizer->SetTo(expressionString);
400 MAPM value = _ParseBinary();
401 Token token = fTokenizer->NextToken();
402 if (token.type != TOKEN_END_OF_LINE)
403 throw ParseException("parse error", token.position);
405 char buffer[1024];
406 value.toString(buffer, sizeof(buffer) - 4);
408 return strtod(buffer, NULL);
412 MAPM
413 ExpressionParser::_ParseBinary()
415 return _ParseSum();
416 // binary operation appearantly not supported by m_apm library,
417 // should not be too hard to implement though....
419 // double value = _ParseSum();
421 // while (true) {
422 // Token token = fTokenizer->NextToken();
423 // switch (token.type) {
424 // case TOKEN_AND:
425 // value = (uint64)value & (uint64)_ParseSum();
426 // break;
427 // case TOKEN_OR:
428 // value = (uint64)value | (uint64)_ParseSum();
429 // break;
431 // default:
432 // fTokenizer->RewindToken();
433 // return value;
434 // }
435 // }
439 MAPM
440 ExpressionParser::_ParseSum()
442 // TODO: check isnan()...
443 MAPM value = _ParseProduct();
445 while (true) {
446 Token token = fTokenizer->NextToken();
447 switch (token.type) {
448 case TOKEN_PLUS:
449 value = value + _ParseProduct();
450 break;
451 case TOKEN_MINUS:
452 value = value - _ParseProduct();
453 break;
455 default:
456 fTokenizer->RewindToken();
457 return _ParseFactorial(value);
463 MAPM
464 ExpressionParser::_ParseProduct()
466 // TODO: check isnan()...
467 MAPM value = _ParsePower();
469 while (true) {
470 Token token = fTokenizer->NextToken();
471 switch (token.type) {
472 case TOKEN_STAR:
473 value = value * _ParsePower();
474 break;
475 case TOKEN_SLASH: {
476 MAPM rhs = _ParsePower();
477 if (rhs == MAPM(0))
478 throw ParseException("division by zero", token.position);
479 value = value / rhs;
480 break;
482 case TOKEN_MODULO: {
483 MAPM rhs = _ParsePower();
484 if (rhs == MAPM(0))
485 throw ParseException("modulo by zero", token.position);
486 value = value % rhs;
487 break;
490 default:
491 fTokenizer->RewindToken();
492 return _ParseFactorial(value);
498 MAPM
499 ExpressionParser::_ParsePower()
501 MAPM value = _ParseUnary();
503 while (true) {
504 Token token = fTokenizer->NextToken();
505 if (token.type != TOKEN_POWER) {
506 fTokenizer->RewindToken();
507 return _ParseFactorial(value);
509 value = value.pow(_ParseUnary());
514 MAPM
515 ExpressionParser::_ParseUnary()
517 Token token = fTokenizer->NextToken();
518 if (token.type == TOKEN_END_OF_LINE)
519 throw ParseException("unexpected end of expression", token.position);
521 switch (token.type) {
522 case TOKEN_PLUS:
523 return _ParseUnary();
524 case TOKEN_MINUS:
525 return -_ParseUnary();
526 // TODO: Implement !
527 // case TOKEN_NOT:
528 // return ~(uint64)_ParseUnary();
530 case TOKEN_IDENTIFIER:
531 return _ParseFunction(token);
533 default:
534 fTokenizer->RewindToken();
535 return _ParseAtom();
538 return MAPM(0);
542 struct Function {
543 const char* name;
544 int argumentCount;
545 void* function;
546 MAPM value;
550 void
551 ExpressionParser::_InitArguments(MAPM values[], int32 argumentCount)
553 _EatToken(TOKEN_OPENING_BRACKET);
555 for (int32 i = 0; i < argumentCount; i++)
556 values[i] = _ParseBinary();
558 _EatToken(TOKEN_CLOSING_BRACKET);
562 MAPM
563 ExpressionParser::_ParseFunction(const Token& token)
565 if (token.string == "e")
566 return _ParseFactorial(MAPM(MM_E));
567 else if (token.string.ICompare("pi") == 0 || token.string == "π")
568 return _ParseFactorial(MAPM(MM_PI));
570 // hard coded cases for different count of arguments
571 // supports functions with 3 arguments at most
573 MAPM values[3];
575 if (strcasecmp("abs", token.string.String()) == 0) {
576 _InitArguments(values, 1);
577 return _ParseFactorial(values[0].abs());
578 } else if (strcasecmp("acos", token.string.String()) == 0) {
579 _InitArguments(values, 1);
580 if (fDegreeMode)
581 values[0] = values[0] * MM_PI / 180;
583 if (values[0] < -1 || values[0] > 1)
584 throw ParseException("out of domain", token.position);
586 return _ParseFactorial(values[0].acos());
587 } else if (strcasecmp("asin", token.string.String()) == 0) {
588 _InitArguments(values, 1);
589 if (fDegreeMode)
590 values[0] = values[0] * MM_PI / 180;
592 if (values[0] < -1 || values[0] > 1)
593 throw ParseException("out of domain", token.position);
595 return _ParseFactorial(values[0].asin());
596 } else if (strcasecmp("atan", token.string.String()) == 0) {
597 _InitArguments(values, 1);
598 if (fDegreeMode)
599 values[0] = values[0] * MM_PI / 180;
601 return _ParseFactorial(values[0].atan());
602 } else if (strcasecmp("atan2", token.string.String()) == 0) {
603 _InitArguments(values, 2);
605 if (fDegreeMode) {
606 values[0] = values[0] * MM_PI / 180;
607 values[1] = values[1] * MM_PI / 180;
610 return _ParseFactorial(values[0].atan2(values[1]));
611 } else if (strcasecmp("cbrt", token.string.String()) == 0) {
612 _InitArguments(values, 1);
613 return _ParseFactorial(values[0].cbrt());
614 } else if (strcasecmp("ceil", token.string.String()) == 0) {
615 _InitArguments(values, 1);
616 return _ParseFactorial(values[0].ceil());
617 } else if (strcasecmp("cos", token.string.String()) == 0) {
618 _InitArguments(values, 1);
619 if (fDegreeMode)
620 values[0] = values[0] * MM_PI / 180;
622 return _ParseFactorial(values[0].cos());
623 } else if (strcasecmp("cosh", token.string.String()) == 0) {
624 _InitArguments(values, 1);
625 // This function always uses radians
626 return _ParseFactorial(values[0].cosh());
627 } else if (strcasecmp("exp", token.string.String()) == 0) {
628 _InitArguments(values, 1);
629 return _ParseFactorial(values[0].exp());
630 } else if (strcasecmp("floor", token.string.String()) == 0) {
631 _InitArguments(values, 1);
632 return _ParseFactorial(values[0].floor());
633 } else if (strcasecmp("ln", token.string.String()) == 0) {
634 _InitArguments(values, 1);
635 if (values[0] <= 0)
636 throw ParseException("out of domain", token.position);
638 return _ParseFactorial(values[0].log());
639 } else if (strcasecmp("log", token.string.String()) == 0) {
640 _InitArguments(values, 1);
641 if (values[0] <= 0)
642 throw ParseException("out of domain", token.position);
644 return _ParseFactorial(values[0].log10());
645 } else if (strcasecmp("pow", token.string.String()) == 0) {
646 _InitArguments(values, 2);
647 return _ParseFactorial(values[0].pow(values[1]));
648 } else if (strcasecmp("sin", token.string.String()) == 0) {
649 _InitArguments(values, 1);
650 if (fDegreeMode)
651 values[0] = values[0] * MM_PI / 180;
653 return _ParseFactorial(values[0].sin());
654 } else if (strcasecmp("sinh", token.string.String()) == 0) {
655 _InitArguments(values, 1);
656 // This function always uses radians
657 return _ParseFactorial(values[0].sinh());
658 } else if (strcasecmp("sqrt", token.string.String()) == 0) {
659 _InitArguments(values, 1);
660 if (values[0] < 0)
661 throw ParseException("out of domain", token.position);
663 return _ParseFactorial(values[0].sqrt());
664 } else if (strcasecmp("tan", token.string.String()) == 0) {
665 _InitArguments(values, 1);
666 if (fDegreeMode)
667 values[0] = values[0] * MM_PI / 180;
669 MAPM divided_by_half_pi = values[0] / MM_HALF_PI;
670 if (divided_by_half_pi.is_integer() && divided_by_half_pi.is_odd())
671 throw ParseException("out of domain", token.position);
673 return _ParseFactorial(values[0].tan());
674 } else if (strcasecmp("tanh", token.string.String()) == 0) {
675 _InitArguments(values, 1);
676 // This function always uses radians
677 return _ParseFactorial(values[0].tanh());
680 throw ParseException("unknown identifier", token.position);
684 MAPM
685 ExpressionParser::_ParseAtom()
687 Token token = fTokenizer->NextToken();
688 if (token.type == TOKEN_END_OF_LINE)
689 throw ParseException("unexpected end of expression", token.position);
691 if (token.type == TOKEN_CONSTANT)
692 return _ParseFactorial(token.value);
694 fTokenizer->RewindToken();
696 _EatToken(TOKEN_OPENING_BRACKET);
698 MAPM value = _ParseBinary();
700 _EatToken(TOKEN_CLOSING_BRACKET);
702 return _ParseFactorial(value);
706 MAPM
707 ExpressionParser::_ParseFactorial(MAPM value)
709 if (fTokenizer->NextToken().type == TOKEN_FACTORIAL) {
710 fTokenizer->RewindToken();
711 _EatToken(TOKEN_FACTORIAL);
712 if (value < 1000)
713 return value.factorial();
714 else {
715 // Use Stirling's approximation (9 term expansion)
716 // http://en.wikipedia.org/wiki/Stirling%27s_approximation
717 // http://www.wolframalpha.com/input/?i=stirling%27s+series
718 // all constants must fit in a signed long for MAPM
719 // (LONG_MAX = 2147483647)
720 return value.pow(value) / value.exp()
721 * (MAPM(2) * MAPM(MM_PI) * value).sqrt()
722 * (MAPM(1) + (MAPM(1) / (MAPM(12) * value))
723 + (MAPM(1) / (MAPM(288) * value.pow(2)))
724 - (MAPM(139) / (MAPM(51840) * value.pow(3)))
725 - (MAPM(571) / (MAPM(2488320) * value.pow(4)))
726 + (MAPM(163879) / (MAPM(209018880) * value.pow(5)))
727 // 2147483647 * 35 + 84869155 = 75246796800
728 + (MAPM(5246819) / ((MAPM(2147483647) * MAPM(35)
729 + MAPM(84869155)) * value.pow(6)))
730 // 2147483647 * 420 + 1018429860 = 902961561600
731 - (MAPM(534703531) / ((MAPM(2147483647) * MAPM(420)
732 + MAPM(1018429860)) * value.pow(7)))
733 // 2147483647 * 2 + 188163965 = 4483131259
734 // 2147483647 * 40366 + 985018798 = 86686309913600
735 - ((MAPM(2147483647) * MAPM(2) + MAPM(188163965))
736 / ((MAPM(2147483647) * MAPM(40366) + MAPM(985018798))
737 * value.pow(8)))
738 // 2147483647 * 201287 + 1380758682 = 432261921612371
739 // 2147483647 * 239771232 + 1145740896
740 // = 514904800886784000
741 + ((MAPM(2147483647) * MAPM(201287) + MAPM(1380758682))
742 / ((MAPM(2147483647) * MAPM(239771232)
743 + MAPM(1145740896))
744 * value.pow(9))));
748 fTokenizer->RewindToken();
749 return value;
753 void
754 ExpressionParser::_EatToken(int32 type)
756 Token token = fTokenizer->NextToken();
757 if (token.type != type) {
758 BString expected;
759 switch (type) {
760 case TOKEN_IDENTIFIER:
761 expected = "an identifier";
762 break;
764 case TOKEN_CONSTANT:
765 expected = "a constant";
766 break;
768 case TOKEN_PLUS:
769 case TOKEN_MINUS:
770 case TOKEN_STAR:
771 case TOKEN_MODULO:
772 case TOKEN_POWER:
773 case TOKEN_FACTORIAL:
774 case TOKEN_OPENING_BRACKET:
775 case TOKEN_CLOSING_BRACKET:
776 case TOKEN_AND:
777 case TOKEN_OR:
778 case TOKEN_NOT:
779 expected << "'" << (char)type << "'";
780 break;
782 case TOKEN_SLASH:
783 expected = "'/', '\\', or ':'";
784 break;
786 case TOKEN_END_OF_LINE:
787 expected = "'\\n'";
788 break;
790 BString temp;
791 temp << "Expected " << expected.String() << " got '" << token.string << "'";
792 throw ParseException(temp.String(), token.position);