2 * Copyright 2006-2012 Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
6 * Stephan Aßmus <superstippi@gmx.de>
7 * John Scipione <jscipione@gmail.com>
8 * Ingo Weinhold <bonefish@cs.tu-berlin.de>
11 #include <ExpressionParser.h>
22 static const int32 kMaxDecimalPlaces
= 32;
30 TOKEN_END_OF_LINE
= '\n',
40 TOKEN_FACTORIAL
= '!',
42 TOKEN_OPENING_BRACKET
= '(',
43 TOKEN_CLOSING_BRACKET
= ')',
51 struct ExpressionParser::Token
{
60 Token(const Token
& other
)
61 : string(other
.string
),
64 position(other
.position
)
68 Token(const char* string
, int32 length
, int32 position
, int32 type
)
69 : string(string
, length
),
76 Token
& operator=(const Token
& other
)
78 string
= other
.string
;
81 position
= other
.position
;
93 class ExpressionParser::Tokenizer
{
104 void SetSupportHexInput(bool enabled
)
106 fHexSupport
= enabled
;
109 void SetTo(const char* string
)
112 fCurrentChar
= fString
.String();
113 fCurrentToken
= Token();
117 const Token
& NextToken()
119 if (fCurrentToken
.type
== TOKEN_END_OF_LINE
)
120 return fCurrentToken
;
124 //printf("next token (recycled): '%s'\n", fCurrentToken.string.String());
125 return fCurrentToken
;
128 while (*fCurrentChar
!= 0 && isspace(*fCurrentChar
))
131 if (*fCurrentChar
== 0)
132 return fCurrentToken
= Token("", 0, _CurrentPos(), TOKEN_END_OF_LINE
);
134 bool decimal
= *fCurrentChar
== '.' || *fCurrentChar
== ',';
136 if (decimal
|| isdigit(*fCurrentChar
)) {
137 if (fHexSupport
&& *fCurrentChar
== '0' && fCurrentChar
[1] == 'x')
138 return _ParseHexNumber();
142 const char* begin
= fCurrentChar
;
144 // optional digits before the comma
145 while (isdigit(*fCurrentChar
)) {
146 temp
<< *fCurrentChar
;
150 // optional post comma part
151 // (required if there are no digits before the comma)
152 if (*fCurrentChar
== '.' || *fCurrentChar
== ',') {
156 // optional post comma digits
157 while (isdigit(*fCurrentChar
)) {
158 temp
<< *fCurrentChar
;
163 // optional exponent part
164 if (*fCurrentChar
== 'E') {
165 temp
<< *fCurrentChar
;
168 // optional exponent sign
169 if (*fCurrentChar
== '+' || *fCurrentChar
== '-') {
170 temp
<< *fCurrentChar
;
174 // required exponent digits
175 if (!isdigit(*fCurrentChar
)) {
176 throw ParseException("missing exponent in constant",
177 fCurrentChar
- begin
);
180 while (isdigit(*fCurrentChar
)) {
181 temp
<< *fCurrentChar
;
186 int32 length
= fCurrentChar
- begin
;
191 int32 matches
= sscanf(test
.String(), "%lf&%s", &value
, t
);
193 throw ParseException("error in constant",
194 _CurrentPos() - length
);
197 fCurrentToken
= Token(begin
, length
, _CurrentPos() - length
,
199 fCurrentToken
.value
= temp
.String();
200 } else if (isalpha(*fCurrentChar
) && *fCurrentChar
!= 'x') {
201 const char* begin
= fCurrentChar
;
202 while (*fCurrentChar
!= 0 && (isalpha(*fCurrentChar
)
203 || isdigit(*fCurrentChar
))) {
206 int32 length
= fCurrentChar
- begin
;
207 fCurrentToken
= Token(begin
, length
, _CurrentPos() - length
,
209 } else if (strncmp(fCurrentChar
, "π", 2) == 0) {
210 fCurrentToken
= Token(fCurrentChar
, 2, _CurrentPos() - 1,
214 int32 type
= TOKEN_NONE
;
216 switch (*fCurrentChar
) {
223 case TOKEN_FACTORIAL
:
224 case TOKEN_OPENING_BRACKET
:
225 case TOKEN_CLOSING_BRACKET
:
229 case TOKEN_END_OF_LINE
:
230 type
= *fCurrentChar
;
246 throw ParseException("unexpected character", _CurrentPos());
248 fCurrentToken
= Token(fCurrentChar
, 1, _CurrentPos(), type
);
252 //printf("next token: '%s'\n", fCurrentToken.string.String());
253 return fCurrentToken
;
262 static bool _IsHexDigit(char c
)
264 return isdigit(c
) || (c
>= 'a' && c
<= 'f') || (c
>= 'A' && c
<= 'F');
267 Token
& _ParseHexNumber()
269 const char* begin
= fCurrentChar
;
273 if (!_IsHexDigit(*fCurrentChar
))
274 throw ParseException("expected hex digit", _CurrentPos());
277 while (_IsHexDigit(*fCurrentChar
))
280 int32 length
= fCurrentChar
- begin
;
281 fCurrentToken
= Token(begin
, length
, _CurrentPos() - length
,
284 // MAPM has no conversion from long long, so we need to improvise.
285 uint64 value
= strtoll(fCurrentToken
.string
.String(), NULL
, 0);
286 if (value
<= 0x7fffffff) {
287 fCurrentToken
.value
= (long)value
;
289 fCurrentToken
.value
= (int)(value
>> 60);
290 fCurrentToken
.value
*= 1 << 30;
291 fCurrentToken
.value
+= (int)((value
>> 30) & 0x3fffffff);
292 fCurrentToken
.value
*= 1 << 30;
293 fCurrentToken
.value
+= (int)(value
& 0x3fffffff);
296 return fCurrentToken
;
299 int32
_CurrentPos() const
301 return fCurrentChar
- fString
.String();
305 const char* fCurrentChar
;
312 ExpressionParser::ExpressionParser()
313 : fTokenizer(new Tokenizer()),
319 ExpressionParser::~ExpressionParser()
326 ExpressionParser::DegreeMode()
333 ExpressionParser::SetDegreeMode(bool degrees
)
335 fDegreeMode
= degrees
;
340 ExpressionParser::SetSupportHexInput(bool enabled
)
342 fTokenizer
->SetSupportHexInput(enabled
);
347 ExpressionParser::Evaluate(const char* expressionString
)
349 fTokenizer
->SetTo(expressionString
);
351 MAPM value
= _ParseBinary();
352 Token token
= fTokenizer
->NextToken();
353 if (token
.type
!= TOKEN_END_OF_LINE
)
354 throw ParseException("parse error", token
.position
);
359 char* buffer
= value
.toFixPtStringExp(kMaxDecimalPlaces
, '.', 0, 0);
361 throw ParseException("out of memory", 0);
363 // remove surplus zeros
364 int32 lastChar
= strlen(buffer
) - 1;
365 if (strchr(buffer
, '.')) {
366 while (buffer
[lastChar
] == '0')
368 if (buffer
[lastChar
] == '.')
372 BString
result(buffer
, lastChar
+ 1);
379 ExpressionParser::EvaluateToInt64(const char* expressionString
)
381 fTokenizer
->SetTo(expressionString
);
383 MAPM value
= _ParseBinary();
384 Token token
= fTokenizer
->NextToken();
385 if (token
.type
!= TOKEN_END_OF_LINE
)
386 throw ParseException("parse error", token
.position
);
389 value
.toIntegerString(buffer
);
391 return strtoll(buffer
, NULL
, 0);
396 ExpressionParser::EvaluateToDouble(const char* expressionString
)
398 fTokenizer
->SetTo(expressionString
);
400 MAPM value
= _ParseBinary();
401 Token token
= fTokenizer
->NextToken();
402 if (token
.type
!= TOKEN_END_OF_LINE
)
403 throw ParseException("parse error", token
.position
);
406 value
.toString(buffer
, sizeof(buffer
) - 4);
408 return strtod(buffer
, NULL
);
413 ExpressionParser::_ParseBinary()
416 // binary operation appearantly not supported by m_apm library,
417 // should not be too hard to implement though....
419 // double value = _ParseSum();
422 // Token token = fTokenizer->NextToken();
423 // switch (token.type) {
425 // value = (uint64)value & (uint64)_ParseSum();
428 // value = (uint64)value | (uint64)_ParseSum();
432 // fTokenizer->RewindToken();
440 ExpressionParser::_ParseSum()
442 // TODO: check isnan()...
443 MAPM value
= _ParseProduct();
446 Token token
= fTokenizer
->NextToken();
447 switch (token
.type
) {
449 value
= value
+ _ParseProduct();
452 value
= value
- _ParseProduct();
456 fTokenizer
->RewindToken();
457 return _ParseFactorial(value
);
464 ExpressionParser::_ParseProduct()
466 // TODO: check isnan()...
467 MAPM value
= _ParsePower();
470 Token token
= fTokenizer
->NextToken();
471 switch (token
.type
) {
473 value
= value
* _ParsePower();
476 MAPM rhs
= _ParsePower();
478 throw ParseException("division by zero", token
.position
);
483 MAPM rhs
= _ParsePower();
485 throw ParseException("modulo by zero", token
.position
);
491 fTokenizer
->RewindToken();
492 return _ParseFactorial(value
);
499 ExpressionParser::_ParsePower()
501 MAPM value
= _ParseUnary();
504 Token token
= fTokenizer
->NextToken();
505 if (token
.type
!= TOKEN_POWER
) {
506 fTokenizer
->RewindToken();
507 return _ParseFactorial(value
);
509 value
= value
.pow(_ParseUnary());
515 ExpressionParser::_ParseUnary()
517 Token token
= fTokenizer
->NextToken();
518 if (token
.type
== TOKEN_END_OF_LINE
)
519 throw ParseException("unexpected end of expression", token
.position
);
521 switch (token
.type
) {
523 return _ParseUnary();
525 return -_ParseUnary();
528 // return ~(uint64)_ParseUnary();
530 case TOKEN_IDENTIFIER
:
531 return _ParseFunction(token
);
534 fTokenizer
->RewindToken();
551 ExpressionParser::_InitArguments(MAPM values
[], int32 argumentCount
)
553 _EatToken(TOKEN_OPENING_BRACKET
);
555 for (int32 i
= 0; i
< argumentCount
; i
++)
556 values
[i
] = _ParseBinary();
558 _EatToken(TOKEN_CLOSING_BRACKET
);
563 ExpressionParser::_ParseFunction(const Token
& token
)
565 if (token
.string
== "e")
566 return _ParseFactorial(MAPM(MM_E
));
567 else if (token
.string
.ICompare("pi") == 0 || token
.string
== "π")
568 return _ParseFactorial(MAPM(MM_PI
));
570 // hard coded cases for different count of arguments
571 // supports functions with 3 arguments at most
575 if (strcasecmp("abs", token
.string
.String()) == 0) {
576 _InitArguments(values
, 1);
577 return _ParseFactorial(values
[0].abs());
578 } else if (strcasecmp("acos", token
.string
.String()) == 0) {
579 _InitArguments(values
, 1);
581 values
[0] = values
[0] * MM_PI
/ 180;
583 if (values
[0] < -1 || values
[0] > 1)
584 throw ParseException("out of domain", token
.position
);
586 return _ParseFactorial(values
[0].acos());
587 } else if (strcasecmp("asin", token
.string
.String()) == 0) {
588 _InitArguments(values
, 1);
590 values
[0] = values
[0] * MM_PI
/ 180;
592 if (values
[0] < -1 || values
[0] > 1)
593 throw ParseException("out of domain", token
.position
);
595 return _ParseFactorial(values
[0].asin());
596 } else if (strcasecmp("atan", token
.string
.String()) == 0) {
597 _InitArguments(values
, 1);
599 values
[0] = values
[0] * MM_PI
/ 180;
601 return _ParseFactorial(values
[0].atan());
602 } else if (strcasecmp("atan2", token
.string
.String()) == 0) {
603 _InitArguments(values
, 2);
606 values
[0] = values
[0] * MM_PI
/ 180;
607 values
[1] = values
[1] * MM_PI
/ 180;
610 return _ParseFactorial(values
[0].atan2(values
[1]));
611 } else if (strcasecmp("cbrt", token
.string
.String()) == 0) {
612 _InitArguments(values
, 1);
613 return _ParseFactorial(values
[0].cbrt());
614 } else if (strcasecmp("ceil", token
.string
.String()) == 0) {
615 _InitArguments(values
, 1);
616 return _ParseFactorial(values
[0].ceil());
617 } else if (strcasecmp("cos", token
.string
.String()) == 0) {
618 _InitArguments(values
, 1);
620 values
[0] = values
[0] * MM_PI
/ 180;
622 return _ParseFactorial(values
[0].cos());
623 } else if (strcasecmp("cosh", token
.string
.String()) == 0) {
624 _InitArguments(values
, 1);
625 // This function always uses radians
626 return _ParseFactorial(values
[0].cosh());
627 } else if (strcasecmp("exp", token
.string
.String()) == 0) {
628 _InitArguments(values
, 1);
629 return _ParseFactorial(values
[0].exp());
630 } else if (strcasecmp("floor", token
.string
.String()) == 0) {
631 _InitArguments(values
, 1);
632 return _ParseFactorial(values
[0].floor());
633 } else if (strcasecmp("ln", token
.string
.String()) == 0) {
634 _InitArguments(values
, 1);
636 throw ParseException("out of domain", token
.position
);
638 return _ParseFactorial(values
[0].log());
639 } else if (strcasecmp("log", token
.string
.String()) == 0) {
640 _InitArguments(values
, 1);
642 throw ParseException("out of domain", token
.position
);
644 return _ParseFactorial(values
[0].log10());
645 } else if (strcasecmp("pow", token
.string
.String()) == 0) {
646 _InitArguments(values
, 2);
647 return _ParseFactorial(values
[0].pow(values
[1]));
648 } else if (strcasecmp("sin", token
.string
.String()) == 0) {
649 _InitArguments(values
, 1);
651 values
[0] = values
[0] * MM_PI
/ 180;
653 return _ParseFactorial(values
[0].sin());
654 } else if (strcasecmp("sinh", token
.string
.String()) == 0) {
655 _InitArguments(values
, 1);
656 // This function always uses radians
657 return _ParseFactorial(values
[0].sinh());
658 } else if (strcasecmp("sqrt", token
.string
.String()) == 0) {
659 _InitArguments(values
, 1);
661 throw ParseException("out of domain", token
.position
);
663 return _ParseFactorial(values
[0].sqrt());
664 } else if (strcasecmp("tan", token
.string
.String()) == 0) {
665 _InitArguments(values
, 1);
667 values
[0] = values
[0] * MM_PI
/ 180;
669 MAPM divided_by_half_pi
= values
[0] / MM_HALF_PI
;
670 if (divided_by_half_pi
.is_integer() && divided_by_half_pi
.is_odd())
671 throw ParseException("out of domain", token
.position
);
673 return _ParseFactorial(values
[0].tan());
674 } else if (strcasecmp("tanh", token
.string
.String()) == 0) {
675 _InitArguments(values
, 1);
676 // This function always uses radians
677 return _ParseFactorial(values
[0].tanh());
680 throw ParseException("unknown identifier", token
.position
);
685 ExpressionParser::_ParseAtom()
687 Token token
= fTokenizer
->NextToken();
688 if (token
.type
== TOKEN_END_OF_LINE
)
689 throw ParseException("unexpected end of expression", token
.position
);
691 if (token
.type
== TOKEN_CONSTANT
)
692 return _ParseFactorial(token
.value
);
694 fTokenizer
->RewindToken();
696 _EatToken(TOKEN_OPENING_BRACKET
);
698 MAPM value
= _ParseBinary();
700 _EatToken(TOKEN_CLOSING_BRACKET
);
702 return _ParseFactorial(value
);
707 ExpressionParser::_ParseFactorial(MAPM value
)
709 if (fTokenizer
->NextToken().type
== TOKEN_FACTORIAL
) {
710 fTokenizer
->RewindToken();
711 _EatToken(TOKEN_FACTORIAL
);
713 return value
.factorial();
715 // Use Stirling's approximation (9 term expansion)
716 // http://en.wikipedia.org/wiki/Stirling%27s_approximation
717 // http://www.wolframalpha.com/input/?i=stirling%27s+series
718 // all constants must fit in a signed long for MAPM
719 // (LONG_MAX = 2147483647)
720 return value
.pow(value
) / value
.exp()
721 * (MAPM(2) * MAPM(MM_PI
) * value
).sqrt()
722 * (MAPM(1) + (MAPM(1) / (MAPM(12) * value
))
723 + (MAPM(1) / (MAPM(288) * value
.pow(2)))
724 - (MAPM(139) / (MAPM(51840) * value
.pow(3)))
725 - (MAPM(571) / (MAPM(2488320) * value
.pow(4)))
726 + (MAPM(163879) / (MAPM(209018880) * value
.pow(5)))
727 // 2147483647 * 35 + 84869155 = 75246796800
728 + (MAPM(5246819) / ((MAPM(2147483647) * MAPM(35)
729 + MAPM(84869155)) * value
.pow(6)))
730 // 2147483647 * 420 + 1018429860 = 902961561600
731 - (MAPM(534703531) / ((MAPM(2147483647) * MAPM(420)
732 + MAPM(1018429860)) * value
.pow(7)))
733 // 2147483647 * 2 + 188163965 = 4483131259
734 // 2147483647 * 40366 + 985018798 = 86686309913600
735 - ((MAPM(2147483647) * MAPM(2) + MAPM(188163965))
736 / ((MAPM(2147483647) * MAPM(40366) + MAPM(985018798))
738 // 2147483647 * 201287 + 1380758682 = 432261921612371
739 // 2147483647 * 239771232 + 1145740896
740 // = 514904800886784000
741 + ((MAPM(2147483647) * MAPM(201287) + MAPM(1380758682))
742 / ((MAPM(2147483647) * MAPM(239771232)
748 fTokenizer
->RewindToken();
754 ExpressionParser::_EatToken(int32 type
)
756 Token token
= fTokenizer
->NextToken();
757 if (token
.type
!= type
) {
760 case TOKEN_IDENTIFIER
:
761 expected
= "an identifier";
765 expected
= "a constant";
773 case TOKEN_FACTORIAL
:
774 case TOKEN_OPENING_BRACKET
:
775 case TOKEN_CLOSING_BRACKET
:
779 expected
<< "'" << (char)type
<< "'";
783 expected
= "'/', '\\', or ':'";
786 case TOKEN_END_OF_LINE
:
791 temp
<< "Expected " << expected
.String() << " got '" << token
.string
<< "'";
792 throw ParseException(temp
.String(), token
.position
);