1 /* expr -- evaluate expressions.
2 Copyright (C) 1986-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Author: Mike Parker.
18 Modified for arbitrary-precision calculation by James Youngman.
20 This program evaluates expressions. Each token (operator, operand,
21 parenthesis) of the expression must be a separate argument. The
22 parser used is a reasonably general one, though any incarnation of
23 it is language-specific. It is especially nice for expressions.
25 No parse tree is needed; a new node is evaluated immediately.
26 One function can handle multiple operators all of equal precedence,
27 provided they all associate ((x op x) op x).
29 Define EVAL_TRACE to print an evaluation trace. */
33 #include <sys/types.h>
39 #include "long-options.h"
41 #include "strnumcmp.h"
44 /* Various parts of this code assume size_t fits into unsigned long
45 int, the widest unsigned type that GMP supports. */
46 verify (SIZE_MAX
<= ULONG_MAX
);
55 static void integer_overflow (char) ATTRIBUTE_NORETURN
;
56 /* Approximate gmp.h well enough for expr.c's purposes. */
57 typedef intmax_t mpz_t
[1];
58 static void mpz_clear (mpz_t z
) { (void) z
; }
59 static void mpz_init_set_ui (mpz_t z
, unsigned long int i
) { z
[0] = i
; }
61 mpz_init_set_str (mpz_t z
, char *s
, int base
)
63 return xstrtoimax (s
, NULL
, base
, z
, NULL
) == LONGINT_OK
? 0 : -1;
66 mpz_add (mpz_t r
, mpz_t a0
, mpz_t b0
)
71 if ((val
< a
) != (b
< 0))
72 integer_overflow ('+');
76 mpz_sub (mpz_t r
, mpz_t a0
, mpz_t b0
)
81 if ((a
< val
) != (b
< 0))
82 integer_overflow ('-');
86 mpz_mul (mpz_t r
, mpz_t a0
, mpz_t b0
)
91 if (! (a
== 0 || b
== 0
92 || ((val
< 0) == ((a
< 0) ^ (b
< 0)) && val
/ a
== b
)))
93 integer_overflow ('*');
97 mpz_tdiv_q (mpz_t r
, mpz_t a0
, mpz_t b0
)
102 /* Some x86-style hosts raise an exception for INT_MIN / -1. */
103 if (a
< - INTMAX_MAX
&& b
== -1)
104 integer_overflow ('/');
108 mpz_tdiv_r (mpz_t r
, mpz_t a0
, mpz_t b0
)
113 /* Some x86-style hosts raise an exception for INT_MIN % -1. */
114 r
[0] = a
< - INTMAX_MAX
&& b
== -1 ? 0 : a
% b
;
117 mpz_get_str (char const *str
, int base
, mpz_t z
)
119 (void) str
; (void) base
;
120 char buf
[INT_BUFSIZE_BOUND (intmax_t)];
121 return xstrdup (imaxtostr (z
[0], buf
));
126 return z
[0] < 0 ? -1 : 0 < z
[0];
129 mpz_fits_ulong_p (mpz_t z
)
131 return 0 <= z
[0] && z
[0] <= ULONG_MAX
;
133 static unsigned long int
139 mpz_out_str (FILE *stream
, int base
, mpz_t z
)
142 char buf
[INT_BUFSIZE_BOUND (intmax_t)];
143 return fputs (imaxtostr (z
[0], buf
), stream
) != EOF
;
147 /* The official name of this program (e.g., no 'g' prefix). */
148 #define PROGRAM_NAME "expr"
151 proper_name ("Mike Parker"), \
152 proper_name ("James Youngman"), \
153 proper_name ("Paul Eggert")
158 /* Invalid expression: e.g., its form does not conform to the
159 grammar for expressions. Our grammar is an extension of the
163 /* An internal error occurred, e.g., arithmetic overflow, storage
168 /* The kinds of value we can have. */
174 typedef enum valtype TYPE
;
179 TYPE type
; /* Which kind. */
181 { /* The value itself. */
186 typedef struct valinfo VALUE
;
188 /* The arguments given to the program, minus the program name. */
191 static VALUE
*eval (bool);
192 static bool nomoreargs (void);
193 static bool null (VALUE
*v
);
194 static void printv (VALUE
*v
);
198 Find the first occurrence in the character string STRING of any character
199 in the character string ACCEPT.
201 Copied from gnulib's mbscspn, with two differences:
202 1. Returns 1-based position of first found character, or zero if not found.
203 2. Returned value is the logical character index, NOT byte offset.
206 mbs_logical_cspn ('hello','a') => 0
207 mbs_logical_cspn ('hello','h') => 1
208 mbs_logical_cspn ('hello','oe') => 1
209 mbs_logical_cspn ('hello','lo') => 3
211 In UTF-8 \xCE\xB1 is a single character (greek alpha):
212 mbs_logical_cspn ('\xCE\xB1bc','\xCE\xB1') => 1
213 mbs_logical_cspn ('\xCE\xB1bc','c') => 3 */
215 mbs_logical_cspn (const char *s
, const char *accept
)
219 if (accept
[0] == '\0')
225 mbui_iterator_t iter
;
227 for (mbui_init (iter
, s
); mbui_avail (iter
); mbui_advance (iter
))
230 if (mb_len (mbui_cur (iter
)) == 1)
232 if (mbschr (accept
, *mbui_cur_ptr (iter
)))
237 mbui_iterator_t aiter
;
239 for (mbui_init (aiter
, accept
);
241 mbui_advance (aiter
))
242 if (mb_equal (mbui_cur (aiter
), mbui_cur (iter
)))
252 /* single-byte locale,
253 convert returned byte offset to 1-based index or zero if not found. */
254 size_t i
= strcspn (s
, accept
);
255 return (s
[i
] ? i
+ 1 : 0);
259 /* Extract the substring of S, from logical character
260 position POS and LEN characters.
261 first character position is 1.
262 POS and LEN refer to logical characters, not octets.
264 Upon exit, sets v->s to the new string.
265 The new string might be empty if POS/LEN are invalid. */
267 mbs_logical_substr (const char *s
, size_t pos
, size_t len
)
271 size_t blen
= strlen (s
); /* byte length */
272 size_t llen
= (MB_CUR_MAX
> 1) ? mbslen (s
) : blen
; /* logical length */
274 if (llen
< pos
|| pos
== 0 || len
== 0 || len
== SIZE_MAX
)
277 /* characters to copy */
278 size_t vlen
= MIN (len
, llen
- pos
+ 1);
282 /* Single-byte case */
283 v
= xmalloc (vlen
+ 1);
284 vlim
= mempcpy (v
, s
+ pos
- 1, vlen
);
290 /* FIXME: this is wasteful. Some memory can be saved by counting
291 how many bytes the matching characters occupy. */
292 vlim
= v
= xmalloc (blen
+ 1);
294 mbui_iterator_t iter
;
296 for (mbui_init (iter
, s
);
297 mbui_avail (iter
) && vlen
> 0;
298 mbui_advance (iter
), ++idx
)
300 /* Skip until we reach the starting position */
304 /* Copy one character */
306 vlim
= mempcpy (vlim
, mbui_cur_ptr (iter
), mb_len (mbui_cur (iter
)));
313 /* Return the number of logical characteres (possibly multibyte)
314 that are in string S in the first OFS octets.
317 "\xE2\x9D\xA7" is "U+2767 ROTATED FLORAL HEART BULLET".
318 In the string below, there are only two characters
319 up to the first 4 bytes (The U+2767 which occupies 3 bytes and 'x'):
320 mbs_count_to_offset ("\xE2\x9D\xA7xyz", 4) => 2 */
322 mbs_offset_to_chars (const char *s
, size_t ofs
)
324 mbui_iterator_t iter
;
326 for (mbui_init (iter
, s
); mbui_avail (iter
); mbui_advance (iter
))
328 ptrdiff_t d
= mbui_cur_ptr (iter
) - s
;
341 if (status
!= EXIT_SUCCESS
)
346 Usage: %s EXPRESSION\n\
349 program_name
, program_name
);
351 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
352 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
355 Print the value of EXPRESSION to standard output. A blank line below\n\
356 separates increasing precedence groups. EXPRESSION may be:\n\
358 ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2\n\
360 ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0\n\
364 ARG1 < ARG2 ARG1 is less than ARG2\n\
365 ARG1 <= ARG2 ARG1 is less than or equal to ARG2\n\
366 ARG1 = ARG2 ARG1 is equal to ARG2\n\
367 ARG1 != ARG2 ARG1 is unequal to ARG2\n\
368 ARG1 >= ARG2 ARG1 is greater than or equal to ARG2\n\
369 ARG1 > ARG2 ARG1 is greater than ARG2\n\
373 ARG1 + ARG2 arithmetic sum of ARG1 and ARG2\n\
374 ARG1 - ARG2 arithmetic difference of ARG1 and ARG2\n\
376 /* Tell xgettext that the "% A" below is not a printf-style
377 format string: xgettext:no-c-format */
380 ARG1 * ARG2 arithmetic product of ARG1 and ARG2\n\
381 ARG1 / ARG2 arithmetic quotient of ARG1 divided by ARG2\n\
382 ARG1 % ARG2 arithmetic remainder of ARG1 divided by ARG2\n\
386 STRING : REGEXP anchored pattern match of REGEXP in STRING\n\
388 match STRING REGEXP same as STRING : REGEXP\n\
389 substr STRING POS LENGTH substring of STRING, POS counted from 1\n\
390 index STRING CHARS index in STRING where any CHARS is found, or 0\n\
391 length STRING length of STRING\n\
394 + TOKEN interpret TOKEN as a string, even if it is a\n\
395 keyword like 'match' or an operator like '/'\n\
397 ( EXPRESSION ) value of EXPRESSION\n\
401 Beware that many operators need to be escaped or quoted for shells.\n\
402 Comparisons are arithmetic if both ARGs are numbers, else lexicographical.\n\
403 Pattern matches return the string matched between \\( and \\) or null; if\n\
404 \\( and \\) are not used, they return the number of characters matched or 0.\n\
408 Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null\n\
409 or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.\n\
411 emit_ancillary_info (PROGRAM_NAME
);
418 /* Report an integer overflow for operation OP and exit. */
420 integer_overflow (char op
)
422 die (EXPR_FAILURE
, ERANGE
, "%c", op
);
427 main (int argc
, char **argv
)
431 initialize_main (&argc
, &argv
);
432 set_program_name (argv
[0]);
433 setlocale (LC_ALL
, "");
434 bindtextdomain (PACKAGE
, LOCALEDIR
);
435 textdomain (PACKAGE
);
437 initialize_exit_failure (EXPR_FAILURE
);
438 atexit (close_stdout
);
440 parse_long_options (argc
, argv
, PROGRAM_NAME
, PACKAGE_NAME
, VERSION
,
441 usage
, AUTHORS
, (char const *) NULL
);
443 /* The above handles --help and --version.
444 Since there is no other invocation of getopt, handle '--' here. */
445 unsigned int u_argc
= argc
;
446 if (1 < u_argc
&& STREQ (argv
[1], "--"))
454 error (0, 0, _("missing operand"));
455 usage (EXPR_INVALID
);
462 die (EXPR_INVALID
, 0, _("syntax error: unexpected argument %s"),
463 quotearg_n_style (0, locale_quoting_style
, *args
));
470 /* Return a VALUE for I. */
473 int_value (unsigned long int i
)
475 VALUE
*v
= xmalloc (sizeof *v
);
477 mpz_init_set_ui (v
->u
.i
, i
);
481 /* Return a VALUE for S. */
484 str_value (char const *s
)
486 VALUE
*v
= xmalloc (sizeof *v
);
488 v
->u
.s
= xstrdup (s
);
492 /* Free VALUE V, including structure components. */
497 if (v
->type
== string
)
512 mpz_out_str (stdout
, 10, v
->u
.i
);
523 /* Return true if V is a null-string or zero-number. */
525 static bool _GL_ATTRIBUTE_PURE
531 return mpz_sgn (v
->u
.i
) == 0;
534 char const *cp
= v
->u
.s
;
554 /* Return true if CP takes the form of an integer. */
556 static bool _GL_ATTRIBUTE_PURE
557 looks_like_integer (char const *cp
)
569 /* Coerce V to a string value (can't fail). */
578 char *s
= mpz_get_str (NULL
, 10, v
->u
.i
);
591 /* Coerce V to an integer value. Return true on success, false on failure. */
604 if (! looks_like_integer (s
))
606 if (mpz_init_set_str (v
->u
.i
, s
, 10) != 0 && !HAVE_GMP
)
607 die (EXPR_FAILURE
, ERANGE
, "%s", (s
));
617 /* Extract a size_t value from an integer value I.
618 If the value is negative, return SIZE_MAX.
619 If the value is too large, return SIZE_MAX - 1. */
625 if (mpz_fits_ulong_p (i
))
627 unsigned long int ul
= mpz_get_ui (i
);
634 /* Return true and advance if the next token matches STR exactly.
635 STR must not be NULL. */
638 nextarg (char const *str
)
644 bool r
= STREQ (*args
, str
);
650 /* Return true if there no more tokens. */
658 /* Report missing operand.
659 There is an implicit assumption that there was a previous argument,
660 and (args-1) is valid. */
662 require_more_args (void)
665 die (EXPR_INVALID
, 0, _("syntax error: missing argument after %s"),
666 quotearg_n_style (0, locale_quoting_style
, *(args
-1)));
671 /* Print evaluation trace and args remaining. */
680 for (a
= args
; *a
; a
++)
686 /* Do the : operator.
687 SV is the VALUE for the lhs (the string),
688 PV is the VALUE for the rhs (the pattern). */
691 docolon (VALUE
*sv
, VALUE
*pv
)
693 VALUE
*v
IF_LINT ( = NULL
);
695 struct re_pattern_buffer re_buffer
;
696 char fastmap
[UCHAR_MAX
+ 1];
697 struct re_registers re_regs
;
703 re_regs
.num_regs
= 0;
704 re_regs
.start
= NULL
;
707 re_buffer
.buffer
= NULL
;
708 re_buffer
.allocated
= 0;
709 re_buffer
.fastmap
= fastmap
;
710 re_buffer
.translate
= NULL
;
712 RE_SYNTAX_POSIX_BASIC
& ~RE_CONTEXT_INVALID_DUP
& ~RE_NO_EMPTY_RANGES
;
713 errmsg
= re_compile_pattern (pv
->u
.s
, strlen (pv
->u
.s
), &re_buffer
);
715 die (EXPR_INVALID
, 0, "%s", (errmsg
));
716 re_buffer
.newline_anchor
= 0;
718 matchlen
= re_match (&re_buffer
, sv
->u
.s
, strlen (sv
->u
.s
), 0, &re_regs
);
721 /* Were \(...\) used? */
722 if (re_buffer
.re_nsub
> 0)
724 sv
->u
.s
[re_regs
.end
[1]] = '\0';
725 v
= str_value (sv
->u
.s
+ re_regs
.start
[1]);
729 /* In multibyte locales, convert the matched offset (=number of bytes)
730 to the number of matched characters. */
731 size_t i
= (MB_CUR_MAX
== 1
733 : mbs_offset_to_chars (sv
->u
.s
, matchlen
));
737 else if (matchlen
== -1)
739 /* Match failed -- return the right kind of null. */
740 if (re_buffer
.re_nsub
> 0)
747 (matchlen
== -2 ? errno
: EOVERFLOW
),
748 _("error in regular expression matcher"));
750 if (0 < re_regs
.num_regs
)
752 free (re_regs
.start
);
755 re_buffer
.fastmap
= NULL
;
756 regfree (&re_buffer
);
760 /* Handle bare operands and ( expr ) syntax. */
763 eval7 (bool evaluate
)
770 require_more_args ();
776 die (EXPR_INVALID
, 0, _("syntax error: expecting ')' after %s"),
777 quotearg_n_style (0, locale_quoting_style
, *(args
-1)));
779 die (EXPR_INVALID
, 0, _("syntax error: expecting ')' instead of %s"),
780 quotearg_n_style (0, locale_quoting_style
, *args
));
785 die (EXPR_INVALID
, 0, _("syntax error: unexpected ')'"));
787 return str_value (*args
++);
790 /* Handle match, substr, index, and length keywords, and quoting "+". */
793 eval6 (bool evaluate
)
806 require_more_args ();
807 return str_value (*args
++);
809 else if (nextarg ("length"))
811 r
= eval6 (evaluate
);
813 v
= int_value (mbslen (r
->u
.s
));
817 else if (nextarg ("match"))
819 l
= eval6 (evaluate
);
820 r
= eval6 (evaluate
);
831 else if (nextarg ("index"))
835 l
= eval6 (evaluate
);
836 r
= eval6 (evaluate
);
839 pos
= mbs_logical_cspn (l
->u
.s
, r
->u
.s
);
845 else if (nextarg ("substr"))
847 l
= eval6 (evaluate
);
848 i1
= eval6 (evaluate
);
849 i2
= eval6 (evaluate
);
852 if (!toarith (i1
) || !toarith (i2
))
856 size_t pos
= getsize (i1
->u
.i
);
857 size_t len
= getsize (i2
->u
.i
);
859 char *s
= mbs_logical_substr (l
->u
.s
, pos
, len
);
869 return eval7 (evaluate
);
872 /* Handle : operator (pattern matching).
873 Calls docolon to do the real work. */
876 eval5 (bool evaluate
)
885 l
= eval6 (evaluate
);
890 r
= eval6 (evaluate
);
904 /* Handle *, /, % operators. */
907 eval4 (bool evaluate
)
911 enum { multiply
, divide
, mod
} fxn
;
916 l
= eval5 (evaluate
);
921 else if (nextarg ("/"))
923 else if (nextarg ("%"))
927 r
= eval5 (evaluate
);
930 if (!toarith (l
) || !toarith (r
))
931 die (EXPR_INVALID
, 0, _("non-integer argument"));
932 if (fxn
!= multiply
&& mpz_sgn (r
->u
.i
) == 0)
933 die (EXPR_INVALID
, 0, _("division by zero"));
934 ((fxn
== multiply
? mpz_mul
935 : fxn
== divide
? mpz_tdiv_q
937 (l
->u
.i
, l
->u
.i
, r
->u
.i
));
943 /* Handle +, - operators. */
946 eval3 (bool evaluate
)
950 enum { plus
, minus
} fxn
;
955 l
= eval4 (evaluate
);
960 else if (nextarg ("-"))
964 r
= eval4 (evaluate
);
967 if (!toarith (l
) || !toarith (r
))
968 die (EXPR_INVALID
, 0, _("non-integer argument"));
969 (fxn
== plus
? mpz_add
: mpz_sub
) (l
->u
.i
, l
->u
.i
, r
->u
.i
);
975 /* Handle comparisons. */
978 eval2 (bool evaluate
)
985 l
= eval3 (evaluate
);
991 less_than
, less_equal
, equal
, not_equal
, greater_equal
, greater_than
997 else if (nextarg ("<="))
999 else if (nextarg ("=") || nextarg ("=="))
1001 else if (nextarg ("!="))
1003 else if (nextarg (">="))
1004 fxn
= greater_equal
;
1005 else if (nextarg (">"))
1009 r
= eval3 (evaluate
);
1017 if (looks_like_integer (l
->u
.s
) && looks_like_integer (r
->u
.s
))
1018 cmp
= strintcmp (l
->u
.s
, r
->u
.s
);
1022 cmp
= strcoll (l
->u
.s
, r
->u
.s
);
1026 error (0, errno
, _("string comparison failed"));
1027 error (0, 0, _("set LC_ALL='C' to work around the problem"));
1028 die (EXPR_INVALID
, 0,
1029 _("the strings compared were %s and %s"),
1030 quotearg_n_style (0, locale_quoting_style
, l
->u
.s
),
1031 quotearg_n_style (1, locale_quoting_style
, r
->u
.s
));
1037 case less_than
: val
= (cmp
< 0); break;
1038 case less_equal
: val
= (cmp
<= 0); break;
1039 case equal
: val
= (cmp
== 0); break;
1040 case not_equal
: val
= (cmp
!= 0); break;
1041 case greater_equal
: val
= (cmp
>= 0); break;
1042 case greater_than
: val
= (cmp
> 0); break;
1049 l
= int_value (val
);
1056 eval1 (bool evaluate
)
1064 l
= eval2 (evaluate
);
1069 r
= eval2 (evaluate
&& !null (l
));
1070 if (null (l
) || null (r
))
1087 eval (bool evaluate
)
1095 l
= eval1 (evaluate
);
1100 r
= eval1 (evaluate
&& null (l
));