1 /* expr -- evaluate expressions.
2 Copyright (C) 1986-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Author: Mike Parker.
18 Modified for arbitrary-precision calculation by James Youngman.
20 This program evaluates expressions. Each token (operator, operand,
21 parenthesis) of the expression must be a separate argument. The
22 parser used is a reasonably general one, though any incarnation of
23 it is language-specific. It is especially nice for expressions.
25 No parse tree is needed; a new node is evaluated immediately.
26 One function can handle multiple operators all of equal precedence,
27 provided they all associate ((x op x) op x).
29 Define EVAL_TRACE to print an evaluation trace. */
33 #include <sys/types.h>
38 #include "long-options.h"
40 #include "strnumcmp.h"
43 /* Various parts of this code assume size_t fits into unsigned long
44 int, the widest unsigned type that GMP supports. */
45 static_assert (SIZE_MAX
<= ULONG_MAX
);
47 /* The official name of this program (e.g., no 'g' prefix). */
48 #define PROGRAM_NAME "expr"
51 proper_name ("Mike Parker"), \
52 proper_name ("James Youngman"), \
53 proper_name ("Paul Eggert")
58 /* Invalid expression: e.g., its form does not conform to the
59 grammar for expressions. Our grammar is an extension of the
63 /* An internal error occurred, e.g., arithmetic overflow, storage
68 /* The kinds of value we can have. */
74 typedef enum valtype TYPE
;
79 TYPE type
; /* Which kind. */
81 { /* The value itself. */
86 typedef struct valinfo VALUE
;
88 /* The arguments given to the program, minus the program name. */
91 static VALUE
*eval (bool);
92 static bool nomoreargs (void);
93 static bool null (VALUE
*v
);
94 static void printv (VALUE
*v
);
98 Find the first occurrence in the character string STRING of any character
99 in the character string ACCEPT.
101 Copied from gnulib's mbscspn, with two differences:
102 1. Returns 1-based position of first found character, or zero if not found.
103 2. Returned value is the logical character index, NOT byte offset.
106 mbs_logical_cspn ('hello','a') => 0
107 mbs_logical_cspn ('hello','h') => 1
108 mbs_logical_cspn ('hello','oe') => 1
109 mbs_logical_cspn ('hello','lo') => 3
111 In UTF-8 \xCE\xB1 is a single character (greek alpha):
112 mbs_logical_cspn ('\xCE\xB1bc','\xCE\xB1') => 1
113 mbs_logical_cspn ('\xCE\xB1bc','c') => 3 */
115 mbs_logical_cspn (char const *s
, char const *accept
)
119 if (accept
[0] == '\0')
125 for (char const *p
= s
; *p
; )
128 mcel_t g
= mcel_scanz (p
);
131 if (mbschr (accept
, *p
))
135 for (char const *a
= accept
; *a
; )
137 mcel_t h
= mcel_scanz (a
);
138 if (mcel_cmp (g
, h
) == 0)
147 /* single-byte locale,
148 convert returned byte offset to 1-based index or zero if not found. */
149 size_t i
= strcspn (s
, accept
);
158 /* Extract the substring of S, from logical character
159 position POS and LEN characters.
160 first character position is 1.
161 POS and LEN refer to logical characters, not octets.
163 Upon exit, sets v->s to the new string.
164 The new string might be empty if POS/LEN are invalid. */
166 mbs_logical_substr (char const *s
, size_t pos
, size_t len
)
168 size_t mb_cur_max
= MB_CUR_MAX
;
169 idx_t llen
= mb_cur_max
<= 1 ? strlen (s
) : mbslen (s
); /* logical length */
171 /* characters to copy */
172 size_t vlen
= MIN (len
, pos
<= llen
? llen
- pos
+ 1 : 0);
174 char const *substart
= s
;
176 if (pos
== 0 || len
== SIZE_MAX
)
178 /* The request is invalid. Silently yield an empty string. */
180 else if (mb_cur_max
<= 1)
186 for (idx_t idx
= 1; *s
&& vlen
; idx
++)
188 idx_t char_bytes
= mcel_scanz (s
).len
;
190 /* Skip until we reach the starting position. */
196 /* Add one character's length in bytes. */
198 sublen
+= char_bytes
;
204 return ximemdup0 (substart
, sublen
);
207 /* Return the number of logical characters (possibly multibyte)
208 that are in string S in the first OFS octets.
211 "\xE2\x9D\xA7" is "U+2767 ROTATED FLORAL HEART BULLET".
212 In the string below, there are only two characters
213 up to the first 4 bytes (The U+2767 which occupies 3 bytes and 'x'):
214 mbs_count_to_offset ("\xE2\x9D\xA7xyz", 4) => 2 */
216 mbs_offset_to_chars (char const *s
, size_t ofs
)
219 for (size_t d
= 0; d
< ofs
&& s
[d
]; d
+= mcel_scanz (s
+ d
).len
)
229 if (status
!= EXIT_SUCCESS
)
234 Usage: %s EXPRESSION\n\
237 program_name
, program_name
);
239 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
240 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
243 Print the value of EXPRESSION to standard output. A blank line below\n\
244 separates increasing precedence groups. EXPRESSION may be:\n\
246 ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2\n\
248 ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0\n\
252 ARG1 < ARG2 ARG1 is less than ARG2\n\
253 ARG1 <= ARG2 ARG1 is less than or equal to ARG2\n\
254 ARG1 = ARG2 ARG1 is equal to ARG2\n\
255 ARG1 != ARG2 ARG1 is unequal to ARG2\n\
256 ARG1 >= ARG2 ARG1 is greater than or equal to ARG2\n\
257 ARG1 > ARG2 ARG1 is greater than ARG2\n\
261 ARG1 + ARG2 arithmetic sum of ARG1 and ARG2\n\
262 ARG1 - ARG2 arithmetic difference of ARG1 and ARG2\n\
264 /* Tell xgettext that the "% A" below is not a printf-style
265 format string: xgettext:no-c-format */
268 ARG1 * ARG2 arithmetic product of ARG1 and ARG2\n\
269 ARG1 / ARG2 arithmetic quotient of ARG1 divided by ARG2\n\
270 ARG1 % ARG2 arithmetic remainder of ARG1 divided by ARG2\n\
274 STRING : REGEXP anchored pattern match of REGEXP in STRING\n\
276 match STRING REGEXP same as STRING : REGEXP\n\
277 substr STRING POS LENGTH substring of STRING, POS counted from 1\n\
278 index STRING CHARS index in STRING where any CHARS is found, or 0\n\
279 length STRING length of STRING\n\
282 + TOKEN interpret TOKEN as a string, even if it is a\n\
283 keyword like 'match' or an operator like '/'\n\
285 ( EXPRESSION ) value of EXPRESSION\n\
289 Beware that many operators need to be escaped or quoted for shells.\n\
290 Comparisons are arithmetic if both ARGs are numbers, else lexicographical.\n\
291 Pattern matches return the string matched between \\( and \\) or null; if\n\
292 \\( and \\) are not used, they return the number of characters matched or 0.\n\
296 Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null\n\
297 or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.\n\
299 emit_ancillary_info (PROGRAM_NAME
);
306 main (int argc
, char **argv
)
310 initialize_main (&argc
, &argv
);
311 set_program_name (argv
[0]);
312 setlocale (LC_ALL
, "");
313 bindtextdomain (PACKAGE
, LOCALEDIR
);
314 textdomain (PACKAGE
);
316 initialize_exit_failure (EXPR_FAILURE
);
317 atexit (close_stdout
);
319 parse_long_options (argc
, argv
, PROGRAM_NAME
, PACKAGE_NAME
, VERSION
,
320 usage
, AUTHORS
, (char const *) nullptr);
322 /* The above handles --help and --version.
323 Since there is no other invocation of getopt, handle '--' here. */
324 if (1 < argc
&& STREQ (argv
[1], "--"))
332 error (0, 0, _("missing operand"));
333 usage (EXPR_INVALID
);
340 error (EXPR_INVALID
, 0, _("syntax error: unexpected argument %s"),
341 quotearg_n_style (0, locale_quoting_style
, *args
));
345 main_exit (null (v
));
348 /* Return a VALUE for I. */
351 int_value (unsigned long int i
)
353 VALUE
*v
= xmalloc (sizeof *v
);
355 mpz_init_set_ui (v
->u
.i
, i
);
359 /* Return a VALUE for S. */
362 str_value (char const *s
)
364 VALUE
*v
= xmalloc (sizeof *v
);
366 v
->u
.s
= xstrdup (s
);
370 /* Free VALUE V, including structure components. */
375 if (v
->type
== string
)
390 mpz_out_str (stdout
, 10, v
->u
.i
);
401 /* Return true if V is a null-string or zero-number. */
410 return mpz_sgn (v
->u
.i
) == 0;
413 char const *cp
= v
->u
.s
;
433 /* Return true if CP takes the form of an integer. */
437 looks_like_integer (char const *cp
)
449 /* Coerce V to a string value (can't fail). */
458 char *s
= mpz_get_str (nullptr, 10, v
->u
.i
);
471 /* Coerce V to an integer value. Return true on success, false on failure. */
484 if (! looks_like_integer (s
))
486 if (mpz_init_set_str (v
->u
.i
, s
, 10) != 0)
487 error (EXPR_FAILURE
, ERANGE
, "%s", (s
));
497 /* Extract a size_t value from an integer value I.
498 If the value is negative, return SIZE_MAX.
499 If the value is too large, return SIZE_MAX - 1. */
505 if (mpz_fits_ulong_p (i
))
507 unsigned long int ul
= mpz_get_ui (i
);
514 /* Return true and advance if the next token matches STR exactly.
515 STR must not be null. */
518 nextarg (char const *str
)
520 if (*args
== nullptr)
524 bool r
= STREQ (*args
, str
);
530 /* Return true if there no more tokens. */
538 /* Report missing operand.
539 There is an implicit assumption that there was a previous argument,
540 and (args-1) is valid. */
542 require_more_args (void)
545 error (EXPR_INVALID
, 0, _("syntax error: missing argument after %s"),
546 quotearg_n_style (0, locale_quoting_style
, *(args
- 1)));
551 /* Print evaluation trace and args remaining. */
560 for (a
= args
; *a
; a
++)
566 /* Do the : operator.
567 SV is the VALUE for the lhs (the string),
568 PV is the VALUE for the rhs (the pattern). */
571 docolon (VALUE
*sv
, VALUE
*pv
)
575 struct re_pattern_buffer re_buffer
;
576 char fastmap
[UCHAR_MAX
+ 1];
577 struct re_registers re_regs
;
583 re_regs
.num_regs
= 0;
584 re_regs
.start
= nullptr;
585 re_regs
.end
= nullptr;
587 re_buffer
.buffer
= nullptr;
588 re_buffer
.allocated
= 0;
589 re_buffer
.fastmap
= fastmap
;
590 re_buffer
.translate
= nullptr;
592 RE_SYNTAX_POSIX_BASIC
& ~RE_CONTEXT_INVALID_DUP
& ~RE_NO_EMPTY_RANGES
;
593 errmsg
= re_compile_pattern (pv
->u
.s
, strlen (pv
->u
.s
), &re_buffer
);
595 error (EXPR_INVALID
, 0, "%s", (errmsg
));
596 re_buffer
.newline_anchor
= 0;
598 matchlen
= re_match (&re_buffer
, sv
->u
.s
, strlen (sv
->u
.s
), 0, &re_regs
);
601 /* Were \(...\) used? */
602 if (re_buffer
.re_nsub
> 0)
604 if (re_regs
.end
[1] < 0)
608 sv
->u
.s
[re_regs
.end
[1]] = '\0';
609 v
= str_value (sv
->u
.s
+ re_regs
.start
[1]);
614 /* In multibyte locales, convert the matched offset (=number of bytes)
615 to the number of matched characters. */
616 size_t i
= (MB_CUR_MAX
== 1
618 : mbs_offset_to_chars (sv
->u
.s
, matchlen
));
622 else if (matchlen
== -1)
624 /* Match failed -- return the right kind of null. */
625 if (re_buffer
.re_nsub
> 0)
632 matchlen
== -2 ? errno
: EOVERFLOW
,
633 _("error in regular expression matcher"));
635 if (0 < re_regs
.num_regs
)
637 free (re_regs
.start
);
640 re_buffer
.fastmap
= nullptr;
641 regfree (&re_buffer
);
645 /* Handle bare operands and ( expr ) syntax. */
648 eval7 (bool evaluate
)
655 require_more_args ();
661 error (EXPR_INVALID
, 0, _("syntax error: expecting ')' after %s"),
662 quotearg_n_style (0, locale_quoting_style
, *(args
- 1)));
664 error (EXPR_INVALID
, 0, _("syntax error: expecting ')' instead of %s"),
665 quotearg_n_style (0, locale_quoting_style
, *args
));
670 error (EXPR_INVALID
, 0, _("syntax error: unexpected ')'"));
672 return str_value (*args
++);
675 /* Handle match, substr, index, and length keywords, and quoting "+". */
678 eval6 (bool evaluate
)
691 require_more_args ();
692 return str_value (*args
++);
694 else if (nextarg ("length"))
696 r
= eval6 (evaluate
);
698 v
= int_value (mbslen (r
->u
.s
));
702 else if (nextarg ("match"))
704 l
= eval6 (evaluate
);
705 r
= eval6 (evaluate
);
716 else if (nextarg ("index"))
720 l
= eval6 (evaluate
);
721 r
= eval6 (evaluate
);
724 pos
= mbs_logical_cspn (l
->u
.s
, r
->u
.s
);
730 else if (nextarg ("substr"))
732 l
= eval6 (evaluate
);
733 i1
= eval6 (evaluate
);
734 i2
= eval6 (evaluate
);
737 if (!toarith (i1
) || !toarith (i2
))
741 size_t pos
= getsize (i1
->u
.i
);
742 size_t len
= getsize (i2
->u
.i
);
744 char *s
= mbs_logical_substr (l
->u
.s
, pos
, len
);
754 return eval7 (evaluate
);
757 /* Handle : operator (pattern matching).
758 Calls docolon to do the real work. */
761 eval5 (bool evaluate
)
770 l
= eval6 (evaluate
);
775 r
= eval6 (evaluate
);
789 /* Handle *, /, % operators. */
792 eval4 (bool evaluate
)
796 enum { multiply
, divide
, mod
} fxn
;
801 l
= eval5 (evaluate
);
806 else if (nextarg ("/"))
808 else if (nextarg ("%"))
812 r
= eval5 (evaluate
);
815 if (!toarith (l
) || !toarith (r
))
816 error (EXPR_INVALID
, 0, _("non-integer argument"));
817 if (fxn
!= multiply
&& mpz_sgn (r
->u
.i
) == 0)
818 error (EXPR_INVALID
, 0, _("division by zero"));
819 ((fxn
== multiply
? mpz_mul
820 : fxn
== divide
? mpz_tdiv_q
822 (l
->u
.i
, l
->u
.i
, r
->u
.i
));
828 /* Handle +, - operators. */
831 eval3 (bool evaluate
)
835 enum { plus
, minus
} fxn
;
840 l
= eval4 (evaluate
);
845 else if (nextarg ("-"))
849 r
= eval4 (evaluate
);
852 if (!toarith (l
) || !toarith (r
))
853 error (EXPR_INVALID
, 0, _("non-integer argument"));
854 (fxn
== plus
? mpz_add
: mpz_sub
) (l
->u
.i
, l
->u
.i
, r
->u
.i
);
860 /* Handle comparisons. */
863 eval2 (bool evaluate
)
870 l
= eval3 (evaluate
);
876 less_than
, less_equal
, equal
, not_equal
, greater_equal
, greater_than
882 else if (nextarg ("<="))
884 else if (nextarg ("=") || nextarg ("=="))
886 else if (nextarg ("!="))
888 else if (nextarg (">="))
890 else if (nextarg (">"))
894 r
= eval3 (evaluate
);
902 if (looks_like_integer (l
->u
.s
) && looks_like_integer (r
->u
.s
))
903 cmp
= strintcmp (l
->u
.s
, r
->u
.s
);
907 cmp
= strcoll (l
->u
.s
, r
->u
.s
);
911 error (0, errno
, _("string comparison failed"));
912 error (0, 0, _("set LC_ALL='C' to work around the problem"));
913 error (EXPR_INVALID
, 0,
914 _("the strings compared were %s and %s"),
915 quotearg_n_style (0, locale_quoting_style
, l
->u
.s
),
916 quotearg_n_style (1, locale_quoting_style
, r
->u
.s
));
922 case less_than
: val
= (cmp
< 0); break;
923 case less_equal
: val
= (cmp
<= 0); break;
924 case equal
: val
= (cmp
== 0); break;
925 case not_equal
: val
= (cmp
!= 0); break;
926 case greater_equal
: val
= (cmp
>= 0); break;
927 case greater_than
: val
= (cmp
> 0); break;
928 default: unreachable ();
941 eval1 (bool evaluate
)
949 l
= eval2 (evaluate
);
954 r
= eval2 (evaluate
&& !null (l
));
955 if (null (l
) || null (r
))
980 l
= eval1 (evaluate
);
985 r
= eval1 (evaluate
&& null (l
));