improve of cmpl.
[bush.git] / src / lxrgmr / braces.c
blobf57f3cdd95829844c96b3f6cfb0dd10a80f488d5
1 /* braces.c -- code for doing word expansion in curly braces. */
3 /* Copyright (C) 1987-2020 Free Software Foundation, Inc.
5 This file is part of GNU Bush, the Bourne Again SHell.
7 Bush is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bush is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bush. If not, see <http://www.gnu.org/licenses/>.
21 /* Stuff in curly braces gets expanded before all other shell expansions. */
23 #include "config.h"
25 #if defined (BRACE_EXPANSION)
27 #if defined (HAVE_UNISTD_H)
28 # ifdef _MINIX
29 # include <sys/types.h>
30 # endif
31 # include <unistd.h>
32 #endif
34 #include <errno.h>
36 #include "bushansi.h"
37 #include "bushintl.h"
39 #if defined (SHELL)
40 # include "shell.h"
41 #else
42 # if defined (TEST)
43 typedef char *WORD_DESC;
44 typedef char **WORD_LIST;
45 #define _(X) X
46 # endif /* TEST */
47 #endif /* SHELL */
49 #include "typemax.h" /* INTMAX_MIN, INTMAX_MAX */
50 #include "general.h"
51 #include "shmbutil.h"
52 #include "chartypes.h"
54 #ifndef errno
55 extern int errno;
56 #endif
58 #define brace_whitespace(c) (!(c) || (c) == ' ' || (c) == '\t' || (c) == '\n')
60 #define BRACE_SEQ_SPECIFIER ".."
62 extern int asprintf PARAMS((char **, const char *, ...)) __attribute__((__format__ (printf, 2, 3)));
64 /* Basic idea:
66 Segregate the text into 3 sections: preamble (stuff before an open brace),
67 postamble (stuff after the matching close brace) and amble (stuff after
68 preamble, and before postamble). Expand amble, and then tack on the
69 expansions to preamble. Expand postamble, and tack on the expansions to
70 the result so far.
73 /* The character which is used to separate arguments. */
74 static const int brace_arg_separator = ',';
76 #if defined (PARAMS)
77 static int brace_gobbler PARAMS((char *, size_t, int *, int));
78 static char **expand_amble PARAMS((char *, size_t, int));
79 static char **expand_seqterm PARAMS((char *, size_t));
80 static char **mkseq PARAMS((intmax_t, intmax_t, intmax_t, int, int));
81 static char **array_concat PARAMS((char **, char **));
82 #else
83 static int brace_gobbler ();
84 static char **expand_amble ();
85 static char **expand_seqterm ();
86 static char **mkseq();
87 static char **array_concat ();
88 #endif
90 #if 0
91 static void
92 dump_result (a)
93 char **a;
95 int i;
97 for (i = 0; a[i]; i++)
98 printf ("dump_result: a[%d] = -%s-\n", i, a[i]);
100 #endif
102 /* Return an array of strings; the brace expansion of TEXT. */
103 char **
104 brace_expand (text)
105 char *text;
107 register int start;
108 size_t tlen;
109 char *preamble, *postamble, *amble;
110 size_t alen;
111 char **tack, **result;
112 int i, j, c, c1;
114 DECLARE_MBSTATE;
116 /* Find the text of the preamble. */
117 tlen = strlen (text);
118 i = 0;
119 #if defined (CSH_BRACE_COMPAT)
120 c = brace_gobbler (text, tlen, &i, '{'); /* } */
121 #else
122 /* Make sure that when we exit this loop, c == 0 or text[i] begins a
123 valid brace expansion sequence. */
126 c = brace_gobbler (text, tlen, &i, '{'); /* } */
127 c1 = c;
128 /* Verify that c begins a valid brace expansion word. If it doesn't, we
129 go on. Loop stops when there are no more open braces in the word. */
130 if (c)
132 start = j = i + 1; /* { */
133 c = brace_gobbler (text, tlen, &j, '}');
134 if (c == 0) /* it's not */
136 i++;
137 c = c1;
138 continue;
140 else /* it is */
142 c = c1;
143 break;
146 else
147 break;
149 while (c);
150 #endif /* !CSH_BRACE_COMPAT */
152 preamble = (char *)xmalloc (i + 1);
153 if (i > 0)
154 strncpy (preamble, text, i);
155 preamble[i] = '\0';
157 result = (char **)xmalloc (2 * sizeof (char *));
158 result[0] = preamble;
159 result[1] = (char *)NULL;
161 /* Special case. If we never found an exciting character, then
162 the preamble is all of the text, so just return that. */
163 if (c != '{')
164 return (result);
166 /* Find the amble. This is the stuff inside this set of braces. */
167 start = ++i;
168 c = brace_gobbler (text, tlen, &i, '}');
170 /* What if there isn't a matching close brace? */
171 if (c == 0)
173 #if defined (NOTDEF)
174 /* Well, if we found an unquoted BRACE_ARG_SEPARATOR between START
175 and I, then this should be an error. Otherwise, it isn't. */
176 j = start;
177 while (j < i)
179 if (text[j] == '\\')
181 j++;
182 ADVANCE_CHAR (text, tlen, j);
183 continue;
186 if (text[j] == brace_arg_separator)
187 { /* { */
188 strvec_dispose (result);
189 set_exit_status (EXECUTION_FAILURE);
190 report_error ("no closing `%c' in %s", '}', text);
191 throw_to_top_level ();
193 ADVANCE_CHAR (text, tlen, j);
195 #endif
196 free (preamble); /* Same as result[0]; see initialization. */
197 result[0] = savestring (text);
198 return (result);
201 #if defined (SHELL)
202 amble = substring (text, start, i);
203 alen = i - start;
204 #else
205 amble = (char *)xmalloc (1 + (i - start));
206 strncpy (amble, &text[start], (i - start));
207 alen = i - start;
208 amble[alen] = '\0';
209 #endif
211 #if defined (SHELL)
212 INITIALIZE_MBSTATE;
214 /* If the amble does not contain an unquoted BRACE_ARG_SEPARATOR, then
215 just return without doing any expansion. */
216 j = 0;
217 while (amble[j])
219 if (amble[j] == '\\')
221 j++;
222 ADVANCE_CHAR (amble, alen, j);
223 continue;
226 if (amble[j] == brace_arg_separator)
227 break;
229 ADVANCE_CHAR (amble, alen, j);
232 if (amble[j] == 0)
234 tack = expand_seqterm (amble, alen);
235 if (tack)
236 goto add_tack;
237 else if (text[i + 1])
239 /* If the sequence expansion fails (e.g., because the integers
240 overflow), but there is more in the string, try and process
241 the rest of the string, which may contain additional brace
242 expansions. Treat the unexpanded sequence term as a simple
243 string (including the braces). */
244 tack = strvec_create (2);
245 tack[0] = savestring (text+start-1);
246 tack[0][i-start+2] = '\0';
247 tack[1] = (char *)0;
248 goto add_tack;
250 else
252 free (amble);
253 free (preamble);
254 result[0] = savestring (text);
255 return (result);
258 #endif /* SHELL */
260 tack = expand_amble (amble, alen, 0);
261 add_tack:
262 result = array_concat (result, tack);
263 free (amble);
264 if (tack != result)
265 strvec_dispose (tack);
267 postamble = text + i + 1;
269 if (postamble && *postamble)
271 tack = brace_expand (postamble);
272 result = array_concat (result, tack);
273 if (tack != result)
274 strvec_dispose (tack);
277 return (result);
280 /* Expand the text found inside of braces. We simply try to split the
281 text at BRACE_ARG_SEPARATORs into separate strings. We then brace
282 expand each slot which needs it, until there are no more slots which
283 need it. */
284 static char **
285 expand_amble (text, tlen, flags)
286 char *text;
287 size_t tlen;
288 int flags;
290 char **result, **partial, **tresult;
291 char *tem;
292 int start, i, c;
294 #if defined (SHELL)
295 DECLARE_MBSTATE;
296 #endif
298 result = (char **)NULL;
300 start = i = 0;
301 c = 1;
302 while (c)
304 c = brace_gobbler (text, tlen, &i, brace_arg_separator);
305 #if defined (SHELL)
306 tem = substring (text, start, i);
307 #else
308 tem = (char *)xmalloc (1 + (i - start));
309 strncpy (tem, &text[start], (i - start));
310 tem[i - start] = '\0';
311 #endif
313 partial = brace_expand (tem);
315 if (!result)
316 result = partial;
317 else
319 register int lr, lp, j;
321 lr = strvec_len (result);
322 lp = strvec_len (partial);
324 tresult = strvec_mresize (result, lp + lr + 1);
325 if (tresult == 0)
327 internal_error (_("brace expansion: cannot allocate memory for %s"), tem);
328 free (tem);
329 strvec_dispose (partial);
330 strvec_dispose (result);
331 result = (char **)NULL;
332 return result;
334 else
335 result = tresult;
337 for (j = 0; j < lp; j++)
338 result[lr + j] = partial[j];
340 result[lr + j] = (char *)NULL;
341 free (partial);
343 free (tem);
344 #if defined (SHELL)
345 ADVANCE_CHAR (text, tlen, i);
346 #else
347 i++;
348 #endif
349 start = i;
351 return (result);
354 #define ST_BAD 0
355 #define ST_INT 1
356 #define ST_CHAR 2
357 #define ST_ZINT 3
359 static char **
360 mkseq (start, end, incr, type, width)
361 intmax_t start, end, incr;
362 int type, width;
364 intmax_t n, prevn;
365 int i, nelem;
366 char **result, *t;
368 if (incr == 0)
369 incr = 1;
371 if (start > end && incr > 0)
372 incr = -incr;
373 else if (start < end && incr < 0)
375 if (incr == INTMAX_MIN) /* Don't use -INTMAX_MIN */
376 return ((char **)NULL);
377 incr = -incr;
380 /* Check that end-start will not overflow INTMAX_MIN, INTMAX_MAX. The +3
381 and -2, not strictly necessary, are there because of the way the number
382 of elements and value passed to strvec_create() are calculated below. */
383 if (SUBOVERFLOW (end, start, INTMAX_MIN+3, INTMAX_MAX-2))
384 return ((char **)NULL);
386 prevn = sh_imaxabs (end - start);
387 /* Need to check this way in case INT_MAX == INTMAX_MAX */
388 if (INT_MAX == INTMAX_MAX && (ADDOVERFLOW (prevn, 2, INT_MIN, INT_MAX)))
389 return ((char **)NULL);
390 /* Make sure the assignment to nelem below doesn't end up <= 0 due to
391 intmax_t overflow */
392 else if (ADDOVERFLOW ((prevn/sh_imaxabs(incr)), 1, INTMAX_MIN, INTMAX_MAX))
393 return ((char **)NULL);
395 /* XXX - TOFIX: potentially allocating a lot of extra memory if
396 imaxabs(incr) != 1 */
397 /* Instead of a simple nelem = prevn + 1, something like:
398 nelem = (prevn / imaxabs(incr)) + 1;
399 would work */
400 if ((prevn / sh_imaxabs (incr)) > INT_MAX - 3) /* check int overflow */
401 return ((char **)NULL);
402 nelem = (prevn / sh_imaxabs(incr)) + 1;
403 result = strvec_mcreate (nelem + 1);
404 if (result == 0)
406 internal_error (_("brace expansion: failed to allocate memory for %u elements"), (unsigned int)nelem);
407 return ((char **)NULL);
410 /* Make sure we go through the loop at least once, so {3..3} prints `3' */
411 i = 0;
412 n = start;
415 #if defined (SHELL)
416 if (ISINTERRUPT)
418 result[i] = (char *)NULL;
419 strvec_dispose (result);
420 result = (char **)NULL;
422 QUIT;
423 #endif
424 if (type == ST_INT)
425 result[i++] = t = itos (n);
426 else if (type == ST_ZINT)
428 int len, arg;
429 arg = n;
430 len = asprintf (&t, "%0*d", width, arg);
431 result[i++] = t;
433 else
435 if (t = (char *)malloc (2))
437 t[0] = n;
438 t[1] = '\0';
440 result[i++] = t;
443 /* We failed to allocate memory for this number, so we bail. */
444 if (t == 0)
446 char *p, lbuf[INT_STRLEN_BOUND(intmax_t) + 1];
448 /* Easier to do this than mess around with various intmax_t printf
449 formats (%ld? %lld? %jd?) and PRIdMAX. */
450 p = inttostr (n, lbuf, sizeof (lbuf));
451 internal_error (_("brace expansion: failed to allocate memory for `%s'"), p);
452 strvec_dispose (result);
453 return ((char **)NULL);
456 /* Handle overflow and underflow of n+incr */
457 if (ADDOVERFLOW (n, incr, INTMAX_MIN, INTMAX_MAX))
458 break;
460 n += incr;
462 if ((incr < 0 && n < end) || (incr > 0 && n > end))
463 break;
465 while (1);
467 result[i] = (char *)0;
468 return (result);
471 static char **
472 expand_seqterm (text, tlen)
473 char *text;
474 size_t tlen;
476 char *t, *lhs, *rhs;
477 int lhs_t, rhs_t, lhs_l, rhs_l, width;
478 intmax_t lhs_v, rhs_v, incr;
479 intmax_t tl, tr;
480 char **result, *ep, *oep;
482 t = strstr (text, BRACE_SEQ_SPECIFIER);
483 if (t == 0)
484 return ((char **)NULL);
486 lhs_l = t - text; /* index of start of BRACE_SEQ_SPECIFIER */
487 lhs = substring (text, 0, lhs_l);
488 rhs = substring (text, lhs_l + sizeof(BRACE_SEQ_SPECIFIER) - 1, tlen);
490 if (lhs[0] == 0 || rhs[0] == 0)
492 free (lhs);
493 free (rhs);
494 return ((char **)NULL);
497 /* Now figure out whether LHS and RHS are integers or letters. Both
498 sides have to match. */
499 lhs_t = (legal_number (lhs, &tl)) ? ST_INT :
500 ((ISALPHA (lhs[0]) && lhs[1] == 0) ? ST_CHAR : ST_BAD);
502 /* Decide on rhs and whether or not it looks like the user specified
503 an increment */
504 ep = 0;
505 if (ISDIGIT (rhs[0]) || ((rhs[0] == '+' || rhs[0] == '-') && ISDIGIT (rhs[1])))
507 rhs_t = ST_INT;
508 errno = 0;
509 tr = strtoimax (rhs, &ep, 10);
510 if (errno == ERANGE || (ep && *ep != 0 && *ep != '.'))
511 rhs_t = ST_BAD; /* invalid */
513 else if (ISALPHA (rhs[0]) && (rhs[1] == 0 || rhs[1] == '.'))
515 rhs_t = ST_CHAR;
516 ep = rhs + 1;
518 else
520 rhs_t = ST_BAD;
521 ep = 0;
524 incr = 1;
525 if (rhs_t != ST_BAD)
527 oep = ep;
528 errno = 0;
529 if (ep && *ep == '.' && ep[1] == '.' && ep[2])
530 incr = strtoimax (ep + 2, &ep, 10);
531 if (*ep != 0 || errno == ERANGE)
532 rhs_t = ST_BAD; /* invalid incr or overflow */
533 tlen -= ep - oep;
536 if (lhs_t != rhs_t || lhs_t == ST_BAD || rhs_t == ST_BAD)
538 free (lhs);
539 free (rhs);
540 return ((char **)NULL);
543 /* OK, we have something. It's either a sequence of integers, ascending
544 or descending, or a sequence or letters, ditto. Generate the sequence,
545 put it into a string vector, and return it. */
547 if (lhs_t == ST_CHAR)
549 lhs_v = (unsigned char)lhs[0];
550 rhs_v = (unsigned char)rhs[0];
551 width = 1;
553 else
555 lhs_v = tl; /* integer truncation */
556 rhs_v = tr;
558 /* Decide whether or not the terms need zero-padding */
559 rhs_l = tlen - lhs_l - sizeof (BRACE_SEQ_SPECIFIER) + 1;
560 width = 0;
561 if (lhs_l > 1 && lhs[0] == '0')
562 width = lhs_l, lhs_t = ST_ZINT;
563 if (lhs_l > 2 && lhs[0] == '-' && lhs[1] == '0')
564 width = lhs_l, lhs_t = ST_ZINT;
565 if (rhs_l > 1 && rhs[0] == '0' && width < rhs_l)
566 width = rhs_l, lhs_t = ST_ZINT;
567 if (rhs_l > 2 && rhs[0] == '-' && rhs[1] == '0' && width < rhs_l)
568 width = rhs_l, lhs_t = ST_ZINT;
570 if (width < lhs_l && lhs_t == ST_ZINT)
571 width = lhs_l;
572 if (width < rhs_l && lhs_t == ST_ZINT)
573 width = rhs_l;
576 result = mkseq (lhs_v, rhs_v, incr, lhs_t, width);
578 free (lhs);
579 free (rhs);
581 return (result);
584 /* Start at INDEX, and skip characters in TEXT. Set INDEX to the
585 index of the character matching SATISFY. This understands about
586 quoting. Return the character that caused us to stop searching;
587 this is either the same as SATISFY, or 0. */
588 /* If SATISFY is `}', we are looking for a brace expression, so we
589 should enforce the rules that govern valid brace expansions:
590 1) to count as an arg separator, a comma or `..' has to be outside
591 an inner set of braces.
593 static int
594 brace_gobbler (text, tlen, indx, satisfy)
595 char *text;
596 size_t tlen;
597 int *indx;
598 int satisfy;
600 register int i, c, quoted, level, commas, pass_next;
601 #if defined (SHELL)
602 int si;
603 char *t;
604 #endif
605 DECLARE_MBSTATE;
607 level = quoted = pass_next = 0;
608 #if defined (CSH_BRACE_COMPAT)
609 commas = 1;
610 #else
611 commas = (satisfy == '}') ? 0 : 1;
612 #endif
614 i = *indx;
615 while (c = text[i])
617 if (pass_next)
619 pass_next = 0;
620 #if defined (SHELL)
621 ADVANCE_CHAR (text, tlen, i);
622 #else
623 i++;
624 #endif
625 continue;
628 /* A backslash escapes the next character. This allows backslash to
629 escape the quote character in a double-quoted string. */
630 if (c == '\\' && (quoted == 0 || quoted == '"' || quoted == '`'))
632 pass_next = 1;
633 i++;
634 continue;
637 #if defined (SHELL)
638 /* If compiling for the shell, treat ${...} like \{...} */
639 if (c == '$' && text[i+1] == '{' && quoted != '\'') /* } */
641 pass_next = 1;
642 i++;
643 if (quoted == 0)
644 level++;
645 continue;
647 #endif
649 if (quoted)
651 if (c == quoted)
652 quoted = 0;
653 #if defined (SHELL)
654 /* The shell allows quoted command substitutions */
655 if (quoted == '"' && c == '$' && text[i+1] == '(') /*)*/
656 goto comsub;
657 #endif
658 #if defined (SHELL)
659 ADVANCE_CHAR (text, tlen, i);
660 #else
661 i++;
662 #endif
663 continue;
666 if (c == '"' || c == '\'' || c == '`')
668 quoted = c;
669 i++;
670 continue;
673 #if defined (SHELL)
674 /* Pass new-style command and process substitutions through unchanged. */
675 if ((c == '$' || c == '<' || c == '>') && text[i+1] == '(') /* ) */
677 comsub:
678 si = i + 2;
679 t = extract_command_subst (text, &si, 0);
680 i = si;
681 free (t);
682 i++;
683 continue;
685 #endif
687 if (c == satisfy && level == 0 && quoted == 0 && commas > 0)
689 /* We ignore an open brace surrounded by whitespace, and also
690 an open brace followed immediately by a close brace preceded
691 by whitespace. */
692 if (c == '{' &&
693 ((!i || brace_whitespace (text[i - 1])) &&
694 (brace_whitespace (text[i + 1]) || text[i + 1] == '}')))
696 i++;
697 continue;
700 break;
703 if (c == '{')
704 level++;
705 else if (c == '}' && level)
706 level--;
707 #if !defined (CSH_BRACE_COMPAT)
708 else if (satisfy == '}' && c == brace_arg_separator && level == 0)
709 commas++;
710 else if (satisfy == '}' && STREQN (text+i, BRACE_SEQ_SPECIFIER, 2) &&
711 text[i+2] != satisfy && level == 0)
712 commas++;
713 #endif
715 #if defined (SHELL)
716 ADVANCE_CHAR (text, tlen, i);
717 #else
718 i++;
719 #endif
722 *indx = i;
723 return (c);
726 /* Return a new array of strings which is the result of appending each
727 string in ARR2 to each string in ARR1. The resultant array is
728 len (arr1) * len (arr2) long. For convenience, ARR1 (and its contents)
729 are free ()'ed. ARR1 can be NULL, in that case, a new version of ARR2
730 is returned. */
731 static char **
732 array_concat (arr1, arr2)
733 char **arr1, **arr2;
735 register int i, j, len, len1, len2;
736 register char **result;
738 if (arr1 == 0)
739 return (arr2); /* XXX - see if we can get away without copying? */
741 if (arr2 == 0)
742 return (arr1); /* XXX - caller expects us to free arr1 */
744 /* We can only short-circuit if the array consists of a single null element;
745 otherwise we need to replicate the contents of the other array and
746 prefix (or append, below) an empty element to each one. */
747 if (arr1[0] && arr1[0][0] == 0 && arr1[1] == 0)
749 strvec_dispose (arr1);
750 return (arr2); /* XXX - use flags to see if we can avoid copying here */
753 if (arr2[0] && arr2[0][0] == 0 && arr2[1] == 0)
754 return (arr1); /* XXX - rather than copying and freeing it */
756 len1 = strvec_len (arr1);
757 len2 = strvec_len (arr2);
759 result = (char **)malloc ((1 + (len1 * len2)) * sizeof (char *));
760 if (result == 0)
761 return (result);
763 len = 0;
764 for (i = 0; i < len1; i++)
766 int strlen_1 = strlen (arr1[i]);
768 for (j = 0; j < len2; j++)
770 result[len] = (char *)xmalloc (1 + strlen_1 + strlen (arr2[j]));
771 strcpy (result[len], arr1[i]);
772 strcpy (result[len] + strlen_1, arr2[j]);
773 len++;
775 free (arr1[i]);
777 free (arr1);
779 result[len] = (char *)NULL;
780 return (result);
783 #if defined (TEST)
784 #include <stdio.h>
786 void *
787 xmalloc(n)
788 size_t n;
790 return (malloc (n));
793 void *
794 xrealloc(p, n)
795 void *p;
796 size_t n;
798 return (realloc (p, n));
802 internal_error (format, arg1, arg2)
803 char *format, *arg1, *arg2;
805 fprintf (stderr, format, arg1, arg2);
806 fprintf (stderr, "\n");
809 main ()
811 char example[256];
813 for (;;)
815 char **result;
816 int i;
818 fprintf (stderr, "brace_expand> ");
820 if ((!fgets (example, 256, stdin)) ||
821 (strncmp (example, "quit", 4) == 0))
822 break;
824 if (strlen (example))
825 example[strlen (example) - 1] = '\0';
827 result = brace_expand (example);
829 for (i = 0; result[i]; i++)
830 printf ("%s\n", result[i]);
832 strvec_dispose (result);
837 * Local variables:
838 * compile-command: "gcc -g -Bstatic -DTEST -o brace_expand braces.c general.o"
839 * end:
842 #endif /* TEST */
843 #endif /* BRACE_EXPANSION */