1 /* wordsplit - a word splitter
2 Copyright (C) 2009-2018 Sergey Poznyakoff
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3 of the License, or (at your
7 option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License along
15 with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <wordsplit.h>
28 #include <stdckdint.h>
34 #include <attribute.h>
41 # define gettext(msgid) msgid
43 #define _(msgid) gettext (msgid)
44 #define N_(msgid) msgid
46 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
47 #define ISDELIM(ws, c) ((c) && strchr ((ws)->ws_delim, c))
49 #define ISVARBEG(c) (c_isalpha (c) || c == '_')
50 #define ISVARCHR(c) (c_isalnum (c) || c == '_')
52 #define WSP_RETURN_DELIMS(wsp) \
53 ((wsp)->ws_flags & WRDSF_RETURN_DELIMS || ((wsp)->ws_options & WRDSO_MAXWORDS))
55 /* Set escape option F in WS for words (Q==0) or quoted strings (Q==1). */
56 #define WRDSO_ESC_SET(ws,q,f) ((ws)->ws_options |= ((f) << 4*(q)))
57 /* Test WS for escape option F for words (Q==0) or quoted strings (Q==1). */
58 #define WRDSO_ESC_TEST(ws,q,f) ((ws)->ws_options & ((f) << 4*(q)))
60 /* When printing diagnostics with %.*s, output at most this many bytes.
61 Users typically don't want super-long strings in diagnostics,
62 and anyway printf fails if it outputs more than INT_MAX bytes.
63 printflen (LEN) returns LEN as an int, but at most PRINTMAX;
64 printfdots (LEN) returns "..." if LEN exceeds PRINTMAX, "" otherwise. */
66 enum { PRINTMAX
= 10 * 1024 };
71 return len
<= PRINTMAX
? len
: PRINTMAX
;
75 printfdots (idx_t len
)
77 return &"..."[3 * (len
<= PRINTMAX
)];
82 _wsplt_alloc_die (struct wordsplit
*wsp
)
84 wsp
->ws_error ("%s", _("memory exhausted"));
88 static void ATTRIBUTE_FORMAT ((__printf__
, 1, 2))
89 _wsplt_error (const char *fmt
, ...)
94 vfprintf (stderr
, fmt
, ap
);
99 #ifdef _WORDSPLIT_EXTRAS
100 # define WORDSPLIT_EXTRAS_extern extern
102 # define WORDSPLIT_EXTRAS_extern static
103 static void wordsplit_clearerr (struct wordsplit
*);
104 static void wordsplit_free_envbuf (struct wordsplit
*);
105 static void wordsplit_free_words (struct wordsplit
*);
106 static void wordsplit_perror (struct wordsplit
*);
109 static void wordsplit_free_nodes (struct wordsplit
*);
112 _wsplt_seterr (struct wordsplit
*wsp
, int ec
)
115 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
116 wordsplit_perror (wsp
);
121 _wsplt_nomem (struct wordsplit
*wsp
)
124 wsp
->ws_errno
= WRDSE_NOSPACE
;
125 if (wsp
->ws_flags
& WRDSF_ENOMEMABRT
)
126 wsp
->ws_alloc_die (wsp
);
127 if (wsp
->ws_flags
& WRDSF_SHOWERR
)
128 wordsplit_perror (wsp
);
129 if (!(wsp
->ws_flags
& WRDSF_REUSE
))
130 wordsplit_free (wsp
);
131 wordsplit_free_nodes (wsp
);
132 return wsp
->ws_errno
;
135 static int wordsplit_init (struct wordsplit
*wsp
, char const *input
,
136 idx_t len
, unsigned flags
);
137 static int wordsplit_process_list (struct wordsplit
*wsp
, idx_t start
);
138 static int wordsplit_finish (struct wordsplit
*wsp
);
141 _wsplt_subsplit (struct wordsplit
*wsp
, struct wordsplit
*wss
,
142 char const *str
, idx_t len
,
143 unsigned flags
, bool finalize
)
147 wss
->ws_delim
= wsp
->ws_delim
;
148 wss
->ws_debug
= wsp
->ws_debug
;
149 wss
->ws_error
= wsp
->ws_error
;
150 wss
->ws_alloc_die
= wsp
->ws_alloc_die
;
152 if (!(flags
& WRDSF_NOVAR
))
154 wss
->ws_env
= wsp
->ws_env
;
155 wss
->ws_getvar
= wsp
->ws_getvar
;
156 flags
|= wsp
->ws_flags
& (WRDSF_ENV
| WRDSF_ENV_KV
| WRDSF_GETVAR
);
158 if (!(flags
& WRDSF_NOCMD
))
160 wss
->ws_command
= wsp
->ws_command
;
163 if ((flags
& (WRDSF_NOVAR
|WRDSF_NOCMD
)) != (WRDSF_NOVAR
|WRDSF_NOCMD
))
165 wss
->ws_closure
= wsp
->ws_closure
;
166 flags
|= wsp
->ws_flags
& WRDSF_CLOSURE
;
169 wss
->ws_options
= wsp
->ws_options
;
175 | (wsp
->ws_flags
& (WRDSF_SHOWDBG
| WRDSF_SHOWERR
| WRDSF_OPTIONS
));
177 rc
= wordsplit_init (wss
, str
, len
, flags
);
181 rc
= wordsplit_process_list (wss
, 0);
184 wordsplit_free_nodes (wss
);
189 rc
= wordsplit_finish (wss
);
190 wordsplit_free_nodes (wss
);
196 _wsplt_seterr_sub (struct wordsplit
*wsp
, struct wordsplit
*wss
)
198 if (wsp
->ws_errno
== WRDSE_USERERR
)
199 free (wsp
->ws_usererr
);
200 wsp
->ws_errno
= wss
->ws_errno
;
201 if (wss
->ws_errno
== WRDSE_USERERR
)
203 wsp
->ws_usererr
= wss
->ws_usererr
;
204 wss
->ws_errno
= WRDSE_EOF
;
205 wss
->ws_usererr
= NULL
;
210 wordsplit_init0 (struct wordsplit
*wsp
)
212 if (wsp
->ws_flags
& WRDSF_REUSE
)
214 if (!(wsp
->ws_flags
& WRDSF_APPEND
))
215 wordsplit_free_words (wsp
);
216 wordsplit_clearerr (wsp
);
220 wsp
->ws_wordv
= NULL
;
228 static char wordsplit_c_escape_tab
[] = "\\\\\"\"a\ab\bf\fn\nr\rt\tv\v";
231 wordsplit_init (struct wordsplit
*wsp
, char const *input
, idx_t len
,
234 wsp
->ws_flags
= flags
;
236 if (!(wsp
->ws_flags
& WRDSF_ALLOC_DIE
))
237 wsp
->ws_alloc_die
= _wsplt_alloc_die
;
238 if (!(wsp
->ws_flags
& WRDSF_ERROR
))
239 wsp
->ws_error
= _wsplt_error
;
241 if (!(wsp
->ws_flags
& WRDSF_NOVAR
))
243 /* These will be initialized on first variable assignment */
244 wsp
->ws_envidx
= wsp
->ws_envsiz
= 0;
245 wsp
->ws_envbuf
= NULL
;
248 if (!(wsp
->ws_flags
& WRDSF_NOCMD
))
250 if (!wsp
->ws_command
)
252 _wsplt_seterr (wsp
, WRDSE_USAGE
);
254 return wsp
->ws_errno
;
258 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
260 if (!(wsp
->ws_flags
& WRDSF_DEBUG
))
262 if (wsp
->ws_flags
& WRDSF_ERROR
)
263 wsp
->ws_debug
= wsp
->ws_error
;
264 else if (wsp
->ws_flags
& WRDSF_SHOWERR
)
265 wsp
->ws_debug
= _wsplt_error
;
267 wsp
->ws_flags
&= ~WRDSF_SHOWDBG
;
271 wsp
->ws_input
= input
;
274 if (!(wsp
->ws_flags
& WRDSF_DOOFFS
))
277 if (!(wsp
->ws_flags
& WRDSF_DELIM
))
278 wsp
->ws_delim
= " \t\n";
280 if (!(wsp
->ws_flags
& WRDSF_COMMENT
))
281 wsp
->ws_comment
= NULL
;
283 if (!(wsp
->ws_flags
& WRDSF_CLOSURE
))
284 wsp
->ws_closure
= NULL
;
286 if (!(wsp
->ws_flags
& WRDSF_OPTIONS
))
289 if (wsp
->ws_flags
& WRDSF_ESCAPE
)
291 if (!wsp
->ws_escape
[WRDSX_WORD
])
292 wsp
->ws_escape
[WRDSX_WORD
] = "";
293 if (!wsp
->ws_escape
[WRDSX_QUOTE
])
294 wsp
->ws_escape
[WRDSX_QUOTE
] = "";
298 if (wsp
->ws_flags
& WRDSF_CESCAPES
)
300 wsp
->ws_escape
[WRDSX_WORD
] = wordsplit_c_escape_tab
;
301 wsp
->ws_escape
[WRDSX_QUOTE
] = wordsplit_c_escape_tab
;
302 wsp
->ws_options
|= WRDSO_OESC_QUOTE
| WRDSO_OESC_WORD
303 | WRDSO_XESC_QUOTE
| WRDSO_XESC_WORD
;
307 wsp
->ws_escape
[WRDSX_WORD
] = "";
308 wsp
->ws_escape
[WRDSX_QUOTE
] = "\\\\\"\"";
309 wsp
->ws_options
|= WRDSO_BSKEEP_QUOTE
;
316 if (wsp
->ws_flags
& WRDSF_REUSE
)
317 wordsplit_free_nodes (wsp
);
318 wsp
->ws_head
= wsp
->ws_tail
= NULL
;
320 wordsplit_init0 (wsp
);
326 alloc_space (struct wordsplit
*wsp
, idx_t count
)
328 idx_t offs_plus_count
;
329 if (ckd_add (&offs_plus_count
, count
,
330 wsp
->ws_flags
& WRDSF_DOOFFS
? wsp
->ws_offs
: 0))
331 return _wsplt_nomem (wsp
);
338 /* The default largest "small, fast" request for glibc malloc. */
339 enum { DEFAULT_MXFAST
= 64 * sizeof (size_t) / 4 };
341 enum { ALLOC_INIT
= DEFAULT_MXFAST
/ sizeof *wordv
};
342 wordn
= offs_plus_count
<= ALLOC_INIT
? ALLOC_INIT
: offs_plus_count
;
344 /* Use calloc so that the initial ws_offs words are zero. */
345 wordv
= icalloc (wordn
, sizeof *wordv
);
349 idx_t wordroom
= wsp
->ws_wordn
- wsp
->ws_wordc
;
350 if (offs_plus_count
<= wordroom
)
353 /* Grow the allocation by at least MININCR. To avoid quadratic
354 behavior, also grow it by at least 50% if possible. */
355 idx_t minincr
= offs_plus_count
- wordroom
;
356 idx_t halfn
= wsp
->ws_wordn
>> 1;
357 wordv
= (halfn
<= minincr
|| ckd_add (&wordn
, wsp
->ws_wordn
, halfn
)
359 : ireallocarray (wsp
->ws_wordv
, wordn
, sizeof *wordv
));
361 wordv
= (ckd_add (&wordn
, wsp
->ws_wordn
, minincr
)
363 : ireallocarray (wsp
->ws_wordv
, wordn
, sizeof *wordv
));
367 return _wsplt_nomem (wsp
);
368 wsp
->ws_wordn
= wordn
;
369 wsp
->ws_wordv
= wordv
;
374 /* Node state flags */
375 #define _WSNF_NULL 0x01 /* null node (a noop) */
376 #define _WSNF_WORD 0x02 /* node contains word in v.word */
377 #define _WSNF_QUOTE 0x04 /* text is quoted */
378 #define _WSNF_NOEXPAND 0x08 /* text is not subject to expansion */
379 #define _WSNF_JOIN 0x10 /* node must be joined with the next node */
380 #define _WSNF_SEXP 0x20 /* is a sed expression */
381 #define _WSNF_DELIM 0x40 /* node is a delimiter */
383 #define _WSNF_EMPTYOK 0x0100 /* special flag indicating that
384 wordsplit_add_segm must add the
385 segment even if it is empty */
387 struct wordsplit_node
389 struct wordsplit_node
*prev
; /* Previous element */
390 struct wordsplit_node
*next
; /* Next element */
391 unsigned flags
; /* Node flags */
396 idx_t beg
; /* Start of word in ws_input */
397 idx_t end
; /* End of word in ws_input */
404 wsnode_flagstr (unsigned flags
)
406 static char retbuf
[7];
409 if (flags
& _WSNF_WORD
)
411 else if (flags
& _WSNF_NULL
)
415 if (flags
& _WSNF_QUOTE
)
419 if (flags
& _WSNF_NOEXPAND
)
423 if (flags
& _WSNF_JOIN
)
427 if (flags
& _WSNF_SEXP
)
431 if (flags
& _WSNF_DELIM
)
440 wsnode_ptr (struct wordsplit
*wsp
, struct wordsplit_node
*p
)
442 if (p
->flags
& _WSNF_NULL
)
444 else if (p
->flags
& _WSNF_WORD
)
447 return wsp
->ws_input
+ p
->v
.segm
.beg
;
451 wsnode_len (struct wordsplit_node
*p
)
453 if (p
->flags
& _WSNF_NULL
)
455 else if (p
->flags
& _WSNF_WORD
)
456 return strlen (p
->v
.word
);
458 return p
->v
.segm
.end
- p
->v
.segm
.beg
;
461 static struct wordsplit_node
*
462 wsnode_new (struct wordsplit
*wsp
)
464 struct wordsplit_node
*node
= calloc (1, sizeof *node
);
471 wsnode_free (struct wordsplit_node
*p
)
473 if (p
->flags
& _WSNF_WORD
)
479 wsnode_append (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
482 node
->prev
= wsp
->ws_tail
;
484 wsp
->ws_tail
->next
= node
;
491 wsnode_remove (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
493 struct wordsplit_node
*p
;
498 p
->next
= node
->next
;
500 p
->flags
&= ~_WSNF_JOIN
;
503 wsp
->ws_head
= node
->next
;
507 p
->prev
= node
->prev
;
509 wsp
->ws_tail
= node
->prev
;
511 node
->next
= node
->prev
= NULL
;
514 static struct wordsplit_node
*
515 wsnode_tail (struct wordsplit_node
*p
)
523 wsnode_insert (struct wordsplit
*wsp
, struct wordsplit_node
*node
,
524 struct wordsplit_node
*anchor
)
528 node
->next
= node
->prev
= NULL
;
529 wsp
->ws_head
= wsp
->ws_tail
= node
;
533 struct wordsplit_node
*p
;
534 struct wordsplit_node
*tail
= wsnode_tail (node
);
548 wordsplit_add_segm (struct wordsplit
*wsp
, idx_t beg
, idx_t end
, unsigned flg
)
550 if (end
== beg
&& !(flg
& _WSNF_EMPTYOK
))
552 struct wordsplit_node
*node
= wsnode_new (wsp
);
555 node
->flags
= flg
& ~(_WSNF_WORD
| _WSNF_EMPTYOK
);
556 node
->v
.segm
.beg
= beg
;
557 node
->v
.segm
.end
= end
;
558 wsnode_append (wsp
, node
);
563 wordsplit_free_nodes (struct wordsplit
*wsp
)
565 struct wordsplit_node
*p
;
567 for (p
= wsp
->ws_head
; p
;)
569 struct wordsplit_node
*next
= p
->next
;
573 wsp
->ws_head
= wsp
->ws_tail
= NULL
;
577 wordsplit_dump_nodes (struct wordsplit
*wsp
)
580 for (struct wordsplit_node
*p
= wsp
->ws_head
; p
; p
= p
->next
, n
++)
582 if (p
->flags
& _WSNF_WORD
)
583 wsp
->ws_debug ("(%02td) %4jd: %p: %#04x (%s):%s;",
585 n
, p
, p
->flags
, wsnode_flagstr (p
->flags
), p
->v
.word
);
588 idx_t seglen
= p
->v
.segm
.end
- p
->v
.segm
.beg
;
589 wsp
->ws_debug ("(%02td) %4jd: %p: %#04x (%s):%.*s%s;",
591 n
, p
, p
->flags
, wsnode_flagstr (p
->flags
),
592 printflen (seglen
), wsp
->ws_input
+ p
->v
.segm
.beg
,
593 printfdots (seglen
));
599 coalesce_segment (struct wordsplit
*wsp
, struct wordsplit_node
*node
)
601 struct wordsplit_node
*p
, *end
;
605 if (!(node
->flags
& _WSNF_JOIN
))
608 for (p
= node
; p
&& (p
->flags
& _WSNF_JOIN
); p
= p
->next
)
610 len
+= wsnode_len (p
);
613 len
+= wsnode_len (p
);
616 buf
= imalloc (len
+ 1);
618 return _wsplt_nomem (wsp
);
624 struct wordsplit_node
*next
= p
->next
;
625 const char *str
= wsnode_ptr (wsp
, p
);
626 idx_t slen
= wsnode_len (p
);
628 memcpy (cur
, str
, slen
);
632 node
->flags
|= p
->flags
& _WSNF_QUOTE
;
633 wsnode_remove (wsp
, p
);
636 /* Call wsnode_free separately to work around GCC bug 106427. */
647 node
->flags
&= ~_WSNF_JOIN
;
649 if (node
->flags
& _WSNF_WORD
)
652 node
->flags
|= _WSNF_WORD
;
657 static void wordsplit_string_unquote_copy (struct wordsplit
*ws
, bool inquote
,
658 char *dst
, const char *src
,
662 wsnode_quoteremoval (struct wordsplit
*wsp
)
664 struct wordsplit_node
*p
;
666 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
668 bool unquote
= (wsp
->ws_flags
& WRDSF_QUOTE
669 && !(p
->flags
& _WSNF_NOEXPAND
));
673 const char *str
= wsnode_ptr (wsp
, p
);
674 idx_t slen
= wsnode_len (p
);
676 if (!(p
->flags
& _WSNF_WORD
))
678 char *newstr
= imalloc (slen
+ 1);
680 return _wsplt_nomem (wsp
);
681 memcpy (newstr
, str
, slen
);
684 p
->flags
|= _WSNF_WORD
;
687 wordsplit_string_unquote_copy (wsp
, !!(p
->flags
& _WSNF_QUOTE
),
688 p
->v
.word
, str
, slen
);
695 wsnode_coalesce (struct wordsplit
*wsp
)
697 struct wordsplit_node
*p
;
699 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
701 if (p
->flags
& _WSNF_JOIN
)
702 if (coalesce_segment (wsp
, p
))
709 wsnode_tail_coalesce (struct wordsplit
*wsp
, struct wordsplit_node
*p
)
713 struct wordsplit_node
*np
= p
;
714 while (np
&& np
->next
)
716 np
->flags
|= _WSNF_JOIN
;
719 if (coalesce_segment (wsp
, p
))
725 static idx_t
skip_delim (struct wordsplit
*wsp
);
728 wordsplit_finish (struct wordsplit
*wsp
)
730 struct wordsplit_node
*p
;
734 /* Postprocess delimiters. It would be rather simple, if it weren't for
735 the incremental operation.
737 Nodes of type _WSNF_DELIM get inserted to the node list if either
738 WRDSF_RETURN_DELIMS flag or WRDSO_MAXWORDS option is set.
740 The following cases should be distinguished:
742 1. If both WRDSF_SQUEEZE_DELIMS and WRDSF_RETURN_DELIMS are set, compress
743 any runs of similar delimiter nodes to a single node. The nodes are
744 'similar' if they point to the same delimiter character.
746 If WRDSO_MAXWORDS option is set, stop compressing when
747 ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
750 2. If WRDSO_MAXWORDS option is set, but WRDSF_RETURN_DELIMS is not,
751 remove any delimiter nodes. Stop operation when
752 ws_wordi + 1 == ws_maxwords, and coalesce the rest of nodes into
755 3. If incremental operation is in progress, restart the loop any time
756 a delimiter node is about to be returned, unless WRDSF_RETURN_DELIMS
760 delim
= '\0'; /* Delimiter being processed (if any) */
761 n
= 0; /* Number of words processed so far */
762 p
= wsp
->ws_head
; /* Current node */
766 struct wordsplit_node
*next
= p
->next
;
767 if (p
->flags
& _WSNF_DELIM
)
769 if (wsp
->ws_flags
& WRDSF_RETURN_DELIMS
)
771 if (wsp
->ws_flags
& WRDSF_SQUEEZE_DELIMS
)
773 char const *s
= wsnode_ptr (wsp
, p
);
778 wsnode_remove (wsp
, p
);
785 n
++; /* Count this node; it will be returned */
796 else if (wsp
->ws_options
& WRDSO_MAXWORDS
)
798 wsnode_remove (wsp
, p
);
807 /* Last node was a delimiter or a compressed run of delimiters;
808 Count it, and clear the delimiter marker */
812 if (wsp
->ws_options
& WRDSO_MAXWORDS
)
814 if (wsp
->ws_wordi
+ n
+ 1 == wsp
->ws_maxwords
)
819 if (wsp
->ws_flags
& WRDSF_INCREMENTAL
)
820 p
= NULL
; /* Break the loop */
827 /* We're here if WRDSO_MAXWORDS is in effect and wsp->ws_maxwords
828 words have already been collected. Reconstruct a single final
829 node from the remaining nodes. */
830 if (wsnode_tail_coalesce (wsp
, p
))
831 return wsp
->ws_errno
;
835 if (n
== 0 && (wsp
->ws_flags
& WRDSF_INCREMENTAL
))
837 /* The loop above have eliminated all nodes. Restart the
838 processing, if there's any input left. */
839 if (wsp
->ws_endp
< wsp
->ws_len
)
842 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
843 wsp
->ws_debug (_("Restarting"));
844 rc
= wordsplit_process_list (wsp
, skip_delim (wsp
));
850 wsp
->ws_error
= WRDSE_EOF
;
856 if (alloc_space (wsp
, n
+ 1))
857 return wsp
->ws_errno
;
861 const char *str
= wsnode_ptr (wsp
, wsp
->ws_head
);
862 idx_t slen
= wsnode_len (wsp
->ws_head
);
863 char *newstr
= imalloc (slen
+ 1);
865 /* Assign newstr first, even if it is NULL. This way
866 wordsplit_free will work even if we return
868 wsp
->ws_wordv
[wsp
->ws_offs
+ wsp
->ws_wordc
] = newstr
;
870 return _wsplt_nomem (wsp
);
871 memcpy (newstr
, str
, slen
);
874 wsnode_remove (wsp
, wsp
->ws_head
);
879 if (wsp
->ws_flags
& WRDSF_INCREMENTAL
)
882 wsp
->ws_wordv
[wsp
->ws_offs
+ wsp
->ws_wordc
] = NULL
;
886 #ifdef _WORDSPLIT_EXTRAS
888 wordsplit_append (wordsplit_t
*wsp
, int argc
, char **argv
)
893 rc
= alloc_space (wsp
, wsp
->ws_wordc
+ argc
+ 1);
896 for (i
= 0; i
< argc
; i
++)
898 char *newstr
= strdup (argv
[i
]);
903 free (wsp
->ws_wordv
[wsp
->ws_offs
+ wsp
->ws_wordc
+ i
- 1]);
904 wsp
->ws_wordv
[wsp
->ws_offs
+ wsp
->ws_wordc
+ i
- 1] = NULL
;
907 return _wsplt_nomem (wsp
);
909 wsp
->ws_wordv
[wsp
->ws_offs
+ wsp
->ws_wordc
+ i
] = newstr
;
912 wsp
->ws_wordv
[wsp
->ws_offs
+ wsp
->ws_wordc
] = NULL
;
917 /* Variable expansion */
919 node_split_prefix (struct wordsplit
*wsp
,
920 struct wordsplit_node
**ptail
,
921 struct wordsplit_node
*node
,
922 idx_t beg
, idx_t len
, unsigned flg
)
926 struct wordsplit_node
*newnode
= wsnode_new (wsp
);
929 wsnode_insert (wsp
, newnode
, *ptail
);
930 if (node
->flags
& _WSNF_WORD
)
932 const char *str
= wsnode_ptr (wsp
, node
);
933 char *newstr
= imalloc (len
+ 1);
935 return _wsplt_nomem (wsp
);
936 memcpy (newstr
, str
+ beg
, len
);
938 newnode
->flags
= _WSNF_WORD
;
939 newnode
->v
.word
= newstr
;
943 newnode
->v
.segm
.beg
= node
->v
.segm
.beg
+ beg
;
944 newnode
->v
.segm
.end
= newnode
->v
.segm
.beg
+ len
;
946 newnode
->flags
|= flg
;
952 find_closing_paren (char const *str
, idx_t i
, idx_t len
, idx_t
*poff
,
955 enum { st_init
, st_squote
, st_dquote
} state
= st_init
;
966 if (str
[i
] == paren
[0])
971 else if (str
[i
] == paren
[1])
1000 else if (str
[i
] == '"')
1009 wordsplit_find_env (struct wordsplit
*wsp
, char const *name
, idx_t len
,
1014 if (!(wsp
->ws_flags
& WRDSF_ENV
))
1017 if (wsp
->ws_flags
& WRDSF_ENV_KV
)
1019 /* A key-value pair environment */
1020 for (i
= 0; wsp
->ws_env
[i
]; i
++)
1022 idx_t elen
= strlen (wsp
->ws_env
[i
]);
1023 if (elen
== len
&& memcmp (wsp
->ws_env
[i
], name
, elen
) == 0)
1025 *ret
= wsp
->ws_env
[i
+ 1];
1028 /* Skip the value. Break the loop if it is NULL. */
1030 if (wsp
->ws_env
[i
] == NULL
)
1034 else if (wsp
->ws_env
)
1036 /* Usual (A=B) environment. */
1037 for (i
= 0; wsp
->ws_env
[i
]; i
++)
1040 const char *var
= wsp
->ws_env
[i
];
1042 for (j
= 0; j
< len
; j
++)
1043 if (name
[j
] != var
[j
])
1045 if (j
== len
&& var
[j
] == '=')
1055 /* Initial size for ws_env, if allocated automatically */
1056 enum { WORDSPLIT_ENV_INIT
= 16 };
1059 wsplt_assign_var (struct wordsplit
*wsp
, char const *name
, idx_t namelen
,
1062 int n
= (wsp
->ws_flags
& WRDSF_ENV_KV
) ? 2 : 1;
1065 if (wsp
->ws_envsiz
- wsp
->ws_envidx
<= n
)
1070 if (!wsp
->ws_envbuf
)
1072 if (wsp
->ws_flags
& WRDSF_ENV
)
1078 for (; wsp
->ws_env
[i
]; i
++)
1084 newenv
= icalloc (sz
, sizeof *newenv
);
1086 return _wsplt_nomem (wsp
);
1088 for (j
= 0; j
< i
; j
++)
1090 newenv
[j
] = strdup (wsp
->ws_env
[j
]);
1097 return _wsplt_nomem (wsp
);
1102 wsp
->ws_envbuf
= newenv
;
1104 wsp
->ws_envsiz
= sz
;
1105 wsp
->ws_env
= (char const **) newenv
;
1109 newenv
= calloc (WORDSPLIT_ENV_INIT
, sizeof *newenv
);
1111 return _wsplt_nomem (wsp
);
1112 wsp
->ws_envbuf
= newenv
;
1114 wsp
->ws_envsiz
= WORDSPLIT_ENV_INIT
;
1115 wsp
->ws_env
= (char const **) newenv
;
1116 wsp
->ws_flags
|= WRDSF_ENV
;
1121 idx_t envsiz
, envsiz2
;
1122 if (ckd_add (&envsiz
, wsp
->ws_envidx
, n
+ 1))
1123 return _wsplt_nomem (wsp
);
1124 newenv
= ((!ckd_add (&envsiz2
, wsp
->ws_envsiz
, wsp
->ws_envsiz
>> 1)
1125 && envsiz
< envsiz2
)
1126 ? ireallocarray (wsp
->ws_envbuf
, envsiz2
, sizeof *newenv
)
1132 newenv
= ireallocarray (wsp
->ws_envbuf
, envsiz
, sizeof *newenv
);
1134 return _wsplt_nomem (wsp
);
1137 wsp
->ws_envbuf
= newenv
;
1138 wsp
->ws_envsiz
= envsiz
;
1139 wsp
->ws_env
= (char const **) wsp
->ws_envbuf
;
1143 if (wsp
->ws_flags
& WRDSF_ENV_KV
)
1145 /* A key-value pair environment */
1146 char *p
= imalloc (namelen
+ 1);
1148 return _wsplt_nomem (wsp
);
1149 memcpy (p
, name
, namelen
);
1156 return _wsplt_nomem (wsp
);
1158 wsp
->ws_env
[wsp
->ws_envidx
++] = p
;
1159 wsp
->ws_env
[wsp
->ws_envidx
++] = v
;
1163 v
= imalloc (namelen
+ strlen (value
) + 2);
1165 return _wsplt_nomem (wsp
);
1166 memcpy (v
, name
, namelen
);
1168 strcpy (v
+ namelen
, value
);
1169 wsp
->ws_env
[wsp
->ws_envidx
++] = v
;
1171 wsp
->ws_env
[wsp
->ws_envidx
++] = NULL
;
1176 expvar (struct wordsplit
*wsp
, char const *str
, idx_t len
,
1177 struct wordsplit_node
**ptail
, const char **pend
, unsigned flg
)
1180 const char *defstr
= NULL
;
1183 struct wordsplit_node
*newnode
;
1184 const char *start
= str
- 1;
1186 struct wordsplit ws
;
1188 if (ISVARBEG (str
[0]))
1190 for (i
= 1; i
< len
; i
++)
1191 if (!ISVARCHR (str
[i
]))
1193 *pend
= str
+ i
- 1;
1195 else if (str
[0] == '{')
1199 for (i
= 1; i
< len
; i
++)
1205 defstr
= str
+ i
+ 1;
1206 if (find_closing_paren (str
, i
+ 1, len
, &j
, "{}"))
1207 return _wsplt_seterr (wsp
, WRDSE_CBRACE
);
1211 else if (str
[i
] == '}')
1217 else if (str
[i
] && strchr ("-+?=", str
[i
]))
1222 if (find_closing_paren (str
, i
, len
, &j
, "{}"))
1223 return _wsplt_seterr (wsp
, WRDSE_CBRACE
);
1229 return _wsplt_seterr (wsp
, WRDSE_CBRACE
);
1233 newnode
= wsnode_new (wsp
);
1236 wsnode_insert (wsp
, newnode
, *ptail
);
1238 newnode
->flags
= _WSNF_WORD
| flg
;
1239 newnode
->v
.word
= malloc (3);
1240 if (!newnode
->v
.word
)
1241 return _wsplt_nomem (wsp
);
1242 newnode
->v
.word
[0] = '$';
1243 newnode
->v
.word
[1] = str
[0];
1244 newnode
->v
.word
[2] = '\0';
1249 /* Actually expand the variable */
1250 /* str - start of the variable name
1252 defstr - default replacement str */
1254 if (defstr
&& ! (defstr
[0] && strchr ("-+?=", defstr
[0])))
1261 rc
= wordsplit_find_env (wsp
, str
, i
, &vptr
);
1266 value
= strdup (vptr
);
1273 else if (wsp
->ws_flags
& WRDSF_GETVAR
)
1274 rc
= wsp
->ws_getvar (&value
, str
, i
, wsp
->ws_closure
);
1279 && !(value
&& *value
)
1280 && defstr
&& defstr
[-1] == ':')
1290 if (defstr
&& *defstr
== '+')
1292 idx_t size
= *pend
- ++defstr
;
1294 rc
= _wsplt_subsplit (wsp
, &ws
, defstr
, size
,
1295 WRDSF_NOSPLIT
| WRDSF_WS
| WRDSF_QUOTE
|
1297 (WRDSF_NOVAR
| WRDSF_NOCMD
)),
1302 value
= ws
.ws_wordv
[0];
1303 ws
.ws_wordv
[0] = NULL
;
1304 wordsplit_free (&ws
);
1312 if (*defstr
== '-' || *defstr
== '=')
1314 size
= *pend
- ++defstr
;
1316 rc
= _wsplt_subsplit (wsp
, &ws
, defstr
, size
,
1317 WRDSF_NOSPLIT
| WRDSF_WS
| WRDSF_QUOTE
|
1319 (WRDSF_NOVAR
| WRDSF_NOCMD
)),
1324 value
= ws
.ws_wordv
[0];
1325 ws
.ws_wordv
[0] = NULL
;
1326 wordsplit_free (&ws
);
1328 if (defstr
[-1] == '=')
1329 wsplt_assign_var (wsp
, str
, i
, value
);
1335 size
= *pend
- ++defstr
;
1337 wsp
->ws_error (_("%.*s%s: variable null or not set"),
1338 printflen (i
), str
, printfdots (i
));
1341 rc
= _wsplt_subsplit (wsp
, &ws
, defstr
, size
,
1342 WRDSF_NOSPLIT
| WRDSF_WS
|
1345 (WRDSF_NOVAR
| WRDSF_NOCMD
)),
1348 wsp
->ws_error ("%.*s%s: %s",
1349 printflen (i
), str
, printfdots (i
),
1352 wsp
->ws_error ("%.*s%s: %.*s%s",
1353 printflen (i
), str
, printfdots (i
),
1354 printflen (size
), defstr
,
1356 wordsplit_free (&ws
);
1362 else if (wsp
->ws_flags
& WRDSF_UNDEF
)
1364 _wsplt_seterr (wsp
, WRDSE_UNDEF
);
1369 if (wsp
->ws_flags
& WRDSF_WARNUNDEF
)
1370 wsp
->ws_error (_("warning: undefined variable '%.*s%s'"),
1371 printflen (i
), str
, printfdots (i
));
1372 if (wsp
->ws_flags
& WRDSF_KEEPUNDEF
)
1376 value
= strdup ("");
1378 return _wsplt_nomem (wsp
);
1384 return _wsplt_nomem (wsp
);
1387 if (wsp
->ws_errno
== WRDSE_USERERR
)
1388 free (wsp
->ws_usererr
);
1389 wsp
->ws_usererr
= value
;
1392 _wsplt_seterr (wsp
, rc
);
1398 if (flg
& _WSNF_QUOTE
)
1400 newnode
= wsnode_new (wsp
);
1406 wsnode_insert (wsp
, newnode
, *ptail
);
1408 newnode
->flags
= _WSNF_WORD
| _WSNF_NOEXPAND
| flg
;
1409 newnode
->v
.word
= value
;
1414 /* Empty string is a special case */
1415 newnode
= wsnode_new (wsp
);
1418 wsnode_insert (wsp
, newnode
, *ptail
);
1420 newnode
->flags
= _WSNF_NULL
;
1424 struct wordsplit ws
;
1427 rc
= _wsplt_subsplit (wsp
, &ws
, value
, strlen (value
),
1428 (WRDSF_NOVAR
| WRDSF_NOCMD
| WRDSF_QUOTE
1429 | (WSP_RETURN_DELIMS (wsp
)
1430 ? WRDSF_RETURN_DELIMS
: 0)),
1435 _wsplt_seterr_sub (wsp
, &ws
);
1436 wordsplit_free (&ws
);
1439 wsnode_insert (wsp
, ws
.ws_head
, *ptail
);
1440 *ptail
= ws
.ws_tail
;
1441 ws
.ws_head
= ws
.ws_tail
= NULL
;
1442 wordsplit_free (&ws
);
1445 else if (wsp
->ws_flags
& WRDSF_KEEPUNDEF
)
1447 idx_t size
= *pend
- start
+ 1;
1449 newnode
= wsnode_new (wsp
);
1452 wsnode_insert (wsp
, newnode
, *ptail
);
1454 newnode
->flags
= _WSNF_WORD
| _WSNF_NOEXPAND
| flg
;
1455 newnode
->v
.word
= imalloc (size
+ 1);
1456 if (!newnode
->v
.word
)
1457 return _wsplt_nomem (wsp
);
1458 memcpy (newnode
->v
.word
, start
, size
);
1459 newnode
->v
.word
[size
] = '\0';
1463 newnode
= wsnode_new (wsp
);
1466 wsnode_insert (wsp
, newnode
, *ptail
);
1468 newnode
->flags
= _WSNF_NULL
;
1474 begin_var_p (char c
)
1476 return c
== '{' || ISVARBEG (c
);
1480 node_expand (struct wordsplit
*wsp
, struct wordsplit_node
*node
,
1481 bool (*beg_p
) (char),
1482 int (*ws_exp_fn
) (struct wordsplit
*wsp
,
1483 char const *str
, idx_t len
,
1484 struct wordsplit_node
**ptail
,
1488 const char *str
= wsnode_ptr (wsp
, node
);
1489 idx_t slen
= wsnode_len (node
);
1490 const char *end
= str
+ slen
;
1493 struct wordsplit_node
*tail
= node
;
1495 for (p
= str
; p
< end
; p
++)
1502 if (*p
== '$' && beg_p (p
[1]))
1507 tail
->flags
|= _WSNF_JOIN
;
1508 if (node_split_prefix (wsp
, &tail
, node
, off
, n
, _WSNF_JOIN
))
1511 if (ws_exp_fn (wsp
, p
, slen
- n
, &tail
, &p
,
1512 node
->flags
& (_WSNF_JOIN
| _WSNF_QUOTE
)))
1521 tail
->flags
|= _WSNF_JOIN
;
1522 if (node_split_prefix (wsp
, &tail
, node
, off
, p
- str
,
1523 node
->flags
& (_WSNF_JOIN
|_WSNF_QUOTE
)))
1528 wsnode_remove (wsp
, node
);
1534 /* Remove NULL nodes from the list */
1536 wsnode_nullelim (struct wordsplit
*wsp
)
1538 struct wordsplit_node
*p
;
1540 for (p
= wsp
->ws_head
; p
;)
1542 struct wordsplit_node
*next
= p
->next
;
1543 if (p
->flags
& _WSNF_DELIM
&& p
->prev
)
1544 p
->prev
->flags
&= ~_WSNF_JOIN
;
1545 if (p
->flags
& _WSNF_NULL
)
1547 wsnode_remove (wsp
, p
);
1555 wordsplit_varexp (struct wordsplit
*wsp
)
1557 struct wordsplit_node
*p
;
1559 for (p
= wsp
->ws_head
; p
;)
1561 struct wordsplit_node
*next
= p
->next
;
1562 if (!(p
->flags
& (_WSNF_NOEXPAND
|_WSNF_DELIM
)))
1563 if (!node_expand (wsp
, p
, begin_var_p
, expvar
))
1568 wsnode_nullelim (wsp
);
1573 begin_cmd_p (char c
)
1579 expcmd (struct wordsplit
*wsp
, char const *str
, idx_t len
,
1580 struct wordsplit_node
**ptail
, const char **pend
, unsigned flg
)
1585 struct wordsplit_node
*newnode
;
1590 if (find_closing_paren (str
, 0, len
, &j
, "()"))
1592 _wsplt_seterr (wsp
, WRDSE_PAREN
);
1597 if (wsp
->ws_options
& WRDSO_ARGV
)
1599 struct wordsplit ws
;
1601 rc
= _wsplt_subsplit (wsp
, &ws
, str
, j
, WRDSF_WS
| WRDSF_QUOTE
, true);
1604 _wsplt_seterr_sub (wsp
, &ws
);
1605 wordsplit_free (&ws
);
1608 rc
= wsp
->ws_command (&value
, str
, j
, ws
.ws_wordv
, wsp
->ws_closure
);
1609 wordsplit_free (&ws
);
1612 rc
= wsp
->ws_command (&value
, str
, j
, NULL
, wsp
->ws_closure
);
1614 if (rc
== WRDSE_NOSPACE
)
1615 return _wsplt_nomem (wsp
);
1618 if (rc
== WRDSE_USERERR
)
1620 if (wsp
->ws_errno
== WRDSE_USERERR
)
1621 free (wsp
->ws_usererr
);
1622 wsp
->ws_usererr
= value
;
1624 _wsplt_seterr (wsp
, rc
);
1630 if (flg
& _WSNF_QUOTE
)
1632 newnode
= wsnode_new (wsp
);
1635 wsnode_insert (wsp
, newnode
, *ptail
);
1637 newnode
->flags
= _WSNF_WORD
| _WSNF_NOEXPAND
| flg
;
1638 newnode
->v
.word
= value
;
1643 /* Empty string is a special case */
1644 newnode
= wsnode_new (wsp
);
1647 wsnode_insert (wsp
, newnode
, *ptail
);
1649 newnode
->flags
= _WSNF_NULL
;
1653 struct wordsplit ws
;
1656 rc
= _wsplt_subsplit (wsp
, &ws
, value
, strlen (value
),
1657 (WRDSF_NOVAR
| WRDSF_NOCMD
| WRDSF_WS
1659 | (WSP_RETURN_DELIMS (wsp
)
1660 ? WRDSF_RETURN_DELIMS
: 0)),
1665 _wsplt_seterr_sub (wsp
, &ws
);
1666 wordsplit_free (&ws
);
1669 wsnode_insert (wsp
, ws
.ws_head
, *ptail
);
1670 *ptail
= ws
.ws_tail
;
1671 ws
.ws_head
= ws
.ws_tail
= NULL
;
1672 wordsplit_free (&ws
);
1677 newnode
= wsnode_new (wsp
);
1680 wsnode_insert (wsp
, newnode
, *ptail
);
1682 newnode
->flags
= _WSNF_NULL
;
1688 wordsplit_cmdexp (struct wordsplit
*wsp
)
1690 struct wordsplit_node
*p
;
1692 for (p
= wsp
->ws_head
; p
;)
1694 struct wordsplit_node
*next
= p
->next
;
1695 if (!(p
->flags
& _WSNF_NOEXPAND
))
1696 if (!node_expand (wsp
, p
, begin_cmd_p
, expcmd
))
1701 wsnode_nullelim (wsp
);
1705 /* Strip off any leading and trailing whitespace. This function is called
1706 right after the initial scanning, therefore it assumes that every
1707 node in the list is a text reference node. */
1709 wordsplit_trimws (struct wordsplit
*wsp
)
1711 struct wordsplit_node
*p
;
1713 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
1717 if (!(p
->flags
& _WSNF_QUOTE
))
1719 /* Skip leading whitespace: */
1720 for (n
= p
->v
.segm
.beg
; n
< p
->v
.segm
.end
&& ISWS (wsp
->ws_input
[n
]);
1726 while (p
->next
&& (p
->flags
& _WSNF_JOIN
))
1729 if (p
->flags
& _WSNF_QUOTE
)
1732 /* Trim trailing whitespace */
1733 for (n
= p
->v
.segm
.end
;
1734 n
> p
->v
.segm
.beg
&& ISWS (wsp
->ws_input
[n
- 1]); n
--);
1736 if (p
->v
.segm
.beg
== p
->v
.segm
.end
)
1737 p
->flags
|= _WSNF_NULL
;
1740 wsnode_nullelim (wsp
);
1745 wordsplit_tildexpand (struct wordsplit
*wsp
)
1747 struct wordsplit_node
*p
;
1751 for (p
= wsp
->ws_head
; p
; p
= p
->next
)
1755 if (p
->flags
& _WSNF_QUOTE
)
1758 str
= wsnode_ptr (wsp
, p
);
1762 idx_t slen
= wsnode_len (p
);
1765 for (i
= 1; i
< slen
&& str
[i
] != '/'; i
++)
1773 if (ckd_add (&usize
, usize
, usize
>> 1) || usize
< i
)
1775 char *p
= irealloc (uname
, usize
);
1779 return _wsplt_nomem (wsp
);
1784 memcpy (uname
, str
+ 1, i
);
1786 pw
= getpwnam (uname
);
1789 pw
= getpwuid (getuid ());
1794 idx_t dlen
= strlen (pw
->pw_dir
);
1795 idx_t size
= slen
- i
+ dlen
;
1796 char *newstr
= imalloc (size
);
1800 return _wsplt_nomem (wsp
);
1804 memcpy (newstr
, pw
->pw_dir
, dlen
);
1805 memcpy (newstr
+ dlen
, str
+ i
+ 1, slen
- i
- 1);
1806 newstr
[size
] = '\0';
1807 if (p
->flags
& _WSNF_WORD
)
1810 p
->flags
|= _WSNF_WORD
;
1818 isglob (char const *s
, idx_t l
)
1820 for (ptrdiff_t i
= l
; i
--; )
1823 if (c
&& strchr ("*?[", c
))
1830 wordsplit_pathexpand (struct wordsplit
*wsp
)
1832 struct wordsplit_node
*p
, *next
;
1837 if (wsp
->ws_options
& WRDSO_DOTGLOB
)
1838 flags
= GLOB_PERIOD
;
1841 for (p
= wsp
->ws_head
; p
; p
= next
)
1847 if (p
->flags
& _WSNF_QUOTE
)
1850 str
= wsnode_ptr (wsp
, p
);
1851 slen
= wsnode_len (p
);
1853 if (isglob (str
, slen
))
1857 struct wordsplit_node
*prev
;
1859 char *pattern
= imalloc (slen
+ 1);
1861 return _wsplt_nomem (wsp
);
1862 memcpy (pattern
, str
, slen
);
1863 pattern
[slen
] = '\0';
1865 switch (glob (pattern
, flags
, NULL
, &g
))
1873 return _wsplt_nomem (wsp
);
1876 if (wsp
->ws_options
& WRDSO_NULLGLOB
)
1878 wsnode_remove (wsp
, p
);
1881 else if (wsp
->ws_options
& WRDSO_FAILGLOB
)
1883 if (wsp
->ws_errno
== WRDSE_USERERR
)
1884 free (wsp
->ws_usererr
);
1885 char const *msg
= _("no files match pattern ");
1886 idx_t msglen
= strlen (msg
);
1887 char *usererr
= irealloc (pattern
, msglen
+ slen
+ 1);
1891 return _wsplt_nomem (wsp
);
1893 memmove (usererr
+ msglen
, usererr
, slen
+ 1);
1894 wsp
->ws_usererr
= memcpy (usererr
, msg
, msglen
);
1895 return _wsplt_seterr (wsp
, WRDSE_USERERR
);
1902 return _wsplt_seterr (wsp
, WRDSE_GLOBERR
);
1906 for (i
= 0; i
< g
.gl_pathc
; i
++)
1908 struct wordsplit_node
*newnode
= wsnode_new (wsp
);
1913 newstr
= strdup (g
.gl_pathv
[i
]);
1916 wsnode_free (newnode
);
1917 return _wsplt_nomem (wsp
);
1919 newnode
->v
.word
= newstr
;
1920 newnode
->flags
|= _WSNF_WORD
|_WSNF_QUOTE
;
1921 wsnode_insert (wsp
, newnode
, prev
);
1926 wsnode_remove (wsp
, p
);
1934 skip_sed_expr (char const *command
, idx_t i
, idx_t len
)
1942 if (command
[i
] == ';')
1944 if (!(command
[i
] == 's' && i
+ 3 < len
&& c_ispunct (command
[i
+ 1])))
1947 delim
= command
[++i
];
1949 for (i
++; i
< len
; i
++)
1953 if (command
[i
] == delim
|| !c_isalnum (command
[i
]))
1956 else if (command
[i
] == '\\')
1958 else if (command
[i
] == delim
)
1962 while (state
== 3 && i
< len
&& command
[i
] == ';');
1966 /* wsp->ws_endp points to a delimiter character. If RETURN_DELIMS
1967 is true, return its value, otherwise return the index past it. */
1969 skip_delim_internal (struct wordsplit
*wsp
, bool return_delims
)
1971 return wsp
->ws_endp
+ !return_delims
;
1975 skip_delim (struct wordsplit
*wsp
)
1977 return skip_delim_internal (wsp
, WSP_RETURN_DELIMS (wsp
));
1981 skip_delim_real (struct wordsplit
*wsp
)
1983 return skip_delim_internal (wsp
, !!(wsp
->ws_flags
& WRDSF_RETURN_DELIMS
));
1991 scan_qstring (struct wordsplit
*wsp
, idx_t start
, idx_t
*end
)
1994 const char *command
= wsp
->ws_input
;
1995 idx_t len
= wsp
->ws_len
;
1996 char q
= command
[start
];
1998 for (j
= start
+ 1; j
< len
&& command
[j
] != q
; j
++)
1999 if (q
== '"' && command
[j
] == '\\')
2001 if (j
< len
&& command
[j
] == q
)
2003 unsigned flags
= _WSNF_QUOTE
| _WSNF_EMPTYOK
;
2005 flags
|= _WSNF_NOEXPAND
;
2006 if (!wordsplit_add_segm (wsp
, start
+ 1, j
, flags
))
2012 wsp
->ws_endp
= start
;
2013 _wsplt_seterr (wsp
, WRDSE_QUOTE
);
2020 scan_word (struct wordsplit
*wsp
, idx_t start
, bool consume_all
)
2022 idx_t len
= wsp
->ws_len
;
2023 const char *command
= wsp
->ws_input
;
2024 const char *comment
= wsp
->ws_comment
;
2027 struct wordsplit_node
*np
= wsp
->ws_tail
;
2033 wsp
->ws_errno
= WRDSE_EOF
;
2039 if (wsp
->ws_flags
& WRDSF_SED_EXPR
2040 && command
[i
] == 's' && i
+ 3 < len
&& c_ispunct (command
[i
+ 1]))
2043 i
= skip_sed_expr (command
, i
, len
);
2045 else if (consume_all
|| !ISDELIM (wsp
, command
[i
]))
2049 if (comment
&& command
[i
] && strchr (comment
, command
[i
]) != NULL
)
2052 for (j
= i
+ 1; j
< len
&& command
[j
] != '\n'; j
++)
2054 if (!wordsplit_add_segm (wsp
, start
, i
, 0))
2060 if (wsp
->ws_flags
& WRDSF_QUOTE
)
2062 if (command
[i
] == '\\')
2070 if (((wsp
->ws_flags
& WRDSF_SQUOTE
) && command
[i
] == '\'') ||
2071 ((wsp
->ws_flags
& WRDSF_DQUOTE
) && command
[i
] == '"'))
2073 if (join
&& wsp
->ws_tail
)
2074 wsp
->ws_tail
->flags
|= _WSNF_JOIN
;
2075 if (!wordsplit_add_segm (wsp
, start
, i
, _WSNF_JOIN
))
2077 if (!scan_qstring (wsp
, i
, &i
))
2084 if (command
[i
] == '$')
2086 if (!(wsp
->ws_flags
& WRDSF_NOVAR
)
2087 && command
[i
+1] == '{'
2088 && !find_closing_paren (command
, i
+ 2, len
, &i
, "{}"))
2090 if (!(wsp
->ws_flags
& WRDSF_NOCMD
)
2091 && command
[i
+1] == '('
2092 && !find_closing_paren (command
, i
+ 2, len
, &i
, "()"))
2096 if (!consume_all
&& ISDELIM (wsp
, command
[i
]))
2102 else if (WSP_RETURN_DELIMS (wsp
))
2105 flags
|= _WSNF_DELIM
;
2107 else if (!(wsp
->ws_flags
& WRDSF_SQUEEZE_DELIMS
))
2108 flags
|= _WSNF_EMPTYOK
;
2110 if (join
&& i
> start
&& wsp
->ws_tail
)
2111 wsp
->ws_tail
->flags
|= _WSNF_JOIN
;
2112 if (!wordsplit_add_segm (wsp
, start
, i
, flags
))
2115 if (wsp
->ws_flags
& WRDSF_INCREMENTAL
)
2124 np
->flags
|= _WSNF_QUOTE
;
2133 xtonum (char *pval
, char const *src
, int base
, int cnt
)
2136 unsigned char val
= 0;
2138 /* The maximum value that a prefix of a number can represent.
2139 This is 31 if base is 8 and UCHAR_MAX == 255,
2140 so that "\400" is treated as "\40" followed by "0", not as "\000". */
2141 unsigned char max_prefix
= UCHAR_MAX
/ base
;
2143 for (i
= 0; i
< cnt
&& val
<= max_prefix
; i
++)
2145 unsigned char c
= src
[i
];
2146 unsigned char digit
;
2150 else if (c_isxdigit (c
))
2151 digit
= c_toupper (c
) - 'A' + 10;
2157 val
= val
* base
+ digit
;
2163 #ifdef _WORDSPLIT_EXTRAS
2165 wordsplit_c_quoted_length (const char *str
, bool quote_hex
, bool *quote
)
2172 if (strchr (" \"", *str
))
2177 else if (*str
== '"')
2179 else if (*str
!= '\t' && *str
!= '\\' && c_isprint (*str
))
2185 if (wordsplit_c_quote_char (*str
))
2196 wsplt_unquote_char (const char *transtab
, char c
)
2198 while (*transtab
&& transtab
[1])
2200 if (*transtab
++ == c
)
2207 #ifdef _WORDSPLIT_EXTRAS
2209 wsplt_quote_char (const char *transtab
, char c
)
2211 for (; *transtab
&& transtab
[1]; transtab
+= 2)
2213 if (transtab
[1] == c
)
2220 wordsplit_c_unquote_char (char c
)
2222 return wsplt_unquote_char (wordsplit_c_escape_tab
, c
);
2226 wordsplit_c_quote_char (char c
)
2228 return wsplt_quote_char (wordsplit_c_escape_tab
, c
);
2233 wordsplit_string_unquote_copy (struct wordsplit
*ws
, bool inquote
,
2234 char *dst
, char const *src
, idx_t n
)
2236 for (idx_t i
= 0; i
< n
; )
2242 if (WRDSO_ESC_TEST (ws
, inquote
, WRDSO_XESC
)
2243 && (src
[i
] == 'x' || src
[i
] == 'X'))
2252 int off
= xtonum (&c
, src
+ i
+ 1, 16, 2);
2265 else if (WRDSO_ESC_TEST (ws
, inquote
, WRDSO_OESC
)
2266 && c_isdigit (src
[i
]))
2275 int off
= xtonum (&c
, src
+ i
, 8, 3);
2288 else if ((c
= wsplt_unquote_char (ws
->ws_escape
[inquote
], src
[i
])))
2295 if (WRDSO_ESC_TEST (ws
, inquote
, WRDSO_BSKEEP
))
2306 #ifdef _WORDSPLIT_EXTRAS
2308 wordsplit_c_quote_copy (char *dst
, const char *src
, bool quote_hex
)
2317 else if (*src
!= '\t' && *src
!= '\\' && c_isprint (*src
))
2321 unsigned char uc
= *src
;
2325 static char const hexdigit
[16] = "0123456789ABCDEF";
2327 for (int i
= 4; 0 <= i
; i
-= 4)
2328 *dst
++ = hexdigit
[(uc
>> i
) & 0xf];
2332 char c
= wordsplit_c_quote_char (*src
);
2337 for (int i
= 6; 0 <= i
; i
-= 3)
2338 *dst
++ = '0' + ((uc
>> i
) & 7);
2346 /* This structure describes a single expansion phase */
2349 char const *descr
; /* Textual description (for debugging) */
2350 int flag
; /* WRDSF_ bit that controls this phase */
2351 int opt
; /* Entry-specific options (see EXPOPT_ flags below */
2352 int (*expansion
) (struct wordsplit
*wsp
); /* expansion function */
2355 /* The following options control expansions: */
2356 /* Normally the exptab entry is run if its flag bit is set in struct
2357 wordsplit. The EXPOPT_NEG option negates this test so that expansion
2358 is performed if its associated flag bit is not set in struct wordsplit. */
2359 #define EXPOPT_NEG 0x01
2360 /* All bits in flag must be set in order for entry to match */
2361 #define EXPORT_ALLOF 0x02
2362 /* Coalesce the input list before running the expansion. */
2363 #define EXPOPT_COALESCE 0x04
2365 static struct exptab exptab
[] = {
2366 { N_("WS trimming"), WRDSF_WS
, 0,
2368 { N_("command substitution"), WRDSF_NOCMD
, EXPOPT_NEG
|EXPOPT_COALESCE
,
2370 { N_("coalesce list"), 0, EXPOPT_NEG
|EXPOPT_COALESCE
,
2372 { N_("tilde expansion"), WRDSF_PATHEXPAND
, 0,
2373 wordsplit_tildexpand
},
2374 { N_("variable expansion"), WRDSF_NOVAR
, EXPOPT_NEG
,
2376 { N_("quote removal"), 0, EXPOPT_NEG
,
2377 wsnode_quoteremoval
},
2378 { N_("coalesce list"), 0, EXPOPT_NEG
|EXPOPT_COALESCE
,
2380 { N_("path expansion"), WRDSF_PATHEXPAND
, 0,
2381 wordsplit_pathexpand
},
2386 exptab_matches (struct exptab
*p
, struct wordsplit
*wsp
)
2390 result
= (wsp
->ws_flags
& p
->flag
);
2391 if (p
->opt
& EXPORT_ALLOF
)
2392 result
= result
== p
->flag
;
2393 if (p
->opt
& EXPOPT_NEG
)
2400 wordsplit_process_list (struct wordsplit
*wsp
, idx_t start
)
2404 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
2405 wsp
->ws_debug (_("(%02td) Input:%.*s%s;"),
2406 wsp
->ws_lvl
, printflen (wsp
->ws_len
), wsp
->ws_input
,
2407 printfdots (wsp
->ws_len
));
2409 if ((wsp
->ws_flags
& WRDSF_NOSPLIT
)
2410 || ((wsp
->ws_options
& WRDSO_MAXWORDS
)
2411 && wsp
->ws_wordi
+ 1 == wsp
->ws_maxwords
))
2413 /* Treat entire input as a single word */
2414 if (scan_word (wsp
, start
, true) == _WRDS_ERR
)
2415 return wsp
->ws_errno
;
2421 while ((rc
= scan_word (wsp
, start
, false)) == _WRDS_OK
)
2422 start
= skip_delim (wsp
);
2423 /* Make sure tail element is not joinable */
2425 wsp
->ws_tail
->flags
&= ~_WSNF_JOIN
;
2426 if (rc
== _WRDS_ERR
)
2427 return wsp
->ws_errno
;
2430 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
2432 wsp
->ws_debug ("(%02td) %s", wsp
->ws_lvl
, _("Initial list:"));
2433 wordsplit_dump_nodes (wsp
);
2436 for (p
= exptab
; p
->descr
; p
++)
2438 if (exptab_matches (p
, wsp
))
2440 if (p
->opt
& EXPOPT_COALESCE
)
2442 if (wsnode_coalesce (wsp
))
2444 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
2446 wsp
->ws_debug ("(%02td) %s", wsp
->ws_lvl
,
2447 _("Coalesced list:"));
2448 wordsplit_dump_nodes (wsp
);
2453 if (p
->expansion (wsp
))
2455 if (wsp
->ws_flags
& WRDSF_SHOWDBG
)
2457 wsp
->ws_debug ("(%02td) %s", wsp
->ws_lvl
, _(p
->descr
));
2458 wordsplit_dump_nodes (wsp
);
2463 return wsp
->ws_errno
;
2466 WORDSPLIT_EXTRAS_extern
2468 wordsplit_len (char const *command
, idx_t length
, struct wordsplit
*wsp
,
2476 if (!(flags
& WRDSF_INCREMENTAL
))
2477 return _wsplt_seterr (wsp
, WRDSE_USAGE
);
2480 return wordsplit_finish (wsp
);
2482 start
= skip_delim_real (wsp
);
2483 if (wsp
->ws_endp
== wsp
->ws_len
)
2484 return _wsplt_seterr (wsp
, WRDSE_NOINPUT
);
2486 wsp
->ws_flags
|= WRDSF_REUSE
;
2487 wordsplit_init0 (wsp
);
2492 rc
= wordsplit_init (wsp
, command
, length
, flags
);
2498 rc
= wordsplit_process_list (wsp
, start
);
2501 return wordsplit_finish (wsp
);
2505 wordsplit (const char *command
, struct wordsplit
*ws
, unsigned flags
)
2507 return wordsplit_len (command
, command
? strlen (command
) : 0, ws
, flags
);
2510 WORDSPLIT_EXTRAS_extern
2512 wordsplit_free_words (struct wordsplit
*ws
)
2516 for (i
= 0; i
< ws
->ws_wordc
; i
++)
2518 char *p
= ws
->ws_wordv
[ws
->ws_offs
+ i
];
2522 ws
->ws_wordv
[ws
->ws_offs
+ i
] = NULL
;
2528 WORDSPLIT_EXTRAS_extern
2530 wordsplit_free_envbuf (struct wordsplit
*ws
)
2532 if (ws
->ws_flags
& WRDSF_NOCMD
)
2538 for (i
= 0; ws
->ws_envbuf
[i
]; i
++)
2539 free (ws
->ws_envbuf
[i
]);
2540 free (ws
->ws_envbuf
);
2541 ws
->ws_envidx
= ws
->ws_envsiz
= 0;
2542 ws
->ws_envbuf
= NULL
;
2546 WORDSPLIT_EXTRAS_extern
2548 wordsplit_clearerr (struct wordsplit
*ws
)
2550 if (ws
->ws_errno
== WRDSE_USERERR
)
2551 free (ws
->ws_usererr
);
2552 ws
->ws_usererr
= NULL
;
2553 ws
->ws_errno
= WRDSE_OK
;
2557 wordsplit_free (struct wordsplit
*ws
)
2559 wordsplit_free_nodes (ws
);
2560 wordsplit_free_words (ws
);
2561 free (ws
->ws_wordv
);
2562 ws
->ws_wordv
= NULL
;
2563 wordsplit_free_envbuf (ws
);
2566 #ifdef _WORDSPLIT_EXTRAS
2568 wordsplit_get_words (struct wordsplit
*ws
, idx_t
*wordc
, char ***wordv
)
2570 /* Tell the memory manager that ws->ws_wordv can be shrunk. */
2571 char **p
= irealloc (ws
->ws_wordv
,
2572 (ws
->ws_wordc
+ 1) * sizeof (ws
->ws_wordv
[0]));
2573 *wordv
= p
? p
: ws
->ws_wordv
;
2574 *wordc
= ws
->ws_wordc
;
2576 ws
->ws_wordv
= NULL
;
2582 static char const *const wordsplit_errstr
[] = {
2584 N_("missing closing quote"),
2585 N_("memory exhausted"),
2586 N_("invalid wordsplit usage"),
2587 N_("unbalanced curly brace"),
2588 N_("undefined variable"),
2589 N_("input exhausted"),
2590 N_("unbalanced parenthesis"),
2591 N_("globbing error")
2593 enum { wordsplit_nerrs
= sizeof wordsplit_errstr
/ sizeof *wordsplit_errstr
};
2596 wordsplit_strerror (struct wordsplit
const *ws
)
2598 if (ws
->ws_errno
== WRDSE_USERERR
)
2599 return ws
->ws_usererr
;
2600 if (ws
->ws_errno
< wordsplit_nerrs
)
2601 return wordsplit_errstr
[ws
->ws_errno
];
2602 return N_("unknown error");
2605 WORDSPLIT_EXTRAS_extern
2607 wordsplit_perror (struct wordsplit
*wsp
)
2609 switch (wsp
->ws_errno
)
2612 wsp
->ws_error (_("missing closing %c (start near #%td)"),
2613 wsp
->ws_input
[wsp
->ws_endp
],
2618 wsp
->ws_error ("%s", wordsplit_strerror (wsp
));