2 Parse string into tokens.
5 Free Software Foundation, Inc.
8 Andrew Borodin <aborodin@vmail.ru> 2010-2024
10 The str_tokenize() and str_tokenize_word routines are mostly from
13 This file is part of the Midnight Commander.
15 The Midnight Commander is free software: you can redistribute it
16 and/or modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation, either version 3 of the License,
18 or (at your option) any later version.
20 The Midnight Commander is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program. If not, see <http://www.gnu.org/licenses/>.
30 * \brief Source: parse string into tokens.
38 #include "lib/global.h"
39 #include "lib/util.h" /* whiteness() */
41 #include "lib/strutil.h"
43 /*** global variables ****************************************************************************/
45 /*** file scope macro definitions ****************************************************************/
47 #define WORD_DELIMITERS " \t\n;&()|<>"
48 #define QUOTE_CHARACTERS "\"'`"
50 #define slashify_in_quotes "\\`\"$"
52 #define member(c, s) ((c != '\0') ? (strchr ((s), (c)) != NULL) : FALSE)
54 /*** file scope type declarations ****************************************************************/
56 /*** forward declarations (file scope functions) *************************************************/
58 /*** file scope variables ************************************************************************/
60 /* --------------------------------------------------------------------------------------------- */
61 /*** file scope functions ************************************************************************/
62 /* --------------------------------------------------------------------------------------------- */
65 * Based on history_tokenize_word() from GNU readline-8.2
68 str_tokenize_word (const char *string
, int start
)
71 char delimiter
= '\0';
72 char delimopen
= '\0';
75 if (member (string
[i
], "()\n")) /* XXX - included \n, but why? been here forever */
78 if (g_ascii_isdigit (string
[i
]))
82 for (j
= i
; string
[j
] != '\0' && g_ascii_isdigit (string
[j
]); j
++)
85 if (string
[j
] == '\0')
88 if (string
[j
] == '<' || string
[j
] == '>')
89 i
= j
; /* digit sequence is a file descriptor */
92 i
= j
; /* digit sequence is part of a word */
97 if (member (string
[i
], "<>;&|"))
99 char peek
= string
[i
+ 1];
101 if (peek
== string
[i
])
103 if (peek
== '<' && (string
[i
+ 2] == '-' || string
[i
+ 2] == '<'))
108 if (peek
== '&' && (string
[i
] == '>' || string
[i
] == '<'))
112 /* file descriptor */
113 for (j
= i
+ 2; string
[j
] != '\0' && g_ascii_isdigit (string
[j
]); j
++)
115 if (string
[j
] == '-') /* <&[digits]-, >&[digits]- */
120 if ((peek
== '>' && string
[i
] == '&') || (peek
== '|' && string
[i
] == '>'))
123 /* XXX - process substitution -- separated out for later -- bash-4.2 */
124 if (peek
== '(' && (string
[i
] == '>' || string
[i
] == '<'))
138 /* Get word from string + i; */
140 if (delimiter
== '\0' && member (string
[i
], QUOTE_CHARACTERS
))
142 delimiter
= string
[i
];
146 for (; string
[i
] != '\0'; i
++)
148 if (string
[i
] == '\\' && string
[i
+ 1] == '\n')
154 if (string
[i
] == '\\' && delimiter
!= '\'' &&
155 (delimiter
!= '"' || member (string
[i
], slashify_in_quotes
)))
161 /* delimiter must be set and set to something other than a quote if
162 nestdelim is set, so these tests are safe. */
163 if (nestdelim
!= 0 && string
[i
] == delimopen
)
168 if (nestdelim
!= 0 && string
[i
] == delimiter
)
176 if (delimiter
!= '\0' && string
[i
] == delimiter
)
182 /* Command and process substitution; shell extended globbing patterns */
183 if (nestdelim
== 0 && delimiter
== '\0' && member (string
[i
], "<>$!@?+*")
184 && string
[i
+ 1] == '(')
194 if (delimiter
== '\0' && member (string
[i
], WORD_DELIMITERS
))
197 if (delimiter
== '\0' && member (string
[i
], QUOTE_CHARACTERS
))
198 delimiter
= string
[i
];
204 /* --------------------------------------------------------------------------------------------- */
205 /*** public functions ****************************************************************************/
206 /* --------------------------------------------------------------------------------------------- */
208 /* Parse string into tokens.
210 * Based on history_tokenize_internal() from GNU readline-8.2
213 str_tokenize (const char *string
)
215 GPtrArray
*result
= NULL
;
218 /* Get a token, and stuff it into RESULT. The tokens are split
219 exactly where the shell would split them. */
220 while (string
[i
] != '\0')
224 /* Skip leading whitespace */
225 for (; string
[i
] != '\0' && whiteness (string
[i
]); i
++)
228 if (string
[i
] == '\0')
232 i
= str_tokenize_word (string
, start
);
234 /* If we have a non-whitespace delimiter character (which would not be
235 skipped by the loop above), use it and any adjacent delimiters to
236 make a separate field. Any adjacent white space will be skipped the
237 next time through the loop. */
239 for (i
++; string
[i
] != '\0' && member (string
[i
], WORD_DELIMITERS
); i
++)
243 result
= g_ptr_array_new ();
245 g_ptr_array_add (result
, g_strndup (string
+ start
, i
- start
));
251 /* --------------------------------------------------------------------------------------------- */