(calc_copy_file_progress): rename from copy_file_file_display_progress().
[midnight-commander.git] / lib / strutil / tokenize.c
blob4aa17e66020ab710204470781ae7dcf0696ba7c3
1 /*
2 Parse string into tokens.
4 Copyright (C) 2024
5 Free Software Foundation, Inc.
7 Written by:
8 Andrew Borodin <aborodin@vmail.ru> 2010-2024
10 The str_tokenize() and str_tokenize_word routines are mostly from
11 GNU readline-8.2.
13 This file is part of the Midnight Commander.
15 The Midnight Commander is free software: you can redistribute it
16 and/or modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation, either version 3 of the License,
18 or (at your option) any later version.
20 The Midnight Commander is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program. If not, see <http://www.gnu.org/licenses/>.
29 /** \file tokenize.c
30 * \brief Source: parse string into tokens.
33 #include <config.h>
35 #include <stdlib.h>
36 #include <string.h>
38 #include "lib/global.h"
39 #include "lib/util.h" /* whiteness() */
41 #include "lib/strutil.h"
43 /*** global variables ****************************************************************************/
45 /*** file scope macro definitions ****************************************************************/
47 #define WORD_DELIMITERS " \t\n;&()|<>"
48 #define QUOTE_CHARACTERS "\"'`"
50 #define slashify_in_quotes "\\`\"$"
52 #define member(c, s) ((c != '\0') ? (strchr ((s), (c)) != NULL) : FALSE)
54 /*** file scope type declarations ****************************************************************/
56 /*** forward declarations (file scope functions) *************************************************/
58 /*** file scope variables ************************************************************************/
60 /* --------------------------------------------------------------------------------------------- */
61 /*** file scope functions ************************************************************************/
62 /* --------------------------------------------------------------------------------------------- */
65 * Based on history_tokenize_word() from GNU readline-8.2
67 static int
68 str_tokenize_word (const char *string, int start)
70 int i = start;
71 char delimiter = '\0';
72 char delimopen = '\0';
73 int nestdelim = 0;
75 if (member (string[i], "()\n")) /* XXX - included \n, but why? been here forever */
76 return (i + 1);
78 if (g_ascii_isdigit (string[i]))
80 int j;
82 for (j = i; string[j] != '\0' && g_ascii_isdigit (string[j]); j++)
85 if (string[j] == '\0')
86 return j;
88 if (string[j] == '<' || string[j] == '>')
89 i = j; /* digit sequence is a file descriptor */
90 else
92 i = j; /* digit sequence is part of a word */
93 goto get_word;
97 if (member (string[i], "<>;&|"))
99 char peek = string[i + 1];
101 if (peek == string[i])
103 if (peek == '<' && (string[i + 2] == '-' || string[i + 2] == '<'))
104 i++;
105 return (i + 2);
108 if (peek == '&' && (string[i] == '>' || string[i] == '<'))
110 int j;
112 /* file descriptor */
113 for (j = i + 2; string[j] != '\0' && g_ascii_isdigit (string[j]); j++)
115 if (string[j] == '-') /* <&[digits]-, >&[digits]- */
116 j++;
117 return j;
120 if ((peek == '>' && string[i] == '&') || (peek == '|' && string[i] == '>'))
121 return (i + 2);
123 /* XXX - process substitution -- separated out for later -- bash-4.2 */
124 if (peek == '(' && (string[i] == '>' || string[i] == '<'))
126 /* ) */
127 i += 2;
128 delimopen = '(';
129 delimiter = ')';
130 nestdelim = 1;
131 goto get_word;
134 return (i + 1);
137 get_word:
138 /* Get word from string + i; */
140 if (delimiter == '\0' && member (string[i], QUOTE_CHARACTERS))
142 delimiter = string[i];
143 i++;
146 for (; string[i] != '\0'; i++)
148 if (string[i] == '\\' && string[i + 1] == '\n')
150 i++;
151 continue;
154 if (string[i] == '\\' && delimiter != '\'' &&
155 (delimiter != '"' || member (string[i], slashify_in_quotes)))
157 i++;
158 continue;
161 /* delimiter must be set and set to something other than a quote if
162 nestdelim is set, so these tests are safe. */
163 if (nestdelim != 0 && string[i] == delimopen)
165 nestdelim++;
166 continue;
168 if (nestdelim != 0 && string[i] == delimiter)
170 nestdelim--;
171 if (nestdelim == 0)
172 delimiter = '\0';
173 continue;
176 if (delimiter != '\0' && string[i] == delimiter)
178 delimiter = '\0';
179 continue;
182 /* Command and process substitution; shell extended globbing patterns */
183 if (nestdelim == 0 && delimiter == '\0' && member (string[i], "<>$!@?+*")
184 && string[i + 1] == '(')
186 /* ) */
187 i += 2;
188 delimopen = '(';
189 delimiter = ')';
190 nestdelim = 1;
191 continue;
194 if (delimiter == '\0' && member (string[i], WORD_DELIMITERS))
195 break;
197 if (delimiter == '\0' && member (string[i], QUOTE_CHARACTERS))
198 delimiter = string[i];
201 return i;
204 /* --------------------------------------------------------------------------------------------- */
205 /*** public functions ****************************************************************************/
206 /* --------------------------------------------------------------------------------------------- */
208 /* Parse string into tokens.
210 * Based on history_tokenize_internal() from GNU readline-8.2
212 GPtrArray *
213 str_tokenize (const char *string)
215 GPtrArray *result = NULL;
216 int i = 0;
218 /* Get a token, and stuff it into RESULT. The tokens are split
219 exactly where the shell would split them. */
220 while (string[i] != '\0')
222 int start;
224 /* Skip leading whitespace */
225 for (; string[i] != '\0' && whiteness (string[i]); i++)
228 if (string[i] == '\0')
229 return result;
231 start = i;
232 i = str_tokenize_word (string, start);
234 /* If we have a non-whitespace delimiter character (which would not be
235 skipped by the loop above), use it and any adjacent delimiters to
236 make a separate field. Any adjacent white space will be skipped the
237 next time through the loop. */
238 if (i == start)
239 for (i++; string[i] != '\0' && member (string[i], WORD_DELIMITERS); i++)
242 if (result == NULL)
243 result = g_ptr_array_new ();
245 g_ptr_array_add (result, g_strndup (string + start, i - start));
248 return result;
251 /* --------------------------------------------------------------------------------------------- */