*** empty log message ***
[coreutils.git] / lib / quotearg.c
blobd5fbc9e60384ea4b7c0b823535c7d18ea79f8d53
1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
20 #if HAVE_CONFIG_H
21 # include <config.h>
22 #endif
24 #include <sys/types.h>
25 #include <quotearg.h>
26 #include <xalloc.h>
28 #include <ctype.h>
30 #if ENABLE_NLS
31 # include <libintl.h>
32 # define _(text) gettext (text)
33 #else
34 # define _(text) text
35 #endif
36 #define N_(text) text
38 #if HAVE_LIMITS_H
39 # include <limits.h>
40 #endif
41 #ifndef CHAR_BIT
42 # define CHAR_BIT 8
43 #endif
44 #ifndef UCHAR_MAX
45 # define UCHAR_MAX ((unsigned char) -1)
46 #endif
48 #if HAVE_C_BACKSLASH_A
49 # define ALERT_CHAR '\a'
50 #else
51 # define ALERT_CHAR '\7'
52 #endif
54 #if HAVE_STDLIB_H
55 # include <stdlib.h>
56 #endif
58 #if HAVE_STRING_H
59 # include <string.h>
60 #endif
62 #if HAVE_WCHAR_H
63 # include <wchar.h>
64 #endif
66 #if HAVE_MBRTOWC
67 size_t mbrtowc ();
68 # ifdef mbstate_t
69 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
70 # define mbsinit(ps) 1
71 # endif
72 #else
73 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
74 other macros are defined only for documentation and to satisfy C
75 syntax. */
76 # undef MB_CUR_MAX
77 # define MB_CUR_MAX 1
78 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
79 # define mbsinit(ps) 1
80 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
81 #endif
83 #ifndef iswprint
84 # if HAVE_WCTYPE_H
85 # include <wctype.h>
86 # endif
87 # if !defined iswprint && !HAVE_ISWPRINT
88 # define iswprint(wc) 1
89 # endif
90 #endif
92 #define INT_BITS (sizeof (int) * CHAR_BIT)
94 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
95 # define IN_CTYPE_DOMAIN(c) 1
96 #else
97 # define IN_CTYPE_DOMAIN(c) isascii(c)
98 #endif
100 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
101 #undef ISPRINT
102 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
104 struct quoting_options
106 /* Basic quoting style. */
107 enum quoting_style style;
109 /* Quote the characters indicated by this bit vector even if the
110 quoting style would not normally require them to be quoted. */
111 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
114 /* Names of quoting styles. */
115 char const *const quoting_style_args[] =
117 "literal",
118 "shell",
119 "shell-always",
120 "c",
121 "escape",
122 "locale",
123 "clocale",
127 /* Correspondences to quoting style names. */
128 enum quoting_style const quoting_style_vals[] =
130 literal_quoting_style,
131 shell_quoting_style,
132 shell_always_quoting_style,
133 c_quoting_style,
134 escape_quoting_style,
135 locale_quoting_style,
136 clocale_quoting_style
139 /* The default quoting options. */
140 static struct quoting_options default_quoting_options;
142 /* Allocate a new set of quoting options, with contents initially identical
143 to O if O is not null, or to the default if O is null.
144 It is the caller's responsibility to free the result. */
145 struct quoting_options *
146 clone_quoting_options (struct quoting_options *o)
148 struct quoting_options *p
149 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
150 *p = *(o ? o : &default_quoting_options);
151 return p;
154 /* Get the value of O's quoting style. If O is null, use the default. */
155 enum quoting_style
156 get_quoting_style (struct quoting_options *o)
158 return (o ? o : &default_quoting_options)->style;
161 /* In O (or in the default if O is null),
162 set the value of the quoting style to S. */
163 void
164 set_quoting_style (struct quoting_options *o, enum quoting_style s)
166 (o ? o : &default_quoting_options)->style = s;
169 /* In O (or in the default if O is null),
170 set the value of the quoting options for character C to I.
171 Return the old value. Currently, the only values defined for I are
172 0 (the default) and 1 (which means to quote the character even if
173 it would not otherwise be quoted). */
175 set_char_quoting (struct quoting_options *o, char c, int i)
177 unsigned char uc = c;
178 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
179 int shift = uc % INT_BITS;
180 int r = (*p >> shift) & 1;
181 *p ^= ((i & 1) ^ r) << shift;
182 return r;
185 /* MSGID approximates a quotation mark. Return its translation if it
186 has one; otherwise, return either it or "\"", depending on S. */
187 static char const *
188 gettext_quote (char const *msgid, enum quoting_style s)
190 char const *translation = _(msgid);
191 if (translation == msgid && s == clocale_quoting_style)
192 translation = "\"";
193 return translation;
196 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
197 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
198 non-quoting-style part of O to control quoting.
199 Terminate the output with a null character, and return the written
200 size of the output, not counting the terminating null.
201 If BUFFERSIZE is too small to store the output string, return the
202 value that would have been returned had BUFFERSIZE been large enough.
203 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
205 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
206 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
207 style specified by O, and O may not be null. */
209 static size_t
210 quotearg_buffer_restyled (char *buffer, size_t buffersize,
211 char const *arg, size_t argsize,
212 enum quoting_style quoting_style,
213 struct quoting_options const *o)
215 size_t i;
216 size_t len = 0;
217 char const *quote_string = 0;
218 size_t quote_string_len = 0;
219 int backslash_escapes = 0;
220 int unibyte_locale = MB_CUR_MAX == 1;
222 #define STORE(c) \
223 do \
225 if (len < buffersize) \
226 buffer[len] = (c); \
227 len++; \
229 while (0)
231 switch (quoting_style)
233 case c_quoting_style:
234 STORE ('"');
235 backslash_escapes = 1;
236 quote_string = "\"";
237 quote_string_len = 1;
238 break;
240 case escape_quoting_style:
241 backslash_escapes = 1;
242 break;
244 case locale_quoting_style:
245 case clocale_quoting_style:
247 /* Get translations for open and closing quotation marks.
249 The message catalog should translate "`" to a left
250 quotation mark suitable for the locale, and similarly for
251 "'". If the catalog has no translation,
252 locale_quoting_style quotes `like this', and
253 clocale_quoting_style quotes "like this".
255 For example, an American English Unicode locale should
256 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
257 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
258 MARK). A British English Unicode locale should instead
259 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
260 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
262 char const *left = gettext_quote (N_("`"), quoting_style);
263 char const *right = gettext_quote (N_("'"), quoting_style);
264 for (quote_string = left; *quote_string; quote_string++)
265 STORE (*quote_string);
266 backslash_escapes = 1;
267 quote_string = right;
268 quote_string_len = strlen (quote_string);
270 break;
272 case shell_always_quoting_style:
273 STORE ('\'');
274 quote_string = "'";
275 quote_string_len = 1;
276 break;
278 default:
279 break;
282 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
284 unsigned char c;
285 unsigned char esc;
287 if (backslash_escapes
288 && quote_string_len
289 && i + quote_string_len <= argsize
290 && memcmp (arg + i, quote_string, quote_string_len) == 0)
291 STORE ('\\');
293 c = arg[i];
294 switch (c)
296 case '?':
297 switch (quoting_style)
299 case shell_quoting_style:
300 goto use_shell_always_quoting_style;
302 case c_quoting_style:
303 if (i + 2 < argsize && arg[i + 1] == '?')
304 switch (arg[i + 2])
306 case '!': case '\'':
307 case '(': case ')': case '-': case '/':
308 case '<': case '=': case '>':
309 /* Escape the second '?' in what would otherwise be
310 a trigraph. */
311 i += 2;
312 c = arg[i + 2];
313 STORE ('?');
314 STORE ('\\');
315 STORE ('?');
316 break;
318 break;
320 default:
321 break;
323 break;
325 case ALERT_CHAR: esc = 'a'; goto c_escape;
326 case '\b': esc = 'b'; goto c_escape;
327 case '\f': esc = 'f'; goto c_escape;
328 case '\n': esc = 'n'; goto c_and_shell_escape;
329 case '\r': esc = 'r'; goto c_and_shell_escape;
330 case '\t': esc = 't'; goto c_and_shell_escape;
331 case '\v': esc = 'v'; goto c_escape;
332 case '\\': esc = c; goto c_and_shell_escape;
334 c_and_shell_escape:
335 if (quoting_style == shell_quoting_style)
336 goto use_shell_always_quoting_style;
337 c_escape:
338 if (backslash_escapes)
340 c = esc;
341 goto store_escape;
343 break;
345 case '#': case '~':
346 if (i != 0)
347 break;
348 /* Fall through. */
349 case ' ':
350 case '!': /* special in bash */
351 case '"': case '$': case '&':
352 case '(': case ')': case '*': case ';':
353 case '<': case '>': case '[':
354 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
355 case '`': case '|':
356 /* A shell special character. In theory, '$' and '`' could
357 be the first bytes of multibyte characters, which means
358 we should check them with mbrtowc, but in practice this
359 doesn't happen so it's not worth worrying about. */
360 if (quoting_style == shell_quoting_style)
361 goto use_shell_always_quoting_style;
362 break;
364 case '\'':
365 switch (quoting_style)
367 case shell_quoting_style:
368 goto use_shell_always_quoting_style;
370 case shell_always_quoting_style:
371 STORE ('\'');
372 STORE ('\\');
373 STORE ('\'');
374 break;
376 default:
377 break;
379 break;
381 case '%': case '+': case ',': case '-': case '.': case '/':
382 case '0': case '1': case '2': case '3': case '4': case '5':
383 case '6': case '7': case '8': case '9': case ':': case '=':
384 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
385 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
386 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
387 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
388 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
389 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
390 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
391 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
392 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
393 case '{': case '}':
394 /* These characters don't cause problems, no matter what the
395 quoting style is. They cannot start multibyte sequences. */
396 break;
398 default:
399 /* If we have a multibyte sequence, copy it until we reach
400 its end, find an error, or come back to the initial shift
401 state. For C-like styles, if the sequence has
402 unprintable characters, escape the whole sequence, since
403 we can't easily escape single characters within it. */
405 /* Length of multibyte sequence found so far. */
406 size_t m;
408 int printable;
410 if (unibyte_locale)
412 m = 1;
413 printable = ISPRINT (c);
415 else
417 mbstate_t mbstate;
418 memset (&mbstate, 0, sizeof mbstate);
420 m = 0;
421 printable = 1;
422 if (argsize == (size_t) -1)
423 argsize = strlen (arg);
427 wchar_t w;
428 size_t bytes = mbrtowc (&w, &arg[i + m],
429 argsize - (i + m), &mbstate);
430 if (bytes == 0)
431 break;
432 else if (bytes == (size_t) -1)
434 printable = 0;
435 break;
437 else if (bytes == (size_t) -2)
439 printable = 0;
440 while (i + m < argsize && arg[i + m])
441 m++;
442 break;
444 else
446 if (! iswprint (w))
447 printable = 0;
448 m += bytes;
451 while (! mbsinit (&mbstate));
454 if (1 < m || (backslash_escapes && ! printable))
456 /* Output a multibyte sequence, or an escaped
457 unprintable unibyte character. */
458 size_t ilim = i + m;
460 for (;;)
462 if (backslash_escapes && ! printable)
464 STORE ('\\');
465 STORE ('0' + (c >> 6));
466 STORE ('0' + ((c >> 3) & 7));
467 c = '0' + (c & 7);
469 if (ilim <= i + 1)
470 break;
471 STORE (c);
472 c = arg[++i];
475 goto store_c;
480 if (! (backslash_escapes
481 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
482 goto store_c;
484 store_escape:
485 STORE ('\\');
487 store_c:
488 STORE (c);
491 if (quote_string)
492 for (; *quote_string; quote_string++)
493 STORE (*quote_string);
495 if (len < buffersize)
496 buffer[len] = '\0';
497 return len;
499 use_shell_always_quoting_style:
500 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
501 shell_always_quoting_style, o);
504 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
505 argument ARG (of size ARGSIZE), using O to control quoting.
506 If O is null, use the default.
507 Terminate the output with a null character, and return the written
508 size of the output, not counting the terminating null.
509 If BUFFERSIZE is too small to store the output string, return the
510 value that would have been returned had BUFFERSIZE been large enough.
511 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
512 size_t
513 quotearg_buffer (char *buffer, size_t buffersize,
514 char const *arg, size_t argsize,
515 struct quoting_options const *o)
517 struct quoting_options const *p = o ? o : &default_quoting_options;
518 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
519 p->style, p);
522 /* Use storage slot N to return a quoted version of the string ARG.
523 OPTIONS specifies the quoting options.
524 The returned value points to static storage that can be
525 reused by the next call to this function with the same value of N.
526 N must be nonnegative. N is deliberately declared with type "int"
527 to allow for future extensions (using negative values). */
528 static char *
529 quotearg_n_options (int n, char const *arg,
530 struct quoting_options const *options)
532 /* Preallocate a slot 0 buffer, so that the caller can always quote
533 one small component of a "memory exhausted" message in slot 0. */
534 static char slot0[256];
535 static unsigned int nslots = 1;
536 struct slotvec
538 size_t size;
539 char *val;
541 static struct slotvec slotvec0 = {sizeof slot0, slot0};
542 static struct slotvec *slotvec = &slotvec0;
544 if (nslots <= n)
546 int n1 = n + 1;
547 size_t s = n1 * sizeof (struct slotvec);
548 if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
549 abort ();
550 if (slotvec == &slotvec0)
552 slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
553 *slotvec = slotvec0;
555 slotvec = (struct slotvec *) xrealloc (slotvec, s);
556 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
557 nslots = n;
561 size_t size = slotvec[n].size;
562 char *val = slotvec[n].val;
563 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
565 if (size <= qsize)
567 slotvec[n].size = size = qsize + 1;
568 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
569 quotearg_buffer (val, size, arg, (size_t) -1, options);
572 return val;
576 char *
577 quotearg_n (unsigned int n, char const *arg)
579 return quotearg_n_options (n, arg, &default_quoting_options);
582 char *
583 quotearg (char const *arg)
585 return quotearg_n (0, arg);
588 char *
589 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
591 struct quoting_options o;
592 o.style = s;
593 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
594 return quotearg_n_options (n, arg, &o);
597 char *
598 quotearg_style (enum quoting_style s, char const *arg)
600 return quotearg_n_style (0, s, arg);
603 char *
604 quotearg_char (char const *arg, char ch)
606 struct quoting_options options;
607 options = default_quoting_options;
608 set_char_quoting (&options, ch, 1);
609 return quotearg_n_options (0, arg, &options);
612 char *
613 quotearg_colon (char const *arg)
615 return quotearg_char (arg, ':');