Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / lib / quotearg.c
blob35dfde73ab576c053aa47f588e8bffc21fcc5be1
1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004 Free Software
4 Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 /* Written by Paul Eggert <eggert@twinsun.com> */
22 #if HAVE_CONFIG_H
23 # include <config.h>
24 #endif
26 #include "quotearg.h"
28 #include "xalloc.h"
30 #include <ctype.h>
31 #include <errno.h>
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #include <string.h>
37 #include "gettext.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) msgid
41 #if HAVE_WCHAR_H
43 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
44 # include <stdio.h>
45 # include <time.h>
47 # include <wchar.h>
48 #endif
50 #if !HAVE_MBRTOWC
51 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
52 other macros are defined only for documentation and to satisfy C
53 syntax. */
54 # undef MB_CUR_MAX
55 # define MB_CUR_MAX 1
56 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
57 # define iswprint(wc) isprint ((unsigned char) (wc))
58 # undef HAVE_MBSINIT
59 #endif
61 #if !defined mbsinit && !HAVE_MBSINIT
62 # define mbsinit(ps) 1
63 #endif
65 #ifndef iswprint
66 # if HAVE_WCTYPE_H
67 # include <wctype.h>
68 # endif
69 # if !defined iswprint && !HAVE_ISWPRINT
70 # define iswprint(wc) 1
71 # endif
72 #endif
74 #ifndef SIZE_MAX
75 # define SIZE_MAX ((size_t) -1)
76 #endif
78 #define INT_BITS (sizeof (int) * CHAR_BIT)
80 struct quoting_options
82 /* Basic quoting style. */
83 enum quoting_style style;
85 /* Quote the characters indicated by this bit vector even if the
86 quoting style would not normally require them to be quoted. */
87 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
90 /* Names of quoting styles. */
91 char const *const quoting_style_args[] =
93 "literal",
94 "shell",
95 "shell-always",
96 "c",
97 "escape",
98 "locale",
99 "clocale",
103 /* Correspondences to quoting style names. */
104 enum quoting_style const quoting_style_vals[] =
106 literal_quoting_style,
107 shell_quoting_style,
108 shell_always_quoting_style,
109 c_quoting_style,
110 escape_quoting_style,
111 locale_quoting_style,
112 clocale_quoting_style
115 /* The default quoting options. */
116 static struct quoting_options default_quoting_options;
118 /* Allocate a new set of quoting options, with contents initially identical
119 to O if O is not null, or to the default if O is null.
120 It is the caller's responsibility to free the result. */
121 struct quoting_options *
122 clone_quoting_options (struct quoting_options *o)
124 int e = errno;
125 struct quoting_options *p = xmalloc (sizeof *p);
126 *p = *(o ? o : &default_quoting_options);
127 errno = e;
128 return p;
131 /* Get the value of O's quoting style. If O is null, use the default. */
132 enum quoting_style
133 get_quoting_style (struct quoting_options *o)
135 return (o ? o : &default_quoting_options)->style;
138 /* In O (or in the default if O is null),
139 set the value of the quoting style to S. */
140 void
141 set_quoting_style (struct quoting_options *o, enum quoting_style s)
143 (o ? o : &default_quoting_options)->style = s;
146 /* In O (or in the default if O is null),
147 set the value of the quoting options for character C to I.
148 Return the old value. Currently, the only values defined for I are
149 0 (the default) and 1 (which means to quote the character even if
150 it would not otherwise be quoted). */
152 set_char_quoting (struct quoting_options *o, char c, int i)
154 unsigned char uc = c;
155 unsigned int *p =
156 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
157 int shift = uc % INT_BITS;
158 int r = (*p >> shift) & 1;
159 *p ^= ((i & 1) ^ r) << shift;
160 return r;
163 /* MSGID approximates a quotation mark. Return its translation if it
164 has one; otherwise, return either it or "\"", depending on S. */
165 static char const *
166 gettext_quote (char const *msgid, enum quoting_style s)
168 char const *translation = _(msgid);
169 if (translation == msgid && s == clocale_quoting_style)
170 translation = "\"";
171 return translation;
174 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
175 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
176 non-quoting-style part of O to control quoting.
177 Terminate the output with a null character, and return the written
178 size of the output, not counting the terminating null.
179 If BUFFERSIZE is too small to store the output string, return the
180 value that would have been returned had BUFFERSIZE been large enough.
181 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
183 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
184 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
185 style specified by O, and O may not be null. */
187 static size_t
188 quotearg_buffer_restyled (char *buffer, size_t buffersize,
189 char const *arg, size_t argsize,
190 enum quoting_style quoting_style,
191 struct quoting_options const *o)
193 size_t i;
194 size_t len = 0;
195 char const *quote_string = 0;
196 size_t quote_string_len = 0;
197 bool backslash_escapes = false;
198 bool unibyte_locale = MB_CUR_MAX == 1;
200 #define STORE(c) \
201 do \
203 if (len < buffersize) \
204 buffer[len] = (c); \
205 len++; \
207 while (0)
209 switch (quoting_style)
211 case c_quoting_style:
212 STORE ('"');
213 backslash_escapes = true;
214 quote_string = "\"";
215 quote_string_len = 1;
216 break;
218 case escape_quoting_style:
219 backslash_escapes = true;
220 break;
222 case locale_quoting_style:
223 case clocale_quoting_style:
225 /* Get translations for open and closing quotation marks.
227 The message catalog should translate "`" to a left
228 quotation mark suitable for the locale, and similarly for
229 "'". If the catalog has no translation,
230 locale_quoting_style quotes `like this', and
231 clocale_quoting_style quotes "like this".
233 For example, an American English Unicode locale should
234 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
235 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
236 MARK). A British English Unicode locale should instead
237 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
238 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
240 char const *left = gettext_quote (N_("`"), quoting_style);
241 char const *right = gettext_quote (N_("'"), quoting_style);
242 for (quote_string = left; *quote_string; quote_string++)
243 STORE (*quote_string);
244 backslash_escapes = true;
245 quote_string = right;
246 quote_string_len = strlen (quote_string);
248 break;
250 case shell_always_quoting_style:
251 STORE ('\'');
252 quote_string = "'";
253 quote_string_len = 1;
254 break;
256 default:
257 break;
260 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
262 unsigned char c;
263 unsigned char esc;
265 if (backslash_escapes
266 && quote_string_len
267 && i + quote_string_len <= argsize
268 && memcmp (arg + i, quote_string, quote_string_len) == 0)
269 STORE ('\\');
271 c = arg[i];
272 switch (c)
274 case '\0':
275 if (backslash_escapes)
277 STORE ('\\');
278 STORE ('0');
279 STORE ('0');
280 c = '0';
282 break;
284 case '?':
285 switch (quoting_style)
287 case shell_quoting_style:
288 goto use_shell_always_quoting_style;
290 case c_quoting_style:
291 if (i + 2 < argsize && arg[i + 1] == '?')
292 switch (arg[i + 2])
294 case '!': case '\'':
295 case '(': case ')': case '-': case '/':
296 case '<': case '=': case '>':
297 /* Escape the second '?' in what would otherwise be
298 a trigraph. */
299 c = arg[i + 2];
300 i += 2;
301 STORE ('?');
302 STORE ('\\');
303 STORE ('?');
304 break;
306 break;
308 default:
309 break;
311 break;
313 case '\a': esc = 'a'; goto c_escape;
314 case '\b': esc = 'b'; goto c_escape;
315 case '\f': esc = 'f'; goto c_escape;
316 case '\n': esc = 'n'; goto c_and_shell_escape;
317 case '\r': esc = 'r'; goto c_and_shell_escape;
318 case '\t': esc = 't'; goto c_and_shell_escape;
319 case '\v': esc = 'v'; goto c_escape;
320 case '\\': esc = c; goto c_and_shell_escape;
322 c_and_shell_escape:
323 if (quoting_style == shell_quoting_style)
324 goto use_shell_always_quoting_style;
325 c_escape:
326 if (backslash_escapes)
328 c = esc;
329 goto store_escape;
331 break;
333 case '{': case '}': /* sometimes special if isolated */
334 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
335 break;
336 /* Fall through. */
337 case '#': case '~':
338 if (i != 0)
339 break;
340 /* Fall through. */
341 case ' ':
342 case '!': /* special in bash */
343 case '"': case '$': case '&':
344 case '(': case ')': case '*': case ';':
345 case '<':
346 case '=': /* sometimes special in 0th or (with "set -k") later args */
347 case '>': case '[':
348 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
349 case '`': case '|':
350 /* A shell special character. In theory, '$' and '`' could
351 be the first bytes of multibyte characters, which means
352 we should check them with mbrtowc, but in practice this
353 doesn't happen so it's not worth worrying about. */
354 if (quoting_style == shell_quoting_style)
355 goto use_shell_always_quoting_style;
356 break;
358 case '\'':
359 switch (quoting_style)
361 case shell_quoting_style:
362 goto use_shell_always_quoting_style;
364 case shell_always_quoting_style:
365 STORE ('\'');
366 STORE ('\\');
367 STORE ('\'');
368 break;
370 default:
371 break;
373 break;
375 case '%': case '+': case ',': case '-': case '.': case '/':
376 case '0': case '1': case '2': case '3': case '4': case '5':
377 case '6': case '7': case '8': case '9': case ':':
378 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
379 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
380 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
381 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
382 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
383 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
384 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
385 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
386 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
387 /* These characters don't cause problems, no matter what the
388 quoting style is. They cannot start multibyte sequences. */
389 break;
391 default:
392 /* If we have a multibyte sequence, copy it until we reach
393 its end, find an error, or come back to the initial shift
394 state. For C-like styles, if the sequence has
395 unprintable characters, escape the whole sequence, since
396 we can't easily escape single characters within it. */
398 /* Length of multibyte sequence found so far. */
399 size_t m;
401 bool printable;
403 if (unibyte_locale)
405 m = 1;
406 printable = isprint (c) != 0;
408 else
410 mbstate_t mbstate;
411 memset (&mbstate, 0, sizeof mbstate);
413 m = 0;
414 printable = true;
415 if (argsize == SIZE_MAX)
416 argsize = strlen (arg);
420 wchar_t w;
421 size_t bytes = mbrtowc (&w, &arg[i + m],
422 argsize - (i + m), &mbstate);
423 if (bytes == 0)
424 break;
425 else if (bytes == (size_t) -1)
427 printable = false;
428 break;
430 else if (bytes == (size_t) -2)
432 printable = false;
433 while (i + m < argsize && arg[i + m])
434 m++;
435 break;
437 else
439 /* Work around a bug with older shells that "see" a '\'
440 that is really the 2nd byte of a multibyte character.
441 In practice the problem is limited to ASCII
442 chars >= '@' that are shell special chars. */
443 if ('[' == 0x5b && quoting_style == shell_quoting_style)
445 size_t j;
446 for (j = 1; j < bytes; j++)
447 switch (arg[i + m + j])
449 case '[': case '\\': case '^':
450 case '`': case '|':
451 goto use_shell_always_quoting_style;
455 if (! iswprint (w))
456 printable = false;
457 m += bytes;
460 while (! mbsinit (&mbstate));
463 if (1 < m || (backslash_escapes && ! printable))
465 /* Output a multibyte sequence, or an escaped
466 unprintable unibyte character. */
467 size_t ilim = i + m;
469 for (;;)
471 if (backslash_escapes && ! printable)
473 STORE ('\\');
474 STORE ('0' + (c >> 6));
475 STORE ('0' + ((c >> 3) & 7));
476 c = '0' + (c & 7);
478 if (ilim <= i + 1)
479 break;
480 STORE (c);
481 c = arg[++i];
484 goto store_c;
489 if (! (backslash_escapes
490 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
491 goto store_c;
493 store_escape:
494 STORE ('\\');
496 store_c:
497 STORE (c);
500 if (i == 0 && quoting_style == shell_quoting_style)
501 goto use_shell_always_quoting_style;
503 if (quote_string)
504 for (; *quote_string; quote_string++)
505 STORE (*quote_string);
507 if (len < buffersize)
508 buffer[len] = '\0';
509 return len;
511 use_shell_always_quoting_style:
512 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
513 shell_always_quoting_style, o);
516 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
517 argument ARG (of size ARGSIZE), using O to control quoting.
518 If O is null, use the default.
519 Terminate the output with a null character, and return the written
520 size of the output, not counting the terminating null.
521 If BUFFERSIZE is too small to store the output string, return the
522 value that would have been returned had BUFFERSIZE been large enough.
523 If ARGSIZE is SIZE_MAX, use the string length of the argument for
524 ARGSIZE. */
525 size_t
526 quotearg_buffer (char *buffer, size_t buffersize,
527 char const *arg, size_t argsize,
528 struct quoting_options const *o)
530 struct quoting_options const *p = o ? o : &default_quoting_options;
531 int e = errno;
532 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
533 p->style, p);
534 errno = e;
535 return r;
538 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
539 allocated storage containing the quoted string. */
540 char *
541 quotearg_alloc (char const *arg, size_t argsize,
542 struct quoting_options const *o)
544 int e = errno;
545 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
546 char *buf = xmalloc (bufsize);
547 quotearg_buffer (buf, bufsize, arg, argsize, o);
548 errno = e;
549 return buf;
552 /* Use storage slot N to return a quoted version of argument ARG.
553 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
554 null-terminated string.
555 OPTIONS specifies the quoting options.
556 The returned value points to static storage that can be
557 reused by the next call to this function with the same value of N.
558 N must be nonnegative. N is deliberately declared with type "int"
559 to allow for future extensions (using negative values). */
560 static char *
561 quotearg_n_options (int n, char const *arg, size_t argsize,
562 struct quoting_options const *options)
564 int e = errno;
566 /* Preallocate a slot 0 buffer, so that the caller can always quote
567 one small component of a "memory exhausted" message in slot 0. */
568 static char slot0[256];
569 static unsigned int nslots = 1;
570 unsigned int n0 = n;
571 struct slotvec
573 size_t size;
574 char *val;
576 static struct slotvec slotvec0 = {sizeof slot0, slot0};
577 static struct slotvec *slotvec = &slotvec0;
578 static size_t sz = sizeof(*slotvec);
580 if (n < 0)
581 abort ();
583 if (nslots <= n0)
585 unsigned int n1 = n0 + 1;
587 if (xalloc_oversized (n1, sz))
588 xalloc_die ();
590 if (slotvec == &slotvec0)
592 slotvec = xmalloc (sizeof *slotvec);
593 *slotvec = slotvec0;
595 slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
596 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
597 nslots = n1;
601 size_t size = slotvec[n].size;
602 char *val = slotvec[n].val;
603 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
605 if (size <= qsize)
607 slotvec[n].size = size = qsize + 1;
608 if (val != slot0)
609 free (val);
610 slotvec[n].val = val = xmalloc (size);
611 quotearg_buffer (val, size, arg, argsize, options);
614 errno = e;
615 return val;
619 char *
620 quotearg_n (int n, char const *arg)
622 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
625 char *
626 quotearg (char const *arg)
628 return quotearg_n (0, arg);
631 /* Return quoting options for STYLE, with no extra quoting. */
632 static struct quoting_options
633 quoting_options_from_style (enum quoting_style style)
635 struct quoting_options o;
636 o.style = style;
637 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
638 return o;
641 char *
642 quotearg_n_style (int n, enum quoting_style s, char const *arg)
644 struct quoting_options const o = quoting_options_from_style (s);
645 return quotearg_n_options (n, arg, SIZE_MAX, &o);
648 char *
649 quotearg_n_style_mem (int n, enum quoting_style s,
650 char const *arg, size_t argsize)
652 struct quoting_options const o = quoting_options_from_style (s);
653 return quotearg_n_options (n, arg, argsize, &o);
656 char *
657 quotearg_style (enum quoting_style s, char const *arg)
659 return quotearg_n_style (0, s, arg);
662 char *
663 quotearg_char (char const *arg, char ch)
665 struct quoting_options options;
666 options = default_quoting_options;
667 set_char_quoting (&options, ch, 1);
668 return quotearg_n_options (0, arg, SIZE_MAX, &options);
671 char *
672 quotearg_colon (char const *arg)
674 return quotearg_char (arg, ':');