utf8: add unit test for g_utf8_make_valid
[glib.git] / glib / gnulib / printf-parse.c
blob30828a8cb979cbdd7927877ef320f408cce278d1
1 /* Formatted output to strings.
2 Copyright (C) 1999-2000, 2002-2003, 2006-2015 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, see <http://www.gnu.org/licenses/>. */
17 /* This file can be parametrized with the following macros:
18 CHAR_T The element type of the format string.
19 CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
20 in the format string are ASCII.
21 DIRECTIVE Structure denoting a format directive.
22 Depends on CHAR_T.
23 DIRECTIVES Structure denoting the set of format directives of a
24 format string. Depends on CHAR_T.
25 PRINTF_PARSE Function that parses a format string.
26 Depends on CHAR_T.
27 STATIC Set to 'static' to declare the function static.
28 ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
30 #ifdef HAVE_CONFIG_H
31 # include <config.h>
32 #endif
34 #include "g-gnulib.h"
36 /* Specification. */
37 #ifndef PRINTF_PARSE
38 # include "printf-parse.h"
39 #endif
41 #include "xsize.h"
43 /* Default parameters. */
44 #ifndef PRINTF_PARSE
45 # define PRINTF_PARSE printf_parse
46 # define CHAR_T char
47 # define DIRECTIVE char_directive
48 # define DIRECTIVES char_directives
49 #endif
51 /* Get size_t, NULL. */
52 #include <stddef.h>
54 /* Get intmax_t. */
55 #if defined IN_LIBINTL || defined IN_LIBASPRINTF
56 # if HAVE_STDINT_H_WITH_UINTMAX
57 # include <stdint.h>
58 # endif
59 # if HAVE_INTTYPES_H_WITH_UINTMAX
60 # include <inttypes.h>
61 # endif
62 #else
63 # if !defined (_MSC_VER) || (_MSC_VER >= 1600)
64 # include <stdint.h>
65 # else
66 typedef signed __int64 intmax_t;
67 # endif
68 #endif
70 /* malloc(), realloc(), free(). */
71 #include <stdlib.h>
73 /* memcpy(). */
74 #include <string.h>
76 /* errno. */
77 #include <errno.h>
79 #if CHAR_T_ONLY_ASCII
80 /* c_isascii(). */
81 # include "c-ctype.h"
82 #endif
84 #ifdef STATIC
85 STATIC
86 #endif
87 int
88 PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
90 const CHAR_T *cp = format; /* pointer into format */
91 size_t arg_posn = 0; /* number of regular arguments consumed */
92 size_t d_allocated; /* allocated elements of d->dir */
93 size_t a_allocated; /* allocated elements of a->arg */
94 size_t max_width_length = 0;
95 size_t max_precision_length = 0;
97 d->count = 0;
98 d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
99 d->dir = d->direct_alloc_dir;
101 a->count = 0;
102 a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
103 a->arg = a->direct_alloc_arg;
105 #define REGISTER_ARG(_index_,_type_) \
107 size_t n = (_index_); \
108 if (n >= a_allocated) \
110 size_t memory_size; \
111 argument *memory; \
113 a_allocated = xtimes (a_allocated, 2); \
114 if (a_allocated <= n) \
115 a_allocated = xsum (n, 1); \
116 memory_size = xtimes (a_allocated, sizeof (argument)); \
117 if (size_overflow_p (memory_size)) \
118 /* Overflow, would lead to out of memory. */ \
119 goto out_of_memory; \
120 memory = (argument *) (a->arg != a->direct_alloc_arg \
121 ? realloc (a->arg, memory_size) \
122 : malloc (memory_size)); \
123 if (memory == NULL) \
124 /* Out of memory. */ \
125 goto out_of_memory; \
126 if (a->arg == a->direct_alloc_arg) \
127 memcpy (memory, a->arg, a->count * sizeof (argument)); \
128 a->arg = memory; \
130 while (a->count <= n) \
131 a->arg[a->count++].type = TYPE_NONE; \
132 if (a->arg[n].type == TYPE_NONE) \
133 a->arg[n].type = (_type_); \
134 else if (a->arg[n].type != (_type_)) \
135 /* Ambiguous type for positional argument. */ \
136 goto error; \
139 while (*cp != '\0')
141 CHAR_T c = *cp++;
142 if (c == '%')
144 size_t arg_index = ARG_NONE;
145 DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
147 /* Initialize the next directive. */
148 dp->dir_start = cp - 1;
149 dp->flags = 0;
150 dp->width_start = NULL;
151 dp->width_end = NULL;
152 dp->width_arg_index = ARG_NONE;
153 dp->precision_start = NULL;
154 dp->precision_end = NULL;
155 dp->precision_arg_index = ARG_NONE;
156 dp->arg_index = ARG_NONE;
158 /* Test for positional argument. */
159 if (*cp >= '0' && *cp <= '9')
161 const CHAR_T *np;
163 for (np = cp; *np >= '0' && *np <= '9'; np++)
165 if (*np == '$')
167 size_t n = 0;
169 for (np = cp; *np >= '0' && *np <= '9'; np++)
170 n = xsum (xtimes (n, 10), *np - '0');
171 if (n == 0)
172 /* Positional argument 0. */
173 goto error;
174 if (size_overflow_p (n))
175 /* n too large, would lead to out of memory later. */
176 goto error;
177 arg_index = n - 1;
178 cp = np + 1;
182 /* Read the flags. */
183 for (;;)
185 if (*cp == '\'')
187 dp->flags |= FLAG_GROUP;
188 cp++;
190 else if (*cp == '-')
192 dp->flags |= FLAG_LEFT;
193 cp++;
195 else if (*cp == '+')
197 dp->flags |= FLAG_SHOWSIGN;
198 cp++;
200 else if (*cp == ' ')
202 dp->flags |= FLAG_SPACE;
203 cp++;
205 else if (*cp == '#')
207 dp->flags |= FLAG_ALT;
208 cp++;
210 else if (*cp == '0')
212 dp->flags |= FLAG_ZERO;
213 cp++;
215 #if __GLIBC__ >= 2 && !defined __UCLIBC__
216 else if (*cp == 'I')
218 dp->flags |= FLAG_LOCALIZED;
219 cp++;
221 #endif
222 else
223 break;
226 /* Parse the field width. */
227 if (*cp == '*')
229 dp->width_start = cp;
230 cp++;
231 dp->width_end = cp;
232 if (max_width_length < 1)
233 max_width_length = 1;
235 /* Test for positional argument. */
236 if (*cp >= '0' && *cp <= '9')
238 const CHAR_T *np;
240 for (np = cp; *np >= '0' && *np <= '9'; np++)
242 if (*np == '$')
244 size_t n = 0;
246 for (np = cp; *np >= '0' && *np <= '9'; np++)
247 n = xsum (xtimes (n, 10), *np - '0');
248 if (n == 0)
249 /* Positional argument 0. */
250 goto error;
251 if (size_overflow_p (n))
252 /* n too large, would lead to out of memory later. */
253 goto error;
254 dp->width_arg_index = n - 1;
255 cp = np + 1;
258 if (dp->width_arg_index == ARG_NONE)
260 dp->width_arg_index = arg_posn++;
261 if (dp->width_arg_index == ARG_NONE)
262 /* arg_posn wrapped around. */
263 goto error;
265 REGISTER_ARG (dp->width_arg_index, TYPE_INT);
267 else if (*cp >= '0' && *cp <= '9')
269 size_t width_length;
271 dp->width_start = cp;
272 for (; *cp >= '0' && *cp <= '9'; cp++)
274 dp->width_end = cp;
275 width_length = dp->width_end - dp->width_start;
276 if (max_width_length < width_length)
277 max_width_length = width_length;
280 /* Parse the precision. */
281 if (*cp == '.')
283 cp++;
284 if (*cp == '*')
286 dp->precision_start = cp - 1;
287 cp++;
288 dp->precision_end = cp;
289 if (max_precision_length < 2)
290 max_precision_length = 2;
292 /* Test for positional argument. */
293 if (*cp >= '0' && *cp <= '9')
295 const CHAR_T *np;
297 for (np = cp; *np >= '0' && *np <= '9'; np++)
299 if (*np == '$')
301 size_t n = 0;
303 for (np = cp; *np >= '0' && *np <= '9'; np++)
304 n = xsum (xtimes (n, 10), *np - '0');
305 if (n == 0)
306 /* Positional argument 0. */
307 goto error;
308 if (size_overflow_p (n))
309 /* n too large, would lead to out of memory
310 later. */
311 goto error;
312 dp->precision_arg_index = n - 1;
313 cp = np + 1;
316 if (dp->precision_arg_index == ARG_NONE)
318 dp->precision_arg_index = arg_posn++;
319 if (dp->precision_arg_index == ARG_NONE)
320 /* arg_posn wrapped around. */
321 goto error;
323 REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
325 else
327 size_t precision_length;
329 dp->precision_start = cp - 1;
330 for (; *cp >= '0' && *cp <= '9'; cp++)
332 dp->precision_end = cp;
333 precision_length = dp->precision_end - dp->precision_start;
334 if (max_precision_length < precision_length)
335 max_precision_length = precision_length;
340 arg_type type;
342 /* Parse argument type/size specifiers. */
344 int flags = 0;
346 for (;;)
348 if (*cp == 'h')
350 flags |= (1 << (flags & 1));
351 cp++;
353 else if (*cp == 'L')
355 flags |= 4;
356 cp++;
358 else if (*cp == 'l')
360 flags += 8;
361 cp++;
363 else if (*cp == 'j')
365 if (sizeof (intmax_t) > sizeof (long))
367 /* intmax_t = long long */
368 flags += 16;
370 else if (sizeof (intmax_t) > sizeof (int))
372 /* intmax_t = long */
373 flags += 8;
375 cp++;
377 else if (*cp == 'z' || *cp == 'Z')
379 /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
380 because the warning facility in gcc-2.95.2 understands
381 only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
382 if (sizeof (size_t) > sizeof (long))
384 /* size_t = long long */
385 flags += 16;
387 else if (sizeof (size_t) > sizeof (int))
389 /* size_t = long */
390 flags += 8;
392 cp++;
394 else if (*cp == 't')
396 if (sizeof (ptrdiff_t) > sizeof (long))
398 /* ptrdiff_t = long long */
399 flags += 16;
401 else if (sizeof (ptrdiff_t) > sizeof (int))
403 /* ptrdiff_t = long */
404 flags += 8;
406 cp++;
408 #if defined __APPLE__ && defined __MACH__
409 /* On Mac OS X 10.3, PRIdMAX is defined as "qd".
410 We cannot change it to "lld" because PRIdMAX must also
411 be understood by the system's printf routines. */
412 else if (*cp == 'q')
414 if (64 / 8 > sizeof (long))
416 /* int64_t = long long */
417 flags += 16;
419 else
421 /* int64_t = long */
422 flags += 8;
424 cp++;
426 #endif
427 #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
428 /* On native Windows, PRIdMAX is defined as "I64d".
429 We cannot change it to "lld" because PRIdMAX must also
430 be understood by the system's printf routines. */
431 else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
433 if (64 / 8 > sizeof (long))
435 /* __int64 = long long */
436 flags += 16;
438 else
440 /* __int64 = long */
441 flags += 8;
443 cp += 3;
445 #endif
446 else
447 break;
450 /* Read the conversion character. */
451 c = *cp++;
452 switch (c)
454 case 'd': case 'i':
455 #if HAVE_LONG_LONG
456 /* If 'long long' exists and is larger than 'long': */
457 if (flags >= 16 || (flags & 4))
458 type = TYPE_LONGLONGINT;
459 else
460 #endif
461 /* If 'long long' exists and is the same as 'long', we parse
462 "lld" into TYPE_LONGINT. */
463 if (flags >= 8)
464 type = TYPE_LONGINT;
465 else if (flags & 2)
466 type = TYPE_SCHAR;
467 else if (flags & 1)
468 type = TYPE_SHORT;
469 else
470 type = TYPE_INT;
471 break;
472 case 'o': case 'u': case 'x': case 'X':
473 #if HAVE_LONG_LONG
474 /* If 'long long' exists and is larger than 'long': */
475 if (flags >= 16 || (flags & 4))
476 type = TYPE_ULONGLONGINT;
477 else
478 #endif
479 /* If 'unsigned long long' exists and is the same as
480 'unsigned long', we parse "llu" into TYPE_ULONGINT. */
481 if (flags >= 8)
482 type = TYPE_ULONGINT;
483 else if (flags & 2)
484 type = TYPE_UCHAR;
485 else if (flags & 1)
486 type = TYPE_USHORT;
487 else
488 type = TYPE_UINT;
489 break;
490 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
491 case 'a': case 'A':
492 if (flags >= 16 || (flags & 4))
493 type = TYPE_LONGDOUBLE;
494 else
495 type = TYPE_DOUBLE;
496 break;
497 case 'c':
498 if (flags >= 8)
499 #if HAVE_WINT_T
500 type = TYPE_WIDE_CHAR;
501 #else
502 goto error;
503 #endif
504 else
505 type = TYPE_CHAR;
506 break;
507 #if HAVE_WINT_T
508 case 'C':
509 type = TYPE_WIDE_CHAR;
510 c = 'c';
511 break;
512 #endif
513 case 's':
514 if (flags >= 8)
515 #if HAVE_WCHAR_T
516 type = TYPE_WIDE_STRING;
517 #else
518 goto error;
519 #endif
520 else
521 type = TYPE_STRING;
522 break;
523 #if HAVE_WCHAR_T
524 case 'S':
525 type = TYPE_WIDE_STRING;
526 c = 's';
527 break;
528 #endif
529 case 'p':
530 type = TYPE_POINTER;
531 break;
532 case 'n':
533 #if HAVE_LONG_LONG
534 /* If 'long long' exists and is larger than 'long': */
535 if (flags >= 16 || (flags & 4))
536 type = TYPE_COUNT_LONGLONGINT_POINTER;
537 else
538 #endif
539 /* If 'long long' exists and is the same as 'long', we parse
540 "lln" into TYPE_COUNT_LONGINT_POINTER. */
541 if (flags >= 8)
542 type = TYPE_COUNT_LONGINT_POINTER;
543 else if (flags & 2)
544 type = TYPE_COUNT_SCHAR_POINTER;
545 else if (flags & 1)
546 type = TYPE_COUNT_SHORT_POINTER;
547 else
548 type = TYPE_COUNT_INT_POINTER;
549 break;
550 #if ENABLE_UNISTDIO
551 /* The unistdio extensions. */
552 case 'U':
553 if (flags >= 16)
554 type = TYPE_U32_STRING;
555 else if (flags >= 8)
556 type = TYPE_U16_STRING;
557 else
558 type = TYPE_U8_STRING;
559 break;
560 #endif
561 case '%':
562 type = TYPE_NONE;
563 break;
564 default:
565 /* Unknown conversion character. */
566 goto error;
570 if (type != TYPE_NONE)
572 dp->arg_index = arg_index;
573 if (dp->arg_index == ARG_NONE)
575 dp->arg_index = arg_posn++;
576 if (dp->arg_index == ARG_NONE)
577 /* arg_posn wrapped around. */
578 goto error;
580 REGISTER_ARG (dp->arg_index, type);
582 dp->conversion = c;
583 dp->dir_end = cp;
586 d->count++;
587 if (d->count >= d_allocated)
589 size_t memory_size;
590 DIRECTIVE *memory;
592 d_allocated = xtimes (d_allocated, 2);
593 memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
594 if (size_overflow_p (memory_size))
595 /* Overflow, would lead to out of memory. */
596 goto out_of_memory;
597 memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
598 ? realloc (d->dir, memory_size)
599 : malloc (memory_size));
600 if (memory == NULL)
601 /* Out of memory. */
602 goto out_of_memory;
603 if (d->dir == d->direct_alloc_dir)
604 memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
605 d->dir = memory;
608 #if CHAR_T_ONLY_ASCII
609 else if (!c_isascii (c))
611 /* Non-ASCII character. Not supported. */
612 goto error;
614 #endif
616 d->dir[d->count].dir_start = cp;
618 d->max_width_length = max_width_length;
619 d->max_precision_length = max_precision_length;
620 return 0;
622 error:
623 if (a->arg != a->direct_alloc_arg)
624 free (a->arg);
625 if (d->dir != d->direct_alloc_dir)
626 free (d->dir);
627 errno = EINVAL;
628 return -1;
630 out_of_memory:
631 if (a->arg != a->direct_alloc_arg)
632 free (a->arg);
633 if (d->dir != d->direct_alloc_dir)
634 free (d->dir);
635 errno = ENOMEM;
636 return -1;
639 #undef PRINTF_PARSE
640 #undef DIRECTIVES
641 #undef DIRECTIVE
642 #undef CHAR_T_ONLY_ASCII
643 #undef CHAR_T