Use size_t for string length variable.
[glibc/history.git] / stdio-common / vfscanf.c
blob1ea9bc4e07d4273862daf22d4ccfe36cf28e3914
1 /* Copyright (C) 1991-2002, 2003, 2004, 2005, 2006
2 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
20 #include <assert.h>
21 #include <errno.h>
22 #include <limits.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <wctype.h>
31 #include <bits/libc-lock.h>
32 #include <locale/localeinfo.h>
34 #ifdef __GNUC__
35 # define HAVE_LONGLONG
36 # define LONGLONG long long
37 #else
38 # define LONGLONG long
39 #endif
41 /* Determine whether we have to handle `long long' at all. */
42 #if LONG_MAX == LONG_LONG_MAX
43 # define need_longlong 0
44 #else
45 # define need_longlong 1
46 #endif
48 /* Determine whether we have to handle `long'. */
49 #if INT_MAX == LONG_MAX
50 # define need_long 0
51 #else
52 # define need_long 1
53 #endif
55 /* Those are flags in the conversion format. */
56 #define LONG 0x001 /* l: long or double */
57 #define LONGDBL 0x002 /* L: long long or long double */
58 #define SHORT 0x004 /* h: short */
59 #define SUPPRESS 0x008 /* *: suppress assignment */
60 #define POINTER 0x010 /* weird %p pointer (`fake hex') */
61 #define NOSKIP 0x020 /* do not skip blanks */
62 #define WIDTH 0x040 /* width was given */
63 #define GROUP 0x080 /* ': group numbers */
64 #define MALLOC 0x100 /* a: malloc strings */
65 #define CHAR 0x200 /* hh: char */
66 #define I18N 0x400 /* I: use locale's digits */
69 #include <locale/localeinfo.h>
70 #include <libioP.h>
71 #include <libio.h>
73 #undef va_list
74 #define va_list _IO_va_list
76 #ifdef COMPILE_WSCANF
77 # define ungetc(c, s) ((void) (c == WEOF \
78 || (--read_in, \
79 INTUSE(_IO_sputbackwc) (s, c))))
80 # define ungetc_not_eof(c, s) ((void) (--read_in, \
81 INTUSE(_IO_sputbackwc) (s, c)))
82 # define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
83 : ((c = _IO_getwc_unlocked (s)), \
84 (void) (c != WEOF \
85 ? ++read_in \
86 : (size_t) (inchar_errno = errno)), c))
88 # define MEMCPY(d, s, n) __wmemcpy (d, s, n)
89 # define ISSPACE(Ch) iswspace (Ch)
90 # define ISDIGIT(Ch) iswdigit (Ch)
91 # define ISXDIGIT(Ch) iswxdigit (Ch)
92 # define TOLOWER(Ch) towlower (Ch)
93 # define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
94 # define __strtoll_internal __wcstoll_internal
95 # define __strtoull_internal __wcstoull_internal
96 # define __strtol_internal __wcstol_internal
97 # define __strtoul_internal __wcstoul_internal
98 # define __strtold_internal __wcstold_internal
99 # define __strtod_internal __wcstod_internal
100 # define __strtof_internal __wcstof_internal
102 # define L_(Str) L##Str
103 # define CHAR_T wchar_t
104 # define UCHAR_T unsigned int
105 # define WINT_T wint_t
106 # undef EOF
107 # define EOF WEOF
108 #else
109 # define ungetc(c, s) ((void) ((int) c == EOF \
110 || (--read_in, \
111 INTUSE(_IO_sputbackc) (s, (unsigned char) c))))
112 # define ungetc_not_eof(c, s) ((void) (--read_in, \
113 INTUSE(_IO_sputbackc) (s, (unsigned char) c)))
114 # define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
115 : ((c = _IO_getc_unlocked (s)), \
116 (void) (c != EOF \
117 ? ++read_in \
118 : (size_t) (inchar_errno = errno)), c))
119 # define MEMCPY(d, s, n) memcpy (d, s, n)
120 # define ISSPACE(Ch) __isspace_l (Ch, loc)
121 # define ISDIGIT(Ch) __isdigit_l (Ch, loc)
122 # define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
123 # define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
124 # define ORIENT if (_IO_vtable_offset (s) == 0 \
125 && _IO_fwide (s, -1) != -1) \
126 return EOF
128 # define L_(Str) Str
129 # define CHAR_T char
130 # define UCHAR_T unsigned char
131 # define WINT_T int
132 #endif
134 #define encode_error() do { \
135 errval = 4; \
136 __set_errno (EILSEQ); \
137 goto errout; \
138 } while (0)
139 #define conv_error() do { \
140 errval = 2; \
141 goto errout; \
142 } while (0)
143 #define input_error() do { \
144 errval = 1; \
145 if (done == 0) done = EOF; \
146 goto errout; \
147 } while (0)
148 #define ARGCHECK(s, format) \
149 do \
151 /* Check file argument for consistence. */ \
152 CHECK_FILE (s, EOF); \
153 if (s->_flags & _IO_NO_READS) \
155 __set_errno (EBADF); \
156 return EOF; \
158 else if (format == NULL) \
160 MAYBE_SET_EINVAL; \
161 return EOF; \
163 } while (0)
164 #define LOCK_STREAM(S) \
165 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
166 _IO_flockfile (S)
167 #define UNLOCK_STREAM(S) \
168 _IO_funlockfile (S); \
169 __libc_cleanup_region_end (0)
172 /* Read formatted input from S according to the format string
173 FORMAT, using the argument list in ARG.
174 Return the number of assignments made, or -1 for an input error. */
175 #ifdef COMPILE_WSCANF
177 _IO_vfwscanf (_IO_FILE *s, const wchar_t *format, _IO_va_list argptr,
178 int *errp)
179 #else
181 _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
182 int *errp)
183 #endif
185 va_list arg;
186 register const CHAR_T *f = format;
187 register UCHAR_T fc; /* Current character of the format. */
188 register WINT_T done = 0; /* Assignments done. */
189 register size_t read_in = 0; /* Chars read in. */
190 register WINT_T c = 0; /* Last char read. */
191 register int width; /* Maximum field width. */
192 register int flags; /* Modifiers for current format element. */
193 int errval = 0;
194 #ifndef COMPILE_WSCANF
195 __locale_t loc = _NL_CURRENT_LOCALE;
196 struct locale_data *const curctype = loc->__locales[LC_CTYPE];
197 #endif
199 /* Errno of last failed inchar call. */
200 int inchar_errno = 0;
201 /* Status for reading F-P nums. */
202 char got_dot, got_e, negative;
203 /* If a [...] is a [^...]. */
204 CHAR_T not_in;
205 #define exp_char not_in
206 /* Base for integral numbers. */
207 int base;
208 /* Signedness for integral numbers. */
209 int number_signed;
210 #define is_hexa number_signed
211 /* Decimal point character. */
212 #ifdef COMPILE_WSCANF
213 wint_t decimal;
214 #else
215 const char *decimal;
216 #endif
217 /* The thousands character of the current locale. */
218 #ifdef COMPILE_WSCANF
219 wint_t thousands;
220 #else
221 const char *thousands;
222 #endif
223 /* State for the conversions. */
224 mbstate_t state;
225 /* Integral holding variables. */
226 union
228 long long int q;
229 unsigned long long int uq;
230 long int l;
231 unsigned long int ul;
232 } num;
233 /* Character-buffer pointer. */
234 char *str = NULL;
235 wchar_t *wstr = NULL;
236 char **strptr = NULL;
237 ssize_t strsize = 0;
238 /* We must not react on white spaces immediately because they can
239 possibly be matched even if in the input stream no character is
240 available anymore. */
241 int skip_space = 0;
242 /* Nonzero if we are reading a pointer. */
243 int read_pointer;
244 /* Workspace. */
245 CHAR_T *tw; /* Temporary pointer. */
246 CHAR_T *wp = NULL; /* Workspace. */
247 size_t wpmax = 0; /* Maximal size of workspace. */
248 size_t wpsize; /* Currently used bytes in workspace. */
249 #define ADDW(Ch) \
250 do \
252 if (wpsize == wpmax) \
254 CHAR_T *old = wp; \
255 wpmax = (UCHAR_MAX + 1 > 2 * wpmax ? UCHAR_MAX + 1 : 2 * wpmax); \
256 wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t)); \
257 if (old != NULL) \
258 MEMCPY (wp, old, wpsize); \
260 wp[wpsize++] = (Ch); \
262 while (0)
264 #ifdef __va_copy
265 __va_copy (arg, argptr);
266 #else
267 arg = (va_list) argptr;
268 #endif
270 #ifdef ORIENT
271 ORIENT;
272 #endif
274 ARGCHECK (s, format);
277 #ifndef COMPILE_WSCANF
278 struct locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
279 #endif
281 /* Figure out the decimal point character. */
282 #ifdef COMPILE_WSCANF
283 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
284 #else
285 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
286 #endif
287 /* Figure out the thousands separator character. */
288 #ifdef COMPILE_WSCANF
289 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
290 #else
291 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
292 if (*thousands == '\0')
293 thousands = NULL;
294 #endif
297 /* Lock the stream. */
298 LOCK_STREAM (s);
301 #ifndef COMPILE_WSCANF
302 /* From now on we use `state' to convert the format string. */
303 memset (&state, '\0', sizeof (state));
304 #endif
306 /* Run through the format string. */
307 while (*f != '\0')
309 unsigned int argpos;
310 /* Extract the next argument, which is of type TYPE.
311 For a %N$... spec, this is the Nth argument from the beginning;
312 otherwise it is the next argument after the state now in ARG. */
313 #ifdef __va_copy
314 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
315 ({ unsigned int pos = argpos; \
316 va_list arg; \
317 __va_copy (arg, argptr); \
318 while (--pos > 0) \
319 (void) va_arg (arg, void *); \
320 va_arg (arg, type); \
322 #else
323 # if 0
324 /* XXX Possible optimization. */
325 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
326 ({ va_list arg = (va_list) argptr; \
327 arg = (va_list) ((char *) arg \
328 + (argpos - 1) \
329 * __va_rounded_size (void *)); \
330 va_arg (arg, type); \
332 # else
333 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
334 ({ unsigned int pos = argpos; \
335 va_list arg = (va_list) argptr; \
336 while (--pos > 0) \
337 (void) va_arg (arg, void *); \
338 va_arg (arg, type); \
340 # endif
341 #endif
343 #ifndef COMPILE_WSCANF
344 if (!isascii ((unsigned char) *f))
346 /* Non-ASCII, may be a multibyte. */
347 int len = __mbrlen (f, strlen (f), &state);
348 if (len > 0)
352 c = inchar ();
353 if (__builtin_expect (c == EOF, 0))
354 input_error ();
355 else if (c != (unsigned char) *f++)
357 ungetc_not_eof (c, s);
358 conv_error ();
361 while (--len > 0);
362 continue;
365 #endif
367 fc = *f++;
368 if (fc != '%')
370 /* Remember to skip spaces. */
371 if (ISSPACE (fc))
373 skip_space = 1;
374 continue;
377 /* Read a character. */
378 c = inchar ();
380 /* Characters other than format specs must just match. */
381 if (__builtin_expect (c == EOF, 0))
382 input_error ();
384 /* We saw white space char as the last character in the format
385 string. Now it's time to skip all leading white space. */
386 if (skip_space)
388 while (ISSPACE (c))
389 if (__builtin_expect (inchar () == EOF, 0))
390 input_error ();
391 skip_space = 0;
394 if (__builtin_expect (c != fc, 0))
396 ungetc (c, s);
397 conv_error ();
400 continue;
403 /* This is the start of the conversion string. */
404 flags = 0;
406 /* Not yet decided whether we read a pointer or not. */
407 read_pointer = 0;
409 /* Initialize state of modifiers. */
410 argpos = 0;
412 /* Prepare temporary buffer. */
413 wpsize = 0;
415 /* Check for a positional parameter specification. */
416 if (ISDIGIT ((UCHAR_T) *f))
418 argpos = (UCHAR_T) *f++ - L_('0');
419 while (ISDIGIT ((UCHAR_T) *f))
420 argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0'));
421 if (*f == L_('$'))
422 ++f;
423 else
425 /* Oops; that was actually the field width. */
426 width = argpos;
427 flags |= WIDTH;
428 argpos = 0;
429 goto got_width;
433 /* Check for the assignment-suppressing, the number grouping flag,
434 and the signal to use the locale's digit representation. */
435 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
436 switch (*f++)
438 case L_('*'):
439 flags |= SUPPRESS;
440 break;
441 case L_('\''):
442 flags |= GROUP;
443 break;
444 case L_('I'):
445 flags |= I18N;
446 break;
449 /* We have seen width. */
450 if (ISDIGIT ((UCHAR_T) *f))
451 flags |= WIDTH;
453 /* Find the maximum field width. */
454 width = 0;
455 while (ISDIGIT ((UCHAR_T) *f))
457 width *= 10;
458 width += (UCHAR_T) *f++ - L_('0');
460 got_width:
461 if (width == 0)
462 width = -1;
464 /* Check for type modifiers. */
465 switch (*f++)
467 case L_('h'):
468 /* ints are short ints or chars. */
469 if (*f == L_('h'))
471 ++f;
472 flags |= CHAR;
474 else
475 flags |= SHORT;
476 break;
477 case L_('l'):
478 if (*f == L_('l'))
480 /* A double `l' is equivalent to an `L'. */
481 ++f;
482 flags |= LONGDBL | LONG;
484 else
485 /* ints are long ints. */
486 flags |= LONG;
487 break;
488 case L_('q'):
489 case L_('L'):
490 /* doubles are long doubles, and ints are long long ints. */
491 flags |= LONGDBL | LONG;
492 break;
493 case L_('a'):
494 /* The `a' is used as a flag only if followed by `s', `S' or
495 `['. */
496 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
498 --f;
499 break;
501 /* String conversions (%s, %[) take a `char **'
502 arg and fill it in with a malloc'd pointer. */
503 flags |= MALLOC;
504 break;
505 case L_('z'):
506 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
507 flags |= LONGDBL;
508 else if (sizeof (size_t) > sizeof (unsigned int))
509 flags |= LONG;
510 break;
511 case L_('j'):
512 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
513 flags |= LONGDBL;
514 else if (sizeof (uintmax_t) > sizeof (unsigned int))
515 flags |= LONG;
516 break;
517 case L_('t'):
518 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
519 flags |= LONGDBL;
520 else if (sizeof (ptrdiff_t) > sizeof (int))
521 flags |= LONG;
522 break;
523 default:
524 /* Not a recognized modifier. Backup. */
525 --f;
526 break;
529 /* End of the format string? */
530 if (__builtin_expect (*f == L_('\0'), 0))
531 conv_error ();
533 /* Find the conversion specifier. */
534 fc = *f++;
535 if (skip_space || (fc != L_('[') && fc != L_('c')
536 && fc != L_('C') && fc != L_('n')))
538 /* Eat whitespace. */
539 int save_errno = errno;
540 errno = 0;
542 if (__builtin_expect (inchar () == EOF && errno == EINTR, 0))
543 input_error ();
544 while (ISSPACE (c));
545 errno = save_errno;
546 ungetc (c, s);
547 skip_space = 0;
550 switch (fc)
552 case L_('%'): /* Must match a literal '%'. */
553 c = inchar ();
554 if (__builtin_expect (c == EOF, 0))
555 input_error ();
556 if (__builtin_expect (c != fc, 0))
558 ungetc_not_eof (c, s);
559 conv_error ();
561 break;
563 case L_('n'): /* Answer number of assignments done. */
564 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
565 with the 'n' conversion specifier. */
566 if (!(flags & SUPPRESS))
568 /* Don't count the read-ahead. */
569 if (need_longlong && (flags & LONGDBL))
570 *ARG (long long int *) = read_in;
571 else if (need_long && (flags & LONG))
572 *ARG (long int *) = read_in;
573 else if (flags & SHORT)
574 *ARG (short int *) = read_in;
575 else if (!(flags & CHAR))
576 *ARG (int *) = read_in;
577 else
578 *ARG (char *) = read_in;
580 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
581 /* We have a severe problem here. The ISO C standard
582 contradicts itself in explaining the effect of the %n
583 format in `scanf'. While in ISO C:1990 and the ISO C
584 Amendement 1:1995 the result is described as
586 Execution of a %n directive does not effect the
587 assignment count returned at the completion of
588 execution of the f(w)scanf function.
590 in ISO C Corrigendum 1:1994 the following was added:
592 Subclause 7.9.6.2
593 Add the following fourth example:
595 #include <stdio.h>
596 int d1, d2, n1, n2, i;
597 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
598 the value 123 is assigned to d1 and the value3 to n1.
599 Because %n can never get an input failure the value
600 of 3 is also assigned to n2. The value of d2 is not
601 affected. The value 3 is assigned to i.
603 We go for now with the historically correct code from ISO C,
604 i.e., we don't count the %n assignments. When it ever
605 should proof to be wrong just remove the #ifdef above. */
606 ++done;
607 #endif
609 break;
611 case L_('c'): /* Match characters. */
612 if ((flags & LONG) == 0)
614 if (!(flags & SUPPRESS))
616 str = ARG (char *);
617 if (str == NULL)
618 conv_error ();
621 c = inchar ();
622 if (__builtin_expect (c == EOF, 0))
623 input_error ();
625 if (width == -1)
626 width = 1;
628 #ifdef COMPILE_WSCANF
629 /* We have to convert the wide character(s) into multibyte
630 characters and store the result. */
631 memset (&state, '\0', sizeof (state));
635 size_t n;
637 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
638 if (__builtin_expect (n == (size_t) -1, 0))
639 /* No valid wide character. */
640 input_error ();
642 /* Increment the output pointer. Even if we don't
643 write anything. */
644 str += n;
646 while (--width > 0 && inchar () != EOF);
647 #else
648 if (!(flags & SUPPRESS))
651 *str++ = c;
652 while (--width > 0 && inchar () != EOF);
654 else
655 while (--width > 0 && inchar () != EOF);
656 #endif
658 if (!(flags & SUPPRESS))
659 ++done;
661 break;
663 /* FALLTHROUGH */
664 case L_('C'):
665 if (!(flags & SUPPRESS))
667 wstr = ARG (wchar_t *);
668 if (wstr == NULL)
669 conv_error ();
672 c = inchar ();
673 if (__builtin_expect (c == EOF, 0))
674 input_error ();
676 #ifdef COMPILE_WSCANF
677 /* Just store the incoming wide characters. */
678 if (!(flags & SUPPRESS))
681 *wstr++ = c;
682 while (--width > 0 && inchar () != EOF);
684 else
685 while (--width > 0 && inchar () != EOF);
686 #else
688 /* We have to convert the multibyte input sequence to wide
689 characters. */
690 char buf[1];
691 mbstate_t cstate;
693 memset (&cstate, '\0', sizeof (cstate));
697 /* This is what we present the mbrtowc function first. */
698 buf[0] = c;
700 while (1)
702 size_t n;
704 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
705 buf, 1, &cstate);
707 if (n == (size_t) -2)
709 /* Possibly correct character, just not enough
710 input. */
711 if (__builtin_expect (inchar () == EOF, 0))
712 encode_error ();
714 buf[0] = c;
715 continue;
718 if (__builtin_expect (n != 1, 0))
719 encode_error ();
721 /* We have a match. */
722 break;
725 /* Advance the result pointer. */
726 ++wstr;
728 while (--width > 0 && inchar () != EOF);
730 #endif
732 if (!(flags & SUPPRESS))
733 ++done;
735 break;
737 case L_('s'): /* Read a string. */
738 if (!(flags & LONG))
740 #define STRING_ARG(Str, Type) \
741 do if (!(flags & SUPPRESS)) \
743 if (flags & MALLOC) \
745 /* The string is to be stored in a malloc'd buffer. */ \
746 strptr = ARG (char **); \
747 if (strptr == NULL) \
748 conv_error (); \
749 /* Allocate an initial buffer. */ \
750 strsize = 100; \
751 *strptr = (char *) malloc (strsize * sizeof (Type)); \
752 Str = (Type *) *strptr; \
754 else \
755 Str = ARG (Type *); \
756 if (Str == NULL) \
757 conv_error (); \
758 } while (0)
759 STRING_ARG (str, char);
761 c = inchar ();
762 if (__builtin_expect (c == EOF, 0))
763 input_error ();
765 #ifdef COMPILE_WSCANF
766 memset (&state, '\0', sizeof (state));
767 #endif
771 if (ISSPACE (c))
773 ungetc_not_eof (c, s);
774 break;
777 #ifdef COMPILE_WSCANF
778 /* This is quite complicated. We have to convert the
779 wide characters into multibyte characters and then
780 store them. */
782 size_t n;
784 if (!(flags & SUPPRESS) && (flags & MALLOC)
785 && str + MB_CUR_MAX >= *strptr + strsize)
787 /* We have to enlarge the buffer if the `a' flag
788 was given. */
789 size_t strleng = str - *strptr;
790 char *newstr;
792 newstr = (char *) realloc (*strptr, strsize * 2);
793 if (newstr == NULL)
795 /* Can't allocate that much. Last-ditch
796 effort. */
797 newstr = (char *) realloc (*strptr,
798 strleng + MB_CUR_MAX);
799 if (newstr == NULL)
801 /* We lose. Oh well. Terminate the
802 string and stop converting,
803 so at least we don't skip any input. */
804 ((char *) (*strptr))[strleng] = '\0';
805 ++done;
806 conv_error ();
808 else
810 *strptr = newstr;
811 str = newstr + strleng;
812 strsize = strleng + MB_CUR_MAX;
815 else
817 *strptr = newstr;
818 str = newstr + strleng;
819 strsize *= 2;
823 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
824 &state);
825 if (__builtin_expect (n == (size_t) -1, 0))
826 encode_error ();
828 assert (n <= MB_CUR_MAX);
829 str += n;
831 #else
832 /* This is easy. */
833 if (!(flags & SUPPRESS))
835 *str++ = c;
836 if ((flags & MALLOC)
837 && (char *) str == *strptr + strsize)
839 /* Enlarge the buffer. */
840 str = (char *) realloc (*strptr, 2 * strsize);
841 if (str == NULL)
843 /* Can't allocate that much. Last-ditch
844 effort. */
845 str = (char *) realloc (*strptr, strsize + 1);
846 if (str == NULL)
848 /* We lose. Oh well. Terminate the
849 string and stop converting,
850 so at least we don't skip any input. */
851 ((char *) (*strptr))[strsize - 1] = '\0';
852 ++done;
853 conv_error ();
855 else
857 *strptr = (char *) str;
858 str += strsize;
859 ++strsize;
862 else
864 *strptr = (char *) str;
865 str += strsize;
866 strsize *= 2;
870 #endif
872 while ((width <= 0 || --width > 0) && inchar () != EOF);
874 if (!(flags & SUPPRESS))
876 #ifdef COMPILE_WSCANF
877 /* We have to emit the code to get into the initial
878 state. */
879 char buf[MB_LEN_MAX];
880 size_t n = __wcrtomb (buf, L'\0', &state);
881 if (n > 0 && (flags & MALLOC)
882 && str + n >= *strptr + strsize)
884 /* Enlarge the buffer. */
885 size_t strleng = str - *strptr;
886 char *newstr;
888 newstr = (char *) realloc (*strptr, strleng + n + 1);
889 if (newstr == NULL)
891 /* We lose. Oh well. Terminate the string
892 and stop converting, so at least we don't
893 skip any input. */
894 ((char *) (*strptr))[strleng] = '\0';
895 ++done;
896 conv_error ();
898 else
900 *strptr = newstr;
901 str = newstr + strleng;
902 strsize = strleng + n + 1;
906 str = __mempcpy (str, buf, n);
907 #endif
908 *str++ = '\0';
910 if ((flags & MALLOC) && str - *strptr != strsize)
912 char *cp = (char *) realloc (*strptr, str - *strptr);
913 if (cp != NULL)
914 *strptr = cp;
917 ++done;
919 break;
921 /* FALLTHROUGH */
923 case L_('S'):
925 #ifndef COMPILE_WSCANF
926 mbstate_t cstate;
927 #endif
929 /* Wide character string. */
930 STRING_ARG (wstr, wchar_t);
932 c = inchar ();
933 if (__builtin_expect (c == EOF, 0))
934 input_error ();
936 #ifndef COMPILE_WSCANF
937 memset (&cstate, '\0', sizeof (cstate));
938 #endif
942 if (ISSPACE (c))
944 ungetc_not_eof (c, s);
945 break;
948 #ifdef COMPILE_WSCANF
949 /* This is easy. */
950 if (!(flags & SUPPRESS))
952 *wstr++ = c;
953 if ((flags & MALLOC)
954 && wstr == (wchar_t *) *strptr + strsize)
956 /* Enlarge the buffer. */
957 wstr = (wchar_t *) realloc (*strptr,
958 (2 * strsize)
959 * sizeof (wchar_t));
960 if (wstr == NULL)
962 /* Can't allocate that much. Last-ditch
963 effort. */
964 wstr = (wchar_t *) realloc (*strptr,
965 (strsize + 1)
966 * sizeof (wchar_t));
967 if (wstr == NULL)
969 /* We lose. Oh well. Terminate the string
970 and stop converting, so at least we don't
971 skip any input. */
972 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
973 ++done;
974 conv_error ();
976 else
978 *strptr = (char *) wstr;
979 wstr += strsize;
980 ++strsize;
983 else
985 *strptr = (char *) wstr;
986 wstr += strsize;
987 strsize *= 2;
991 #else
993 char buf[1];
995 buf[0] = c;
997 while (1)
999 size_t n;
1001 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1002 buf, 1, &cstate);
1004 if (n == (size_t) -2)
1006 /* Possibly correct character, just not enough
1007 input. */
1008 if (__builtin_expect (inchar () == EOF, 0))
1009 encode_error ();
1011 buf[0] = c;
1012 continue;
1015 if (__builtin_expect (n != 1, 0))
1016 encode_error ();
1018 /* We have a match. */
1019 ++wstr;
1020 break;
1023 if (!(flags & SUPPRESS) && (flags & MALLOC)
1024 && wstr == (wchar_t *) *strptr + strsize)
1026 /* Enlarge the buffer. */
1027 wstr = (wchar_t *) realloc (*strptr,
1028 (2 * strsize
1029 * sizeof (wchar_t)));
1030 if (wstr == NULL)
1032 /* Can't allocate that much. Last-ditch effort. */
1033 wstr = (wchar_t *) realloc (*strptr,
1034 ((strsize + 1)
1035 * sizeof (wchar_t)));
1036 if (wstr == NULL)
1038 /* We lose. Oh well. Terminate the
1039 string and stop converting, so at
1040 least we don't skip any input. */
1041 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1042 ++done;
1043 conv_error ();
1045 else
1047 *strptr = (char *) wstr;
1048 wstr += strsize;
1049 ++strsize;
1052 else
1054 *strptr = (char *) wstr;
1055 wstr += strsize;
1056 strsize *= 2;
1060 #endif
1062 while ((width <= 0 || --width > 0) && inchar () != EOF);
1064 if (!(flags & SUPPRESS))
1066 *wstr++ = L'\0';
1068 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1070 wchar_t *cp = (wchar_t *) realloc (*strptr,
1071 ((wstr
1072 - (wchar_t *) *strptr)
1073 * sizeof(wchar_t)));
1074 if (cp != NULL)
1075 *strptr = (char *) cp;
1078 ++done;
1081 break;
1083 case L_('x'): /* Hexadecimal integer. */
1084 case L_('X'): /* Ditto. */
1085 base = 16;
1086 number_signed = 0;
1087 goto number;
1089 case L_('o'): /* Octal integer. */
1090 base = 8;
1091 number_signed = 0;
1092 goto number;
1094 case L_('u'): /* Unsigned decimal integer. */
1095 base = 10;
1096 number_signed = 0;
1097 goto number;
1099 case L_('d'): /* Signed decimal integer. */
1100 base = 10;
1101 number_signed = 1;
1102 goto number;
1104 case L_('i'): /* Generic number. */
1105 base = 0;
1106 number_signed = 1;
1108 number:
1109 c = inchar ();
1110 if (__builtin_expect (c == EOF, 0))
1111 input_error ();
1113 /* Check for a sign. */
1114 if (c == L_('-') || c == L_('+'))
1116 ADDW (c);
1117 if (width > 0)
1118 --width;
1119 c = inchar ();
1122 /* Look for a leading indication of base. */
1123 if (width != 0 && c == L_('0'))
1125 if (width > 0)
1126 --width;
1128 ADDW (c);
1129 c = inchar ();
1131 if (width != 0 && TOLOWER (c) == L_('x'))
1133 if (base == 0)
1134 base = 16;
1135 if (base == 16)
1137 if (width > 0)
1138 --width;
1139 c = inchar ();
1142 else if (base == 0)
1143 base = 8;
1146 if (base == 0)
1147 base = 10;
1149 if (base == 10 && __builtin_expect ((flags & I18N) != 0, 0))
1151 int from_level;
1152 int to_level;
1153 int level;
1154 #ifdef COMPILE_WSCANF
1155 const wchar_t *wcdigits[10];
1156 const wchar_t *wcdigits_extended[10];
1157 #else
1158 const char *mbdigits[10];
1159 const char *mbdigits_extended[10];
1160 #endif
1161 /* "to_inpunct" is a map from ASCII digits to their
1162 equivalent in locale. This is defined for locales
1163 which use an extra digits set. */
1164 wctrans_t map = __wctrans ("to_inpunct");
1165 int n;
1167 from_level = 0;
1168 #ifdef COMPILE_WSCANF
1169 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1170 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1171 #else
1172 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1173 #endif
1175 /* Get the alternative digit forms if there are any. */
1176 if (__builtin_expect (map != NULL, 0))
1178 /* Adding new level for extra digits set in locale file. */
1179 ++to_level;
1181 for (n = 0; n < 10; ++n)
1183 #ifdef COMPILE_WSCANF
1184 wcdigits[n] = (const wchar_t *)
1185 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1187 wchar_t *wc_extended = (wchar_t *)
1188 alloca ((to_level + 2) * sizeof (wchar_t));
1189 __wmemcpy (wc_extended, wcdigits[n], to_level);
1190 wc_extended[to_level] = __towctrans (L'0' + n, map);
1191 wc_extended[to_level + 1] = '\0';
1192 wcdigits_extended[n] = wc_extended;
1193 #else
1194 mbdigits[n]
1195 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1197 /* Get the equivalent wide char in map. */
1198 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1200 /* Convert it to multibyte representation. */
1201 mbstate_t state;
1202 memset (&state, '\0', sizeof (state));
1204 char extra_mbdigit[MB_LEN_MAX];
1205 size_t mblen
1206 = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
1208 if (mblen == (size_t) -1)
1210 /* Ignore this new level. */
1211 map = NULL;
1212 break;
1215 /* Calculate the length of mbdigits[n]. */
1216 const char *last_char = mbdigits[n];
1217 for (level = 0; level < to_level; ++level)
1218 last_char = strchr (last_char, '\0') + 1;
1220 size_t mbdigits_len = last_char - mbdigits[n];
1222 /* Allocate memory for extended multibyte digit. */
1223 char *mb_extended;
1224 mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
1226 /* And get the mbdigits + extra_digit string. */
1227 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1228 mbdigits_len),
1229 extra_mbdigit, mblen) = '\0';
1230 mbdigits_extended[n] = mb_extended;
1231 #endif
1235 /* Read the number into workspace. */
1236 while (c != EOF && width != 0)
1238 /* In this round we get the pointer to the digit strings
1239 and also perform the first round of comparisons. */
1240 for (n = 0; n < 10; ++n)
1242 /* Get the string for the digits with value N. */
1243 #ifdef COMPILE_WSCANF
1244 if (__builtin_expect (map != NULL, 0))
1245 wcdigits[n] = wcdigits_extended[n];
1246 else
1247 wcdigits[n] = (const wchar_t *)
1248 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1249 wcdigits[n] += from_level;
1251 if (c == (wint_t) *wcdigits[n])
1253 to_level = from_level;
1254 break;
1257 /* Advance the pointer to the next string. */
1258 ++wcdigits[n];
1259 #else
1260 const char *cmpp;
1261 int avail = width > 0 ? width : INT_MAX;
1263 if (__builtin_expect (map != NULL, 0))
1264 mbdigits[n] = mbdigits_extended[n];
1265 else
1266 mbdigits[n]
1267 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1269 for (level = 0; level < from_level; level++)
1270 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1272 cmpp = mbdigits[n];
1273 while ((unsigned char) *cmpp == c && avail > 0)
1275 if (*++cmpp == '\0')
1276 break;
1277 else
1279 if ((c = inchar ()) == EOF)
1280 break;
1281 --avail;
1285 if (*cmpp == '\0')
1287 if (width > 0)
1288 width = avail;
1289 to_level = from_level;
1290 break;
1293 /* We are pushing all read characters back. */
1294 if (cmpp > mbdigits[n])
1296 ungetc (c, s);
1297 while (--cmpp > mbdigits[n])
1298 ungetc_not_eof ((unsigned char) *cmpp, s);
1299 c = (unsigned char) *cmpp;
1302 /* Advance the pointer to the next string. */
1303 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1304 #endif
1307 if (n == 10)
1309 /* Have not yet found the digit. */
1310 for (level = from_level + 1; level <= to_level; ++level)
1312 /* Search all ten digits of this level. */
1313 for (n = 0; n < 10; ++n)
1315 #ifdef COMPILE_WSCANF
1316 if (c == (wint_t) *wcdigits[n])
1317 break;
1319 /* Advance the pointer to the next string. */
1320 ++wcdigits[n];
1321 #else
1322 const char *cmpp;
1323 int avail = width > 0 ? width : INT_MAX;
1325 cmpp = mbdigits[n];
1326 while ((unsigned char) *cmpp == c && avail > 0)
1328 if (*++cmpp == '\0')
1329 break;
1330 else
1332 if ((c = inchar ()) == EOF)
1333 break;
1334 --avail;
1338 if (*cmpp == '\0')
1340 if (width > 0)
1341 width = avail;
1342 break;
1345 /* We are pushing all read characters back. */
1346 if (cmpp > mbdigits[n])
1348 ungetc (c, s);
1349 while (--cmpp > mbdigits[n])
1350 ungetc_not_eof ((unsigned char) *cmpp, s);
1351 c = (unsigned char) *cmpp;
1354 /* Advance the pointer to the next string. */
1355 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1356 #endif
1359 if (n < 10)
1361 /* Found it. */
1362 from_level = level;
1363 to_level = level;
1364 break;
1369 if (n < 10)
1370 c = L_('0') + n;
1371 else if ((flags & GROUP)
1372 #ifdef COMPILE_WSCANF
1373 && thousands != L'\0'
1374 #else
1375 && thousands != NULL
1376 #endif
1379 /* Try matching against the thousands separator. */
1380 #ifdef COMPILE_WSCANF
1381 if (c != thousands)
1382 break;
1383 #else
1384 const char *cmpp = thousands;
1385 int avail = width > 0 ? width : INT_MAX;
1387 while ((unsigned char) *cmpp == c && avail > 0)
1389 ADDW (c);
1390 if (*++cmpp == '\0')
1391 break;
1392 else
1394 if ((c = inchar ()) == EOF)
1395 break;
1396 --avail;
1400 if (*cmpp != '\0')
1402 /* We are pushing all read characters back. */
1403 if (cmpp > thousands)
1405 wpsize -= cmpp - thousands;
1406 ungetc (c, s);
1407 while (--cmpp > thousands)
1408 ungetc_not_eof ((unsigned char) *cmpp, s);
1409 c = (unsigned char) *cmpp;
1411 break;
1414 if (width > 0)
1415 width = avail;
1417 /* The last thousands character will be added back by
1418 the ADDW below. */
1419 --wpsize;
1420 #endif
1422 else
1423 break;
1425 ADDW (c);
1426 if (width > 0)
1427 --width;
1429 c = inchar ();
1432 else
1433 /* Read the number into workspace. */
1434 while (c != EOF && width != 0)
1436 if (base == 16)
1438 if (!ISXDIGIT (c))
1439 break;
1441 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1443 if (base == 10 && (flags & GROUP)
1444 #ifdef COMPILE_WSCANF
1445 && thousands != L'\0'
1446 #else
1447 && thousands != NULL
1448 #endif
1451 /* Try matching against the thousands separator. */
1452 #ifdef COMPILE_WSCANF
1453 if (c != thousands)
1454 break;
1455 #else
1456 const char *cmpp = thousands;
1457 int avail = width > 0 ? width : INT_MAX;
1459 while ((unsigned char) *cmpp == c && avail > 0)
1461 ADDW (c);
1462 if (*++cmpp == '\0')
1463 break;
1464 else
1466 if ((c = inchar ()) == EOF)
1467 break;
1468 --avail;
1472 if (*cmpp != '\0')
1474 /* We are pushing all read characters back. */
1475 if (cmpp > thousands)
1477 wpsize -= cmpp - thousands;
1478 ungetc (c, s);
1479 while (--cmpp > thousands)
1480 ungetc_not_eof ((unsigned char) *cmpp, s);
1481 c = (unsigned char) *cmpp;
1483 break;
1486 if (width > 0)
1487 width = avail;
1489 /* The last thousands character will be added back by
1490 the ADDW below. */
1491 --wpsize;
1492 #endif
1494 else
1495 break;
1497 ADDW (c);
1498 if (width > 0)
1499 --width;
1501 c = inchar ();
1504 if (wpsize == 0
1505 || (wpsize == 1 && (wp[0] == L_('+') || wp[0] == L_('-'))))
1507 /* There was no number. If we are supposed to read a pointer
1508 we must recognize "(nil)" as well. */
1509 if (__builtin_expect (wpsize == 0
1510 && read_pointer
1511 && (width < 0 || width >= 0)
1512 && c == '('
1513 && TOLOWER (inchar ()) == L_('n')
1514 && TOLOWER (inchar ()) == L_('i')
1515 && TOLOWER (inchar ()) == L_('l')
1516 && inchar () == L_(')'), 1))
1517 /* We must produce the value of a NULL pointer. A single
1518 '0' digit is enough. */
1519 ADDW (L_('0'));
1520 else
1522 /* The last read character is not part of the number
1523 anymore. */
1524 ungetc (c, s);
1526 conv_error ();
1529 else
1530 /* The just read character is not part of the number anymore. */
1531 ungetc (c, s);
1533 /* Convert the number. */
1534 ADDW (L_('\0'));
1535 if (need_longlong && (flags & LONGDBL))
1537 if (number_signed)
1538 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP);
1539 else
1540 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP);
1542 else
1544 if (number_signed)
1545 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
1546 else
1547 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
1549 if (__builtin_expect (wp == tw, 0))
1550 conv_error ();
1552 if (!(flags & SUPPRESS))
1554 if (! number_signed)
1556 if (need_longlong && (flags & LONGDBL))
1557 *ARG (unsigned LONGLONG int *) = num.uq;
1558 else if (need_long && (flags & LONG))
1559 *ARG (unsigned long int *) = num.ul;
1560 else if (flags & SHORT)
1561 *ARG (unsigned short int *)
1562 = (unsigned short int) num.ul;
1563 else if (!(flags & CHAR))
1564 *ARG (unsigned int *) = (unsigned int) num.ul;
1565 else
1566 *ARG (unsigned char *) = (unsigned char) num.ul;
1568 else
1570 if (need_longlong && (flags & LONGDBL))
1571 *ARG (LONGLONG int *) = num.q;
1572 else if (need_long && (flags & LONG))
1573 *ARG (long int *) = num.l;
1574 else if (flags & SHORT)
1575 *ARG (short int *) = (short int) num.l;
1576 else if (!(flags & CHAR))
1577 *ARG (int *) = (int) num.l;
1578 else
1579 *ARG (signed char *) = (signed char) num.ul;
1581 ++done;
1583 break;
1585 case L_('e'): /* Floating-point numbers. */
1586 case L_('E'):
1587 case L_('f'):
1588 case L_('F'):
1589 case L_('g'):
1590 case L_('G'):
1591 case L_('a'):
1592 case L_('A'):
1593 c = inchar ();
1594 if (__builtin_expect (c == EOF, 0))
1595 input_error ();
1597 got_dot = got_e = 0;
1599 /* Check for a sign. */
1600 if (c == L_('-') || c == L_('+'))
1602 negative = c == L_('-');
1603 if (__builtin_expect (width == 0 || inchar () == EOF, 0))
1604 /* EOF is only an input error before we read any chars. */
1605 conv_error ();
1606 if (! ISDIGIT (c) && TOLOWER (c) != L_('i')
1607 && TOLOWER (c) != L_('n'))
1609 #ifdef COMPILE_WSCANF
1610 if (__builtin_expect (c != decimal, 0))
1612 /* This is no valid number. */
1613 ungetc (c, s);
1614 conv_error ();
1616 #else
1617 /* Match against the decimal point. At this point
1618 we are taking advantage of the fact that we can
1619 push more than one character back. This is
1620 (almost) never necessary since the decimal point
1621 string hopefully never contains more than one
1622 byte. */
1623 const char *cmpp = decimal;
1624 int avail = width > 0 ? width : INT_MAX;
1626 while ((unsigned char) *cmpp == c && avail-- > 0)
1627 if (*++cmpp == '\0')
1628 break;
1629 else
1631 if (inchar () == EOF)
1632 break;
1635 if (__builtin_expect (*cmpp != '\0', 0))
1637 /* This is no valid number. */
1638 while (1)
1640 ungetc (c, s);
1641 if (cmpp == decimal)
1642 break;
1643 c = (unsigned char) *--cmpp;
1646 conv_error ();
1648 else
1650 /* Add all the characters. */
1651 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1652 ADDW ((unsigned char) *cmpp);
1653 if (width > 0)
1654 width = avail;
1655 got_dot = 1;
1657 c = inchar ();
1659 if (width > 0)
1660 width = avail;
1661 #endif
1663 if (width > 0)
1664 --width;
1666 else
1667 negative = 0;
1669 /* Take care for the special arguments "nan" and "inf". */
1670 if (TOLOWER (c) == L_('n'))
1672 /* Maybe "nan". */
1673 ADDW (c);
1674 if (__builtin_expect (width == 0
1675 || inchar () == EOF
1676 || TOLOWER (c) != L_('a'), 0))
1677 conv_error ();
1678 if (width > 0)
1679 --width;
1680 ADDW (c);
1681 if (__builtin_expect (width == 0
1682 || inchar () == EOF
1683 || TOLOWER (c) != L_('n'), 0))
1684 conv_error ();
1685 if (width > 0)
1686 --width;
1687 ADDW (c);
1688 /* It is "nan". */
1689 goto scan_float;
1691 else if (TOLOWER (c) == L_('i'))
1693 /* Maybe "inf" or "infinity". */
1694 ADDW (c);
1695 if (__builtin_expect (width == 0
1696 || inchar () == EOF
1697 || TOLOWER (c) != L_('n'), 0))
1698 conv_error ();
1699 if (width > 0)
1700 --width;
1701 ADDW (c);
1702 if (__builtin_expect (width == 0
1703 || inchar () == EOF
1704 || TOLOWER (c) != L_('f'), 0))
1705 conv_error ();
1706 if (width > 0)
1707 --width;
1708 ADDW (c);
1709 /* It is as least "inf". */
1710 if (width != 0 && inchar () != EOF)
1712 if (TOLOWER (c) == L_('i'))
1714 if (width > 0)
1715 --width;
1716 /* Now we have to read the rest as well. */
1717 ADDW (c);
1718 if (__builtin_expect (width == 0
1719 || inchar () == EOF
1720 || TOLOWER (c) != L_('n'), 0))
1721 conv_error ();
1722 if (width > 0)
1723 --width;
1724 ADDW (c);
1725 if (__builtin_expect (width == 0
1726 || inchar () == EOF
1727 || TOLOWER (c) != L_('i'), 0))
1728 conv_error ();
1729 if (width > 0)
1730 --width;
1731 ADDW (c);
1732 if (__builtin_expect (width == 0
1733 || inchar () == EOF
1734 || TOLOWER (c) != L_('t'), 0))
1735 conv_error ();
1736 if (width > 0)
1737 --width;
1738 ADDW (c);
1739 if (__builtin_expect (width == 0
1740 || inchar () == EOF
1741 || TOLOWER (c) != L_('y'), 0))
1742 conv_error ();
1743 if (width > 0)
1744 --width;
1745 ADDW (c);
1747 else
1748 /* Never mind. */
1749 ungetc (c, s);
1751 goto scan_float;
1754 is_hexa = 0;
1755 exp_char = L_('e');
1756 if (width != 0 && c == L_('0'))
1758 ADDW (c);
1759 c = inchar ();
1760 if (width > 0)
1761 --width;
1762 if (width != 0 && TOLOWER (c) == L_('x'))
1764 /* It is a number in hexadecimal format. */
1765 ADDW (c);
1767 is_hexa = 1;
1768 exp_char = L_('p');
1770 /* Grouping is not allowed. */
1771 flags &= ~GROUP;
1772 c = inchar ();
1773 if (width > 0)
1774 --width;
1780 if (ISDIGIT (c))
1781 ADDW (c);
1782 else if (!got_e && is_hexa && ISXDIGIT (c))
1783 ADDW (c);
1784 else if (got_e && wp[wpsize - 1] == exp_char
1785 && (c == L_('-') || c == L_('+')))
1786 ADDW (c);
1787 else if (wpsize > 0 && !got_e
1788 && (CHAR_T) TOLOWER (c) == exp_char)
1790 ADDW (exp_char);
1791 got_e = got_dot = 1;
1793 else
1795 #ifdef COMPILE_WSCANF
1796 if (! got_dot && c == decimal)
1798 ADDW (c);
1799 got_dot = 1;
1801 else if ((flags & GROUP) != 0 && thousands != L'\0'
1802 && ! got_dot && c == thousands)
1803 ADDW (c);
1804 else
1806 /* The last read character is not part of the number
1807 anymore. */
1808 ungetc (c, s);
1809 break;
1811 #else
1812 const char *cmpp = decimal;
1813 int avail = width > 0 ? width : INT_MAX;
1815 if (! got_dot)
1817 while ((unsigned char) *cmpp == c && avail > 0)
1818 if (*++cmpp == '\0')
1819 break;
1820 else
1822 if (inchar () == EOF)
1823 break;
1824 --avail;
1828 if (*cmpp == '\0')
1830 /* Add all the characters. */
1831 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1832 ADDW ((unsigned char) *cmpp);
1833 if (width > 0)
1834 width = avail;
1835 got_dot = 1;
1837 else
1839 /* Figure out whether it is a thousands separator.
1840 There is one problem: we possibly read more than
1841 one character. We cannot push them back but since
1842 we know that parts of the `decimal' string matched,
1843 we can compare against it. */
1844 const char *cmp2p = thousands;
1846 if ((flags & GROUP) != 0 && thousands != NULL
1847 && ! got_dot)
1849 while (cmp2p - thousands < cmpp - decimal
1850 && *cmp2p == decimal[cmp2p - thousands])
1851 ++cmp2p;
1852 if (cmp2p - thousands == cmpp - decimal)
1854 while ((unsigned char) *cmp2p == c && avail > 0)
1855 if (*++cmp2p == '\0')
1856 break;
1857 else
1859 if (inchar () == EOF)
1860 break;
1861 --avail;
1866 if (cmp2p != NULL && *cmp2p == '\0')
1868 /* Add all the characters. */
1869 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
1870 ADDW ((unsigned char) *cmpp);
1871 if (width > 0)
1872 width = avail;
1874 else
1876 /* The last read character is not part of the number
1877 anymore. */
1878 ungetc (c, s);
1879 break;
1882 #endif
1884 if (width > 0)
1885 --width;
1887 while (width != 0 && inchar () != EOF);
1889 /* Have we read any character? If we try to read a number
1890 in hexadecimal notation and we have read only the `0x'
1891 prefix or no exponent this is an error. */
1892 if (__builtin_expect (wpsize == 0
1893 || (is_hexa && (wpsize == 2 || ! got_e)), 0))
1894 conv_error ();
1896 scan_float:
1897 /* Convert the number. */
1898 ADDW (L_('\0'));
1899 if ((flags & LONGDBL) && !__ldbl_is_dbl)
1901 long double d = __strtold_internal (wp, &tw, flags & GROUP);
1902 if (!(flags & SUPPRESS) && tw != wp)
1903 *ARG (long double *) = negative ? -d : d;
1905 else if (flags & (LONG | LONGDBL))
1907 double d = __strtod_internal (wp, &tw, flags & GROUP);
1908 if (!(flags & SUPPRESS) && tw != wp)
1909 *ARG (double *) = negative ? -d : d;
1911 else
1913 float d = __strtof_internal (wp, &tw, flags & GROUP);
1914 if (!(flags & SUPPRESS) && tw != wp)
1915 *ARG (float *) = negative ? -d : d;
1918 if (__builtin_expect (tw == wp, 0))
1919 conv_error ();
1921 if (!(flags & SUPPRESS))
1922 ++done;
1923 break;
1925 case L_('['): /* Character class. */
1926 if (flags & LONG)
1927 STRING_ARG (wstr, wchar_t);
1928 else
1929 STRING_ARG (str, char);
1931 if (*f == L_('^'))
1933 ++f;
1934 not_in = 1;
1936 else
1937 not_in = 0;
1939 if (width < 0)
1940 /* There is no width given so there is also no limit on the
1941 number of characters we read. Therefore we set width to
1942 a very high value to make the algorithm easier. */
1943 width = INT_MAX;
1945 #ifdef COMPILE_WSCANF
1946 /* Find the beginning and the end of the scanlist. We are not
1947 creating a lookup table since it would have to be too large.
1948 Instead we search each time through the string. This is not
1949 a constant lookup time but who uses this feature deserves to
1950 be punished. */
1951 tw = (wchar_t *) f; /* Marks the beginning. */
1953 if (*f == L']')
1954 ++f;
1956 while ((fc = *f++) != L'\0' && fc != L']');
1958 if (__builtin_expect (fc == L'\0', 0))
1959 conv_error ();
1960 wp = (wchar_t *) f - 1;
1961 #else
1962 /* Fill WP with byte flags indexed by character.
1963 We will use this flag map for matching input characters. */
1964 if (wpmax < UCHAR_MAX + 1)
1966 wpmax = UCHAR_MAX + 1;
1967 wp = (char *) alloca (wpmax);
1969 memset (wp, '\0', UCHAR_MAX + 1);
1971 fc = *f;
1972 if (fc == ']' || fc == '-')
1974 /* If ] or - appears before any char in the set, it is not
1975 the terminator or separator, but the first char in the
1976 set. */
1977 wp[fc] = 1;
1978 ++f;
1981 while ((fc = *f++) != '\0' && fc != ']')
1982 if (fc == '-' && *f != '\0' && *f != ']'
1983 && (unsigned char) f[-2] <= (unsigned char) *f)
1985 /* Add all characters from the one before the '-'
1986 up to (but not including) the next format char. */
1987 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
1988 wp[fc] = 1;
1990 else
1991 /* Add the character to the flag map. */
1992 wp[fc] = 1;
1994 if (__builtin_expect (fc == '\0', 0))
1995 conv_error();
1996 #endif
1998 if (flags & LONG)
2000 size_t now = read_in;
2001 #ifdef COMPILE_WSCANF
2002 if (__builtin_expect (inchar () == WEOF, 0))
2003 input_error ();
2007 wchar_t *runp;
2009 /* Test whether it's in the scanlist. */
2010 runp = tw;
2011 while (runp < wp)
2013 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2014 && runp != tw
2015 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2017 /* Match against all characters in between the
2018 first and last character of the sequence. */
2019 wchar_t wc;
2021 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2022 if ((wint_t) wc == c)
2023 break;
2025 if (wc <= runp[1] && !not_in)
2026 break;
2027 if (wc <= runp[1] && not_in)
2029 /* The current character is not in the
2030 scanset. */
2031 ungetc (c, s);
2032 goto out;
2035 runp += 2;
2037 else
2039 if ((wint_t) *runp == c && !not_in)
2040 break;
2041 if ((wint_t) *runp == c && not_in)
2043 ungetc (c, s);
2044 goto out;
2047 ++runp;
2051 if (runp == wp && !not_in)
2053 ungetc (c, s);
2054 goto out;
2057 if (!(flags & SUPPRESS))
2059 *wstr++ = c;
2061 if ((flags & MALLOC)
2062 && wstr == (wchar_t *) *strptr + strsize)
2064 /* Enlarge the buffer. */
2065 wstr = (wchar_t *) realloc (*strptr,
2066 (2 * strsize)
2067 * sizeof (wchar_t));
2068 if (wstr == NULL)
2070 /* Can't allocate that much. Last-ditch
2071 effort. */
2072 wstr = (wchar_t *)
2073 realloc (*strptr, (strsize + 1)
2074 * sizeof (wchar_t));
2075 if (wstr == NULL)
2077 /* We lose. Oh well. Terminate the string
2078 and stop converting, so at least we don't
2079 skip any input. */
2080 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2081 ++done;
2082 conv_error ();
2084 else
2086 *strptr = (char *) wstr;
2087 wstr += strsize;
2088 ++strsize;
2091 else
2093 *strptr = (char *) wstr;
2094 wstr += strsize;
2095 strsize *= 2;
2100 while (--width > 0 && inchar () != WEOF);
2101 out:
2102 #else
2103 char buf[MB_LEN_MAX];
2104 size_t cnt = 0;
2105 mbstate_t cstate;
2107 if (__builtin_expect (inchar () == EOF, 0))
2108 input_error ();
2110 memset (&cstate, '\0', sizeof (cstate));
2114 if (wp[c] == not_in)
2116 ungetc_not_eof (c, s);
2117 break;
2120 /* This is easy. */
2121 if (!(flags & SUPPRESS))
2123 size_t n;
2125 /* Convert it into a wide character. */
2126 buf[0] = c;
2127 n = __mbrtowc (wstr, buf, 1, &cstate);
2129 if (n == (size_t) -2)
2131 /* Possibly correct character, just not enough
2132 input. */
2133 ++cnt;
2134 assert (cnt < MB_CUR_MAX);
2135 continue;
2137 cnt = 0;
2139 ++wstr;
2140 if ((flags & MALLOC)
2141 && wstr == (wchar_t *) *strptr + strsize)
2143 /* Enlarge the buffer. */
2144 wstr = (wchar_t *) realloc (*strptr,
2145 (2 * strsize
2146 * sizeof (wchar_t)));
2147 if (wstr == NULL)
2149 /* Can't allocate that much. Last-ditch
2150 effort. */
2151 wstr = (wchar_t *)
2152 realloc (*strptr, ((strsize + 1)
2153 * sizeof (wchar_t)));
2154 if (wstr == NULL)
2156 /* We lose. Oh well. Terminate the
2157 string and stop converting,
2158 so at least we don't skip any input. */
2159 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2160 ++done;
2161 conv_error ();
2163 else
2165 *strptr = (char *) wstr;
2166 wstr += strsize;
2167 ++strsize;
2170 else
2172 *strptr = (char *) wstr;
2173 wstr += strsize;
2174 strsize *= 2;
2179 if (--width <= 0)
2180 break;
2182 while (inchar () != EOF);
2184 if (__builtin_expect (cnt != 0, 0))
2185 /* We stopped in the middle of recognizing another
2186 character. That's a problem. */
2187 encode_error ();
2188 #endif
2190 if (__builtin_expect (now == read_in, 0))
2191 /* We haven't succesfully read any character. */
2192 conv_error ();
2194 if (!(flags & SUPPRESS))
2196 *wstr++ = L'\0';
2198 if ((flags & MALLOC)
2199 && wstr - (wchar_t *) *strptr != strsize)
2201 wchar_t *cp = (wchar_t *)
2202 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2203 * sizeof(wchar_t)));
2204 if (cp != NULL)
2205 *strptr = (char *) cp;
2208 ++done;
2211 else
2213 size_t now = read_in;
2215 if (__builtin_expect (inchar () == EOF, 0))
2216 input_error ();
2218 #ifdef COMPILE_WSCANF
2220 memset (&state, '\0', sizeof (state));
2224 wchar_t *runp;
2225 size_t n;
2227 /* Test whether it's in the scanlist. */
2228 runp = tw;
2229 while (runp < wp)
2231 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2232 && runp != tw
2233 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2235 /* Match against all characters in between the
2236 first and last character of the sequence. */
2237 wchar_t wc;
2239 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2240 if ((wint_t) wc == c)
2241 break;
2243 if (wc <= runp[1] && !not_in)
2244 break;
2245 if (wc <= runp[1] && not_in)
2247 /* The current character is not in the
2248 scanset. */
2249 ungetc (c, s);
2250 goto out2;
2253 runp += 2;
2255 else
2257 if ((wint_t) *runp == c && !not_in)
2258 break;
2259 if ((wint_t) *runp == c && not_in)
2261 ungetc (c, s);
2262 goto out2;
2265 ++runp;
2269 if (runp == wp && !not_in)
2271 ungetc (c, s);
2272 goto out2;
2275 if (!(flags & SUPPRESS))
2277 if ((flags & MALLOC)
2278 && str + MB_CUR_MAX >= *strptr + strsize)
2280 /* Enlarge the buffer. */
2281 size_t strleng = str - *strptr;
2282 char *newstr;
2284 newstr = (char *) realloc (*strptr, 2 * strsize);
2285 if (newstr == NULL)
2287 /* Can't allocate that much. Last-ditch
2288 effort. */
2289 newstr = (char *) realloc (*strptr,
2290 strleng + MB_CUR_MAX);
2291 if (newstr == NULL)
2293 /* We lose. Oh well. Terminate the string
2294 and stop converting, so at least we don't
2295 skip any input. */
2296 ((char *) (*strptr))[strleng] = '\0';
2297 ++done;
2298 conv_error ();
2300 else
2302 *strptr = newstr;
2303 str = newstr + strleng;
2304 strsize = strleng + MB_CUR_MAX;
2307 else
2309 *strptr = newstr;
2310 str = newstr + strleng;
2311 strsize *= 2;
2316 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2317 if (__builtin_expect (n == (size_t) -1, 0))
2318 encode_error ();
2320 assert (n <= MB_CUR_MAX);
2321 str += n;
2323 while (--width > 0 && inchar () != WEOF);
2324 out2:
2325 #else
2328 if (wp[c] == not_in)
2330 ungetc_not_eof (c, s);
2331 break;
2334 /* This is easy. */
2335 if (!(flags & SUPPRESS))
2337 *str++ = c;
2338 if ((flags & MALLOC)
2339 && (char *) str == *strptr + strsize)
2341 /* Enlarge the buffer. */
2342 size_t newsize = 2 * strsize;
2344 allocagain:
2345 str = (char *) realloc (*strptr, newsize);
2346 if (str == NULL)
2348 /* Can't allocate that much. Last-ditch
2349 effort. */
2350 if (newsize > strsize + 1)
2352 newsize = strsize + 1;
2353 goto allocagain;
2355 /* We lose. Oh well. Terminate the
2356 string and stop converting,
2357 so at least we don't skip any input. */
2358 ((char *) (*strptr))[strsize - 1] = '\0';
2359 ++done;
2360 conv_error ();
2362 else
2364 *strptr = (char *) str;
2365 str += strsize;
2366 strsize = newsize;
2371 while (--width > 0 && inchar () != EOF);
2372 #endif
2374 if (__builtin_expect (now == read_in, 0))
2375 /* We haven't succesfully read any character. */
2376 conv_error ();
2378 if (!(flags & SUPPRESS))
2380 #ifdef COMPILE_WSCANF
2381 /* We have to emit the code to get into the initial
2382 state. */
2383 char buf[MB_LEN_MAX];
2384 size_t n = __wcrtomb (buf, L'\0', &state);
2385 if (n > 0 && (flags & MALLOC)
2386 && str + n >= *strptr + strsize)
2388 /* Enlarge the buffer. */
2389 size_t strleng = str - *strptr;
2390 char *newstr;
2392 newstr = (char *) realloc (*strptr, strleng + n + 1);
2393 if (newstr == NULL)
2395 /* We lose. Oh well. Terminate the string
2396 and stop converting, so at least we don't
2397 skip any input. */
2398 ((char *) (*strptr))[strleng] = '\0';
2399 ++done;
2400 conv_error ();
2402 else
2404 *strptr = newstr;
2405 str = newstr + strleng;
2406 strsize = strleng + n + 1;
2410 str = __mempcpy (str, buf, n);
2411 #endif
2412 *str++ = '\0';
2414 if ((flags & MALLOC) && str - *strptr != strsize)
2416 char *cp = (char *) realloc (*strptr, str - *strptr);
2417 if (cp != NULL)
2418 *strptr = cp;
2421 ++done;
2424 break;
2426 case L_('p'): /* Generic pointer. */
2427 base = 16;
2428 /* A PTR must be the same size as a `long int'. */
2429 flags &= ~(SHORT|LONGDBL);
2430 if (need_long)
2431 flags |= LONG;
2432 number_signed = 0;
2433 read_pointer = 1;
2434 goto number;
2436 default:
2437 /* If this is an unknown format character punt. */
2438 conv_error ();
2442 /* The last thing we saw int the format string was a white space.
2443 Consume the last white spaces. */
2444 if (skip_space)
2447 c = inchar ();
2448 while (ISSPACE (c));
2449 ungetc (c, s);
2452 errout:
2453 /* Unlock stream. */
2454 UNLOCK_STREAM (s);
2456 if (errp != NULL)
2457 *errp |= errval;
2459 return done;
2462 #ifdef COMPILE_WSCANF
2464 __vfwscanf (FILE *s, const wchar_t *format, va_list argptr)
2466 return _IO_vfwscanf (s, format, argptr, NULL);
2468 ldbl_weak_alias (__vfwscanf, vfwscanf)
2469 #else
2471 ___vfscanf (FILE *s, const char *format, va_list argptr)
2473 return _IO_vfscanf_internal (s, format, argptr, NULL);
2475 ldbl_strong_alias (_IO_vfscanf_internal, _IO_vfscanf)
2476 ldbl_strong_alias (___vfscanf, __vfscanf)
2477 ldbl_hidden_def (___vfscanf, __vfscanf)
2478 ldbl_weak_alias (___vfscanf, vfscanf)
2479 #endif