1 /* Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
19 #include "../locale/localeinfo.h"
28 #include <libc-lock.h>
32 #define LONGLONG long long
37 /* Those are flags in the conversion format. */
38 # define LONG 0x001 /* l: long or double */
39 # define LONGDBL 0x002 /* L: long long or long double */
40 # define SHORT 0x004 /* h: short */
41 # define SUPPRESS 0x008 /* *: suppress assignment */
42 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
43 # define NOSKIP 0x020 /* do not skip blanks */
44 # define WIDTH 0x040 /* width was given */
45 # define GROUP 0x080 /* ': group numbers */
46 # define MALLOC 0x100 /* a: malloc strings */
48 # define TYPEMOD (LONG|LONGDBL|SHORT)
56 # define va_list _IO_va_list
57 # define ungetc(c, s) (--read_in, _IO_ungetc (c, s))
58 # define inchar() ((c = _IO_getc_unlocked (s)), (void) ++read_in, c)
59 # define encode_error() do { \
60 if (errp != NULL) *errp |= 4; \
61 _IO_funlockfile (s); \
62 __set_errno (EILSEQ); \
65 # define conv_error() do { \
66 if (errp != NULL) *errp |= 2; \
67 _IO_funlockfile (s); \
70 # define input_error() do { \
71 _IO_funlockfile (s); \
72 if (errp != NULL) *errp |= 1; \
75 # define memory_error() do { \
76 _IO_funlockfile (s); \
77 __set_errno (ENOMEM); \
80 # define ARGCHECK(s, format) \
83 /* Check file argument for consistence. */ \
84 CHECK_FILE (s, EOF); \
85 if (s->_flags & _IO_NO_READS) \
87 __set_errno (EBADF); \
90 else if (format == NULL) \
96 # define LOCK_STREAM(S) \
97 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
99 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
101 # define ungetc(c, s) (--read_in, ungetc (c, s))
102 # define inchar() ((c = getc (s)), (void) ++read_in, c)
103 # define encode_error() do { \
105 __set_errno (EILSEQ); \
108 # define conv_error() do { \
112 # define input_error() do { \
114 return done ?: EOF; \
116 # define memory_error() do { \
118 __set_errno (ENOMEM); \
121 # define ARGCHECK(s, format) \
124 /* Check file argument for consistence. */ \
125 if (!__validfp (s) || !s->__mode.__read) \
127 __set_errno (EBADF); \
130 else if (format == NULL) \
132 __set_errno (EINVAL); \
137 /* XXX For now !!! */
138 # define flockfile(S) /* nothing */
139 # define funlockfile(S) /* nothing */
140 # define LOCK_STREAM(S)
141 # define UNLOCK_STREAM
143 # define LOCK_STREAM(S) \
144 __libc_cleanup_region_start (&__funlockfile, (S)); \
146 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
151 /* Read formatted input from S according to the format string
152 FORMAT, using the argument list in ARG.
153 Return the number of assignments made, or -1 for an input error. */
156 _IO_vfscanf (s
, format
, argptr
, errp
)
163 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
166 va_list arg
= (va_list) argptr
;
168 register const char *f
= format
;
169 register unsigned char fc
; /* Current character of the format. */
170 register size_t done
= 0; /* Assignments done. */
171 register size_t read_in
= 0; /* Chars read in. */
172 register int c
= 0; /* Last char read. */
173 register int width
; /* Maximum field width. */
174 register int flags
; /* Modifiers for current format element. */
176 /* Status for reading F-P nums. */
177 char got_dot
, got_e
, negative
;
178 /* If a [...] is a [^...]. */
180 /* Base for integral numbers. */
182 /* Signedness for integral numbers. */
184 /* Decimal point character. */
186 /* The thousands character of the current locale. */
188 /* Integral holding variables. */
192 unsigned long long int uq
;
194 unsigned long int ul
;
196 /* Character-buffer pointer. */
198 wchar_t *wstr
= NULL
;
199 char **strptr
= NULL
;
201 /* We must not react on white spaces immediately because they can
202 possibly be matched even if in the input stream no character is
203 available anymore. */
206 char *tw
; /* Temporary pointer. */
207 char *wp
= NULL
; /* Workspace. */
208 size_t wpmax
= 0; /* Maximal size of workspace. */
209 size_t wpsize
; /* Currently used bytes in workspace. */
213 if (wpsize == wpmax) \
216 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
217 wp = (char *) alloca (wpmax); \
219 memcpy (wp, old, wpsize); \
221 wp[wpsize++] = (Ch); \
225 ARGCHECK (s
, format
);
227 /* Figure out the decimal point character. */
228 if (mbtowc (&decimal
, _NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
),
229 strlen (_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
))) <= 0)
230 decimal
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
);
231 /* Figure out the thousands separator character. */
232 if (mbtowc (&thousands
, _NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
),
233 strlen (_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
))) <= 0)
234 thousands
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
);
236 /* Lock the stream. */
239 /* Run through the format string. */
243 /* Extract the next argument, which is of type TYPE.
244 For a %N$... spec, this is the Nth argument from the beginning;
245 otherwise it is the next argument after the state now in ARG. */
247 /* XXX Possible optimization. */
248 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
249 ({ va_list arg = (va_list) argptr; \
250 arg = (va_list) ((char *) arg \
252 * __va_rounded_size (void *)); \
253 va_arg (arg, type); \
256 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
257 ({ unsigned int pos = argpos; \
258 va_list arg = (va_list) argptr; \
260 (void) va_arg (arg, void *); \
261 va_arg (arg, type); \
267 /* Non-ASCII, may be a multibyte. */
268 int len
= mblen (f
, strlen (f
));
290 /* Remember to skip spaces. */
297 /* Read a character. */
300 /* Characters other than format specs must just match. */
304 /* We saw white space char as the last character in the format
305 string. Now it's time to skip all leading white space. */
309 if (inchar () == EOF
&& errno
== EINTR
)
323 /* This is the start of the conversion string. */
326 /* Initialize state of modifiers. */
329 /* Prepare temporary buffer. */
332 /* Check for a positional parameter specification. */
337 argpos
= argpos
* 10 + (*f
++ - '0');
342 /* Oops; that was actually the field width. */
350 /* Check for the assignment-suppressing and the number grouping flag. */
351 while (*f
== '*' || *f
== '\'')
362 /* We have seen width. */
366 /* Find the maximum field width. */
377 /* Check for type modifiers. */
378 while (*f
== 'h' || *f
== 'l' || *f
== 'L' || *f
== 'a' || *f
== 'q')
382 /* int's are short int's. */
384 /* Signal illegal format element. */
389 if (flags
& (SHORT
|LONGDBL
))
391 else if (flags
& LONG
)
393 /* A double `l' is equivalent to an `L'. */
398 /* int's are long int's. */
403 /* double's are long double's, and int's are long long int's. */
405 /* Signal illegal format element. */
411 /* Signal illegal format element. */
413 /* String conversions (%s, %[) take a `char **'
414 arg and fill it in with a malloc'd pointer. */
419 /* End of the format string? */
423 /* We must take care for EINTR errors. */
424 if (c
== EOF
&& errno
== EINTR
)
427 /* Find the conversion specifier. */
429 if (skip_space
|| (fc
!= '[' && fc
!= 'c' && fc
!= 'C' && fc
!= 'n'))
431 /* Eat whitespace. */
433 if (inchar () == EOF
&& errno
== EINTR
)
442 case '%': /* Must match a literal '%'. */
451 case 'n': /* Answer number of assignments done. */
452 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
453 with the 'n' conversion specifier. */
454 if (!(flags
& SUPPRESS
))
456 /* Don't count the read-ahead. */
458 *ARG (long long int *) = read_in
;
459 else if (flags
& LONG
)
460 *ARG (long int *) = read_in
;
461 else if (flags
& SHORT
)
462 *ARG (short int *) = read_in
;
464 *ARG (int *) = read_in
;
466 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
467 /* We have a severe problem here. The ISO C standard
468 contradicts itself in explaining the effect of the %n
469 format in `scanf'. While in ISO C:1990 and the ISO C
470 Amendement 1:1995 the result is described as
472 Execution of a %n directive does not effect the
473 assignment count returned at the completion of
474 execution of the f(w)scanf function.
476 in ISO C Corrigendum 1:1994 the following was added:
479 Add the following fourth example:
482 int d1, d2, n1, n2, i;
483 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
484 the value 123 is assigned to d1 and the value3 to n1.
485 Because %n can never get an input failure the value
486 of 3 is also assigned to n2. The value of d2 is not
487 affected. The value 3 is assigned to i.
489 We go for now with the historically correct code fro ISO C,
490 i.e., we don't count the %n assignments. When it ever
491 should proof to be wrong just remove the #ifdef above. */
497 case 'c': /* Match characters. */
498 if ((flags
& LONG
) == 0)
500 if (!(flags
& SUPPRESS
))
514 if (!(flags
& SUPPRESS
))
518 while (--width
> 0 && inchar () != EOF
);
521 while (--width
> 0 && inchar () != EOF
);
524 /* I.e., EOF was read. */
527 if (!(flags
& SUPPRESS
))
534 /* Get UTF-8 encoded wide character. Here we assume (as in
535 other parts of the libc) that we only have to handle
542 if (!(flags
& SUPPRESS
))
544 wstr
= ARG (wchar_t *);
551 #define NEXT_WIDE_CHAR(First) \
554 /* EOF is only an error for the first character. */ \
565 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
567 if ((c & 0xe0) == 0xc0) \
569 /* We expect two bytes. */ \
573 else if ((c & 0xf0) == 0xe0) \
575 /* We expect three bytes. */ \
579 else if ((c & 0xf8) == 0xf0) \
581 /* We expect four bytes. */ \
585 else if ((c & 0xfc) == 0xf8) \
587 /* We expect five bytes. */ \
593 /* We expect six bytes. */ \
602 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
610 if (!(flags & SUPPRESS)) \
614 NEXT_WIDE_CHAR (first
);
619 /* I.e., EOF was read. */
622 if (!(flags
& SUPPRESS
))
627 case 's': /* Read a string. */
629 /* We have to process a wide character string. */
630 goto wide_char_string
;
632 #define STRING_ARG(Str, Type) \
633 if (!(flags & SUPPRESS)) \
635 if (flags & MALLOC) \
637 /* The string is to be stored in a malloc'd buffer. */ \
638 strptr = ARG (char **); \
639 if (strptr == NULL) \
641 /* Allocate an initial buffer. */ \
643 *strptr = malloc (strsize * sizeof (Type)); \
644 Str = (Type *) *strptr; \
647 Str = ARG (Type *); \
651 STRING_ARG (str
, char);
664 #define STRING_ADD_CHAR(Str, c, Type) \
665 if (!(flags & SUPPRESS)) \
668 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
670 /* Enlarge the buffer. */ \
671 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
674 /* Can't allocate that much. Last-ditch effort. */\
675 Str = realloc (*strptr, \
676 (strsize + 1) * sizeof (Type)); \
679 /* We lose. Oh well. \
680 Terminate the string and stop converting, \
681 so at least we don't skip any input. */ \
682 ((Type *) (*strptr))[strsize] = '\0'; \
688 *strptr = (char *) Str; \
689 Str = ((Type *) *strptr) + strsize; \
695 *strptr = (char *) Str; \
696 Str = ((Type *) *strptr) + strsize; \
701 STRING_ADD_CHAR (str
, c
, char);
702 } while ((width
<= 0 || --width
> 0) && inchar () != EOF
);
707 if (!(flags
& SUPPRESS
))
715 /* Wide character string. */
720 STRING_ARG (wstr
, wchar_t);
725 NEXT_WIDE_CHAR (first
);
729 /* XXX We would have to push back the whole wide char
730 with possibly many bytes. But since scanf does
731 not make a difference for white space characters
732 we can simply push back a simple <SP> which is
733 guaranteed to be in the [:space:] class. */
738 STRING_ADD_CHAR (wstr
, val
, wchar_t);
741 while (width
<= 0 || --width
> 0);
743 if (!(flags
& SUPPRESS
))
751 case 'x': /* Hexadecimal integer. */
752 case 'X': /* Ditto. */
757 case 'o': /* Octal integer. */
762 case 'u': /* Unsigned decimal integer. */
767 case 'd': /* Signed decimal integer. */
772 case 'i': /* Generic number. */
781 /* Check for a sign. */
782 if (c
== '-' || c
== '+')
790 /* Look for a leading indication of base. */
791 if (width
!= 0 && c
== '0')
799 if (width
!= 0 && tolower (c
) == 'x')
817 /* Read the number into workspace. */
818 while (c
!= EOF
&& width
!= 0)
820 if (base
== 16 ? !isxdigit (c
) :
821 ((!isdigit (c
) || c
- '0' >= base
) &&
822 !((flags
& GROUP
) && base
== 10 && c
== thousands
)))
831 /* The just read character is not part of the number anymore. */
835 (wpsize
== 1 && (wp
[0] == '+' || wp
[0] == '-')))
836 /* There was no number. */
839 /* Convert the number. */
844 num
.q
= __strtoq_internal (wp
, &tw
, base
, flags
& GROUP
);
846 num
.uq
= __strtouq_internal (wp
, &tw
, base
, flags
& GROUP
);
851 num
.l
= __strtol_internal (wp
, &tw
, base
, flags
& GROUP
);
853 num
.ul
= __strtoul_internal (wp
, &tw
, base
, flags
& GROUP
);
858 if (!(flags
& SUPPRESS
))
863 *ARG (unsigned LONGLONG
int *) = num
.uq
;
864 else if (flags
& LONG
)
865 *ARG (unsigned long int *) = num
.ul
;
866 else if (flags
& SHORT
)
867 *ARG (unsigned short int *)
868 = (unsigned short int) num
.ul
;
870 *ARG (unsigned int *) = (unsigned int) num
.ul
;
875 *ARG (LONGLONG
int *) = num
.q
;
876 else if (flags
& LONG
)
877 *ARG (long int *) = num
.l
;
878 else if (flags
& SHORT
)
879 *ARG (short int *) = (short int) num
.l
;
881 *ARG (int *) = (int) num
.l
;
887 case 'e': /* Floating-point numbers. */
896 /* Check for a sign. */
897 if (c
== '-' || c
== '+')
900 if (inchar () == EOF
)
901 /* EOF is only an input error before we read any chars. */
914 else if (got_e
&& wp
[wpsize
- 1] == 'e'
915 && (c
== '-' || c
== '+'))
917 else if (wpsize
> 0 && !got_e
&& tolower (c
) == 'e')
922 else if (c
== decimal
&& !got_dot
)
927 else if ((flags
& GROUP
) && c
== thousands
&& !got_dot
)
934 while (inchar () != EOF
&& width
!= 0);
936 /* The last read character is not part of the number anymore. */
942 /* Convert the number. */
946 long double d
= __strtold_internal (wp
, &tw
, flags
& GROUP
);
947 if (!(flags
& SUPPRESS
) && tw
!= wp
)
948 *ARG (long double *) = negative
? -d
: d
;
950 else if (flags
& LONG
)
952 double d
= __strtod_internal (wp
, &tw
, flags
& GROUP
);
953 if (!(flags
& SUPPRESS
) && tw
!= wp
)
954 *ARG (double *) = negative
? -d
: d
;
958 float d
= __strtof_internal (wp
, &tw
, flags
& GROUP
);
959 if (!(flags
& SUPPRESS
) && tw
!= wp
)
960 *ARG (float *) = negative
? -d
: d
;
966 if (!(flags
& SUPPRESS
))
970 case '[': /* Character class. */
973 STRING_ARG (wstr
, wchar_t);
974 c
= '\0'; /* This is to keep gcc quiet. */
978 STRING_ARG (str
, char);
993 /* Fill WP with byte flags indexed by character.
994 We will use this flag map for matching input characters. */
995 if (wpmax
< UCHAR_MAX
)
998 wp
= (char *) alloca (wpmax
);
1000 memset (wp
, 0, UCHAR_MAX
);
1003 if (fc
== ']' || fc
== '-')
1005 /* If ] or - appears before any char in the set, it is not
1006 the terminator or separator, but the first char in the
1012 while ((fc
= *f
++) != '\0' && fc
!= ']')
1014 if (fc
== '-' && *f
!= '\0' && *f
!= ']' &&
1015 (unsigned char) f
[-2] <= (unsigned char) *f
)
1017 /* Add all characters from the one before the '-'
1018 up to (but not including) the next format char. */
1019 for (fc
= f
[-2]; fc
< *f
; ++fc
)
1023 /* Add the character to the flag map. */
1028 if (!(flags
& LONG
))
1041 NEXT_WIDE_CHAR (first
);
1042 if (val
> 255 || wp
[val
] == not_in
)
1044 /* XXX We have a problem here. We read a wide
1045 character and this possibly took several
1046 bytes. But we can only push back one single
1047 character. To be sure we don't create wrong
1048 input we push it back only in case it is
1049 representable within one byte. */
1054 STRING_ADD_CHAR (wstr
, val
, wchar_t);
1064 if (!(flags
& SUPPRESS
))
1072 num
.ul
= read_in
- 1; /* -1 because we already read one char. */
1075 if (wp
[c
] == not_in
)
1080 STRING_ADD_CHAR (str
, c
, char);
1084 while (width
!= 0 && inchar () != EOF
);
1086 if (read_in
== num
.ul
)
1089 if (!(flags
& SUPPRESS
))
1097 case 'p': /* Generic pointer. */
1099 /* A PTR must be the same size as a `long int'. */
1100 flags
&= ~(SHORT
|LONGDBL
);
1107 /* The last thing we saw int the format string was a white space.
1108 Consume the last white spaces. */
1113 while (isspace (c
));
1117 /* Unlock stream. */
1125 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
1127 return _IO_vfscanf (s
, format
, argptr
, NULL
);
1131 weak_alias (__vfscanf
, vfscanf
)