1 /* Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
19 #include "../locale/localeinfo.h"
28 #include <bits/libc-lock.h>
32 #define LONGLONG long long
37 /* Those are flags in the conversion format. */
38 # define LONG 0x001 /* l: long or double */
39 # define LONGDBL 0x002 /* L: long long or long double */
40 # define SHORT 0x004 /* h: short */
41 # define SUPPRESS 0x008 /* *: suppress assignment */
42 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
43 # define NOSKIP 0x020 /* do not skip blanks */
44 # define WIDTH 0x040 /* width was given */
45 # define GROUP 0x080 /* ': group numbers */
46 # define MALLOC 0x100 /* a: malloc strings */
48 # define TYPEMOD (LONG|LONGDBL|SHORT)
56 # define va_list _IO_va_list
57 # define ungetc(c, s) ((void) ((int) c != EOF && --read_in), \
59 # define inchar() ((c = _IO_getc_unlocked (s)), \
60 (void) (c != EOF && ++read_in), c)
61 # define encode_error() do { \
62 if (errp != NULL) *errp |= 4; \
63 _IO_funlockfile (s); \
64 __set_errno (EILSEQ); \
67 # define conv_error() do { \
68 if (errp != NULL) *errp |= 2; \
69 _IO_funlockfile (s); \
72 # define input_error() do { \
73 _IO_funlockfile (s); \
74 if (errp != NULL) *errp |= 1; \
77 # define memory_error() do { \
78 _IO_funlockfile (s); \
79 __set_errno (ENOMEM); \
82 # define ARGCHECK(s, format) \
85 /* Check file argument for consistence. */ \
86 CHECK_FILE (s, EOF); \
87 if (s->_flags & _IO_NO_READS) \
89 __set_errno (EBADF); \
92 else if (format == NULL) \
98 # define LOCK_STREAM(S) \
99 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
101 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
103 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
104 # define inchar() ((c = getc (s)), (void) (c != EOF && ++read_in), c)
105 # define encode_error() do { \
107 __set_errno (EILSEQ); \
110 # define conv_error() do { \
114 # define input_error() do { \
116 return done ?: EOF; \
118 # define memory_error() do { \
120 __set_errno (ENOMEM); \
123 # define ARGCHECK(s, format) \
126 /* Check file argument for consistence. */ \
127 if (!__validfp (s) || !s->__mode.__read) \
129 __set_errno (EBADF); \
132 else if (format == NULL) \
134 __set_errno (EINVAL); \
139 /* XXX For now !!! */
140 # define flockfile(S) /* nothing */
141 # define funlockfile(S) /* nothing */
142 # define LOCK_STREAM(S)
143 # define UNLOCK_STREAM
145 # define LOCK_STREAM(S) \
146 __libc_cleanup_region_start (&__funlockfile, (S)); \
148 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
153 /* Read formatted input from S according to the format string
154 FORMAT, using the argument list in ARG.
155 Return the number of assignments made, or -1 for an input error. */
158 _IO_vfscanf (s
, format
, argptr
, errp
)
165 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
169 register const char *f
= format
;
170 register unsigned char fc
; /* Current character of the format. */
171 register size_t done
= 0; /* Assignments done. */
172 register size_t read_in
= 0; /* Chars read in. */
173 register int c
= 0; /* Last char read. */
174 register int width
; /* Maximum field width. */
175 register int flags
; /* Modifiers for current format element. */
177 /* Status for reading F-P nums. */
178 char got_dot
, got_e
, negative
;
179 /* If a [...] is a [^...]. */
181 #define exp_char not_in
182 /* Base for integral numbers. */
184 /* Signedness for integral numbers. */
186 #define is_hexa number_signed
187 /* Decimal point character. */
189 /* The thousands character of the current locale. */
191 /* Integral holding variables. */
195 unsigned long long int uq
;
197 unsigned long int ul
;
199 /* Character-buffer pointer. */
201 wchar_t *wstr
= NULL
;
202 char **strptr
= NULL
;
204 /* We must not react on white spaces immediately because they can
205 possibly be matched even if in the input stream no character is
206 available anymore. */
209 char *tw
; /* Temporary pointer. */
210 char *wp
= NULL
; /* Workspace. */
211 size_t wpmax
= 0; /* Maximal size of workspace. */
212 size_t wpsize
; /* Currently used bytes in workspace. */
216 if (wpsize == wpmax) \
219 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
220 wp = (char *) alloca (wpmax); \
222 memcpy (wp, old, wpsize); \
224 wp[wpsize++] = (Ch); \
229 __va_copy (arg
, argptr
);
231 arg
= (va_list) argptr
;
234 ARGCHECK (s
, format
);
236 /* Figure out the decimal point character. */
237 if (mbtowc (&decimal
, _NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
),
238 strlen (_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
))) <= 0)
239 decimal
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
);
240 /* Figure out the thousands separator character. */
241 if (mbtowc (&thousands
, _NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
),
242 strlen (_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
))) <= 0)
243 thousands
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
);
245 /* Lock the stream. */
248 /* Run through the format string. */
252 /* Extract the next argument, which is of type TYPE.
253 For a %N$... spec, this is the Nth argument from the beginning;
254 otherwise it is the next argument after the state now in ARG. */
256 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
257 ({ unsigned int pos = argpos; \
259 __va_copy (arg, argptr); \
261 (void) va_arg (arg, void *); \
262 va_arg (arg, type); \
266 /* XXX Possible optimization. */
267 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
268 ({ va_list arg = (va_list) argptr; \
269 arg = (va_list) ((char *) arg \
271 * __va_rounded_size (void *)); \
272 va_arg (arg, type); \
275 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
276 ({ unsigned int pos = argpos; \
277 va_list arg = (va_list) argptr; \
279 (void) va_arg (arg, void *); \
280 va_arg (arg, type); \
287 /* Non-ASCII, may be a multibyte. */
288 int len
= mblen (f
, strlen (f
));
310 /* Remember to skip spaces. */
317 /* Read a character. */
320 /* Characters other than format specs must just match. */
324 /* We saw white space char as the last character in the format
325 string. Now it's time to skip all leading white space. */
329 if (inchar () == EOF
&& errno
== EINTR
)
343 /* This is the start of the conversion string. */
346 /* Initialize state of modifiers. */
349 /* Prepare temporary buffer. */
352 /* Check for a positional parameter specification. */
357 argpos
= argpos
* 10 + (*f
++ - '0');
362 /* Oops; that was actually the field width. */
370 /* Check for the assignment-suppressing and the number grouping flag. */
371 while (*f
== '*' || *f
== '\'')
382 /* We have seen width. */
386 /* Find the maximum field width. */
397 /* Check for type modifiers. */
398 while (*f
== 'h' || *f
== 'l' || *f
== 'L' || *f
== 'a' || *f
== 'q')
402 /* int's are short int's. */
404 /* Signal illegal format element. */
409 if (flags
& (SHORT
|LONGDBL
))
411 else if (flags
& LONG
)
413 /* A double `l' is equivalent to an `L'. */
418 /* int's are long int's. */
423 /* double's are long double's, and int's are long long int's. */
425 /* Signal illegal format element. */
431 /* Signal illegal format element. */
433 /* String conversions (%s, %[) take a `char **'
434 arg and fill it in with a malloc'd pointer. */
439 /* End of the format string? */
443 /* We must take care for EINTR errors. */
444 if (c
== EOF
&& errno
== EINTR
)
447 /* Find the conversion specifier. */
449 if (skip_space
|| (fc
!= '[' && fc
!= 'c' && fc
!= 'C' && fc
!= 'n'))
451 /* Eat whitespace. */
453 if (inchar () == EOF
&& errno
== EINTR
)
462 case '%': /* Must match a literal '%'. */
471 case 'n': /* Answer number of assignments done. */
472 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
473 with the 'n' conversion specifier. */
474 if (!(flags
& SUPPRESS
))
476 /* Don't count the read-ahead. */
478 *ARG (long long int *) = read_in
;
479 else if (flags
& LONG
)
480 *ARG (long int *) = read_in
;
481 else if (flags
& SHORT
)
482 *ARG (short int *) = read_in
;
484 *ARG (int *) = read_in
;
486 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
487 /* We have a severe problem here. The ISO C standard
488 contradicts itself in explaining the effect of the %n
489 format in `scanf'. While in ISO C:1990 and the ISO C
490 Amendement 1:1995 the result is described as
492 Execution of a %n directive does not effect the
493 assignment count returned at the completion of
494 execution of the f(w)scanf function.
496 in ISO C Corrigendum 1:1994 the following was added:
499 Add the following fourth example:
502 int d1, d2, n1, n2, i;
503 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
504 the value 123 is assigned to d1 and the value3 to n1.
505 Because %n can never get an input failure the value
506 of 3 is also assigned to n2. The value of d2 is not
507 affected. The value 3 is assigned to i.
509 We go for now with the historically correct code fro ISO C,
510 i.e., we don't count the %n assignments. When it ever
511 should proof to be wrong just remove the #ifdef above. */
517 case 'c': /* Match characters. */
518 if ((flags
& LONG
) == 0)
520 if (!(flags
& SUPPRESS
))
534 if (!(flags
& SUPPRESS
))
538 while (--width
> 0 && inchar () != EOF
);
541 while (--width
> 0 && inchar () != EOF
);
544 /* I.e., EOF was read. */
547 if (!(flags
& SUPPRESS
))
554 /* Get UTF-8 encoded wide character. Here we assume (as in
555 other parts of the libc) that we only have to handle
562 if (!(flags
& SUPPRESS
))
564 wstr
= ARG (wchar_t *);
571 #define NEXT_WIDE_CHAR(First) \
574 /* EOF is only an error for the first character. */ \
585 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
587 if ((c & 0xe0) == 0xc0) \
589 /* We expect two bytes. */ \
593 else if ((c & 0xf0) == 0xe0) \
595 /* We expect three bytes. */ \
599 else if ((c & 0xf8) == 0xf0) \
601 /* We expect four bytes. */ \
605 else if ((c & 0xfc) == 0xf8) \
607 /* We expect five bytes. */ \
613 /* We expect six bytes. */ \
622 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
630 if (!(flags & SUPPRESS)) \
634 NEXT_WIDE_CHAR (first
);
639 /* I.e., EOF was read. */
642 if (!(flags
& SUPPRESS
))
647 case 's': /* Read a string. */
649 /* We have to process a wide character string. */
650 goto wide_char_string
;
652 #define STRING_ARG(Str, Type) \
653 if (!(flags & SUPPRESS)) \
655 if (flags & MALLOC) \
657 /* The string is to be stored in a malloc'd buffer. */ \
658 strptr = ARG (char **); \
659 if (strptr == NULL) \
661 /* Allocate an initial buffer. */ \
663 *strptr = malloc (strsize * sizeof (Type)); \
664 Str = (Type *) *strptr; \
667 Str = ARG (Type *); \
671 STRING_ARG (str
, char);
684 #define STRING_ADD_CHAR(Str, c, Type) \
685 if (!(flags & SUPPRESS)) \
688 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
690 /* Enlarge the buffer. */ \
691 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
694 /* Can't allocate that much. Last-ditch effort. */\
695 Str = realloc (*strptr, \
696 (strsize + 1) * sizeof (Type)); \
699 /* We lose. Oh well. \
700 Terminate the string and stop converting, \
701 so at least we don't skip any input. */ \
702 ((Type *) (*strptr))[strsize] = '\0'; \
708 *strptr = (char *) Str; \
709 Str = ((Type *) *strptr) + strsize; \
715 *strptr = (char *) Str; \
716 Str = ((Type *) *strptr) + strsize; \
721 STRING_ADD_CHAR (str
, c
, char);
722 } while ((width
<= 0 || --width
> 0) && inchar () != EOF
);
724 if (!(flags
& SUPPRESS
))
732 /* Wide character string. */
737 STRING_ARG (wstr
, wchar_t);
742 NEXT_WIDE_CHAR (first
);
746 /* XXX We would have to push back the whole wide char
747 with possibly many bytes. But since scanf does
748 not make a difference for white space characters
749 we can simply push back a simple <SP> which is
750 guaranteed to be in the [:space:] class. */
755 STRING_ADD_CHAR (wstr
, val
, wchar_t);
758 while (width
<= 0 || --width
> 0);
760 if (!(flags
& SUPPRESS
))
768 case 'x': /* Hexadecimal integer. */
769 case 'X': /* Ditto. */
774 case 'o': /* Octal integer. */
779 case 'u': /* Unsigned decimal integer. */
784 case 'd': /* Signed decimal integer. */
789 case 'i': /* Generic number. */
798 /* Check for a sign. */
799 if (c
== '-' || c
== '+')
807 /* Look for a leading indication of base. */
808 if (width
!= 0 && c
== '0')
816 if (width
!= 0 && tolower (c
) == 'x')
834 /* Read the number into workspace. */
835 while (c
!= EOF
&& width
!= 0)
837 if (base
== 16 ? !isxdigit (c
) :
838 ((!isdigit (c
) || c
- '0' >= base
) &&
839 !((flags
& GROUP
) && base
== 10 && c
== thousands
)))
848 /* The just read character is not part of the number anymore. */
852 (wpsize
== 1 && (wp
[0] == '+' || wp
[0] == '-')))
853 /* There was no number. */
856 /* Convert the number. */
861 num
.q
= __strtoll_internal (wp
, &tw
, base
, flags
& GROUP
);
863 num
.uq
= __strtoull_internal (wp
, &tw
, base
, flags
& GROUP
);
868 num
.l
= __strtol_internal (wp
, &tw
, base
, flags
& GROUP
);
870 num
.ul
= __strtoul_internal (wp
, &tw
, base
, flags
& GROUP
);
875 if (!(flags
& SUPPRESS
))
880 *ARG (unsigned LONGLONG
int *) = num
.uq
;
881 else if (flags
& LONG
)
882 *ARG (unsigned long int *) = num
.ul
;
883 else if (flags
& SHORT
)
884 *ARG (unsigned short int *)
885 = (unsigned short int) num
.ul
;
887 *ARG (unsigned int *) = (unsigned int) num
.ul
;
892 *ARG (LONGLONG
int *) = num
.q
;
893 else if (flags
& LONG
)
894 *ARG (long int *) = num
.l
;
895 else if (flags
& SHORT
)
896 *ARG (short int *) = (short int) num
.l
;
898 *ARG (int *) = (int) num
.l
;
904 case 'e': /* Floating-point numbers. */
914 /* Check for a sign. */
915 if (c
== '-' || c
== '+')
918 if (inchar () == EOF
)
919 /* EOF is only an input error before we read any chars. */
933 if (tolower (c
) == 'x')
935 /* It is a number in hexadecimal format. */
941 /* Grouping is not allowed. */
952 else if (!got_e
&& is_hexa
&& isxdigit (c
))
954 else if (got_e
&& wp
[wpsize
- 1] == exp_char
955 && (c
== '-' || c
== '+'))
957 else if (wpsize
> 0 && !got_e
&& tolower (c
) == exp_char
)
962 else if (c
== decimal
&& !got_dot
)
967 else if ((flags
& GROUP
) && c
== thousands
&& !got_dot
)
971 /* The last read character is not part of the number
979 while (width
!= 0 && inchar () != EOF
);
981 /* Have we read any character? If we try to read a number
982 in hexadecimal notation and we have read only the `0x'
983 prefix this is an error. */
984 if (wpsize
== 0 || (is_hexa
&& wpsize
== 2))
987 /* Convert the number. */
991 long double d
= __strtold_internal (wp
, &tw
, flags
& GROUP
);
992 if (!(flags
& SUPPRESS
) && tw
!= wp
)
993 *ARG (long double *) = negative
? -d
: d
;
995 else if (flags
& LONG
)
997 double d
= __strtod_internal (wp
, &tw
, flags
& GROUP
);
998 if (!(flags
& SUPPRESS
) && tw
!= wp
)
999 *ARG (double *) = negative
? -d
: d
;
1003 float d
= __strtof_internal (wp
, &tw
, flags
& GROUP
);
1004 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1005 *ARG (float *) = negative
? -d
: d
;
1011 if (!(flags
& SUPPRESS
))
1015 case '[': /* Character class. */
1018 STRING_ARG (wstr
, wchar_t);
1019 c
= '\0'; /* This is to keep gcc quiet. */
1023 STRING_ARG (str
, char);
1038 /* Fill WP with byte flags indexed by character.
1039 We will use this flag map for matching input characters. */
1040 if (wpmax
< UCHAR_MAX
)
1043 wp
= (char *) alloca (wpmax
);
1045 memset (wp
, 0, UCHAR_MAX
);
1048 if (fc
== ']' || fc
== '-')
1050 /* If ] or - appears before any char in the set, it is not
1051 the terminator or separator, but the first char in the
1057 while ((fc
= *f
++) != '\0' && fc
!= ']')
1059 if (fc
== '-' && *f
!= '\0' && *f
!= ']' &&
1060 (unsigned char) f
[-2] <= (unsigned char) *f
)
1062 /* Add all characters from the one before the '-'
1063 up to (but not including) the next format char. */
1064 for (fc
= f
[-2]; fc
< *f
; ++fc
)
1068 /* Add the character to the flag map. */
1073 if (!(flags
& LONG
))
1086 NEXT_WIDE_CHAR (first
);
1087 if (val
> 255 || wp
[val
] == not_in
)
1089 /* XXX We have a problem here. We read a wide
1090 character and this possibly took several
1091 bytes. But we can only push back one single
1092 character. To be sure we don't create wrong
1093 input we push it back only in case it is
1094 representable within one byte. */
1099 STRING_ADD_CHAR (wstr
, val
, wchar_t);
1109 if (!(flags
& SUPPRESS
))
1117 num
.ul
= read_in
- 1; /* -1 because we already read one char. */
1120 if (wp
[c
] == not_in
)
1125 STRING_ADD_CHAR (str
, c
, char);
1129 while (width
!= 0 && inchar () != EOF
);
1131 if (read_in
== num
.ul
)
1134 if (!(flags
& SUPPRESS
))
1142 case 'p': /* Generic pointer. */
1144 /* A PTR must be the same size as a `long int'. */
1145 flags
&= ~(SHORT
|LONGDBL
);
1152 /* The last thing we saw int the format string was a white space.
1153 Consume the last white spaces. */
1158 while (isspace (c
));
1162 /* Unlock stream. */
1170 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
1172 return _IO_vfscanf (s
, format
, argptr
, NULL
);
1176 weak_alias (__vfscanf
, vfscanf
)