1 /* Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
19 #include "../locale/localeinfo.h"
28 #include <bits/libc-lock.h>
32 #define LONGLONG long long
37 /* Those are flags in the conversion format. */
38 # define LONG 0x001 /* l: long or double */
39 # define LONGDBL 0x002 /* L: long long or long double */
40 # define SHORT 0x004 /* h: short */
41 # define SUPPRESS 0x008 /* *: suppress assignment */
42 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
43 # define NOSKIP 0x020 /* do not skip blanks */
44 # define WIDTH 0x040 /* width was given */
45 # define GROUP 0x080 /* ': group numbers */
46 # define MALLOC 0x100 /* a: malloc strings */
47 # define CHAR 0x200 /* hh: char */
49 # define TYPEMOD (LONG|LONGDBL|SHORT|CHAR)
57 # define va_list _IO_va_list
58 # define ungetc(c, s) ((void) ((int) c != EOF && --read_in), \
60 # define inchar() ((c = _IO_getc_unlocked (s)), \
61 (void) (c != EOF && ++read_in), c)
62 # define encode_error() do { \
63 if (errp != NULL) *errp |= 4; \
64 _IO_funlockfile (s); \
65 __set_errno (EILSEQ); \
68 # define conv_error() do { \
69 if (errp != NULL) *errp |= 2; \
70 _IO_funlockfile (s); \
73 # define input_error() do { \
74 _IO_funlockfile (s); \
75 if (errp != NULL) *errp |= 1; \
78 # define memory_error() do { \
79 _IO_funlockfile (s); \
80 __set_errno (ENOMEM); \
83 # define ARGCHECK(s, format) \
86 /* Check file argument for consistence. */ \
87 CHECK_FILE (s, EOF); \
88 if (s->_flags & _IO_NO_READS) \
90 __set_errno (EBADF); \
93 else if (format == NULL) \
99 # define LOCK_STREAM(S) \
100 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
102 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
104 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
105 # define inchar() ((c = getc (s)), (void) (c != EOF && ++read_in), c)
106 # define encode_error() do { \
108 __set_errno (EILSEQ); \
111 # define conv_error() do { \
115 # define input_error() do { \
117 return done ?: EOF; \
119 # define memory_error() do { \
121 __set_errno (ENOMEM); \
124 # define ARGCHECK(s, format) \
127 /* Check file argument for consistence. */ \
128 if (!__validfp (s) || !s->__mode.__read) \
130 __set_errno (EBADF); \
133 else if (format == NULL) \
135 __set_errno (EINVAL); \
140 /* XXX For now !!! */
141 # define flockfile(S) /* nothing */
142 # define funlockfile(S) /* nothing */
143 # define LOCK_STREAM(S)
144 # define UNLOCK_STREAM
146 # define LOCK_STREAM(S) \
147 __libc_cleanup_region_start (&__funlockfile, (S)); \
149 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
154 /* Read formatted input from S according to the format string
155 FORMAT, using the argument list in ARG.
156 Return the number of assignments made, or -1 for an input error. */
159 _IO_vfscanf (s
, format
, argptr
, errp
)
166 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
170 register const char *f
= format
;
171 register unsigned char fc
; /* Current character of the format. */
172 register size_t done
= 0; /* Assignments done. */
173 register size_t read_in
= 0; /* Chars read in. */
174 register int c
= 0; /* Last char read. */
175 register int width
; /* Maximum field width. */
176 register int flags
; /* Modifiers for current format element. */
178 /* Status for reading F-P nums. */
179 char got_dot
, got_e
, negative
;
180 /* If a [...] is a [^...]. */
182 #define exp_char not_in
183 /* Base for integral numbers. */
185 /* Signedness for integral numbers. */
187 #define is_hexa number_signed
188 /* Decimal point character. */
190 /* The thousands character of the current locale. */
192 /* Integral holding variables. */
196 unsigned long long int uq
;
198 unsigned long int ul
;
200 /* Character-buffer pointer. */
202 wchar_t *wstr
= NULL
;
203 char **strptr
= NULL
;
205 /* We must not react on white spaces immediately because they can
206 possibly be matched even if in the input stream no character is
207 available anymore. */
210 char *tw
; /* Temporary pointer. */
211 char *wp
= NULL
; /* Workspace. */
212 size_t wpmax
= 0; /* Maximal size of workspace. */
213 size_t wpsize
; /* Currently used bytes in workspace. */
217 if (wpsize == wpmax) \
220 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
221 wp = (char *) alloca (wpmax); \
223 memcpy (wp, old, wpsize); \
225 wp[wpsize++] = (Ch); \
230 __va_copy (arg
, argptr
);
232 arg
= (va_list) argptr
;
235 ARGCHECK (s
, format
);
237 /* Figure out the decimal point character. */
238 if (mbtowc (&decimal
, _NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
),
239 strlen (_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
))) <= 0)
240 decimal
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
);
241 /* Figure out the thousands separator character. */
242 if (mbtowc (&thousands
, _NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
),
243 strlen (_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
))) <= 0)
244 thousands
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
);
246 /* Lock the stream. */
249 /* Run through the format string. */
253 /* Extract the next argument, which is of type TYPE.
254 For a %N$... spec, this is the Nth argument from the beginning;
255 otherwise it is the next argument after the state now in ARG. */
257 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
258 ({ unsigned int pos = argpos; \
260 __va_copy (arg, argptr); \
262 (void) va_arg (arg, void *); \
263 va_arg (arg, type); \
267 /* XXX Possible optimization. */
268 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
269 ({ va_list arg = (va_list) argptr; \
270 arg = (va_list) ((char *) arg \
272 * __va_rounded_size (void *)); \
273 va_arg (arg, type); \
276 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
277 ({ unsigned int pos = argpos; \
278 va_list arg = (va_list) argptr; \
280 (void) va_arg (arg, void *); \
281 va_arg (arg, type); \
288 /* Non-ASCII, may be a multibyte. */
289 int len
= mblen (f
, strlen (f
));
311 /* Remember to skip spaces. */
318 /* Read a character. */
321 /* Characters other than format specs must just match. */
325 /* We saw white space char as the last character in the format
326 string. Now it's time to skip all leading white space. */
330 if (inchar () == EOF
&& errno
== EINTR
)
344 /* This is the start of the conversion string. */
347 /* Initialize state of modifiers. */
350 /* Prepare temporary buffer. */
353 /* Check for a positional parameter specification. */
358 argpos
= argpos
* 10 + (*f
++ - '0');
363 /* Oops; that was actually the field width. */
371 /* Check for the assignment-suppressing and the number grouping flag. */
372 while (*f
== '*' || *f
== '\'')
383 /* We have seen width. */
387 /* Find the maximum field width. */
398 /* Check for type modifiers. */
399 while (*f
== 'h' || *f
== 'l' || *f
== 'L' || *f
== 'a' || *f
== 'q')
403 /* int's are short int's. */
404 if (flags
& (LONG
|LONGDBL
|CHAR
))
405 /* Signal illegal format element. */
416 if (flags
& (SHORT
|LONGDBL
|CHAR
))
418 else if (flags
& LONG
)
420 /* A double `l' is equivalent to an `L'. */
425 /* int's are long int's. */
430 /* double's are long double's, and int's are long long int's. */
432 /* Signal illegal format element. */
437 /* The `a' is used as a flag only if followed by `s', `S' or
439 if (*f
!= 's' && *f
!= 'S' && *f
!= '[')
445 /* Signal illegal format element. */
447 /* String conversions (%s, %[) take a `char **'
448 arg and fill it in with a malloc'd pointer. */
453 /* End of the format string? */
457 /* We must take care for EINTR errors. */
458 if (c
== EOF
&& errno
== EINTR
)
461 /* Find the conversion specifier. */
463 if (skip_space
|| (fc
!= '[' && fc
!= 'c' && fc
!= 'C' && fc
!= 'n'))
465 /* Eat whitespace. */
467 if (inchar () == EOF
&& errno
== EINTR
)
476 case '%': /* Must match a literal '%'. */
485 case 'n': /* Answer number of assignments done. */
486 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
487 with the 'n' conversion specifier. */
488 if (!(flags
& SUPPRESS
))
490 /* Don't count the read-ahead. */
492 *ARG (long long int *) = read_in
;
493 else if (flags
& LONG
)
494 *ARG (long int *) = read_in
;
495 else if (flags
& SHORT
)
496 *ARG (short int *) = read_in
;
498 *ARG (int *) = read_in
;
500 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
501 /* We have a severe problem here. The ISO C standard
502 contradicts itself in explaining the effect of the %n
503 format in `scanf'. While in ISO C:1990 and the ISO C
504 Amendement 1:1995 the result is described as
506 Execution of a %n directive does not effect the
507 assignment count returned at the completion of
508 execution of the f(w)scanf function.
510 in ISO C Corrigendum 1:1994 the following was added:
513 Add the following fourth example:
516 int d1, d2, n1, n2, i;
517 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
518 the value 123 is assigned to d1 and the value3 to n1.
519 Because %n can never get an input failure the value
520 of 3 is also assigned to n2. The value of d2 is not
521 affected. The value 3 is assigned to i.
523 We go for now with the historically correct code fro ISO C,
524 i.e., we don't count the %n assignments. When it ever
525 should proof to be wrong just remove the #ifdef above. */
531 case 'c': /* Match characters. */
532 if ((flags
& LONG
) == 0)
534 if (!(flags
& SUPPRESS
))
548 if (!(flags
& SUPPRESS
))
552 while (--width
> 0 && inchar () != EOF
);
555 while (--width
> 0 && inchar () != EOF
);
558 /* I.e., EOF was read. */
561 if (!(flags
& SUPPRESS
))
568 /* Get UTF-8 encoded wide character. Here we assume (as in
569 other parts of the libc) that we only have to handle
576 if (!(flags
& SUPPRESS
))
578 wstr
= ARG (wchar_t *);
585 #define NEXT_WIDE_CHAR(First) \
588 /* EOF is only an error for the first character. */ \
599 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
601 if ((c & 0xe0) == 0xc0) \
603 /* We expect two bytes. */ \
607 else if ((c & 0xf0) == 0xe0) \
609 /* We expect three bytes. */ \
613 else if ((c & 0xf8) == 0xf0) \
615 /* We expect four bytes. */ \
619 else if ((c & 0xfc) == 0xf8) \
621 /* We expect five bytes. */ \
627 /* We expect six bytes. */ \
636 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
644 if (!(flags & SUPPRESS)) \
648 NEXT_WIDE_CHAR (first
);
653 /* I.e., EOF was read. */
656 if (!(flags
& SUPPRESS
))
661 case 's': /* Read a string. */
663 /* We have to process a wide character string. */
664 goto wide_char_string
;
666 #define STRING_ARG(Str, Type) \
667 if (!(flags & SUPPRESS)) \
669 if (flags & MALLOC) \
671 /* The string is to be stored in a malloc'd buffer. */ \
672 strptr = ARG (char **); \
673 if (strptr == NULL) \
675 /* Allocate an initial buffer. */ \
677 *strptr = malloc (strsize * sizeof (Type)); \
678 Str = (Type *) *strptr; \
681 Str = ARG (Type *); \
685 STRING_ARG (str
, char);
698 #define STRING_ADD_CHAR(Str, c, Type) \
699 if (!(flags & SUPPRESS)) \
702 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
704 /* Enlarge the buffer. */ \
705 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
708 /* Can't allocate that much. Last-ditch effort. */\
709 Str = realloc (*strptr, \
710 (strsize + 1) * sizeof (Type)); \
713 /* We lose. Oh well. \
714 Terminate the string and stop converting, \
715 so at least we don't skip any input. */ \
716 ((Type *) (*strptr))[strsize] = '\0'; \
722 *strptr = (char *) Str; \
723 Str = ((Type *) *strptr) + strsize; \
729 *strptr = (char *) Str; \
730 Str = ((Type *) *strptr) + strsize; \
735 STRING_ADD_CHAR (str
, c
, char);
736 } while ((width
<= 0 || --width
> 0) && inchar () != EOF
);
738 if (!(flags
& SUPPRESS
))
746 /* Wide character string. */
751 STRING_ARG (wstr
, wchar_t);
756 NEXT_WIDE_CHAR (first
);
760 /* XXX We would have to push back the whole wide char
761 with possibly many bytes. But since scanf does
762 not make a difference for white space characters
763 we can simply push back a simple <SP> which is
764 guaranteed to be in the [:space:] class. */
769 STRING_ADD_CHAR (wstr
, val
, wchar_t);
772 while (width
<= 0 || --width
> 0);
774 if (!(flags
& SUPPRESS
))
782 case 'x': /* Hexadecimal integer. */
783 case 'X': /* Ditto. */
788 case 'o': /* Octal integer. */
793 case 'u': /* Unsigned decimal integer. */
798 case 'd': /* Signed decimal integer. */
803 case 'i': /* Generic number. */
812 /* Check for a sign. */
813 if (c
== '-' || c
== '+')
821 /* Look for a leading indication of base. */
822 if (width
!= 0 && c
== '0')
830 if (width
!= 0 && tolower (c
) == 'x')
848 /* Read the number into workspace. */
849 while (c
!= EOF
&& width
!= 0)
851 if (base
== 16 ? !isxdigit (c
) :
852 ((!isdigit (c
) || c
- '0' >= base
) &&
853 !((flags
& GROUP
) && base
== 10 && c
== thousands
)))
862 /* The just read character is not part of the number anymore. */
866 (wpsize
== 1 && (wp
[0] == '+' || wp
[0] == '-')))
867 /* There was no number. */
870 /* Convert the number. */
875 num
.q
= __strtoll_internal (wp
, &tw
, base
, flags
& GROUP
);
877 num
.uq
= __strtoull_internal (wp
, &tw
, base
, flags
& GROUP
);
882 num
.l
= __strtol_internal (wp
, &tw
, base
, flags
& GROUP
);
884 num
.ul
= __strtoul_internal (wp
, &tw
, base
, flags
& GROUP
);
889 if (!(flags
& SUPPRESS
))
894 *ARG (unsigned LONGLONG
int *) = num
.uq
;
895 else if (flags
& LONG
)
896 *ARG (unsigned long int *) = num
.ul
;
897 else if (flags
& SHORT
)
898 *ARG (unsigned short int *)
899 = (unsigned short int) num
.ul
;
900 else if (flags
& CHAR
)
901 *ARG (unsigned char *) = (unsigned char) num
.ul
;
903 *ARG (unsigned int *) = (unsigned int) num
.ul
;
908 *ARG (LONGLONG
int *) = num
.q
;
909 else if (flags
& LONG
)
910 *ARG (long int *) = num
.l
;
911 else if (flags
& SHORT
)
912 *ARG (short int *) = (short int) num
.l
;
913 else if (flags
& CHAR
)
914 *ARG (signed char *) = (signed char) num
.ul
;
916 *ARG (int *) = (int) num
.l
;
922 case 'e': /* Floating-point numbers. */
933 /* Check for a sign. */
934 if (c
== '-' || c
== '+')
937 if (inchar () == EOF
)
938 /* EOF is only an input error before we read any chars. */
952 if (tolower (c
) == 'x')
954 /* It is a number in hexadecimal format. */
960 /* Grouping is not allowed. */
971 else if (!got_e
&& is_hexa
&& isxdigit (c
))
973 else if (got_e
&& wp
[wpsize
- 1] == exp_char
974 && (c
== '-' || c
== '+'))
976 else if (wpsize
> 0 && !got_e
&& tolower (c
) == exp_char
)
981 else if (c
== decimal
&& !got_dot
)
986 else if ((flags
& GROUP
) && c
== thousands
&& !got_dot
)
990 /* The last read character is not part of the number
998 while (width
!= 0 && inchar () != EOF
);
1000 /* Have we read any character? If we try to read a number
1001 in hexadecimal notation and we have read only the `0x'
1002 prefix this is an error. */
1003 if (wpsize
== 0 || (is_hexa
&& wpsize
== 2))
1006 /* Convert the number. */
1008 if (flags
& LONGDBL
)
1010 long double d
= __strtold_internal (wp
, &tw
, flags
& GROUP
);
1011 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1012 *ARG (long double *) = negative
? -d
: d
;
1014 else if (flags
& LONG
)
1016 double d
= __strtod_internal (wp
, &tw
, flags
& GROUP
);
1017 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1018 *ARG (double *) = negative
? -d
: d
;
1022 float d
= __strtof_internal (wp
, &tw
, flags
& GROUP
);
1023 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1024 *ARG (float *) = negative
? -d
: d
;
1030 if (!(flags
& SUPPRESS
))
1034 case '[': /* Character class. */
1037 STRING_ARG (wstr
, wchar_t);
1038 c
= '\0'; /* This is to keep gcc quiet. */
1042 STRING_ARG (str
, char);
1057 /* Fill WP with byte flags indexed by character.
1058 We will use this flag map for matching input characters. */
1059 if (wpmax
< UCHAR_MAX
)
1062 wp
= (char *) alloca (wpmax
);
1064 memset (wp
, 0, UCHAR_MAX
);
1067 if (fc
== ']' || fc
== '-')
1069 /* If ] or - appears before any char in the set, it is not
1070 the terminator or separator, but the first char in the
1076 while ((fc
= *f
++) != '\0' && fc
!= ']')
1078 if (fc
== '-' && *f
!= '\0' && *f
!= ']' &&
1079 (unsigned char) f
[-2] <= (unsigned char) *f
)
1081 /* Add all characters from the one before the '-'
1082 up to (but not including) the next format char. */
1083 for (fc
= f
[-2]; fc
< *f
; ++fc
)
1087 /* Add the character to the flag map. */
1092 if (!(flags
& LONG
))
1105 NEXT_WIDE_CHAR (first
);
1106 if (val
> 255 || wp
[val
] == not_in
)
1108 /* XXX We have a problem here. We read a wide
1109 character and this possibly took several
1110 bytes. But we can only push back one single
1111 character. To be sure we don't create wrong
1112 input we push it back only in case it is
1113 representable within one byte. */
1118 STRING_ADD_CHAR (wstr
, val
, wchar_t);
1128 if (!(flags
& SUPPRESS
))
1136 num
.ul
= read_in
- 1; /* -1 because we already read one char. */
1139 if (wp
[c
] == not_in
)
1144 STRING_ADD_CHAR (str
, c
, char);
1148 while (width
!= 0 && inchar () != EOF
);
1150 if (read_in
== num
.ul
)
1153 if (!(flags
& SUPPRESS
))
1161 case 'p': /* Generic pointer. */
1163 /* A PTR must be the same size as a `long int'. */
1164 flags
&= ~(SHORT
|LONGDBL
);
1171 /* The last thing we saw int the format string was a white space.
1172 Consume the last white spaces. */
1177 while (isspace (c
));
1181 /* Unlock stream. */
1189 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
1191 return _IO_vfscanf (s
, format
, argptr
, NULL
);
1195 weak_alias (__vfscanf
, vfscanf
)