nspr: import 3.0 RC1 cutoff from CVS
[mozilla-nspr.git] / nsprpub / pr / src / io / prscanf.c
blob618f184f5f9bc8d2fe7a006e5a269a0d5e46b855
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is the Netscape Portable Runtime (NSPR).
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998-2000
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
39 * Scan functions for NSPR types
41 * Author: Wan-Teh Chang
43 * Acknowledgment: The implementation is inspired by the source code
44 * in P.J. Plauger's "The Standard C Library," Prentice-Hall, 1992.
47 #include <limits.h>
48 #include <ctype.h>
49 #include <string.h>
50 #include <stdlib.h>
51 #ifdef SUNOS4
52 #include "md/sunos4.h" /* for strtoul */
53 #endif
54 #include "prprf.h"
55 #include "prdtoa.h"
56 #include "prlog.h"
57 #include "prerror.h"
60 * A function that reads a character from 'stream'.
61 * Returns the character read, or EOF if end of stream is reached.
63 typedef int (*_PRGetCharFN)(void *stream);
66 * A function that pushes the character 'ch' back to 'stream'.
68 typedef void (*_PRUngetCharFN)(void *stream, int ch);
71 * The size specifier for the integer and floating point number
72 * conversions in format control strings.
74 typedef enum {
75 _PR_size_none, /* No size specifier is given */
76 _PR_size_h, /* The 'h' specifier, suggesting "short" */
77 _PR_size_l, /* The 'l' specifier, suggesting "long" */
78 _PR_size_L, /* The 'L' specifier, meaning a 'long double' */
79 _PR_size_ll /* The 'll' specifier, suggesting "long long" */
80 } _PRSizeSpec;
83 * The collection of data that is passed between the scan function
84 * and its subordinate functions. The fields of this structure
85 * serve as the input or output arguments for these functions.
87 typedef struct {
88 _PRGetCharFN get; /* get a character from input stream */
89 _PRUngetCharFN unget; /* unget (push back) a character */
90 void *stream; /* argument for get and unget */
91 va_list ap; /* the variable argument list */
92 int nChar; /* number of characters read from 'stream' */
94 PRBool assign; /* assign, or suppress assignment? */
95 int width; /* field width */
96 _PRSizeSpec sizeSpec; /* 'h', 'l', 'L', or 'll' */
98 PRBool converted; /* is the value actually converted? */
99 } ScanfState;
101 #define GET(state) ((state)->nChar++, (state)->get((state)->stream))
102 #define UNGET(state, ch) \
103 ((state)->nChar--, (state)->unget((state)->stream, ch))
106 * The following two macros, GET_IF_WITHIN_WIDTH and WITHIN_WIDTH,
107 * are always used together.
109 * GET_IF_WITHIN_WIDTH calls the GET macro and assigns its return
110 * value to 'ch' only if we have not exceeded the field width of
111 * 'state'. Therefore, after GET_IF_WITHIN_WIDTH, the value of
112 * 'ch' is valid only if the macro WITHIN_WIDTH evaluates to true.
115 #define GET_IF_WITHIN_WIDTH(state, ch) \
116 if (--(state)->width >= 0) { \
117 (ch) = GET(state); \
119 #define WITHIN_WIDTH(state) ((state)->width >= 0)
122 * _pr_strtoull:
123 * Convert a string to an unsigned 64-bit integer. The string
124 * 'str' is assumed to be a representation of the integer in
125 * base 'base'.
127 * Warning:
128 * - Only handle base 8, 10, and 16.
129 * - No overflow checking.
132 static PRUint64
133 _pr_strtoull(const char *str, char **endptr, int base)
135 static const int BASE_MAX = 16;
136 static const char digits[] = "0123456789abcdef";
137 char *digitPtr;
138 PRUint64 x; /* return value */
139 PRInt64 base64;
140 const char *cPtr;
141 PRBool negative;
142 const char *digitStart;
144 PR_ASSERT(base == 0 || base == 8 || base == 10 || base == 16);
145 if (base < 0 || base == 1 || base > BASE_MAX) {
146 if (endptr) {
147 *endptr = (char *) str;
148 return LL_ZERO;
152 cPtr = str;
153 while (isspace(*cPtr)) {
154 ++cPtr;
157 negative = PR_FALSE;
158 if (*cPtr == '-') {
159 negative = PR_TRUE;
160 cPtr++;
161 } else if (*cPtr == '+') {
162 cPtr++;
165 if (base == 16) {
166 if (*cPtr == '0' && (cPtr[1] == 'x' || cPtr[1] == 'X')) {
167 cPtr += 2;
169 } else if (base == 0) {
170 if (*cPtr != '0') {
171 base = 10;
172 } else if (cPtr[1] == 'x' || cPtr[1] == 'X') {
173 base = 16;
174 cPtr += 2;
175 } else {
176 base = 8;
179 PR_ASSERT(base != 0);
180 LL_I2L(base64, base);
181 digitStart = cPtr;
183 /* Skip leading zeros */
184 while (*cPtr == '0') {
185 cPtr++;
188 LL_I2L(x, 0);
189 while ((digitPtr = (char*)memchr(digits, tolower(*cPtr), base)) != NULL) {
190 PRUint64 d;
192 LL_I2L(d, (digitPtr - digits));
193 LL_MUL(x, x, base64);
194 LL_ADD(x, x, d);
195 cPtr++;
198 if (cPtr == digitStart) {
199 if (endptr) {
200 *endptr = (char *) str;
202 return LL_ZERO;
205 if (negative) {
206 #ifdef HAVE_LONG_LONG
207 /* The cast to a signed type is to avoid a compiler warning */
208 x = -(PRInt64)x;
209 #else
210 LL_NEG(x, x);
211 #endif
214 if (endptr) {
215 *endptr = (char *) cPtr;
217 return x;
221 * The maximum field width (in number of characters) that is enough
222 * (may be more than necessary) to represent a 64-bit integer or
223 * floating point number.
225 #define FMAX 31
226 #define DECIMAL_POINT '.'
228 static PRStatus
229 GetInt(ScanfState *state, int code)
231 char buf[FMAX + 1], *p;
232 int ch;
233 static const char digits[] = "0123456789abcdefABCDEF";
234 PRBool seenDigit = PR_FALSE;
235 int base;
236 int dlen;
238 switch (code) {
239 case 'd': case 'u':
240 base = 10;
241 break;
242 case 'i':
243 base = 0;
244 break;
245 case 'x': case 'X': case 'p':
246 base = 16;
247 break;
248 case 'o':
249 base = 8;
250 break;
251 default:
252 return PR_FAILURE;
254 if (state->width == 0 || state->width > FMAX) {
255 state->width = FMAX;
257 p = buf;
258 GET_IF_WITHIN_WIDTH(state, ch);
259 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
260 *p++ = ch;
261 GET_IF_WITHIN_WIDTH(state, ch);
263 if (WITHIN_WIDTH(state) && ch == '0') {
264 seenDigit = PR_TRUE;
265 *p++ = ch;
266 GET_IF_WITHIN_WIDTH(state, ch);
267 if (WITHIN_WIDTH(state)
268 && (ch == 'x' || ch == 'X')
269 && (base == 0 || base == 16)) {
270 base = 16;
271 *p++ = ch;
272 GET_IF_WITHIN_WIDTH(state, ch);
273 } else if (base == 0) {
274 base = 8;
277 if (base == 0 || base == 10) {
278 dlen = 10;
279 } else if (base == 8) {
280 dlen = 8;
281 } else {
282 PR_ASSERT(base == 16);
283 dlen = 16 + 6; /* 16 digits, plus 6 in uppercase */
285 while (WITHIN_WIDTH(state) && memchr(digits, ch, dlen)) {
286 *p++ = ch;
287 GET_IF_WITHIN_WIDTH(state, ch);
288 seenDigit = PR_TRUE;
290 if (WITHIN_WIDTH(state)) {
291 UNGET(state, ch);
293 if (!seenDigit) {
294 return PR_FAILURE;
296 *p = '\0';
297 if (state->assign) {
298 if (code == 'd' || code == 'i') {
299 if (state->sizeSpec == _PR_size_ll) {
300 PRInt64 llval = _pr_strtoull(buf, NULL, base);
301 *va_arg(state->ap, PRInt64 *) = llval;
302 } else {
303 long lval = strtol(buf, NULL, base);
305 if (state->sizeSpec == _PR_size_none) {
306 *va_arg(state->ap, PRIntn *) = lval;
307 } else if (state->sizeSpec == _PR_size_h) {
308 *va_arg(state->ap, PRInt16 *) = (PRInt16)lval;
309 } else if (state->sizeSpec == _PR_size_l) {
310 *va_arg(state->ap, PRInt32 *) = lval;
311 } else {
312 return PR_FAILURE;
315 } else {
316 if (state->sizeSpec == _PR_size_ll) {
317 PRUint64 llval = _pr_strtoull(buf, NULL, base);
318 *va_arg(state->ap, PRUint64 *) = llval;
319 } else {
320 unsigned long lval = strtoul(buf, NULL, base);
322 if (state->sizeSpec == _PR_size_none) {
323 *va_arg(state->ap, PRUintn *) = lval;
324 } else if (state->sizeSpec == _PR_size_h) {
325 *va_arg(state->ap, PRUint16 *) = (PRUint16)lval;
326 } else if (state->sizeSpec == _PR_size_l) {
327 *va_arg(state->ap, PRUint32 *) = lval;
328 } else {
329 return PR_FAILURE;
333 state->converted = PR_TRUE;
335 return PR_SUCCESS;
338 static PRStatus
339 GetFloat(ScanfState *state)
341 char buf[FMAX + 1], *p;
342 int ch;
343 PRBool seenDigit = PR_FALSE;
345 if (state->width == 0 || state->width > FMAX) {
346 state->width = FMAX;
348 p = buf;
349 GET_IF_WITHIN_WIDTH(state, ch);
350 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
351 *p++ = ch;
352 GET_IF_WITHIN_WIDTH(state, ch);
354 while (WITHIN_WIDTH(state) && isdigit(ch)) {
355 *p++ = ch;
356 GET_IF_WITHIN_WIDTH(state, ch);
357 seenDigit = PR_TRUE;
359 if (WITHIN_WIDTH(state) && ch == DECIMAL_POINT) {
360 *p++ = ch;
361 GET_IF_WITHIN_WIDTH(state, ch);
362 while (WITHIN_WIDTH(state) && isdigit(ch)) {
363 *p++ = ch;
364 GET_IF_WITHIN_WIDTH(state, ch);
365 seenDigit = PR_TRUE;
370 * This is not robust. For example, "1.2e+" would confuse
371 * the code below to read 'e' and '+', only to realize that
372 * it should have stopped at "1.2". But we can't push back
373 * more than one character, so there is nothing I can do.
376 /* Parse exponent */
377 if (WITHIN_WIDTH(state) && (ch == 'e' || ch == 'E') && seenDigit) {
378 *p++ = ch;
379 GET_IF_WITHIN_WIDTH(state, ch);
380 if (WITHIN_WIDTH(state) && (ch == '+' || ch == '-')) {
381 *p++ = ch;
382 GET_IF_WITHIN_WIDTH(state, ch);
384 while (WITHIN_WIDTH(state) && isdigit(ch)) {
385 *p++ = ch;
386 GET_IF_WITHIN_WIDTH(state, ch);
389 if (WITHIN_WIDTH(state)) {
390 UNGET(state, ch);
392 if (!seenDigit) {
393 return PR_FAILURE;
395 *p = '\0';
396 if (state->assign) {
397 PRFloat64 dval = PR_strtod(buf, NULL);
399 state->converted = PR_TRUE;
400 if (state->sizeSpec == _PR_size_l) {
401 *va_arg(state->ap, PRFloat64 *) = dval;
402 } else if (state->sizeSpec == _PR_size_L) {
403 #if defined(OSF1) || defined(IRIX)
404 *va_arg(state->ap, double *) = dval;
405 #else
406 *va_arg(state->ap, long double *) = dval;
407 #endif
408 } else {
409 *va_arg(state->ap, float *) = (float) dval;
412 return PR_SUCCESS;
416 * Convert, and return the end of the conversion spec.
417 * Return NULL on error.
420 static const char *
421 Convert(ScanfState *state, const char *fmt)
423 const char *cPtr;
424 int ch;
425 char *cArg = NULL;
427 state->converted = PR_FALSE;
428 cPtr = fmt;
429 if (*cPtr != 'c' && *cPtr != 'n' && *cPtr != '[') {
430 do {
431 ch = GET(state);
432 } while (isspace(ch));
433 UNGET(state, ch);
435 switch (*cPtr) {
436 case 'c':
437 if (state->assign) {
438 cArg = va_arg(state->ap, char *);
440 if (state->width == 0) {
441 state->width = 1;
443 for (; state->width > 0; state->width--) {
444 ch = GET(state);
445 if (ch == EOF) {
446 return NULL;
447 } else if (state->assign) {
448 *cArg++ = ch;
451 if (state->assign) {
452 state->converted = PR_TRUE;
454 break;
455 case 'p':
456 case 'd': case 'i': case 'o':
457 case 'u': case 'x': case 'X':
458 if (GetInt(state, *cPtr) == PR_FAILURE) {
459 return NULL;
461 break;
462 case 'e': case 'E': case 'f':
463 case 'g': case 'G':
464 if (GetFloat(state) == PR_FAILURE) {
465 return NULL;
467 break;
468 case 'n':
469 /* do not consume any input */
470 if (state->assign) {
471 switch (state->sizeSpec) {
472 case _PR_size_none:
473 *va_arg(state->ap, PRIntn *) = state->nChar;
474 break;
475 case _PR_size_h:
476 *va_arg(state->ap, PRInt16 *) = state->nChar;
477 break;
478 case _PR_size_l:
479 *va_arg(state->ap, PRInt32 *) = state->nChar;
480 break;
481 case _PR_size_ll:
482 LL_I2L(*va_arg(state->ap, PRInt64 *), state->nChar);
483 break;
484 default:
485 PR_ASSERT(0);
488 break;
489 case 's':
490 if (state->width == 0) {
491 state->width = INT_MAX;
493 if (state->assign) {
494 cArg = va_arg(state->ap, char *);
496 for (; state->width > 0; state->width--) {
497 ch = GET(state);
498 if ((ch == EOF) || isspace(ch)) {
499 UNGET(state, ch);
500 break;
502 if (state->assign) {
503 *cArg++ = ch;
506 if (state->assign) {
507 *cArg = '\0';
508 state->converted = PR_TRUE;
510 break;
511 case '%':
512 ch = GET(state);
513 if (ch != '%') {
514 UNGET(state, ch);
515 return NULL;
517 break;
518 case '[':
520 PRBool complement = PR_FALSE;
521 const char *closeBracket;
522 size_t n;
524 if (*++cPtr == '^') {
525 complement = PR_TRUE;
526 cPtr++;
528 closeBracket = strchr(*cPtr == ']' ? cPtr + 1 : cPtr, ']');
529 if (closeBracket == NULL) {
530 return NULL;
532 n = closeBracket - cPtr;
533 if (state->width == 0) {
534 state->width = INT_MAX;
536 if (state->assign) {
537 cArg = va_arg(state->ap, char *);
539 for (; state->width > 0; state->width--) {
540 ch = GET(state);
541 if ((ch == EOF)
542 || (!complement && !memchr(cPtr, ch, n))
543 || (complement && memchr(cPtr, ch, n))) {
544 UNGET(state, ch);
545 break;
547 if (state->assign) {
548 *cArg++ = ch;
551 if (state->assign) {
552 *cArg = '\0';
553 state->converted = PR_TRUE;
555 cPtr = closeBracket;
557 break;
558 default:
559 return NULL;
561 return cPtr;
564 static PRInt32
565 DoScanf(ScanfState *state, const char *fmt)
567 PRInt32 nConverted = 0;
568 const char *cPtr;
569 int ch;
571 state->nChar = 0;
572 cPtr = fmt;
573 while (1) {
574 if (isspace(*cPtr)) {
575 /* white space: skip */
576 do {
577 cPtr++;
578 } while (isspace(*cPtr));
579 do {
580 ch = GET(state);
581 } while (isspace(ch));
582 UNGET(state, ch);
583 } else if (*cPtr == '%') {
584 /* format spec: convert */
585 cPtr++;
586 state->assign = PR_TRUE;
587 if (*cPtr == '*') {
588 cPtr++;
589 state->assign = PR_FALSE;
591 for (state->width = 0; isdigit(*cPtr); cPtr++) {
592 state->width = state->width * 10 + *cPtr - '0';
594 state->sizeSpec = _PR_size_none;
595 if (*cPtr == 'h') {
596 cPtr++;
597 state->sizeSpec = _PR_size_h;
598 } else if (*cPtr == 'l') {
599 cPtr++;
600 if (*cPtr == 'l') {
601 cPtr++;
602 state->sizeSpec = _PR_size_ll;
603 } else {
604 state->sizeSpec = _PR_size_l;
606 } else if (*cPtr == 'L') {
607 cPtr++;
608 state->sizeSpec = _PR_size_L;
610 cPtr = Convert(state, cPtr);
611 if (cPtr == NULL) {
612 return (nConverted > 0 ? nConverted : EOF);
614 if (state->converted) {
615 nConverted++;
617 cPtr++;
618 } else {
619 /* others: must match */
620 if (*cPtr == '\0') {
621 return nConverted;
623 ch = GET(state);
624 if (ch != *cPtr) {
625 UNGET(state, ch);
626 return nConverted;
628 cPtr++;
633 static int
634 StringGetChar(void *stream)
636 char *cPtr = *((char **) stream);
638 if (*cPtr == '\0') {
639 return EOF;
640 } else {
641 *((char **) stream) = cPtr + 1;
642 return (unsigned char) *cPtr;
646 static void
647 StringUngetChar(void *stream, int ch)
649 char *cPtr = *((char **) stream);
651 if (ch != EOF) {
652 *((char **) stream) = cPtr - 1;
656 PR_IMPLEMENT(PRInt32)
657 PR_sscanf(const char *buf, const char *fmt, ...)
659 PRInt32 rv;
660 ScanfState state;
662 state.get = &StringGetChar;
663 state.unget = &StringUngetChar;
664 state.stream = (void *) &buf;
665 va_start(state.ap, fmt);
666 rv = DoScanf(&state, fmt);
667 va_end(state.ap);
668 return rv;