import less(1)
[unleashed/tickless.git] / usr / src / lib / libc / port / fp / char_to_decimal.h
blob8e841c5032f7ae714419c78700a183c807a09bb2
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
30 * This file contains the common part of the functions string_to_decimal,
31 * func_to_decimal, and file_to_decimal. Much of this code has been dupli-
32 * cated in wstring_to_decimal (see wstod.c) with some simplifications and
33 * appropriate modifications for wide characters. DO NOT fix a bug here
34 * without fixing the same bug in wstring_to_decimal, if it applies.
36 * The code below makes the following assumptions.
38 * 1. The first six parameters to the function are declared with the
39 * following names and types:
41 * char **ppc;
42 * int nmax;
43 * int fortran_conventions;
44 * decimal_record *pd;
45 * enum decimal_string_form *pform;
46 * char **pechar;
48 * 2. Before this file is #included, the following variables have been
49 * defined and initialized as shown:
51 * char *cp;
52 * char *good = *ppc - 1;
53 * int current;
54 * int nread;
56 * If the first character can be read successfully, then current is set
57 * to the value of the first character, cp is set to *ppc, (char)current
58 * is stored at *cp, and nread = 1. If the first character cannot be
59 * read successfully, then current = EOF and nread = 0.
61 * 3. The macro NEXT is defined to expand to code that implements
62 * the following logic:
64 * if (nread < nmax) {
65 * current = <next character>;
66 * if (current != EOF) {
67 * *++cp = (char)current;
68 * nread++;
69 * }
70 * } else
71 * current = EOF;
73 * Note that nread always reflects the number of characters successfully
74 * read, the buffer pointed to by *ppc gets filled only with characters
75 * that have been successfully read, and cp always points to the location
76 * in the buffer that was filled by the last character successfully read.
77 * current == EOF if and only if we can't read any more, either because
78 * we've reached the end of the input file or the buffer is full (i.e.,
79 * we've read nmax characters).
81 * 4. After this file is #included, the following variables may be used
82 * and will have the specified values:
84 * *ppc, *pd, *pform, and *pechar will be set as documented in the
85 * manual page;
86 * nmax and fortran_conventions will be unchanged;
87 * nread will be the number of characters actually read;
88 * cp will point to the last character actually read, provided at least
89 * one character was read successfully (in which case cp >= *ppc).
92 #define UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c)
94 #define NZDIGIT(c) (('1' <= c && c <= '9') || ((int)form < 0 && \
95 (('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))))
98 static const char *infstring = "INFINITY";
99 static const char *nanstring = "NAN";
101 int sigfound, spacefound = 0;
102 int ids = 0;
103 int i, agree;
104 int nzbp = 0; /* number of zeros before point */
105 int nzap = 0; /* number of zeros after point */
106 char decpt;
107 int nfast, nfastlimit;
108 char *pfast;
109 int e, esign;
110 int expshift = 0;
111 enum decimal_string_form form;
114 * This routine assumes that the radix point is a single
115 * ASCII character, so that following this assignment, the
116 * condition (current == decpt) will correctly detect it.
118 if (fortran_conventions > 0)
119 decpt = '.';
120 else
121 decpt = *(localeconv()->decimal_point);
123 /* input is invalid until we find something */
124 pd->fpclass = fp_signaling;
125 pd->sign = 0;
126 pd->exponent = 0;
127 pd->ds[0] = '\0';
128 pd->more = 0;
129 pd->ndigits = 0;
130 *pform = form = invalid_form;
131 *pechar = NULL;
133 /* skip white space */
134 while (isspace(current)) {
135 spacefound = 1;
136 NEXT;
139 if (fortran_conventions >= 2 && spacefound) {
141 * We found at least one white space character. For
142 * Fortran formatted input, accept this; if we don't
143 * find anything else, we'll interpret it as a valid zero.
145 pd->fpclass = fp_zero;
146 form = whitespace_form;
147 sigfound = 0; /* 0 = only zeros found so far */
148 if (current == EOF) {
149 good = cp;
150 goto done;
151 } else {
152 good = cp - 1;
154 } else {
155 sigfound = -1; /* -1 = no digits found yet */
158 /* look for optional leading sign */
159 if (current == '+') {
160 NEXT;
161 } else if (current == '-') {
162 pd->sign = 1;
163 NEXT;
167 * Admissible first non-white-space, non-sign characters are
168 * 0-9, i, I, n, N, or the radix point.
170 if ('1' <= current && current <= '9') {
171 good = cp;
172 pd->fpclass = fp_normal;
173 form = fixed_int_form;
174 sigfound = 1; /* 1 = significant digits found */
175 pd->ds[ids++] = (char)current;
176 NEXT;
177 } else {
178 switch (current) {
179 case ' ':
180 if (fortran_conventions < 2)
181 goto done;
183 * When fortran_conventions >= 2, treat leading
184 * blanks the same as leading zeroes.
186 /*FALLTHRU*/
188 case '0':
190 * Accept the leading zero and set pd->fpclass
191 * accordingly, but don't set sigfound until we
192 * determine that this isn't a "fake" hex string
193 * (i.e., 0x.p...).
195 good = cp;
196 pd->fpclass = fp_zero;
197 if (fortran_conventions < 0) {
198 /* look for a hex fp string */
199 NEXT;
200 if (current == 'X' || current == 'x') {
201 /* assume hex fp form */
202 form = (enum decimal_string_form)-1;
203 expshift = 2;
204 NEXT;
206 * Only a digit or radix point can
207 * follow "0x".
209 if (NZDIGIT(current)) {
210 pd->fpclass = fp_normal;
211 good = cp;
212 sigfound = 1;
213 pd->ds[ids++] = (char)current;
214 NEXT;
215 break;
216 } else if (current == decpt) {
217 NEXT;
218 goto afterpoint;
219 } else if (current != '0') {
220 /* not hex fp after all */
221 form = fixed_int_form;
222 expshift = 0;
223 goto done;
225 } else {
226 form = fixed_int_form;
228 } else {
229 form = fixed_int_form;
232 /* skip all leading zeros */
233 while (current == '0' || (current == ' ' &&
234 fortran_conventions >= 2)) {
235 NEXT;
237 sigfound = 0; /* 0 = only zeros found so far */
238 if (current == EOF) {
239 good = cp;
240 goto done;
241 } else {
242 good = cp - 1;
244 break;
246 case 'i':
247 case 'I':
248 /* look for inf or infinity */
249 NEXT;
250 agree = 1;
251 while (agree <= 7 &&
252 UCASE(current) == infstring[agree]) {
253 NEXT;
254 agree++;
256 if (agree < 3)
257 goto done;
258 /* found valid infinity */
259 pd->fpclass = fp_infinity;
260 sigfound = 1;
261 __inf_read = 1;
262 if (agree < 8) {
263 good = (current == EOF)? cp + 3 - agree :
264 cp + 2 - agree;
265 form = inf_form;
266 } else {
267 good = (current == EOF)? cp : cp - 1;
268 form = infinity_form;
271 * Accept trailing blanks if no extra characters
272 * intervene.
274 if (fortran_conventions >= 2 && (agree == 3 ||
275 agree == 8)) {
276 while (current == ' ') {
277 NEXT;
279 good = (current == EOF)? cp : cp - 1;
281 goto done;
283 case 'n':
284 case 'N':
285 /* look for nan or nan(string) */
286 NEXT;
287 agree = 1;
288 while (agree <= 2 &&
289 UCASE(current) == nanstring[agree]) {
290 NEXT;
291 agree++;
293 if (agree < 3)
294 goto done;
295 /* found valid NaN */
296 good = (current == EOF)? cp : cp - 1;
297 pd->fpclass = fp_quiet;
298 form = nan_form;
299 sigfound = 1;
300 __nan_read = 1;
301 if (current == '(') {
302 /* accept parenthesized string */
303 NEXT;
304 if (fortran_conventions < 0) {
305 while ((isalnum(current) ||
306 current == '_') &&
307 ids < DECIMAL_STRING_LENGTH - 1) {
308 pd->ds[ids++] = (char)current;
309 NEXT;
311 while (isalnum(current) ||
312 current == '_') {
313 pd->more = 1;
314 NEXT;
316 } else {
317 while (current > 0 && current != ')' &&
318 ids < DECIMAL_STRING_LENGTH - 1) {
319 pd->ds[ids++] = (char)current;
320 NEXT;
322 while (current > 0 && current != ')') {
323 pd->more = 1;
324 NEXT;
327 if (current != ')')
328 goto done;
329 good = cp;
330 form = nanstring_form;
331 /* prepare for loop below */
332 if (fortran_conventions >= 2) {
333 NEXT;
336 /* accept trailing blanks */
337 if (fortran_conventions >= 2) {
338 while (current == ' ') {
339 NEXT;
341 good = (current == EOF)? cp : cp - 1;
343 goto done;
345 default:
346 if (current == decpt) {
348 * Don't accept the radix point just yet;
349 * we need to see at least one digit.
351 NEXT;
352 goto afterpoint;
354 goto done;
358 nextnumber:
360 * Admissible characters after the first digit are a valid digit,
361 * an exponent delimiter (E or e for any decimal form; +, -, D, d,
362 * Q, or q when fortran_conventions >= 2; P or p for hex form),
363 * or the radix point. (Note that we can't get here unless we've
364 * already found a digit.)
366 if (NZDIGIT(current)) {
368 * Found another nonzero digit. If there's enough room
369 * in pd->ds, store any intervening zeros we've found so far
370 * and then store this digit. Otherwise, stop storing
371 * digits in pd->ds and set pd->more.
373 if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) {
374 for (i = 0; i < nzbp; i++)
375 pd->ds[ids++] = '0';
376 pd->ds[ids++] = (char)current;
377 } else {
378 pd->exponent += (nzbp + 1) << expshift;
379 pd->more = 1;
380 if (ids < DECIMAL_STRING_LENGTH) {
381 pd->ds[ids] = '\0';
382 pd->ndigits = ids;
383 /* don't store any more digits */
384 ids = DECIMAL_STRING_LENGTH;
387 pd->fpclass = fp_normal;
388 sigfound = 1;
389 nzbp = 0;
390 NEXT;
393 * Use an optimized loop to grab a consecutive sequence
394 * of nonzero digits quickly.
396 nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
397 for (nfast = 0, pfast = &(pd->ds[ids]);
398 nfast < nfastlimit && NZDIGIT(current);
399 nfast++) {
400 *pfast++ = (char)current;
401 NEXT;
403 ids += nfast;
404 if (current == '0')
405 goto nextnumberzero; /* common case */
406 /* advance good to the last accepted digit */
407 good = (current == EOF)? cp : cp - 1;
408 goto nextnumber;
409 } else {
410 switch (current) {
411 case ' ':
412 if (fortran_conventions < 2)
413 goto done;
414 if (fortran_conventions == 2) {
415 while (current == ' ') {
416 NEXT;
418 good = (current == EOF)? cp : cp - 1;
419 goto nextnumber;
422 * When fortran_conventions > 2, treat internal
423 * blanks the same as zeroes.
425 /*FALLTHRU*/
427 case '0':
428 nextnumberzero:
430 * Count zeros before the radix point. Later we
431 * will either put these zeros into pd->ds or add
432 * nzbp to pd->exponent to account for them.
434 while (current == '0' || (current == ' ' &&
435 fortran_conventions > 2)) {
436 nzbp++;
437 NEXT;
439 good = (current == EOF)? cp : cp - 1;
440 goto nextnumber;
442 case '+':
443 case '-':
444 case 'D':
445 case 'd':
446 case 'Q':
447 case 'q':
449 * Only accept these as the start of the exponent
450 * field if fortran_conventions is positive.
452 if (fortran_conventions <= 0)
453 goto done;
454 /*FALLTHRU*/
456 case 'E':
457 case 'e':
458 if ((int)form < 0)
459 goto done;
460 goto exponent;
462 case 'P':
463 case 'p':
464 if ((int)form > 0)
465 goto done;
466 goto exponent;
468 default:
469 if (current == decpt) {
470 /* accept the radix point */
471 good = cp;
472 if (form == fixed_int_form)
473 form = fixed_intdot_form;
474 NEXT;
475 goto afterpoint;
477 goto done;
481 afterpoint:
483 * Admissible characters after the radix point are a valid digit
484 * or an exponent delimiter. (Note that it is possible to get
485 * here even though we haven't found any digits yet.)
487 if (NZDIGIT(current)) {
488 /* found a digit after the point; revise form */
489 if (form == invalid_form || form == whitespace_form)
490 form = fixed_dotfrac_form;
491 else if (form == fixed_intdot_form)
492 form = fixed_intdotfrac_form;
493 good = cp;
494 if (sigfound < 1) {
495 /* no significant digits found until now */
496 pd->fpclass = fp_normal;
497 sigfound = 1;
498 pd->ds[ids++] = (char)current;
499 pd->exponent = (-(nzap + 1)) << expshift;
500 } else {
501 /* significant digits have been found */
502 if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) {
503 for (i = 0; i < nzbp + nzap; i++)
504 pd->ds[ids++] = '0';
505 pd->ds[ids++] = (char)current;
506 pd->exponent -= (nzap + 1) << expshift;
507 } else {
508 pd->exponent += nzbp << expshift;
509 pd->more = 1;
510 if (ids < DECIMAL_STRING_LENGTH) {
511 pd->ds[ids] = '\0';
512 pd->ndigits = ids;
513 /* don't store any more digits */
514 ids = DECIMAL_STRING_LENGTH;
518 nzbp = 0;
519 nzap = 0;
520 NEXT;
523 * Use an optimized loop to grab a consecutive sequence
524 * of nonzero digits quickly.
526 nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids;
527 for (nfast = 0, pfast = &(pd->ds[ids]);
528 nfast < nfastlimit && NZDIGIT(current);
529 nfast++) {
530 *pfast++ = (char)current;
531 NEXT;
533 ids += nfast;
534 pd->exponent -= nfast << expshift;
535 if (current == '0')
536 goto zeroafterpoint;
537 /* advance good to the last accepted digit */
538 good = (current == EOF)? cp : cp - 1;
539 goto afterpoint;
540 } else {
541 switch (current) {
542 case ' ':
543 if (fortran_conventions < 2)
544 goto done;
545 if (fortran_conventions == 2) {
547 * Treat a radix point followed by blanks
548 * but no digits as zero so we'll pass FCVS.
550 if (sigfound == -1) {
551 pd->fpclass = fp_zero;
552 sigfound = 0;
554 while (current == ' ') {
555 NEXT;
557 good = (current == EOF)? cp : cp - 1;
558 goto afterpoint;
561 * when fortran_conventions > 2, treat internal
562 * blanks the same as zeroes
564 /*FALLTHRU*/
566 case '0':
567 /* found a digit after the point; revise form */
568 if (form == invalid_form || form == whitespace_form)
569 form = fixed_dotfrac_form;
570 else if (form == fixed_intdot_form)
571 form = fixed_intdotfrac_form;
572 if (sigfound == -1) {
573 pd->fpclass = fp_zero;
574 sigfound = 0;
576 zeroafterpoint:
578 * Count zeros after the radix point. If we find
579 * any more nonzero digits later, we will put these
580 * zeros into pd->ds and decrease pd->exponent by
581 * nzap.
583 while (current == '0' || (current == ' ' &&
584 fortran_conventions > 2)) {
585 nzap++;
586 NEXT;
588 if (current == EOF) {
589 good = cp;
590 goto done;
591 } else {
592 good = cp - 1;
594 goto afterpoint;
596 case '+':
597 case '-':
598 case 'D':
599 case 'd':
600 case 'Q':
601 case 'q':
603 * Only accept these as the start of the exponent
604 * field if fortran_conventions is positive.
606 if (fortran_conventions <= 0)
607 goto done;
608 /*FALLTHRU*/
610 case 'E':
611 case 'e':
612 /* don't accept exponent without preceding digits */
613 if (sigfound == -1 || (int)form < 0)
614 goto done;
615 break;
617 case 'P':
618 case 'p':
619 /* don't accept exponent without preceding digits */
620 if (sigfound == -1 || (int)form > 0)
621 goto done;
622 break;
624 default:
625 goto done;
629 exponent:
631 * Set *pechar to point to the character that looks like the
632 * beginning of the exponent field, then attempt to parse it.
634 *pechar = cp;
635 if (current != '+' && current != '-') {
636 /* skip the exponent character and following blanks */
637 NEXT;
638 if (fortran_conventions >= 2 && current == ' ') {
639 while (current == ' ') {
640 NEXT;
642 if (fortran_conventions > 2)
643 good = (current == EOF)? cp : cp - 1;
647 e = 0;
648 esign = 0;
650 /* look for optional exponent sign */
651 if (current == '+') {
652 NEXT;
653 } else if (current == '-') {
654 esign = 1;
655 NEXT;
659 * Accumulate explicit exponent. Note that if we don't find at
660 * least one digit, good won't be updated and e will remain 0.
661 * Also, we keep e from getting too large so we don't overflow
662 * the range of int (but notice that the threshold is large
663 * enough that any larger e would cause the result to underflow
664 * or overflow anyway).
666 while (('0' <= current && current <= '9') || current == ' ') {
667 if (current == ' ') {
668 if (fortran_conventions < 2)
669 break;
670 if (fortran_conventions == 2) {
671 NEXT;
672 continue;
674 current = '0';
676 good = cp;
677 if (e <= 1000000)
678 e = 10 * e + current - '0';
679 NEXT;
680 if (fortran_conventions == 2 && current == ' ') {
681 /* accept trailing blanks */
682 while (current == ' ') {
683 NEXT;
685 good = (current == EOF)? cp : cp - 1;
688 if (esign == 1)
689 pd->exponent -= e;
690 else
691 pd->exponent += e;
694 * If we successfully parsed an exponent field, update form
695 * accordingly. If we didn't, don't set *pechar.
697 if (good >= *pechar) {
698 switch (form) {
699 case whitespace_form:
700 case fixed_int_form:
701 form = floating_int_form;
702 break;
704 case fixed_intdot_form:
705 form = floating_intdot_form;
706 break;
708 case fixed_dotfrac_form:
709 form = floating_dotfrac_form;
710 break;
712 case fixed_intdotfrac_form:
713 form = floating_intdotfrac_form;
714 break;
716 } else {
717 *pechar = NULL;
720 done:
722 * If we found any zeros before the radix point that were not
723 * accounted for earlier, adjust the exponent. (This is only
724 * relevant when pd->fpclass == fp_normal, but it's harmless
725 * in all other cases.)
727 pd->exponent += nzbp << expshift;
729 /* terminate pd->ds if we haven't already */
730 if (ids < DECIMAL_STRING_LENGTH) {
731 pd->ds[ids] = '\0';
732 pd->ndigits = ids;
736 * If we accepted any characters, advance *ppc to point to the
737 * first character we didn't accept; otherwise, pass back a
738 * signaling nan.
740 if (good >= *ppc) {
741 *ppc = good + 1;
742 } else {
743 pd->fpclass = fp_signaling;
744 pd->sign = 0;
745 form = invalid_form;
748 *pform = form;