Release PSPP version 1.4.0.
[pspp.git] / src / data / data-in.c
blob33eeb111384971179087e368c3bff10005ce8b54
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "data-in.h"
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <math.h>
25 #include <stdarg.h>
26 #include <stdbool.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
32 #include "calendar.h"
33 #include "dictionary.h"
34 #include "format.h"
35 #include "identifier.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/compiler.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/integer-format.h"
40 #include "libpspp/misc.h"
41 #include "libpspp/str.h"
42 #include "settings.h"
43 #include "value.h"
45 #include "gl/c-ctype.h"
46 #include "gl/c-strtod.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
50 #include "gettext.h"
51 #define _(msgid) gettext (msgid)
53 /* Information about parsing one data field. */
54 struct data_in
56 struct substring input; /* Source. */
57 enum fmt_type format; /* Input format. */
59 union value *output; /* Destination. */
60 int width; /* Output width. */
63 typedef char *data_in_parser_func (struct data_in *);
64 #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \
65 static data_in_parser_func parse_##METHOD;
66 #include "format.def"
68 static void default_result (struct data_in *);
69 static bool trim_spaces_and_check_missing (struct data_in *);
71 static int hexit_value (int c);
73 /* Parses the characters in INPUT, which are encoded in the given
74 INPUT_ENCODING, according to FORMAT.
76 Stores the parsed representation in OUTPUT, which the caller must have
77 initialized with the given WIDTH (0 for a numeric field, otherwise the
78 string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the
79 correct encoding for OUTPUT (normally obtained via dict_get_encoding()).
81 If successful NULL is the return value. Otherwise a string describing
82 the problem is returned. The caller must free this string.
84 char *
85 data_in (struct substring input, const char *input_encoding,
86 enum fmt_type format,
87 union value *output, int width, const char *output_encoding)
89 static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] =
91 #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) parse_##METHOD,
92 #include "format.def"
95 struct data_in i;
97 enum fmt_category cat;
98 const char *dest_encoding;
99 char *s;
100 char *error;
102 assert ((width != 0) == fmt_is_string (format));
104 i.format = format;
106 i.output = output;
107 i.width = width;
109 if (ss_is_empty (input))
111 default_result (&i);
112 return NULL;
115 cat = fmt_get_category (format);
116 if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL | FMT_CAT_CUSTOM
117 | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))
119 /* We're going to parse these into numbers. For this purpose we want to
120 deal with them in the local "C" encoding. Any character not in that
121 encoding wouldn't be valid anyhow. */
122 dest_encoding = C_ENCODING;
124 else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY))
126 /* Don't recode these binary formats at all, since they are not text. */
127 dest_encoding = NULL;
129 else
131 assert (cat == FMT_CAT_STRING);
132 if (format == FMT_AHEX)
134 /* We want the hex digits in the local "C" encoding, even though the
135 result may not be in that encoding. */
136 dest_encoding = C_ENCODING;
138 else
140 /* Use the final output encoding. */
141 dest_encoding = output_encoding;
145 if (dest_encoding != NULL)
147 i.input = recode_substring_pool (dest_encoding, input_encoding, input,
148 NULL);
149 s = i.input.string;
151 else
153 i.input = input;
154 s = NULL;
157 error = handlers[i.format] (&i);
158 if (error != NULL)
159 default_result (&i);
161 free (s);
163 return error;
166 bool
167 data_in_msg (struct substring input, const char *input_encoding,
168 enum fmt_type format,
169 union value *output, int width, const char *output_encoding)
171 char *error = data_in (input, input_encoding, format,
172 output, width, output_encoding);
173 if (error != NULL)
175 msg (SW, _("Data is not valid as format %s: %s"),
176 fmt_name (format), error);
177 free (error);
178 return false;
180 else
181 return true;
184 static bool
185 number_has_implied_decimals (const char *s, enum fmt_type type)
187 int decimal = settings_get_style (type)->decimal;
188 bool got_digit = false;
189 for (;;)
191 switch (*s)
193 case '0': case '1': case '2': case '3': case '4':
194 case '5': case '6': case '7': case '8': case '9':
195 got_digit = true;
196 break;
198 case '+': case '-':
199 if (got_digit)
200 return false;
201 break;
203 case 'e': case 'E': case 'd': case 'D':
204 return false;
206 case '.': case ',':
207 if (*s == decimal)
208 return false;
209 break;
211 case '\0':
212 return true;
214 default:
215 break;
218 s++;
222 static bool
223 has_implied_decimals (struct substring input, const char *input_encoding,
224 enum fmt_type format)
226 bool retval;
227 char *s;
229 switch (format)
231 case FMT_F:
232 case FMT_COMMA:
233 case FMT_DOT:
234 case FMT_DOLLAR:
235 case FMT_PCT:
236 case FMT_E:
237 case FMT_Z:
238 break;
240 case FMT_N:
241 case FMT_IB:
242 case FMT_PIB:
243 case FMT_P:
244 case FMT_PK:
245 return true;
247 default:
248 return false;
251 s = recode_string (C_ENCODING, input_encoding,
252 ss_data (input), ss_length (input));
253 retval = (format == FMT_Z
254 ? strchr (s, '.') == NULL
255 : number_has_implied_decimals (s, format));
256 free (s);
258 return retval;
261 /* In some cases, when no decimal point is explicitly included in numeric
262 input, its position is implied by the number of decimal places in the input
263 format. In such a case, this function may be called just after data_in().
264 Its arguments are a subset of that function's arguments plus D, the number
265 of decimal places associated with FORMAT.
267 If it is appropriate, this function modifies the numeric value in OUTPUT. */
268 void
269 data_in_imply_decimals (struct substring input, const char *input_encoding,
270 enum fmt_type format, int d, union value *output)
272 if (d > 0 && output->f != SYSMIS
273 && has_implied_decimals (input, input_encoding, format))
274 output->f /= pow (10., d);
277 /* Format parsers. */
279 /* Parses F, COMMA, DOT, DOLLAR, PCT, and E input formats. */
280 static char *
281 parse_number (struct data_in *i)
283 const struct fmt_number_style *style =
284 settings_get_style (i->format);
286 struct string tmp;
288 int save_errno;
289 char *tail;
291 if (fmt_get_category (i->format) == FMT_CAT_CUSTOM)
293 style = settings_get_style (FMT_F);
296 /* Trim spaces and check for missing value representation. */
297 if (trim_spaces_and_check_missing (i))
298 return NULL;
300 ds_init_empty (&tmp);
301 ds_extend (&tmp, 64);
303 /* Prefix character may precede sign. */
304 if (style->prefix.s[0] != '\0')
306 ss_match_byte (&i->input, style->prefix.s[0]);
307 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
310 /* Sign. */
311 if (ss_match_byte (&i->input, '-'))
313 ds_put_byte (&tmp, '-');
314 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
316 else
318 ss_match_byte (&i->input, '+');
319 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
322 /* Prefix character may follow sign. */
323 if (style->prefix.s[0] != '\0')
325 ss_match_byte (&i->input, style->prefix.s[0]);
326 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
329 /* Digits before decimal point. */
330 while (c_isdigit (ss_first (i->input)))
332 ds_put_byte (&tmp, ss_get_byte (&i->input));
333 if (style->grouping != 0)
334 ss_match_byte (&i->input, style->grouping);
337 /* Decimal point and following digits. */
338 if (ss_match_byte (&i->input, style->decimal))
340 ds_put_byte (&tmp, '.');
341 while (c_isdigit (ss_first (i->input)))
342 ds_put_byte (&tmp, ss_get_byte (&i->input));
345 /* Exponent. */
346 if (!ds_is_empty (&tmp)
347 && !ss_is_empty (i->input)
348 && strchr ("eEdD-+", ss_first (i->input)))
350 ds_put_byte (&tmp, 'e');
352 if (strchr ("eEdD", ss_first (i->input)))
354 ss_advance (&i->input, 1);
355 ss_match_byte (&i->input, ' ');
358 if (ss_first (i->input) == '-' || ss_first (i->input) == '+')
360 if (ss_get_byte (&i->input) == '-')
361 ds_put_byte (&tmp, '-');
362 ss_match_byte (&i->input, ' ');
365 while (c_isdigit (ss_first (i->input)))
366 ds_put_byte (&tmp, ss_get_byte (&i->input));
369 /* Suffix character. */
370 if (style->suffix.s[0] != '\0')
371 ss_match_byte (&i->input, style->suffix.s[0]);
373 if (!ss_is_empty (i->input))
375 char *error;
376 if (ds_is_empty (&tmp))
377 error = xstrdup (_("Field contents are not numeric."));
378 else
379 error = xstrdup (_("Number followed by garbage."));
380 ds_destroy (&tmp);
381 return error;
384 /* Let c_strtod() do the conversion. */
385 save_errno = errno;
386 errno = 0;
387 i->output->f = c_strtod (ds_cstr (&tmp), &tail);
388 if (*tail != '\0')
390 errno = save_errno;
391 ds_destroy (&tmp);
392 return xstrdup (_("Invalid numeric syntax."));
394 else if (errno == ERANGE)
396 if (fabs (i->output->f) > 1)
398 i->output->f = SYSMIS;
399 ds_destroy (&tmp);
400 return xstrdup (_("Too-large number set to system-missing."));
402 else
404 i->output->f = 0.0;
405 ds_destroy (&tmp);
406 return xstrdup (_("Too-small number set to zero."));
409 else
410 errno = save_errno;
412 ds_destroy (&tmp);
413 return NULL;
416 /* Parses N format. */
417 static char *
418 parse_N (struct data_in *i)
420 int c;
422 i->output->f = 0;
423 while ((c = ss_get_byte (&i->input)) != EOF)
425 if (!c_isdigit (c))
426 return xstrdup (_("All characters in field must be digits."));
427 i->output->f = i->output->f * 10.0 + (c - '0');
430 return NULL;
433 /* Parses PIBHEX format. */
434 static char *
435 parse_PIBHEX (struct data_in *i)
437 double n;
438 int c;
440 n = 0.0;
442 while ((c = ss_get_byte (&i->input)) != EOF)
444 if (!c_isxdigit (c))
445 return xstrdup (_("Unrecognized character in field."));
446 n = n * 16.0 + hexit_value (c);
449 i->output->f = n;
450 return NULL;
453 /* Parses RBHEX format. */
454 static char *
455 parse_RBHEX (struct data_in *i)
457 double d;
458 size_t j;
460 memset (&d, 0, sizeof d);
461 for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++)
463 int hi = ss_get_byte (&i->input);
464 int lo = ss_get_byte (&i->input);
465 if (lo == EOF)
466 return xstrdup (_("Field must have even length."));
467 else if (!c_isxdigit (hi) || !c_isxdigit (lo))
468 return xstrdup (_("Field must contain only hex digits."));
469 ((unsigned char *) &d)[j] = 16 * hexit_value (hi) + hexit_value (lo);
472 i->output->f = d;
474 return NULL;
477 /* Digits for Z format. */
478 static const char z_digits[] = "0123456789{ABCDEFGHI}JKLMNOPQR";
480 /* Returns true if C is a Z format digit, false otherwise. */
481 static bool
482 is_z_digit (int c)
484 return c > 0 && strchr (z_digits, c) != NULL;
487 /* Returns the (absolute value of the) value of C as a Z format
488 digit. */
489 static int
490 z_digit_value (int c)
492 assert (is_z_digit (c));
493 return (strchr (z_digits, c) - z_digits) % 10;
496 /* Returns true if Z format digit C represents a negative value,
497 false otherwise. */
498 static bool
499 is_negative_z_digit (int c)
501 assert (is_z_digit (c));
502 return (strchr (z_digits, c) - z_digits) >= 20;
505 /* Parses Z format. */
506 static char *
507 parse_Z (struct data_in *i)
509 struct string tmp;
511 int save_errno;
513 bool got_dot = false;
514 bool got_final_digit = false;
516 /* Trim spaces and check for missing value representation. */
517 if (trim_spaces_and_check_missing (i))
518 return NULL;
520 ds_init_empty (&tmp);
521 ds_extend (&tmp, 64);
523 ds_put_byte (&tmp, '+');
524 while (!ss_is_empty (i->input))
526 int c = ss_get_byte (&i->input);
527 if (c_isdigit (c) && !got_final_digit)
528 ds_put_byte (&tmp, c);
529 else if (is_z_digit (c) && !got_final_digit)
531 ds_put_byte (&tmp, z_digit_value (c) + '0');
532 if (is_negative_z_digit (c))
533 ds_data (&tmp)[0] = '-';
534 got_final_digit = true;
536 else if (c == '.' && !got_dot)
538 ds_put_byte (&tmp, '.');
539 got_dot = true;
541 else
543 ds_destroy (&tmp);
544 return xstrdup (_("Invalid zoned decimal syntax."));
548 if (!ss_is_empty (i->input))
550 char *error;
552 if (ds_length (&tmp) == 1)
553 error = xstrdup (_("Field contents are not numeric."));
554 else
555 error = xstrdup (_("Number followed by garbage."));
557 ds_destroy (&tmp);
558 return error;
561 /* Let c_strtod() do the conversion. */
562 save_errno = errno;
563 errno = 0;
564 i->output->f = c_strtod (ds_cstr (&tmp), NULL);
565 if (errno == ERANGE)
567 if (fabs (i->output->f) > 1)
569 i->output->f = SYSMIS;
570 ds_destroy (&tmp);
571 return xstrdup (_("Too-large number set to system-missing."));
573 else
575 i->output->f = 0.0;
576 ds_destroy (&tmp);
577 return xstrdup (_("Too-small number set to zero."));
580 else
581 errno = save_errno;
583 ds_destroy (&tmp);
584 return NULL;
587 /* Parses IB format. */
588 static char *
589 parse_IB (struct data_in *i)
591 size_t bytes;
592 uint64_t value;
593 uint64_t sign_bit;
595 bytes = MIN (8, ss_length (i->input));
596 value = integer_get (settings_get_input_integer_format (), ss_data (i->input), bytes);
598 sign_bit = UINT64_C(1) << (8 * bytes - 1);
599 if (!(value & sign_bit))
600 i->output->f = value;
601 else
603 /* Sign-extend to full 64 bits. */
604 value -= sign_bit << 1;
605 i->output->f = -(double) -value;
608 return NULL;
611 /* Parses PIB format. */
612 static char *
613 parse_PIB (struct data_in *i)
615 i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input),
616 MIN (8, ss_length (i->input)));
618 return NULL;
621 /* Consumes the first character of S. Stores its high 4 bits in
622 HIGH_NIBBLE and its low 4 bits in LOW_NIBBLE. */
623 static void
624 get_nibbles (struct substring *s, int *high_nibble, int *low_nibble)
626 int c = ss_get_byte (s);
627 assert (c != EOF);
628 *high_nibble = (c >> 4) & 15;
629 *low_nibble = c & 15;
632 /* Parses P format. */
633 static char *
634 parse_P (struct data_in *i)
636 int high_nibble, low_nibble;
638 i->output->f = 0.0;
640 while (ss_length (i->input) > 1)
642 get_nibbles (&i->input, &high_nibble, &low_nibble);
643 if (high_nibble > 9 || low_nibble > 9)
644 return xstrdup (_("Invalid syntax for P field."));
645 i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble;
648 get_nibbles (&i->input, &high_nibble, &low_nibble);
649 if (high_nibble > 9)
650 return xstrdup (_("Invalid syntax for P field."));
651 i->output->f = (10 * i->output->f) + high_nibble;
652 if (low_nibble < 10)
653 i->output->f = (10 * i->output->f) + low_nibble;
654 else if (low_nibble == 0xb || low_nibble == 0xd)
655 i->output->f = -i->output->f;
657 return NULL;
660 /* Parses PK format. */
661 static char *
662 parse_PK (struct data_in *i)
664 i->output->f = 0.0;
665 while (!ss_is_empty (i->input))
667 int high_nibble, low_nibble;
669 get_nibbles (&i->input, &high_nibble, &low_nibble);
670 if (high_nibble > 9 || low_nibble > 9)
672 i->output->f = SYSMIS;
673 return NULL;
675 i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble;
678 return NULL;
681 /* Parses RB format. */
682 static char *
683 parse_RB (struct data_in *i)
685 enum float_format ff = settings_get_input_float_format ();
686 size_t size = float_get_size (ff);
687 if (ss_length (i->input) >= size)
688 float_convert (ff, ss_data (i->input),
689 FLOAT_NATIVE_DOUBLE, &i->output->f);
690 else
691 i->output->f = SYSMIS;
693 return NULL;
696 /* Parses A format. */
697 static char *
698 parse_A (struct data_in *i)
700 /* This is equivalent to buf_copy_rpad, except that we posibly
701 do a character set recoding in the middle. */
702 uint8_t *dst = i->output->s;
703 size_t dst_size = i->width;
704 const char *src = ss_data (i->input);
705 size_t src_size = ss_length (i->input);
707 memcpy (dst, src, MIN (src_size, dst_size));
709 if (dst_size > src_size)
710 memset (&dst[src_size], ' ', dst_size - src_size);
712 return NULL;
715 /* Parses AHEX format. */
716 static char *
717 parse_AHEX (struct data_in *i)
719 uint8_t *s = i->output->s;
720 size_t j;
722 for (j = 0; ; j++)
724 int hi = ss_get_byte (&i->input);
725 int lo = ss_get_byte (&i->input);
726 if (hi == EOF)
727 break;
728 else if (lo == EOF)
729 return xstrdup (_("Field must have even length."));
731 if (!c_isxdigit (hi) || !c_isxdigit (lo))
732 return xstrdup (_("Field must contain only hex digits."));
734 if (j < i->width)
735 s[j] = hexit_value (hi) * 16 + hexit_value (lo);
738 memset (&s[j], ' ', i->width - j);
740 return NULL;
743 /* Date & time format components. */
745 /* Sign of a time value. */
746 enum time_sign
748 SIGN_NO_TIME, /* No time yet encountered. */
749 SIGN_POSITIVE, /* Positive time. */
750 SIGN_NEGATIVE /* Negative time. */
753 /* Parses a signed decimal integer from at most the first
754 MAX_DIGITS characters in I, storing the result into *RESULT.
755 Returns true if successful, false if no integer was
756 present. */
757 static char * WARN_UNUSED_RESULT
758 parse_int (struct data_in *i, long *result, size_t max_digits)
760 struct substring head = ss_head (i->input, max_digits);
761 size_t n = ss_get_long (&head, result);
762 if (n)
764 ss_advance (&i->input, n);
765 return NULL;
767 else
768 return xstrdup (_("Syntax error in date field."));
771 /* Parses a date integer between 1 and 31 from I, storing it into
772 *DAY.
773 Returns true if successful, false if no date was present. */
774 static char *
775 parse_day (struct data_in *i, long *day)
777 char *error = parse_int (i, day, SIZE_MAX);
778 if (error != NULL)
779 return error;
780 if (*day >= 1 && *day <= 31)
781 return NULL;
783 return xasprintf (_("Day (%ld) must be between 1 and 31."), *day);
786 /* If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the
787 time and sets *TIME_SIGN. Otherwise, does not allow a sign. */
788 static void
789 parse_time_sign (struct data_in *i, enum time_sign *time_sign)
791 if (*time_sign == SIGN_NO_TIME)
793 if (ss_match_byte (&i->input, '-'))
794 *time_sign = SIGN_NEGATIVE;
795 else
797 ss_match_byte (&i->input, '+');
798 *time_sign = SIGN_POSITIVE;
803 /* Parses an integer from the beginning of I.
804 Adds SECONDS_PER_UNIT times the absolute value of the integer
805 to *TIME.
806 Returns true if successful, false if no integer was present. */
807 static char *
808 parse_time_units (struct data_in *i, double seconds_per_unit, double *time)
811 char *error;
812 long units;
814 error = parse_int (i, &units, SIZE_MAX);
815 if (error != NULL)
816 return error;
817 if (units < 0)
818 return xstrdup (_("Syntax error in date field."));
819 *time += units * seconds_per_unit;
820 return NULL;
823 /* Parses a data delimiter from the beginning of I.
824 Returns true if successful, false if no delimiter was
825 present. */
826 static char *
827 parse_date_delimiter (struct data_in *i)
829 if (ss_ltrim (&i->input, ss_cstr ("-/.," CC_SPACES)))
830 return NULL;
832 return xstrdup (_("Delimiter expected between fields in date."));
835 /* Parses spaces at the beginning of I. */
836 static void
837 parse_spaces (struct data_in *i)
839 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
842 static struct substring
843 parse_name_token (struct data_in *i)
845 struct substring token;
846 ss_get_bytes (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token);
847 return token;
850 /* Reads a name from I and sets *OUTPUT to the value associated
851 with that name. If ALLOW_SUFFIXES is true, then names that
852 begin with one of the names are accepted; otherwise, only
853 exact matches (except for case) are allowed.
854 Returns true if successful, false otherwise. */
855 static bool
856 match_name (struct substring token, const char *const *names, long *output)
858 int i;
860 for (i = 1; *names != NULL; i++)
861 if (ss_equals_case (ss_cstr (*names++), token))
863 *output = i;
864 return true;
867 return false;
870 /* Parses a month name or number from the beginning of I,
871 storing the month (in range 1...12) into *MONTH.
872 Returns true if successful, false if no month was present. */
873 static char *
874 parse_month (struct data_in *i, long *month)
876 if (c_isdigit (ss_first (i->input)))
878 char *error = parse_int (i, month, SIZE_MAX);
879 if (error != NULL)
880 return error;
881 if (*month >= 1 && *month <= 12)
882 return NULL;
884 else
886 static const char *const english_names[] =
888 "jan", "feb", "mar", "apr", "may", "jun",
889 "jul", "aug", "sep", "oct", "nov", "dec",
890 NULL,
893 static const char *const roman_names[] =
895 "i", "ii", "iii", "iv", "v", "vi",
896 "vii", "viii", "ix", "x", "xi", "xii",
897 NULL,
900 struct substring token = parse_name_token (i);
901 if (match_name (ss_head (token, 3), english_names, month)
902 || match_name (ss_head (token, 4), roman_names, month))
903 return NULL;
906 return xstrdup (_("Unrecognized month format. Months may be specified "
907 "as Arabic or Roman numerals or as at least 3 letters "
908 "of their English names."));
911 /* Parses a year of at most MAX_DIGITS from the beginning of I,
912 storing a "4-digit" year into *YEAR. */
913 static char *
914 parse_year (struct data_in *i, long *year, size_t max_digits)
916 char *error = parse_int (i, year, max_digits);
917 if (error != NULL)
918 return error;
920 if (*year >= 0 && *year <= 99)
922 int epoch = settings_get_epoch ();
923 int epoch_century = ROUND_DOWN (epoch, 100);
924 int epoch_offset = epoch - epoch_century;
925 if (*year >= epoch_offset)
926 *year += epoch_century;
927 else
928 *year += epoch_century + 100;
930 if (*year >= 1582 && *year <= 19999)
931 return NULL;
933 return xasprintf (_("Year (%ld) must be between 1582 and 19999."), *year);
936 /* Returns true if input in I has been exhausted,
937 false otherwise. */
938 static char *
939 parse_trailer (struct data_in *i)
941 if (ss_is_empty (i->input))
942 return NULL;
944 return xasprintf (_("Trailing garbage `%.*s' following date."),
945 (int) ss_length (i->input), ss_data (i->input));
948 /* Parses a 3-digit Julian day-of-year value from I into *YDAY.
949 Returns true if successful, false on failure. */
950 static char *
951 parse_yday (struct data_in *i, long *yday)
953 struct substring num_s;
954 long num;
956 ss_get_bytes (&i->input, 3, &num_s);
957 if (ss_span (num_s, ss_cstr (CC_DIGITS)) != 3)
958 return xstrdup (_("Julian day must have exactly three digits."));
959 else if (!ss_get_long (&num_s, &num) || num < 1 || num > 366)
960 return xasprintf (_("Julian day (%ld) must be between 1 and 366."), num);
962 *yday = num;
963 return NULL;
966 /* Parses a quarter-of-year integer between 1 and 4 from I.
967 Stores the corresponding month into *MONTH.
968 Returns true if successful, false if no quarter was present. */
969 static char *
970 parse_quarter (struct data_in *i, long int *month)
972 long quarter;
973 char *error;
975 error = parse_int (i, &quarter, SIZE_MAX);
976 if (error != NULL)
977 return error;
978 if (quarter >= 1 && quarter <= 4)
980 *month = (quarter - 1) * 3 + 1;
981 return NULL;
984 return xasprintf (_("Quarter (%ld) must be between 1 and 4."), quarter);
987 /* Parses a week-of-year integer between 1 and 53 from I,
988 Stores the corresponding year-of-day into *YDAY.
989 Returns true if successful, false if no week was present. */
990 static char *
991 parse_week (struct data_in *i, long int *yday)
993 char *error;
994 long week;
996 error = parse_int (i, &week, SIZE_MAX);
997 if (error != NULL)
998 return error;
999 if (week >= 1 && week <= 53)
1001 *yday = (week - 1) * 7 + 1;
1002 return NULL;
1005 return xasprintf (_("Week (%ld) must be between 1 and 53."), week);
1008 /* Parses a time delimiter from the beginning of I.
1009 Returns true if successful, false if no delimiter was
1010 present. */
1011 static char *
1012 parse_time_delimiter (struct data_in *i)
1014 if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) > 0)
1015 return NULL;
1017 return xstrdup (_("Delimiter expected between fields in time."));
1020 /* Parses minutes and optional seconds from the beginning of I.
1021 The time is converted into seconds, which are added to
1022 *TIME.
1023 Returns true if successful, false if an error was found. */
1024 static char *
1025 parse_minute_second (struct data_in *i, double *time)
1027 long minute;
1028 char buf[64];
1029 char *error;
1030 char *cp;
1032 /* Parse minutes. */
1033 error = parse_int (i, &minute, SIZE_MAX);
1034 if (error != NULL)
1035 return error;
1036 if (i->format != FMT_MTIME && (minute < 0 || minute > 59))
1037 return xasprintf (_("Minute (%ld) must be between 0 and 59."), minute);
1038 *time += 60. * minute;
1040 /* Check for seconds. */
1041 if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) == 0
1042 || !c_isdigit (ss_first (i->input)))
1043 return NULL;
1045 /* Parse seconds. */
1046 cp = buf;
1047 while (c_isdigit (ss_first (i->input)))
1048 *cp++ = ss_get_byte (&i->input);
1049 if (ss_match_byte (&i->input, settings_get_decimal_char (FMT_F)))
1050 *cp++ = '.';
1051 while (c_isdigit (ss_first (i->input)))
1052 *cp++ = ss_get_byte (&i->input);
1053 *cp = '\0';
1055 *time += c_strtod (buf, NULL);
1057 return NULL;
1060 /* Parses a weekday name from the beginning of I,
1061 storing a value of 1=Sunday...7=Saturday into *WEEKDAY.
1062 Returns true if successful, false if an error was found. */
1063 static char *
1064 parse_weekday (struct data_in *i, long *weekday)
1066 static const char *const weekday_names[] =
1068 "su", "mo", "tu", "we", "th", "fr", "sa",
1069 NULL,
1072 struct substring token = parse_name_token (i);
1073 bool ok = match_name (ss_head (token, 2), weekday_names, weekday);
1074 if (!ok)
1075 return xstrdup (_("Unrecognized weekday name. At least the first two "
1076 "letters of an English weekday name must be "
1077 "specified."));
1078 return NULL;
1081 /* Date & time formats. */
1083 /* Parses WKDAY format. */
1084 static char *
1085 parse_WKDAY (struct data_in *i)
1087 long weekday;
1088 char *error;
1090 if (trim_spaces_and_check_missing (i))
1091 return NULL;
1093 error = parse_weekday (i, &weekday);
1094 if (error == NULL)
1095 error = parse_trailer (i);
1097 i->output->f = weekday;
1098 return error;
1101 /* Parses MONTH format. */
1102 static char *
1103 parse_MONTH (struct data_in *i)
1105 long month;
1106 char *error;
1108 if (trim_spaces_and_check_missing (i))
1109 return NULL;
1111 error = parse_month (i, &month);
1112 if (error == NULL)
1113 error = parse_trailer (i);
1115 i->output->f = month;
1116 return error;
1119 /* Parses DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, KWYR,
1120 DATETIME, YMDHMS, MTIME, TIME, and DTIME formats. */
1121 static char *
1122 parse_date (struct data_in *i)
1124 long int year = INT_MIN;
1125 long int month = 1;
1126 long int day = 1;
1127 long int yday = 1;
1128 double time = 0, date = 0;
1129 enum time_sign time_sign = SIGN_NO_TIME;
1131 const char *template = fmt_date_template (i->format, 0);
1132 size_t template_width = strlen (template);
1133 char *error;
1135 if (trim_spaces_and_check_missing (i))
1136 return NULL;
1138 while (*template != '\0')
1140 unsigned char ch = *template;
1141 int count = 1;
1143 while (template[count] == ch)
1144 count++;
1145 template += count;
1147 switch (ch)
1149 case 'd':
1150 error = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday);
1151 break;
1152 case 'm':
1153 error = parse_month (i, &month);
1154 break;
1155 case 'y':
1157 size_t max_digits;
1158 if (!c_isalpha (*template))
1159 max_digits = SIZE_MAX;
1160 else
1162 if (ss_length (i->input) >= template_width + 2)
1163 max_digits = 4;
1164 else
1165 max_digits = 2;
1167 error = parse_year (i, &year, max_digits);
1169 break;
1170 case 'q':
1171 error = parse_quarter (i, &month);
1172 break;
1173 case 'w':
1174 error = parse_week (i, &yday);
1175 break;
1176 case 'D':
1177 parse_time_sign (i, &time_sign);
1178 error = parse_time_units (i, 60. * 60. * 24., &time);
1179 break;
1180 case 'H':
1181 parse_time_sign (i, &time_sign);
1182 error = parse_time_units (i, 60. * 60., &time);
1183 break;
1184 case 'M':
1185 if (i->format == FMT_MTIME)
1186 parse_time_sign (i, &time_sign);
1187 error = parse_minute_second (i, &time);
1188 break;
1189 case '-':
1190 case '/':
1191 case '.':
1192 error = parse_date_delimiter (i);
1193 break;
1194 case ':':
1195 error = parse_time_delimiter (i);
1196 break;
1197 case ' ':
1198 if (i->format != FMT_MOYR)
1200 parse_spaces (i);
1201 error = NULL;
1203 else
1204 error = parse_date_delimiter (i);
1205 break;
1206 default:
1207 assert (count == 1);
1208 if (!ss_match_byte (&i->input, c_toupper (ch))
1209 && !ss_match_byte (&i->input, c_tolower (ch)))
1210 error = xasprintf (_("`%c' expected in date field."), ch);
1211 else
1212 error = NULL;
1213 break;
1215 if (error != NULL)
1216 return error;
1218 error = parse_trailer (i);
1219 if (error != NULL)
1220 return error;
1222 if (year != INT_MIN)
1224 char *error;
1225 double ofs;
1227 ofs = calendar_gregorian_to_offset (year, month, day, &error);
1228 if (ofs == SYSMIS)
1229 return error;
1230 date = (yday - 1 + ofs) * 60. * 60. * 24.;
1232 else
1233 date = 0.;
1234 i->output->f = date + (time_sign == SIGN_NEGATIVE ? -time : time);
1236 return NULL;
1239 /* Utility functions. */
1241 /* Sets the default result for I.
1242 For a numeric format, this is the value set on SET BLANKS
1243 (typically system-missing); for a string format, it is all
1244 spaces. */
1245 static void
1246 default_result (struct data_in *i)
1248 if (fmt_is_string (i->format))
1249 memset (i->output->s, ' ', i->width);
1250 else
1251 i->output->f = settings_get_blanks ();
1254 /* Trims leading and trailing spaces from I.
1255 If the result is empty, or a single period character, then
1256 sets the default result and returns true; otherwise, returns
1257 false. */
1258 static bool
1259 trim_spaces_and_check_missing (struct data_in *i)
1261 ss_trim (&i->input, ss_cstr (" "));
1262 if (ss_is_empty (i->input) || ss_equals (i->input, ss_cstr (".")))
1264 default_result (i);
1265 return true;
1267 return false;
1270 /* Returns the integer value of hex digit C. */
1271 static int
1272 hexit_value (int c)
1274 const char s[] = "0123456789abcdef";
1275 const char *cp = strchr (s, c_tolower ((unsigned char) c));
1277 assert (cp != NULL);
1278 return cp - s;