Fix misleadingly indented statements.
[pspp.git] / src / data / data-in.c
blobace167090c57b9f2f2fe97a4d4e96cd80405764d
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "data-in.h"
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <math.h>
25 #include <stdarg.h>
26 #include <stdbool.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
32 #include "calendar.h"
33 #include "dictionary.h"
34 #include "format.h"
35 #include "identifier.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/compiler.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/integer-format.h"
40 #include "libpspp/misc.h"
41 #include "libpspp/str.h"
42 #include "settings.h"
43 #include "value.h"
45 #include "gl/c-ctype.h"
46 #include "gl/c-strtod.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
50 #include "gettext.h"
51 #define _(msgid) gettext (msgid)
53 /* Information about parsing one data field. */
54 struct data_in
56 struct substring input; /* Source. */
57 enum fmt_type format; /* Input format. */
59 union value *output; /* Destination. */
60 int width; /* Output width. */
63 typedef char *data_in_parser_func (struct data_in *);
64 #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \
65 static data_in_parser_func parse_##METHOD;
66 #include "format.def"
68 static void default_result (struct data_in *);
69 static bool trim_spaces_and_check_missing (struct data_in *);
71 static int hexit_value (int c);
73 /* Parses the characters in INPUT, which are encoded in the given
74 INPUT_ENCODING, according to FORMAT.
76 Stores the parsed representation in OUTPUT, which the caller must have
77 initialized with the given WIDTH (0 for a numeric field, otherwise the
78 string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the
79 correct encoding for OUTPUT (normally obtained via dict_get_encoding()). */
80 char *
81 data_in (struct substring input, const char *input_encoding,
82 enum fmt_type format,
83 union value *output, int width, const char *output_encoding)
85 static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] =
87 #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) parse_##METHOD,
88 #include "format.def"
91 struct data_in i;
93 enum fmt_category cat;
94 const char *dest_encoding;
95 char *s;
96 char *error;
98 assert ((width != 0) == fmt_is_string (format));
100 i.format = format;
102 i.output = output;
103 i.width = width;
105 if (ss_is_empty (input))
107 default_result (&i);
108 return NULL;
111 cat = fmt_get_category (format);
112 if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL
113 | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))
115 /* We're going to parse these into numbers. For this purpose we want to
116 deal with them in the local "C" encoding. Any character not in that
117 encoding wouldn't be valid anyhow. */
118 dest_encoding = C_ENCODING;
120 else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY))
122 /* Don't recode these binary formats at all, since they are not text. */
123 dest_encoding = NULL;
125 else
127 assert (cat == FMT_CAT_STRING);
128 if (format == FMT_AHEX)
130 /* We want the hex digits in the local "C" encoding, even though the
131 result may not be in that encoding. */
132 dest_encoding = C_ENCODING;
134 else
136 /* Use the final output encoding. */
137 dest_encoding = output_encoding;
141 if (dest_encoding != NULL)
143 i.input = recode_substring_pool (dest_encoding, input_encoding, input,
144 NULL);
145 s = i.input.string;
147 else
149 i.input = input;
150 s = NULL;
153 error = handlers[i.format] (&i);
154 if (error != NULL)
155 default_result (&i);
157 free (s);
159 return error;
162 bool
163 data_in_msg (struct substring input, const char *input_encoding,
164 enum fmt_type format,
165 union value *output, int width, const char *output_encoding)
167 char *error = data_in (input, input_encoding, format,
168 output, width, output_encoding);
169 if (error != NULL)
171 msg (SW,_("Data is not valid as format %s: %s"),
172 fmt_name (format), error);
173 free (error);
174 return false;
176 else
177 return true;
180 static bool
181 number_has_implied_decimals (const char *s, enum fmt_type type)
183 int decimal = settings_get_style (type)->decimal;
184 bool got_digit = false;
185 for (;;)
187 switch (*s)
189 case '0': case '1': case '2': case '3': case '4':
190 case '5': case '6': case '7': case '8': case '9':
191 got_digit = true;
192 break;
194 case '+': case '-':
195 if (got_digit)
196 return false;
197 break;
199 case 'e': case 'E': case 'd': case 'D':
200 return false;
202 case '.': case ',':
203 if (*s == decimal)
204 return false;
205 break;
207 case '\0':
208 return true;
210 default:
211 break;
214 s++;
218 static bool
219 has_implied_decimals (struct substring input, const char *input_encoding,
220 enum fmt_type format)
222 bool retval;
223 char *s;
225 switch (format)
227 case FMT_F:
228 case FMT_COMMA:
229 case FMT_DOT:
230 case FMT_DOLLAR:
231 case FMT_PCT:
232 case FMT_E:
233 case FMT_Z:
234 break;
236 case FMT_N:
237 case FMT_IB:
238 case FMT_PIB:
239 case FMT_P:
240 case FMT_PK:
241 return true;
243 default:
244 return false;
247 s = recode_string (C_ENCODING, input_encoding,
248 ss_data (input), ss_length (input));
249 retval = (format == FMT_Z
250 ? strchr (s, '.') == NULL
251 : number_has_implied_decimals (s, format));
252 free (s);
254 return retval;
257 /* In some cases, when no decimal point is explicitly included in numeric
258 input, its position is implied by the number of decimal places in the input
259 format. In such a case, this function may be called just after data_in().
260 Its arguments are a subset of that function's arguments plus D, the number
261 of decimal places associated with FORMAT.
263 If it is appropriate, this function modifies the numeric value in OUTPUT. */
264 void
265 data_in_imply_decimals (struct substring input, const char *input_encoding,
266 enum fmt_type format, int d, union value *output)
268 if (d > 0 && output->f != SYSMIS
269 && has_implied_decimals (input, input_encoding, format))
270 output->f /= pow (10., d);
273 /* Format parsers. */
275 /* Parses F, COMMA, DOT, DOLLAR, PCT, and E input formats. */
276 static char *
277 parse_number (struct data_in *i)
279 const struct fmt_number_style *style =
280 settings_get_style (i->format);
282 struct string tmp;
284 int save_errno;
285 char *tail;
287 if (fmt_get_category (i->format) == FMT_CAT_CUSTOM)
289 style = settings_get_style (FMT_F);
292 /* Trim spaces and check for missing value representation. */
293 if (trim_spaces_and_check_missing (i))
294 return NULL;
296 ds_init_empty (&tmp);
297 ds_extend (&tmp, 64);
299 /* Prefix character may precede sign. */
300 if (style->prefix.s[0] != '\0')
302 ss_match_byte (&i->input, style->prefix.s[0]);
303 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
306 /* Sign. */
307 if (ss_match_byte (&i->input, '-'))
309 ds_put_byte (&tmp, '-');
310 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
312 else
314 ss_match_byte (&i->input, '+');
315 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
318 /* Prefix character may follow sign. */
319 if (style->prefix.s[0] != '\0')
321 ss_match_byte (&i->input, style->prefix.s[0]);
322 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
325 /* Digits before decimal point. */
326 while (c_isdigit (ss_first (i->input)))
328 ds_put_byte (&tmp, ss_get_byte (&i->input));
329 if (style->grouping != 0)
330 ss_match_byte (&i->input, style->grouping);
333 /* Decimal point and following digits. */
334 if (ss_match_byte (&i->input, style->decimal))
336 ds_put_byte (&tmp, '.');
337 while (c_isdigit (ss_first (i->input)))
338 ds_put_byte (&tmp, ss_get_byte (&i->input));
341 /* Exponent. */
342 if (!ds_is_empty (&tmp)
343 && !ss_is_empty (i->input)
344 && strchr ("eEdD-+", ss_first (i->input)))
346 ds_put_byte (&tmp, 'e');
348 if (strchr ("eEdD", ss_first (i->input)))
350 ss_advance (&i->input, 1);
351 ss_match_byte (&i->input, ' ');
354 if (ss_first (i->input) == '-' || ss_first (i->input) == '+')
356 if (ss_get_byte (&i->input) == '-')
357 ds_put_byte (&tmp, '-');
358 ss_match_byte (&i->input, ' ');
361 while (c_isdigit (ss_first (i->input)))
362 ds_put_byte (&tmp, ss_get_byte (&i->input));
365 /* Suffix character. */
366 if (style->suffix.s[0] != '\0')
367 ss_match_byte (&i->input, style->suffix.s[0]);
369 if (!ss_is_empty (i->input))
371 char *error;
372 if (ds_is_empty (&tmp))
373 error = xstrdup (_("Field contents are not numeric."));
374 else
375 error = xstrdup (_("Number followed by garbage."));
376 ds_destroy (&tmp);
377 return error;
380 /* Let c_strtod() do the conversion. */
381 save_errno = errno;
382 errno = 0;
383 i->output->f = c_strtod (ds_cstr (&tmp), &tail);
384 if (*tail != '\0')
386 errno = save_errno;
387 ds_destroy (&tmp);
388 return xstrdup (_("Invalid numeric syntax."));
390 else if (errno == ERANGE)
392 if (fabs (i->output->f) > 1)
394 i->output->f = SYSMIS;
395 ds_destroy (&tmp);
396 return xstrdup (_("Too-large number set to system-missing."));
398 else
400 i->output->f = 0.0;
401 ds_destroy (&tmp);
402 return xstrdup (_("Too-small number set to zero."));
405 else
406 errno = save_errno;
408 ds_destroy (&tmp);
409 return NULL;
412 /* Parses N format. */
413 static char *
414 parse_N (struct data_in *i)
416 int c;
418 i->output->f = 0;
419 while ((c = ss_get_byte (&i->input)) != EOF)
421 if (!c_isdigit (c))
422 return xstrdup (_("All characters in field must be digits."));
423 i->output->f = i->output->f * 10.0 + (c - '0');
426 return NULL;
429 /* Parses PIBHEX format. */
430 static char *
431 parse_PIBHEX (struct data_in *i)
433 double n;
434 int c;
436 n = 0.0;
438 while ((c = ss_get_byte (&i->input)) != EOF)
440 if (!c_isxdigit (c))
441 return xstrdup (_("Unrecognized character in field."));
442 n = n * 16.0 + hexit_value (c);
445 i->output->f = n;
446 return NULL;
449 /* Parses RBHEX format. */
450 static char *
451 parse_RBHEX (struct data_in *i)
453 double d;
454 size_t j;
456 memset (&d, 0, sizeof d);
457 for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++)
459 int hi = ss_get_byte (&i->input);
460 int lo = ss_get_byte (&i->input);
461 if (lo == EOF)
462 return xstrdup (_("Field must have even length."));
463 else if (!c_isxdigit (hi) || !c_isxdigit (lo))
464 return xstrdup (_("Field must contain only hex digits."));
465 ((unsigned char *) &d)[j] = 16 * hexit_value (hi) + hexit_value (lo);
468 i->output->f = d;
470 return NULL;
473 /* Digits for Z format. */
474 static const char z_digits[] = "0123456789{ABCDEFGHI}JKLMNOPQR";
476 /* Returns true if C is a Z format digit, false otherwise. */
477 static bool
478 is_z_digit (int c)
480 return c > 0 && strchr (z_digits, c) != NULL;
483 /* Returns the (absolute value of the) value of C as a Z format
484 digit. */
485 static int
486 z_digit_value (int c)
488 assert (is_z_digit (c));
489 return (strchr (z_digits, c) - z_digits) % 10;
492 /* Returns true if Z format digit C represents a negative value,
493 false otherwise. */
494 static bool
495 is_negative_z_digit (int c)
497 assert (is_z_digit (c));
498 return (strchr (z_digits, c) - z_digits) >= 20;
501 /* Parses Z format. */
502 static char *
503 parse_Z (struct data_in *i)
505 struct string tmp;
507 int save_errno;
509 bool got_dot = false;
510 bool got_final_digit = false;
512 /* Trim spaces and check for missing value representation. */
513 if (trim_spaces_and_check_missing (i))
514 return NULL;
516 ds_init_empty (&tmp);
517 ds_extend (&tmp, 64);
519 ds_put_byte (&tmp, '+');
520 while (!ss_is_empty (i->input))
522 int c = ss_get_byte (&i->input);
523 if (c_isdigit (c) && !got_final_digit)
524 ds_put_byte (&tmp, c);
525 else if (is_z_digit (c) && !got_final_digit)
527 ds_put_byte (&tmp, z_digit_value (c) + '0');
528 if (is_negative_z_digit (c))
529 ds_data (&tmp)[0] = '-';
530 got_final_digit = true;
532 else if (c == '.' && !got_dot)
534 ds_put_byte (&tmp, '.');
535 got_dot = true;
537 else
539 ds_destroy (&tmp);
540 return xstrdup (_("Invalid zoned decimal syntax."));
544 if (!ss_is_empty (i->input))
546 char *error;
548 if (ds_length (&tmp) == 1)
549 error = xstrdup (_("Field contents are not numeric."));
550 else
551 error = xstrdup (_("Number followed by garbage."));
553 ds_destroy (&tmp);
554 return error;
557 /* Let c_strtod() do the conversion. */
558 save_errno = errno;
559 errno = 0;
560 i->output->f = c_strtod (ds_cstr (&tmp), NULL);
561 if (errno == ERANGE)
563 if (fabs (i->output->f) > 1)
565 i->output->f = SYSMIS;
566 ds_destroy (&tmp);
567 return xstrdup (_("Too-large number set to system-missing."));
569 else
571 i->output->f = 0.0;
572 ds_destroy (&tmp);
573 return xstrdup (_("Too-small number set to zero."));
576 else
577 errno = save_errno;
579 ds_destroy (&tmp);
580 return NULL;
583 /* Parses IB format. */
584 static char *
585 parse_IB (struct data_in *i)
587 size_t bytes;
588 uint64_t value;
589 uint64_t sign_bit;
591 bytes = MIN (8, ss_length (i->input));
592 value = integer_get (settings_get_input_integer_format (), ss_data (i->input), bytes);
594 sign_bit = UINT64_C(1) << (8 * bytes - 1);
595 if (!(value & sign_bit))
596 i->output->f = value;
597 else
599 /* Sign-extend to full 64 bits. */
600 value -= sign_bit << 1;
601 i->output->f = -(double) -value;
604 return NULL;
607 /* Parses PIB format. */
608 static char *
609 parse_PIB (struct data_in *i)
611 i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input),
612 MIN (8, ss_length (i->input)));
614 return NULL;
617 /* Consumes the first character of S. Stores its high 4 bits in
618 HIGH_NIBBLE and its low 4 bits in LOW_NIBBLE. */
619 static void
620 get_nibbles (struct substring *s, int *high_nibble, int *low_nibble)
622 int c = ss_get_byte (s);
623 assert (c != EOF);
624 *high_nibble = (c >> 4) & 15;
625 *low_nibble = c & 15;
628 /* Parses P format. */
629 static char *
630 parse_P (struct data_in *i)
632 int high_nibble, low_nibble;
634 i->output->f = 0.0;
636 while (ss_length (i->input) > 1)
638 get_nibbles (&i->input, &high_nibble, &low_nibble);
639 if (high_nibble > 9 || low_nibble > 9)
640 return xstrdup (_("Invalid syntax for P field."));
641 i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble;
644 get_nibbles (&i->input, &high_nibble, &low_nibble);
645 if (high_nibble > 9)
646 return xstrdup (_("Invalid syntax for P field."));
647 i->output->f = (10 * i->output->f) + high_nibble;
648 if (low_nibble < 10)
649 i->output->f = (10 * i->output->f) + low_nibble;
650 else if (low_nibble == 0xb || low_nibble == 0xd)
651 i->output->f = -i->output->f;
653 return NULL;
656 /* Parses PK format. */
657 static char *
658 parse_PK (struct data_in *i)
660 i->output->f = 0.0;
661 while (!ss_is_empty (i->input))
663 int high_nibble, low_nibble;
665 get_nibbles (&i->input, &high_nibble, &low_nibble);
666 if (high_nibble > 9 || low_nibble > 9)
668 i->output->f = SYSMIS;
669 return NULL;
671 i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble;
674 return NULL;
677 /* Parses RB format. */
678 static char *
679 parse_RB (struct data_in *i)
681 enum float_format ff = settings_get_input_float_format ();
682 size_t size = float_get_size (ff);
683 if (ss_length (i->input) >= size)
684 float_convert (ff, ss_data (i->input),
685 FLOAT_NATIVE_DOUBLE, &i->output->f);
686 else
687 i->output->f = SYSMIS;
689 return NULL;
692 /* Parses A format. */
693 static char *
694 parse_A (struct data_in *i)
696 /* This is equivalent to buf_copy_rpad, except that we posibly
697 do a character set recoding in the middle. */
698 uint8_t *dst = value_str_rw (i->output, i->width);
699 size_t dst_size = i->width;
700 const char *src = ss_data (i->input);
701 size_t src_size = ss_length (i->input);
703 memcpy (dst, src, MIN (src_size, dst_size));
705 if (dst_size > src_size)
706 memset (&dst[src_size], ' ', dst_size - src_size);
708 return NULL;
711 /* Parses AHEX format. */
712 static char *
713 parse_AHEX (struct data_in *i)
715 uint8_t *s = value_str_rw (i->output, i->width);
716 size_t j;
718 for (j = 0; ; j++)
720 int hi = ss_get_byte (&i->input);
721 int lo = ss_get_byte (&i->input);
722 if (hi == EOF)
723 break;
724 else if (lo == EOF)
725 return xstrdup (_("Field must have even length."));
727 if (!c_isxdigit (hi) || !c_isxdigit (lo))
728 return xstrdup (_("Field must contain only hex digits."));
730 if (j < i->width)
731 s[j] = hexit_value (hi) * 16 + hexit_value (lo);
734 memset (&s[j], ' ', i->width - j);
736 return NULL;
739 /* Date & time format components. */
741 /* Sign of a time value. */
742 enum time_sign
744 SIGN_NO_TIME, /* No time yet encountered. */
745 SIGN_POSITIVE, /* Positive time. */
746 SIGN_NEGATIVE /* Negative time. */
749 /* Parses a signed decimal integer from at most the first
750 MAX_DIGITS characters in I, storing the result into *RESULT.
751 Returns true if successful, false if no integer was
752 present. */
753 static char * WARN_UNUSED_RESULT
754 parse_int (struct data_in *i, long *result, size_t max_digits)
756 struct substring head = ss_head (i->input, max_digits);
757 size_t n = ss_get_long (&head, result);
758 if (n)
760 ss_advance (&i->input, n);
761 return NULL;
763 else
764 return xstrdup (_("Syntax error in date field."));
767 /* Parses a date integer between 1 and 31 from I, storing it into
768 *DAY.
769 Returns true if successful, false if no date was present. */
770 static char *
771 parse_day (struct data_in *i, long *day)
773 char *error = parse_int (i, day, SIZE_MAX);
774 if (error != NULL)
775 return error;
776 if (*day >= 1 && *day <= 31)
777 return NULL;
779 return xasprintf (_("Day (%ld) must be between 1 and 31."), *day);
782 /* Parses an integer from the beginning of I.
783 Adds SECONDS_PER_UNIT times the absolute value of the integer
784 to *TIME.
785 If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the
786 time and sets *TIME_SIGN. Otherwise, does not allow a sign.
787 Returns true if successful, false if no integer was present. */
788 static char *
789 parse_time_units (struct data_in *i, double seconds_per_unit,
790 enum time_sign *time_sign, double *time)
793 char *error;
794 long units;
796 if (*time_sign == SIGN_NO_TIME)
798 if (ss_match_byte (&i->input, '-'))
799 *time_sign = SIGN_NEGATIVE;
800 else
802 ss_match_byte (&i->input, '+');
803 *time_sign = SIGN_POSITIVE;
806 error = parse_int (i, &units, SIZE_MAX);
807 if (error != NULL)
808 return error;
809 if (units < 0)
810 return xstrdup (_("Syntax error in date field."));
811 *time += units * seconds_per_unit;
812 return NULL;
815 /* Parses a data delimiter from the beginning of I.
816 Returns true if successful, false if no delimiter was
817 present. */
818 static char *
819 parse_date_delimiter (struct data_in *i)
821 if (ss_ltrim (&i->input, ss_cstr ("-/.," CC_SPACES)))
822 return NULL;
824 return xstrdup (_("Delimiter expected between fields in date."));
827 /* Parses spaces at the beginning of I. */
828 static void
829 parse_spaces (struct data_in *i)
831 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
834 static struct substring
835 parse_name_token (struct data_in *i)
837 struct substring token;
838 ss_get_bytes (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token);
839 return token;
842 /* Reads a name from I and sets *OUTPUT to the value associated
843 with that name. If ALLOW_SUFFIXES is true, then names that
844 begin with one of the names are accepted; otherwise, only
845 exact matches (except for case) are allowed.
846 Returns true if successful, false otherwise. */
847 static bool
848 match_name (struct substring token, const char *const *names, long *output)
850 int i;
852 for (i = 1; *names != NULL; i++)
853 if (ss_equals_case (ss_cstr (*names++), token))
855 *output = i;
856 return true;
859 return false;
862 /* Parses a month name or number from the beginning of I,
863 storing the month (in range 1...12) into *MONTH.
864 Returns true if successful, false if no month was present. */
865 static char *
866 parse_month (struct data_in *i, long *month)
868 if (c_isdigit (ss_first (i->input)))
870 char *error = parse_int (i, month, SIZE_MAX);
871 if (error != NULL)
872 return error;
873 if (*month >= 1 && *month <= 12)
874 return NULL;
876 else
878 static const char *const english_names[] =
880 "jan", "feb", "mar", "apr", "may", "jun",
881 "jul", "aug", "sep", "oct", "nov", "dec",
882 NULL,
885 static const char *const roman_names[] =
887 "i", "ii", "iii", "iv", "v", "vi",
888 "vii", "viii", "ix", "x", "xi", "xii",
889 NULL,
892 struct substring token = parse_name_token (i);
893 if (match_name (ss_head (token, 3), english_names, month)
894 || match_name (ss_head (token, 4), roman_names, month))
895 return NULL;
898 return xstrdup (_("Unrecognized month format. Months may be specified "
899 "as Arabic or Roman numerals or as at least 3 letters "
900 "of their English names."));
903 /* Parses a year of at most MAX_DIGITS from the beginning of I,
904 storing a "4-digit" year into *YEAR. */
905 static char *
906 parse_year (struct data_in *i, long *year, size_t max_digits)
908 char *error = parse_int (i, year, max_digits);
909 if (error != NULL)
910 return error;
912 if (*year >= 0 && *year <= 99)
914 int epoch = settings_get_epoch ();
915 int epoch_century = ROUND_DOWN (epoch, 100);
916 int epoch_offset = epoch - epoch_century;
917 if (*year >= epoch_offset)
918 *year += epoch_century;
919 else
920 *year += epoch_century + 100;
922 if (*year >= 1582 && *year <= 19999)
923 return NULL;
925 return xasprintf (_("Year (%ld) must be between 1582 and 19999."), *year);
928 /* Returns true if input in I has been exhausted,
929 false otherwise. */
930 static char *
931 parse_trailer (struct data_in *i)
933 if (ss_is_empty (i->input))
934 return NULL;
936 return xasprintf (_("Trailing garbage `%.*s' following date."),
937 (int) ss_length (i->input), ss_data (i->input));
940 /* Parses a 3-digit Julian day-of-year value from I into *YDAY.
941 Returns true if successful, false on failure. */
942 static char *
943 parse_yday (struct data_in *i, long *yday)
945 struct substring num_s;
946 long num;
948 ss_get_bytes (&i->input, 3, &num_s);
949 if (ss_span (num_s, ss_cstr (CC_DIGITS)) != 3)
950 return xstrdup (_("Julian day must have exactly three digits."));
951 else if (!ss_get_long (&num_s, &num) || num < 1 || num > 366)
952 return xasprintf (_("Julian day (%ld) must be between 1 and 366."), num);
954 *yday = num;
955 return NULL;
958 /* Parses a quarter-of-year integer between 1 and 4 from I.
959 Stores the corresponding month into *MONTH.
960 Returns true if successful, false if no quarter was present. */
961 static char *
962 parse_quarter (struct data_in *i, long int *month)
964 long quarter;
965 char *error;
967 error = parse_int (i, &quarter, SIZE_MAX);
968 if (error != NULL)
969 return error;
970 if (quarter >= 1 && quarter <= 4)
972 *month = (quarter - 1) * 3 + 1;
973 return NULL;
976 return xasprintf (_("Quarter (%ld) must be between 1 and 4."), quarter);
979 /* Parses a week-of-year integer between 1 and 53 from I,
980 Stores the corresponding year-of-day into *YDAY.
981 Returns true if successful, false if no week was present. */
982 static char *
983 parse_week (struct data_in *i, long int *yday)
985 char *error;
986 long week;
988 error = parse_int (i, &week, SIZE_MAX);
989 if (error != NULL)
990 return error;
991 if (week >= 1 && week <= 53)
993 *yday = (week - 1) * 7 + 1;
994 return NULL;
997 return xasprintf (_("Week (%ld) must be between 1 and 53."), week);
1000 /* Parses a time delimiter from the beginning of I.
1001 Returns true if successful, false if no delimiter was
1002 present. */
1003 static char *
1004 parse_time_delimiter (struct data_in *i)
1006 if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) > 0)
1007 return NULL;
1009 return xstrdup (_("Delimiter expected between fields in time."));
1012 /* Parses minutes and optional seconds from the beginning of I.
1013 The time is converted into seconds, which are added to
1014 *TIME.
1015 Returns true if successful, false if an error was found. */
1016 static char *
1017 parse_minute_second (struct data_in *i, double *time)
1019 long minute;
1020 char buf[64];
1021 char *error;
1022 char *cp;
1024 /* Parse minutes. */
1025 error = parse_int (i, &minute, SIZE_MAX);
1026 if (error != NULL)
1027 return error;
1028 if (minute < 0 || minute > 59)
1029 return xasprintf (_("Minute (%ld) must be between 0 and 59."), minute);
1030 *time += 60. * minute;
1032 /* Check for seconds. */
1033 if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) == 0
1034 || !c_isdigit (ss_first (i->input)))
1035 return NULL;
1037 /* Parse seconds. */
1038 cp = buf;
1039 while (c_isdigit (ss_first (i->input)))
1040 *cp++ = ss_get_byte (&i->input);
1041 if (ss_match_byte (&i->input, settings_get_decimal_char (FMT_F)))
1042 *cp++ = '.';
1043 while (c_isdigit (ss_first (i->input)))
1044 *cp++ = ss_get_byte (&i->input);
1045 *cp = '\0';
1047 *time += c_strtod (buf, NULL);
1049 return NULL;
1052 /* Parses a weekday name from the beginning of I,
1053 storing a value of 1=Sunday...7=Saturday into *WEEKDAY.
1054 Returns true if successful, false if an error was found. */
1055 static char *
1056 parse_weekday (struct data_in *i, long *weekday)
1058 static const char *const weekday_names[] =
1060 "su", "mo", "tu", "we", "th", "fr", "sa",
1061 NULL,
1064 struct substring token = parse_name_token (i);
1065 bool ok = match_name (ss_head (token, 2), weekday_names, weekday);
1066 if (!ok)
1067 return xstrdup (_("Unrecognized weekday name. At least the first two "
1068 "letters of an English weekday name must be "
1069 "specified."));
1070 return NULL;
1073 /* Date & time formats. */
1075 /* Parses WKDAY format. */
1076 static char *
1077 parse_WKDAY (struct data_in *i)
1079 long weekday;
1080 char *error;
1082 if (trim_spaces_and_check_missing (i))
1083 return NULL;
1085 error = parse_weekday (i, &weekday);
1086 if (error == NULL)
1087 error = parse_trailer (i);
1089 i->output->f = weekday;
1090 return error;
1093 /* Parses MONTH format. */
1094 static char *
1095 parse_MONTH (struct data_in *i)
1097 long month;
1098 char *error;
1100 if (trim_spaces_and_check_missing (i))
1101 return NULL;
1103 error = parse_month (i, &month);
1104 if (error == NULL)
1105 error = parse_trailer (i);
1107 i->output->f = month;
1108 return error;
1111 /* Parses DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, KWYR,
1112 DATETIME, TIME and DTIME formats. */
1113 static char *
1114 parse_date (struct data_in *i)
1116 long int year = INT_MIN;
1117 long int month = 1;
1118 long int day = 1;
1119 long int yday = 1;
1120 double time = 0, date = 0;
1121 enum time_sign time_sign = SIGN_NO_TIME;
1123 const char *template = fmt_date_template (i->format, 0);
1124 size_t template_width = strlen (template);
1125 char *error;
1127 if (trim_spaces_and_check_missing (i))
1128 return NULL;
1130 while (*template != '\0')
1132 unsigned char ch = *template;
1133 int count = 1;
1135 while (template[count] == ch)
1136 count++;
1137 template += count;
1139 switch (ch)
1141 case 'd':
1142 error = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday);
1143 break;
1144 case 'm':
1145 error = parse_month (i, &month);
1146 break;
1147 case 'y':
1149 size_t max_digits;
1150 if (!c_isalpha (*template))
1151 max_digits = SIZE_MAX;
1152 else
1154 if (ss_length (i->input) >= template_width + 2)
1155 max_digits = 4;
1156 else
1157 max_digits = 2;
1159 error = parse_year (i, &year, max_digits);
1161 break;
1162 case 'q':
1163 error = parse_quarter (i, &month);
1164 break;
1165 case 'w':
1166 error = parse_week (i, &yday);
1167 break;
1168 case 'D':
1169 error = parse_time_units (i, 60. * 60. * 24., &time_sign, &time);
1170 break;
1171 case 'H':
1172 error = parse_time_units (i, 60. * 60., &time_sign, &time);
1173 break;
1174 case 'M':
1175 error = parse_minute_second (i, &time);
1176 break;
1177 case '-':
1178 case '/':
1179 case '.':
1180 error = parse_date_delimiter (i);
1181 break;
1182 case ':':
1183 error = parse_time_delimiter (i);
1184 case ' ':
1185 if (i->format != FMT_MOYR)
1187 parse_spaces (i);
1188 error = NULL;
1190 else
1191 error = parse_date_delimiter (i);
1192 break;
1193 default:
1194 assert (count == 1);
1195 if (!ss_match_byte (&i->input, c_toupper (ch))
1196 && !ss_match_byte (&i->input, c_tolower (ch)))
1197 error = xasprintf (_("`%c' expected in date field."), ch);
1198 else
1199 error = NULL;
1200 break;
1202 if (error != NULL)
1203 return error;
1205 error = parse_trailer (i);
1206 if (error != NULL)
1207 return error;
1209 if (year != INT_MIN)
1211 char *error;
1212 double ofs;
1214 ofs = calendar_gregorian_to_offset (year, month, day, &error);
1215 if (ofs == SYSMIS)
1216 return error;
1217 date = (yday - 1 + ofs) * 60. * 60. * 24.;
1219 else
1220 date = 0.;
1221 i->output->f = date + (time_sign == SIGN_NEGATIVE ? -time : time);
1223 return NULL;
1226 /* Utility functions. */
1228 /* Sets the default result for I.
1229 For a numeric format, this is the value set on SET BLANKS
1230 (typically system-missing); for a string format, it is all
1231 spaces. */
1232 static void
1233 default_result (struct data_in *i)
1235 if (fmt_is_string (i->format))
1236 memset (value_str_rw (i->output, i->width), ' ', i->width);
1237 else
1238 i->output->f = settings_get_blanks ();
1241 /* Trims leading and trailing spaces from I.
1242 If the result is empty, or a single period character, then
1243 sets the default result and returns true; otherwise, returns
1244 false. */
1245 static bool
1246 trim_spaces_and_check_missing (struct data_in *i)
1248 ss_trim (&i->input, ss_cstr (" "));
1249 if (ss_is_empty (i->input) || ss_equals (i->input, ss_cstr (".")))
1251 default_result (i);
1252 return true;
1254 return false;
1257 /* Returns the integer value of hex digit C. */
1258 static int
1259 hexit_value (int c)
1261 const char s[] = "0123456789abcdef";
1262 const char *cp = strchr (s, c_tolower ((unsigned char) c));
1264 assert (cp != NULL);
1265 return cp - s;