Independent Samples T-Test Dialog: Fix Crash
[pspp.git] / src / data / data-in.c
blob7d18ef34ddd0e9e68575ba95cd1ebb6d95fb1bd5
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "data-in.h"
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <math.h>
25 #include <stdarg.h>
26 #include <stdbool.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
32 #include "calendar.h"
33 #include "dictionary.h"
34 #include "format.h"
35 #include "identifier.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/compiler.h"
38 #include "libpspp/i18n.h"
39 #include "libpspp/integer-format.h"
40 #include "libpspp/misc.h"
41 #include "libpspp/str.h"
42 #include "settings.h"
43 #include "value.h"
45 #include "gl/c-ctype.h"
46 #include "gl/c-strtod.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
50 #include "gettext.h"
51 #define _(msgid) gettext (msgid)
53 /* Information about parsing one data field. */
54 struct data_in
56 const struct fmt_settings *settings;
58 struct substring input; /* Source. */
59 enum fmt_type format; /* Input format. */
61 union value *output; /* Destination. */
62 int width; /* Output width. */
65 typedef char *data_in_parser_func (struct data_in *);
66 #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \
67 static data_in_parser_func parse_##METHOD;
68 #include "format.def"
70 static void default_result (struct data_in *);
71 static bool trim_spaces_and_check_missing (struct data_in *);
73 static int hexit_value (int c);
75 /* Parses the characters in INPUT, which are encoded in the given
76 INPUT_ENCODING, according to FORMAT.
78 Stores the parsed representation in OUTPUT, which the caller must have
79 initialized with the given WIDTH (0 for a numeric field, otherwise the
80 string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the
81 correct encoding for OUTPUT (normally obtained via dict_get_encoding()).
83 If successful NULL is the return value. Otherwise a string describing
84 the problem is returned. The caller must free this string.
86 char *
87 data_in (struct substring input, const char *input_encoding,
88 enum fmt_type format, const struct fmt_settings *settings,
89 union value *output, int width, const char *output_encoding)
91 static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] =
93 #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) parse_##METHOD,
94 #include "format.def"
97 struct data_in i;
99 enum fmt_category cat;
100 const char *dest_encoding;
101 char *s;
102 char *error;
104 assert ((width != 0) == fmt_is_string (format));
106 i.settings = settings;
108 i.format = format;
110 i.output = output;
111 i.width = width;
113 if (ss_is_empty (input))
115 default_result (&i);
116 return NULL;
119 cat = fmt_get_category (format);
120 if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL | FMT_CAT_CUSTOM
121 | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))
123 /* We're going to parse these into numbers. For this purpose we want to
124 deal with them in the local "C" encoding. Any character not in that
125 encoding wouldn't be valid anyhow. */
126 dest_encoding = C_ENCODING;
128 else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY))
130 /* Don't recode these binary formats at all, since they are not text. */
131 dest_encoding = NULL;
133 else
135 assert (cat == FMT_CAT_STRING);
136 if (format == FMT_AHEX)
138 /* We want the hex digits in the local "C" encoding, even though the
139 result may not be in that encoding. */
140 dest_encoding = C_ENCODING;
142 else
144 /* Use the final output encoding. */
145 dest_encoding = output_encoding;
149 if (dest_encoding != NULL)
151 i.input = recode_substring_pool (dest_encoding, input_encoding, input,
152 NULL);
153 s = i.input.string;
155 else
157 i.input = input;
158 s = NULL;
161 error = handlers[i.format] (&i);
162 if (error != NULL)
163 default_result (&i);
165 free (s);
167 return error;
170 bool
171 data_in_msg (struct substring input, const char *input_encoding,
172 enum fmt_type format, const struct fmt_settings *settings,
173 union value *output, int width, const char *output_encoding)
175 char *error = data_in (input, input_encoding, format, settings,
176 output, width, output_encoding);
177 if (error != NULL)
179 msg (SW, _("Data is not valid as format %s: %s"),
180 fmt_name (format), error);
181 free (error);
182 return false;
184 else
185 return true;
188 static bool
189 number_has_implied_decimals (const struct fmt_settings *settings,
190 const char *s, enum fmt_type type)
192 int decimal = fmt_settings_get_style (settings, type)->decimal;
193 bool got_digit = false;
194 for (;;)
196 switch (*s)
198 case '0': case '1': case '2': case '3': case '4':
199 case '5': case '6': case '7': case '8': case '9':
200 got_digit = true;
201 break;
203 case '+': case '-':
204 if (got_digit)
205 return false;
206 break;
208 case 'e': case 'E': case 'd': case 'D':
209 return false;
211 case '.': case ',':
212 if (*s == decimal)
213 return false;
214 break;
216 case '\0':
217 return true;
219 default:
220 break;
223 s++;
227 static bool
228 has_implied_decimals (struct substring input, const char *input_encoding,
229 enum fmt_type format,
230 const struct fmt_settings *settings)
232 bool retval;
233 char *s;
235 switch (format)
237 case FMT_F:
238 case FMT_COMMA:
239 case FMT_DOT:
240 case FMT_DOLLAR:
241 case FMT_PCT:
242 case FMT_E:
243 case FMT_Z:
244 break;
246 case FMT_N:
247 case FMT_IB:
248 case FMT_PIB:
249 case FMT_P:
250 case FMT_PK:
251 return true;
253 default:
254 return false;
257 s = recode_string (C_ENCODING, input_encoding,
258 ss_data (input), ss_length (input));
259 retval = (format == FMT_Z
260 ? strchr (s, '.') == NULL
261 : number_has_implied_decimals (settings, s, format));
262 free (s);
264 return retval;
267 /* In some cases, when no decimal point is explicitly included in numeric
268 input, its position is implied by the number of decimal places in the input
269 format. In such a case, this function may be called just after data_in().
270 Its arguments are a subset of that function's arguments plus D, the number
271 of decimal places associated with FORMAT.
273 If it is appropriate, this function modifies the numeric value in OUTPUT. */
274 void
275 data_in_imply_decimals (struct substring input, const char *input_encoding,
276 enum fmt_type format, int d,
277 const struct fmt_settings *settings,
278 union value *output)
280 if (d > 0 && output->f != SYSMIS
281 && has_implied_decimals (input, input_encoding, format, settings))
282 output->f /= pow (10., d);
285 /* Format parsers. */
287 /* Parses F, COMMA, DOT, DOLLAR, PCT, and E input formats. */
288 static char *
289 parse_number (struct data_in *i)
291 const struct fmt_number_style *style = fmt_settings_get_style (
292 i->settings,
293 fmt_get_category (i->format) == FMT_CAT_CUSTOM ? FMT_F : i->format);
295 struct string tmp;
297 int save_errno;
298 char *tail;
300 /* Trim spaces and check for missing value representation. */
301 if (trim_spaces_and_check_missing (i))
302 return NULL;
304 ds_init_empty (&tmp);
305 ds_extend (&tmp, 64);
307 /* Prefix character may precede sign. */
308 if (style->prefix.s[0] != '\0')
310 ss_match_byte (&i->input, style->prefix.s[0]);
311 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
314 /* Sign. */
315 if (ss_match_byte (&i->input, '-'))
317 ds_put_byte (&tmp, '-');
318 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
320 else
322 ss_match_byte (&i->input, '+');
323 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
326 /* Prefix character may follow sign. */
327 if (style->prefix.s[0] != '\0')
329 ss_match_byte (&i->input, style->prefix.s[0]);
330 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
333 /* Digits before decimal point. */
334 while (c_isdigit (ss_first (i->input)))
336 ds_put_byte (&tmp, ss_get_byte (&i->input));
337 if (style->grouping != 0)
338 ss_match_byte (&i->input, style->grouping);
341 /* Decimal point and following digits. */
342 if (ss_match_byte (&i->input, style->decimal))
344 ds_put_byte (&tmp, '.');
345 while (c_isdigit (ss_first (i->input)))
346 ds_put_byte (&tmp, ss_get_byte (&i->input));
349 /* Exponent. */
350 if (!ds_is_empty (&tmp)
351 && !ss_is_empty (i->input)
352 && strchr ("eEdD-+", ss_first (i->input)))
354 ds_put_byte (&tmp, 'e');
356 if (strchr ("eEdD", ss_first (i->input)))
358 ss_advance (&i->input, 1);
359 ss_match_byte (&i->input, ' ');
362 if (ss_first (i->input) == '-' || ss_first (i->input) == '+')
364 if (ss_get_byte (&i->input) == '-')
365 ds_put_byte (&tmp, '-');
366 ss_match_byte (&i->input, ' ');
369 while (c_isdigit (ss_first (i->input)))
370 ds_put_byte (&tmp, ss_get_byte (&i->input));
373 /* Suffix character. */
374 if (style->suffix.s[0] != '\0')
375 ss_match_byte (&i->input, style->suffix.s[0]);
377 if (!ss_is_empty (i->input))
379 char *error;
380 if (ds_is_empty (&tmp))
381 error = xstrdup (_("Field contents are not numeric."));
382 else
383 error = xstrdup (_("Number followed by garbage."));
384 ds_destroy (&tmp);
385 return error;
388 /* Let c_strtod() do the conversion. */
389 save_errno = errno;
390 errno = 0;
391 i->output->f = c_strtod (ds_cstr (&tmp), &tail);
392 if (*tail != '\0')
394 errno = save_errno;
395 ds_destroy (&tmp);
396 return xstrdup (_("Invalid numeric syntax."));
398 else if (errno == ERANGE)
400 if (fabs (i->output->f) > 1)
402 i->output->f = SYSMIS;
403 ds_destroy (&tmp);
404 return xstrdup (_("Too-large number set to system-missing."));
406 else
408 i->output->f = 0.0;
409 ds_destroy (&tmp);
410 return xstrdup (_("Too-small number set to zero."));
413 else
414 errno = save_errno;
416 ds_destroy (&tmp);
417 return NULL;
420 /* Parses N format. */
421 static char *
422 parse_N (struct data_in *i)
424 int c;
426 i->output->f = 0;
427 while ((c = ss_get_byte (&i->input)) != EOF)
429 if (!c_isdigit (c))
430 return xstrdup (_("All characters in field must be digits."));
431 i->output->f = i->output->f * 10.0 + (c - '0');
434 return NULL;
437 /* Parses PIBHEX format. */
438 static char *
439 parse_PIBHEX (struct data_in *i)
441 double n;
442 int c;
444 n = 0.0;
446 while ((c = ss_get_byte (&i->input)) != EOF)
448 if (!c_isxdigit (c))
449 return xstrdup (_("Unrecognized character in field."));
450 n = n * 16.0 + hexit_value (c);
453 i->output->f = n;
454 return NULL;
457 /* Parses RBHEX format. */
458 static char *
459 parse_RBHEX (struct data_in *i)
461 double d;
462 size_t j;
464 memset (&d, 0, sizeof d);
465 for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++)
467 int hi = ss_get_byte (&i->input);
468 int lo = ss_get_byte (&i->input);
469 if (lo == EOF)
470 return xstrdup (_("Field must have even length."));
471 else if (!c_isxdigit (hi) || !c_isxdigit (lo))
472 return xstrdup (_("Field must contain only hex digits."));
473 ((unsigned char *) &d)[j] = 16 * hexit_value (hi) + hexit_value (lo);
476 i->output->f = d;
478 return NULL;
481 /* Digits for Z format. */
482 static const char z_digits[] = "0123456789{ABCDEFGHI}JKLMNOPQR";
484 /* Returns true if C is a Z format digit, false otherwise. */
485 static bool
486 is_z_digit (int c)
488 return c > 0 && strchr (z_digits, c) != NULL;
491 /* Returns the (absolute value of the) value of C as a Z format
492 digit. */
493 static int
494 z_digit_value (int c)
496 assert (is_z_digit (c));
497 return (strchr (z_digits, c) - z_digits) % 10;
500 /* Returns true if Z format digit C represents a negative value,
501 false otherwise. */
502 static bool
503 is_negative_z_digit (int c)
505 assert (is_z_digit (c));
506 return (strchr (z_digits, c) - z_digits) >= 20;
509 /* Parses Z format. */
510 static char *
511 parse_Z (struct data_in *i)
513 struct string tmp;
515 int save_errno;
517 bool got_dot = false;
518 bool got_final_digit = false;
520 /* Trim spaces and check for missing value representation. */
521 if (trim_spaces_and_check_missing (i))
522 return NULL;
524 ds_init_empty (&tmp);
525 ds_extend (&tmp, 64);
527 ds_put_byte (&tmp, '+');
528 while (!ss_is_empty (i->input))
530 int c = ss_get_byte (&i->input);
531 if (c_isdigit (c) && !got_final_digit)
532 ds_put_byte (&tmp, c);
533 else if (is_z_digit (c) && !got_final_digit)
535 ds_put_byte (&tmp, z_digit_value (c) + '0');
536 if (is_negative_z_digit (c))
537 ds_data (&tmp)[0] = '-';
538 got_final_digit = true;
540 else if (c == '.' && !got_dot)
542 ds_put_byte (&tmp, '.');
543 got_dot = true;
545 else
547 ds_destroy (&tmp);
548 return xstrdup (_("Invalid zoned decimal syntax."));
552 if (!ss_is_empty (i->input))
554 char *error;
556 if (ds_length (&tmp) == 1)
557 error = xstrdup (_("Field contents are not numeric."));
558 else
559 error = xstrdup (_("Number followed by garbage."));
561 ds_destroy (&tmp);
562 return error;
565 /* Let c_strtod() do the conversion. */
566 save_errno = errno;
567 errno = 0;
568 i->output->f = c_strtod (ds_cstr (&tmp), NULL);
569 if (errno == ERANGE)
571 if (fabs (i->output->f) > 1)
573 i->output->f = SYSMIS;
574 ds_destroy (&tmp);
575 return xstrdup (_("Too-large number set to system-missing."));
577 else
579 i->output->f = 0.0;
580 ds_destroy (&tmp);
581 return xstrdup (_("Too-small number set to zero."));
584 else
585 errno = save_errno;
587 ds_destroy (&tmp);
588 return NULL;
591 /* Parses IB format. */
592 static char *
593 parse_IB (struct data_in *i)
595 size_t bytes;
596 uint64_t value;
597 uint64_t sign_bit;
599 bytes = MIN (8, ss_length (i->input));
600 value = integer_get (settings_get_input_integer_format (), ss_data (i->input), bytes);
602 sign_bit = UINT64_C(1) << (8 * bytes - 1);
603 if (!(value & sign_bit))
604 i->output->f = value;
605 else
607 /* Sign-extend to full 64 bits. */
608 value -= sign_bit << 1;
609 i->output->f = -(double) -value;
612 return NULL;
615 /* Parses PIB format. */
616 static char *
617 parse_PIB (struct data_in *i)
619 i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input),
620 MIN (8, ss_length (i->input)));
622 return NULL;
625 /* Consumes the first character of S. Stores its high 4 bits in
626 HIGH_NIBBLE and its low 4 bits in LOW_NIBBLE. */
627 static void
628 get_nibbles (struct substring *s, int *high_nibble, int *low_nibble)
630 int c = ss_get_byte (s);
631 assert (c != EOF);
632 *high_nibble = (c >> 4) & 15;
633 *low_nibble = c & 15;
636 /* Parses P format. */
637 static char *
638 parse_P (struct data_in *i)
640 int high_nibble, low_nibble;
642 i->output->f = 0.0;
644 while (ss_length (i->input) > 1)
646 get_nibbles (&i->input, &high_nibble, &low_nibble);
647 if (high_nibble > 9 || low_nibble > 9)
648 return xstrdup (_("Invalid syntax for P field."));
649 i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble;
652 get_nibbles (&i->input, &high_nibble, &low_nibble);
653 if (high_nibble > 9)
654 return xstrdup (_("Invalid syntax for P field."));
655 i->output->f = (10 * i->output->f) + high_nibble;
656 if (low_nibble < 10)
657 i->output->f = (10 * i->output->f) + low_nibble;
658 else if (low_nibble == 0xb || low_nibble == 0xd)
659 i->output->f = -i->output->f;
661 return NULL;
664 /* Parses PK format. */
665 static char *
666 parse_PK (struct data_in *i)
668 i->output->f = 0.0;
669 while (!ss_is_empty (i->input))
671 int high_nibble, low_nibble;
673 get_nibbles (&i->input, &high_nibble, &low_nibble);
674 if (high_nibble > 9 || low_nibble > 9)
676 i->output->f = SYSMIS;
677 return NULL;
679 i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble;
682 return NULL;
685 /* Parses RB format. */
686 static char *
687 parse_RB (struct data_in *i)
689 enum float_format ff = settings_get_input_float_format ();
690 size_t size = float_get_size (ff);
691 if (ss_length (i->input) >= size)
692 float_convert (ff, ss_data (i->input),
693 FLOAT_NATIVE_DOUBLE, &i->output->f);
694 else
695 i->output->f = SYSMIS;
697 return NULL;
700 /* Parses A format. */
701 static char *
702 parse_A (struct data_in *i)
704 /* This is equivalent to buf_copy_rpad, except that we posibly
705 do a character set recoding in the middle. */
706 uint8_t *dst = i->output->s;
707 size_t dst_size = i->width;
708 const char *src = ss_data (i->input);
709 size_t src_size = ss_length (i->input);
711 memcpy (dst, src, MIN (src_size, dst_size));
713 if (dst_size > src_size)
714 memset (&dst[src_size], ' ', dst_size - src_size);
716 return NULL;
719 /* Parses AHEX format. */
720 static char *
721 parse_AHEX (struct data_in *i)
723 uint8_t *s = i->output->s;
724 size_t j;
726 for (j = 0; ; j++)
728 int hi = ss_get_byte (&i->input);
729 int lo = ss_get_byte (&i->input);
730 if (hi == EOF)
731 break;
732 else if (lo == EOF)
733 return xstrdup (_("Field must have even length."));
735 if (!c_isxdigit (hi) || !c_isxdigit (lo))
736 return xstrdup (_("Field must contain only hex digits."));
738 if (j < i->width)
739 s[j] = hexit_value (hi) * 16 + hexit_value (lo);
742 memset (&s[j], ' ', i->width - j);
744 return NULL;
747 /* Date & time format components. */
749 /* Sign of a time value. */
750 enum time_sign
752 SIGN_NO_TIME, /* No time yet encountered. */
753 SIGN_POSITIVE, /* Positive time. */
754 SIGN_NEGATIVE /* Negative time. */
757 /* Parses a signed decimal integer from at most the first
758 MAX_DIGITS characters in I, storing the result into *RESULT.
759 Returns true if successful, false if no integer was
760 present. */
761 static char * WARN_UNUSED_RESULT
762 parse_int (struct data_in *i, long *result, size_t max_digits)
764 struct substring head = ss_head (i->input, max_digits);
765 size_t n = ss_get_long (&head, result);
766 if (n)
768 ss_advance (&i->input, n);
769 return NULL;
771 else
772 return xstrdup (_("Syntax error in date field."));
775 /* Parses a date integer between 1 and 31 from I, storing it into
776 *DAY.
777 Returns true if successful, false if no date was present. */
778 static char *
779 parse_day (struct data_in *i, long *day)
781 char *error = parse_int (i, day, SIZE_MAX);
782 if (error != NULL)
783 return error;
784 if (*day >= 1 && *day <= 31)
785 return NULL;
787 return xasprintf (_("Day (%ld) must be between 1 and 31."), *day);
790 /* If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the
791 time and sets *TIME_SIGN. Otherwise, does not allow a sign. */
792 static void
793 parse_time_sign (struct data_in *i, enum time_sign *time_sign)
795 if (*time_sign == SIGN_NO_TIME)
797 if (ss_match_byte (&i->input, '-'))
798 *time_sign = SIGN_NEGATIVE;
799 else
801 ss_match_byte (&i->input, '+');
802 *time_sign = SIGN_POSITIVE;
807 /* Parses an integer from the beginning of I.
808 Adds SECONDS_PER_UNIT times the absolute value of the integer
809 to *TIME.
810 Returns true if successful, false if no integer was present. */
811 static char *
812 parse_time_units (struct data_in *i, double seconds_per_unit, double *time)
815 char *error;
816 long units;
818 error = parse_int (i, &units, SIZE_MAX);
819 if (error != NULL)
820 return error;
821 if (units < 0)
822 return xstrdup (_("Syntax error in date field."));
823 *time += units * seconds_per_unit;
824 return NULL;
827 /* Parses a data delimiter from the beginning of I.
828 Returns true if successful, false if no delimiter was
829 present. */
830 static char *
831 parse_date_delimiter (struct data_in *i)
833 if (ss_ltrim (&i->input, ss_cstr ("-/.," CC_SPACES)))
834 return NULL;
836 return xstrdup (_("Delimiter expected between fields in date."));
839 /* Parses spaces at the beginning of I. */
840 static void
841 parse_spaces (struct data_in *i)
843 ss_ltrim (&i->input, ss_cstr (CC_SPACES));
846 static struct substring
847 parse_name_token (struct data_in *i)
849 struct substring token;
850 ss_get_bytes (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token);
851 return token;
854 /* Reads a name from I and sets *OUTPUT to the value associated
855 with that name. If ALLOW_SUFFIXES is true, then names that
856 begin with one of the names are accepted; otherwise, only
857 exact matches (except for case) are allowed.
858 Returns true if successful, false otherwise. */
859 static bool
860 match_name (struct substring token, const char *const *names, long *output)
862 int i;
864 for (i = 1; *names != NULL; i++)
865 if (ss_equals_case (ss_cstr (*names++), token))
867 *output = i;
868 return true;
871 return false;
874 /* Parses a month name or number from the beginning of I,
875 storing the month (in range 1...12) into *MONTH.
876 Returns true if successful, false if no month was present. */
877 static char *
878 parse_month (struct data_in *i, long *month)
880 if (c_isdigit (ss_first (i->input)))
882 char *error = parse_int (i, month, SIZE_MAX);
883 if (error != NULL)
884 return error;
885 if (*month >= 1 && *month <= 12)
886 return NULL;
888 else
890 static const char *const english_names[] =
892 "jan", "feb", "mar", "apr", "may", "jun",
893 "jul", "aug", "sep", "oct", "nov", "dec",
894 NULL,
897 static const char *const roman_names[] =
899 "i", "ii", "iii", "iv", "v", "vi",
900 "vii", "viii", "ix", "x", "xi", "xii",
901 NULL,
904 struct substring token = parse_name_token (i);
905 if (match_name (ss_head (token, 3), english_names, month)
906 || match_name (ss_head (token, 4), roman_names, month))
907 return NULL;
910 return xstrdup (_("Unrecognized month format. Months may be specified "
911 "as Arabic or Roman numerals or as at least 3 letters "
912 "of their English names."));
915 /* Parses a year of at most MAX_DIGITS from the beginning of I,
916 storing a "4-digit" year into *YEAR. */
917 static char *
918 parse_year (struct data_in *i, long *year, size_t max_digits)
920 char *error = parse_int (i, year, max_digits);
921 if (error != NULL)
922 return error;
924 if (*year >= 0 && *year <= 99)
926 int epoch = fmt_settings_get_epoch (i->settings);
927 int epoch_century = ROUND_DOWN (epoch, 100);
928 int epoch_offset = epoch - epoch_century;
929 if (*year >= epoch_offset)
930 *year += epoch_century;
931 else
932 *year += epoch_century + 100;
934 if (*year >= 1582 && *year <= 19999)
935 return NULL;
937 return xasprintf (_("Year (%ld) must be between 1582 and 19999."), *year);
940 /* Returns true if input in I has been exhausted,
941 false otherwise. */
942 static char *
943 parse_trailer (struct data_in *i)
945 if (ss_is_empty (i->input))
946 return NULL;
948 return xasprintf (_("Trailing garbage `%.*s' following date."),
949 (int) ss_length (i->input), ss_data (i->input));
952 /* Parses a 3-digit Julian day-of-year value from I into *YDAY.
953 Returns true if successful, false on failure. */
954 static char *
955 parse_yday (struct data_in *i, long *yday)
957 struct substring num_s;
958 long num;
960 ss_get_bytes (&i->input, 3, &num_s);
961 if (ss_span (num_s, ss_cstr (CC_DIGITS)) != 3)
962 return xstrdup (_("Julian day must have exactly three digits."));
963 else if (!ss_get_long (&num_s, &num) || num < 1 || num > 366)
964 return xasprintf (_("Julian day (%ld) must be between 1 and 366."), num);
966 *yday = num;
967 return NULL;
970 /* Parses a quarter-of-year integer between 1 and 4 from I.
971 Stores the corresponding month into *MONTH.
972 Returns true if successful, false if no quarter was present. */
973 static char *
974 parse_quarter (struct data_in *i, long int *month)
976 long quarter;
977 char *error;
979 error = parse_int (i, &quarter, SIZE_MAX);
980 if (error != NULL)
981 return error;
982 if (quarter >= 1 && quarter <= 4)
984 *month = (quarter - 1) * 3 + 1;
985 return NULL;
988 return xasprintf (_("Quarter (%ld) must be between 1 and 4."), quarter);
991 /* Parses a week-of-year integer between 1 and 53 from I,
992 Stores the corresponding year-of-day into *YDAY.
993 Returns true if successful, false if no week was present. */
994 static char *
995 parse_week (struct data_in *i, long int *yday)
997 char *error;
998 long week;
1000 error = parse_int (i, &week, SIZE_MAX);
1001 if (error != NULL)
1002 return error;
1003 if (week >= 1 && week <= 53)
1005 *yday = (week - 1) * 7 + 1;
1006 return NULL;
1009 return xasprintf (_("Week (%ld) must be between 1 and 53."), week);
1012 /* Parses a time delimiter from the beginning of I.
1013 Returns true if successful, false if no delimiter was
1014 present. */
1015 static char *
1016 parse_time_delimiter (struct data_in *i)
1018 if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) > 0)
1019 return NULL;
1021 return xstrdup (_("Delimiter expected between fields in time."));
1024 /* Parses minutes and optional seconds from the beginning of I.
1025 The time is converted into seconds, which are added to
1026 *TIME.
1027 Returns true if successful, false if an error was found. */
1028 static char *
1029 parse_minute_second (struct data_in *i, double *time)
1031 long minute;
1032 char buf[64];
1033 char *error;
1034 char *cp;
1036 /* Parse minutes. */
1037 error = parse_int (i, &minute, SIZE_MAX);
1038 if (error != NULL)
1039 return error;
1040 if (i->format != FMT_MTIME && (minute < 0 || minute > 59))
1041 return xasprintf (_("Minute (%ld) must be between 0 and 59."), minute);
1042 *time += 60. * minute;
1044 /* Check for seconds. */
1045 if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) == 0
1046 || !c_isdigit (ss_first (i->input)))
1047 return NULL;
1049 /* Parse seconds. */
1050 cp = buf;
1051 while (c_isdigit (ss_first (i->input)))
1052 *cp++ = ss_get_byte (&i->input);
1053 if (ss_match_byte (&i->input, i->settings->decimal))
1054 *cp++ = '.';
1055 while (c_isdigit (ss_first (i->input)))
1056 *cp++ = ss_get_byte (&i->input);
1057 *cp = '\0';
1059 *time += c_strtod (buf, NULL);
1061 return NULL;
1064 /* Parses a weekday name from the beginning of I,
1065 storing a value of 1=Sunday...7=Saturday into *WEEKDAY.
1066 Returns true if successful, false if an error was found. */
1067 static char *
1068 parse_weekday (struct data_in *i, long *weekday)
1070 static const char *const weekday_names[] =
1072 "su", "mo", "tu", "we", "th", "fr", "sa",
1073 NULL,
1076 struct substring token = parse_name_token (i);
1077 bool ok = match_name (ss_head (token, 2), weekday_names, weekday);
1078 if (!ok)
1079 return xstrdup (_("Unrecognized weekday name. At least the first two "
1080 "letters of an English weekday name must be "
1081 "specified."));
1082 return NULL;
1085 /* Date & time formats. */
1087 /* Parses WKDAY format. */
1088 static char *
1089 parse_WKDAY (struct data_in *i)
1091 long weekday = 0;
1092 char *error;
1094 if (trim_spaces_and_check_missing (i))
1095 return NULL;
1097 error = parse_weekday (i, &weekday);
1098 if (error == NULL)
1099 error = parse_trailer (i);
1101 i->output->f = weekday;
1102 return error;
1105 /* Parses MONTH format. */
1106 static char *
1107 parse_MONTH (struct data_in *i)
1109 long month;
1110 char *error;
1112 if (trim_spaces_and_check_missing (i))
1113 return NULL;
1115 error = parse_month (i, &month);
1116 if (error == NULL)
1117 error = parse_trailer (i);
1119 i->output->f = month;
1120 return error;
1123 /* Parses DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, KWYR,
1124 DATETIME, YMDHMS, MTIME, TIME, and DTIME formats. */
1125 static char *
1126 parse_date (struct data_in *i)
1128 long int year = INT_MIN;
1129 long int month = 1;
1130 long int day = 1;
1131 long int yday = 1;
1132 double time = 0, date = 0;
1133 enum time_sign time_sign = SIGN_NO_TIME;
1135 const char *template = fmt_date_template (i->format, 0);
1136 size_t template_width = strlen (template);
1137 char *error;
1139 if (trim_spaces_and_check_missing (i))
1140 return NULL;
1142 while (*template != '\0')
1144 unsigned char ch = *template;
1145 int count = 1;
1147 while (template[count] == ch)
1148 count++;
1149 template += count;
1151 switch (ch)
1153 case 'd':
1154 error = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday);
1155 break;
1156 case 'm':
1157 error = parse_month (i, &month);
1158 break;
1159 case 'y':
1161 size_t max_digits;
1162 if (!c_isalpha (*template))
1163 max_digits = SIZE_MAX;
1164 else
1166 if (ss_length (i->input) >= template_width + 2)
1167 max_digits = 4;
1168 else
1169 max_digits = 2;
1171 error = parse_year (i, &year, max_digits);
1173 break;
1174 case 'q':
1175 error = parse_quarter (i, &month);
1176 break;
1177 case 'w':
1178 error = parse_week (i, &yday);
1179 break;
1180 case 'D':
1181 parse_time_sign (i, &time_sign);
1182 error = parse_time_units (i, 60. * 60. * 24., &time);
1183 break;
1184 case 'H':
1185 parse_time_sign (i, &time_sign);
1186 error = parse_time_units (i, 60. * 60., &time);
1187 break;
1188 case 'M':
1189 if (i->format == FMT_MTIME)
1190 parse_time_sign (i, &time_sign);
1191 error = parse_minute_second (i, &time);
1192 break;
1193 case '-':
1194 case '/':
1195 case '.':
1196 error = parse_date_delimiter (i);
1197 break;
1198 case ':':
1199 error = parse_time_delimiter (i);
1200 break;
1201 case ' ':
1202 if (i->format != FMT_MOYR)
1204 parse_spaces (i);
1205 error = NULL;
1207 else
1208 error = parse_date_delimiter (i);
1209 break;
1210 default:
1211 assert (count == 1);
1212 if (!ss_match_byte (&i->input, c_toupper (ch))
1213 && !ss_match_byte (&i->input, c_tolower (ch)))
1214 error = xasprintf (_("`%c' expected in date field."), ch);
1215 else
1216 error = NULL;
1217 break;
1219 if (error != NULL)
1220 return error;
1222 error = parse_trailer (i);
1223 if (error != NULL)
1224 return error;
1226 if (year != INT_MIN)
1228 char *error;
1229 double ofs;
1231 ofs = calendar_gregorian_to_offset (
1232 year, month, day, settings_get_fmt_settings (), &error);
1233 if (ofs == SYSMIS)
1234 return error;
1235 date = (yday - 1 + ofs) * 60. * 60. * 24.;
1237 else
1238 date = 0.;
1239 i->output->f = date + (time_sign == SIGN_NEGATIVE ? -time : time);
1241 return NULL;
1244 /* Utility functions. */
1246 /* Sets the default result for I.
1247 For a numeric format, this is the value set on SET BLANKS
1248 (typically system-missing); for a string format, it is all
1249 spaces. */
1250 static void
1251 default_result (struct data_in *i)
1253 if (fmt_is_string (i->format))
1254 memset (i->output->s, ' ', i->width);
1255 else
1256 i->output->f = settings_get_blanks ();
1259 /* Trims leading and trailing spaces from I.
1260 If the result is empty, or a single period character, then
1261 sets the default result and returns true; otherwise, returns
1262 false. */
1263 static bool
1264 trim_spaces_and_check_missing (struct data_in *i)
1266 ss_trim (&i->input, ss_cstr (" "));
1267 if (ss_is_empty (i->input) || ss_equals (i->input, ss_cstr (".")))
1269 default_result (i);
1270 return true;
1272 return false;
1275 /* Returns the integer value of hex digit C. */
1276 static int
1277 hexit_value (int c)
1279 const char s[] = "0123456789abcdef";
1280 const char *cp = strchr (s, c_tolower ((unsigned char) c));
1282 assert (cp != NULL);
1283 return cp - s;