2 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * This code was contributed to The NetBSD Foundation by Klaus Klein.
6 * Heavily optimised by David Laight
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
31 #include "ws_strptime.h"
33 #include <wsutil/time_util.h> /* For ws_localtime_r() */
34 #include <wsutil/strtoi.h>
38 #define tzname _tzname
39 #define timezone _timezone
40 #define daylight _daylight
43 static const unsigned char *conv_num(const unsigned char *, int *, unsigned, unsigned);
44 static const unsigned char *find_string(const unsigned char *, int *, const char * const *,
45 const char * const *, int);
48 #define MINSPERHOUR 60
49 #define HOURSPERDAY 24
51 #define DAYSPERNYEAR 365
52 #define DAYSPERLYEAR 366
53 #define SECSPERHOUR (SECSPERMIN * MINSPERHOUR)
54 #define SECSPERDAY ((int_fast32_t) SECSPERHOUR * HOURSPERDAY)
55 #define MONSPERYEAR 12
60 #define TM_WEDNESDAY 3
73 #define TM_SEPTEMBER 8
75 #define TM_NOVEMBER 10
76 #define TM_DECEMBER 11
78 #define TM_YEAR_BASE 1900
80 #define EPOCH_YEAR 1970
81 #define EPOCH_WDAY TM_THURSDAY
83 #define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
86 ** Since everything in isleap is modulo 400 (or a factor of 400), we know that
87 ** isleap(y) == isleap(y % 400)
89 ** isleap(a + b) == isleap((a + b) % 400)
91 ** isleap(a + b) == isleap(a % 400 + b % 400)
92 ** This is true even if % means modulo rather than Fortran remainder
93 ** (which is allowed by C89 but not C99).
94 ** We use this to avoid addition overflow problems.
97 #define isleap_sum(a, b) isleap((a) % 400 + (b) % 400)
100 * We do not implement alternate representations. However, we always
101 * check whether a given modifier is allowed for a certain conversion.
105 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; }
107 #define S_YEAR (1 << 0)
108 #define S_MON (1 << 1)
109 #define S_YDAY (1 << 2)
110 #define S_MDAY (1 << 3)
111 #define S_WDAY (1 << 4)
112 #define S_HOUR (1 << 5)
114 #define HAVE_MDAY(s) (s & S_MDAY)
115 #define HAVE_MON(s) (s & S_MON)
116 #define HAVE_WDAY(s) (s & S_WDAY)
117 #define HAVE_YDAY(s) (s & S_YDAY)
118 #define HAVE_YEAR(s) (s & S_YEAR)
119 #define HAVE_HOUR(s) (s & S_HOUR)
121 static const char utc
[] = { "UTC" };
122 /* RFC-822/RFC-2822 */
123 static const char * const nast
[5] = {
124 "EST", "CST", "MST", "PST", "\0\0\0"
126 static const char * const nadt
[5] = {
127 "EDT", "CDT", "MDT", "PDT", "\0\0\0"
130 static const char * const cloc_am_pm
[] = {"AM", "PM", NULL
};
132 static const char * const cloc_abday
[] = {
133 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
136 static const char * const cloc_day
[] = {
137 "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
141 static const char * const cloc_abmon
[] = {
142 "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
143 "Oct", "Nov", "Dec", NULL
146 static const char * const cloc_mon
[] = {
147 "January", "February", "March", "April", "May", "June", "July",
148 "August", "September", "October", "November", "December", NULL
152 * Table to determine the ordinal date for the start of a month.
153 * Ref: http://en.wikipedia.org/wiki/ISO_week_date
155 static const int start_of_month
[2][13] = {
157 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
159 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
163 * Calculate the week day of the first day of a year. Valid for
164 * the Gregorian calendar, which began Sept 14, 1752 in the UK
165 * and its colonies. Ref:
166 * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
170 first_wday_of(int yr
)
172 return ((2 * (3 - (yr
/ 100) % 4)) + (yr
% 100) + ((yr
% 100) / 4) +
173 (isleap(yr
) ? 6 : 0) + 1) % 7;
176 #define delim(p) ((p) == '\0' || g_ascii_isspace((unsigned char)(p)))
178 #define SET_ZONEP(p, off, zone) \
179 do { if (p) { p->tm_gmtoff = off; p->tm_zone = zone; } } while (0)
182 * This is spectacularly ugly.
184 * POSIX require that there be a variable named "timezone", which contains
185 * "the difference, in seconds, between Coordinated Universal Time (UTC)
186 * and local standard time.".
188 * Most of the platforms on which we run have this.
190 * FreeBSD, however, does not. Instead, it provides a function named
191 * "timezone", which takes two integer arguments, "zone" and "dst",
192 * and "returns a pointer to a time zone abbreviation for the specified
193 * zone and dst values. The zone argument is the number of minutes west
194 * of GMT and dst is non-zero if daylight savings time is in effect."
196 * So we need a way to get "the difference, in seconds, between Coordinated
197 * Universal Time (UTC) and local standard time."
199 * The FreeBSD Wireshark port, as of 2023-12-05, does so by handing
200 * a time_t value of 0, meaning 1970-01-01 00:00:00 UTC (the Unix Epoch),
201 * to localtime() and using the tm_gmtoff value from the resulting
202 * struct tm. That works in countries that were in standard time
203 * then, but doesn't work in countries that were not in standard time
204 * then, meaning it doesn't work correctly in countries in the Southern
205 * Hemisphere that were in Daylight Saving Tie at that point, and may or
206 * may not work correctly in Ireland, depending on how "standard time"
207 * is defined (don't ask).
209 * For now, we use a similar mechanism to the one above, but we check
210 * whether tm_isdst is greater than 0 in the resulting struct tm and,
211 * if it is, use a time_t value of 86400*(365/2), in the hopes that,
212 * halfway through 1970, the location in question was in standard
215 * Also, for now, we test for FreeBSD rather than doing a configure-
216 * time check; checking whether the symbol "timezone" is defined
217 * won't work, as it's defined in FreeBSD as a function, so we'd
218 * have to check *how* it's defined.
220 * So we have a function to return the difference in question. It
221 * returns a long because timezone is defined to be a long in POSIX
222 * and because the tm_gmtoff member of a struct tm, if such a member
223 * is present, is also a long.
228 #if defined(__FreeBSD__)
230 * We only calculate the standard time UTC offset once, under the
231 * assumption that we won't change what time zone we're in.
233 * XXX - that assumption is violated if:
235 * you're running on an OS where you can set the current
236 * time zone and that will affect all running programs,
237 * or where the OS tries to determine where you're located
238 * and changes the time zone to match (for example, macOS,
239 * in which both of those are the case);
241 * you're in a location that has moved between time zones
242 * since 1970-01-01 00:00:00 UTC (there are some, and the
243 * IANA time zone database, at least, takes that into
246 * we add support for the if_iana_tzname Interface
247 * Description Block option, so that, when looking
248 * at a file with that option for one or more
249 * interfaces, and using the timezone from that
250 * option rather than the local timezone, the
251 * offset from UTC may change from file to file.
253 * This *probably* won't make much of a difference, as
254 * we have to do this sort of hackery only when parsing
255 * a date that doesn't use the "Obsolete Date and Time",
256 * as it's called in RFC 2822.
258 static bool got_utcoffset
= false;
259 static struct tm
*gtm
;
264 return gtm
->tm_gmtoff
;
266 return 0; /* localtime() failed on us */
269 gtm
= localtime(&then
);
270 got_utcoffset
= true;
273 * Oh, heck, it can't convert the Epoch. Just
274 * return 0 and say to hell with it.
278 if (gtm
->tm_isdst
> 0) {
280 * Sorry, we were in Daylight Saving Time on
281 * 1970-01-01 at 00:00:00 UTC. Try the middle
282 * of the year. (We don't bother making sure
283 * we weren't in DST then.)
285 then
= 86400*(365/2);
286 gtm
= localtime(&then
);
292 return gtm
->tm_gmtoff
;
299 ws_strptime_p(const char *buf
, const char *format
, struct tm
*tm
)
302 return strptime(buf
, format
, tm
);
304 return ws_strptime(buf
, format
, tm
, NULL
);
309 ws_strptime(const char *buf
, const char *fmt
, struct tm
*tm
, struct ws_timezone
*zonep
)
312 const unsigned char *bp
, *ep
, *zname
;
313 int alt_format
, i
, split_year
= 0, neg
= 0, state
= 0,
314 day_offset
= -1, week_offset
= 0, offs
, mandatory
;
319 bp
= (const unsigned char *)buf
;
321 while (bp
!= NULL
&& (c
= *fmt
++) != '\0') {
322 /* Clear `alternate' modifier prior to new conversion. */
326 /* Eat up white-space. */
327 if (g_ascii_isspace(c
)) {
328 while (g_ascii_isspace(*bp
))
337 again
: switch (c
= *fmt
++) {
338 case '%': /* "%%" is converted to "%". */
346 * "Alternative" modifiers. Just set the appropriate flag
347 * and start over again.
349 case 'E': /* "%E?" alternative conversion modifier. */
354 case 'O': /* "%O?" alternative conversion modifier. */
360 * "Complex" conversion rules, implemented through recursion.
362 case 'c': /* Date and time, using the locale's format. */
363 new_fmt
= "%a %b %e %H:%M:%S %Y";
364 state
|= S_WDAY
| S_MON
| S_MDAY
| S_YEAR
;
367 case 'D': /* The date as "%m/%d/%y". */
368 new_fmt
= "%m/%d/%y";
370 state
|= S_MON
| S_MDAY
| S_YEAR
;
373 case 'F': /* The date as "%Y-%m-%d". */
374 new_fmt
= "%Y-%m-%d";
376 state
|= S_MON
| S_MDAY
| S_YEAR
;
379 case 'R': /* The time as "%H:%M". */
384 case 'r': /* The time in 12-hour clock representation. */
385 new_fmt
= "%I:%M:%S %p";
389 case 'T': /* The time as "%H:%M:%S". */
390 new_fmt
= "%H:%M:%S";
394 case 'X': /* The time, using the locale's format. */
395 new_fmt
= "%H:%M:%S";
398 case 'x': /* The date, using the locale's format. */
399 new_fmt
= "%m/%d/%y";
400 state
|= S_MON
| S_MDAY
| S_YEAR
;
402 bp
= (const unsigned char *)ws_strptime((const char *)bp
,
408 * "Elementary" conversion rules.
410 case 'A': /* The day of week, using the locale's form. */
412 bp
= find_string(bp
, &tm
->tm_wday
, cloc_day
, cloc_abday
, 7);
417 case 'B': /* The month, using the locale's form. */
420 bp
= find_string(bp
, &tm
->tm_mon
, cloc_mon
, cloc_abmon
, 12);
425 case 'C': /* The century number. */
427 bp
= conv_num(bp
, &i
, 0, 99);
429 i
= i
* 100 - TM_YEAR_BASE
;
431 i
+= tm
->tm_year
% 100;
438 case 'd': /* The day of month. */
440 bp
= conv_num(bp
, &tm
->tm_mday
, 1, 31);
445 case 'k': /* The hour (24-hour clock representation). */
449 bp
= conv_num(bp
, &tm
->tm_hour
, 0, 23);
454 case 'l': /* The hour (12-hour clock representation). */
458 bp
= conv_num(bp
, &tm
->tm_hour
, 1, 12);
459 if (tm
->tm_hour
== 12)
465 case 'j': /* The day of year. */
467 bp
= conv_num(bp
, &i
, 1, 366);
473 case 'M': /* The minute. */
474 bp
= conv_num(bp
, &tm
->tm_min
, 0, 59);
478 case 'm': /* The month. */
480 bp
= conv_num(bp
, &i
, 1, 12);
486 case 'p': /* The locale's equivalent of AM/PM. */
487 bp
= find_string(bp
, &i
, cloc_am_pm
,
489 if (HAVE_HOUR(state
) && tm
->tm_hour
> 11)
491 tm
->tm_hour
+= i
* 12;
495 case 'S': /* The seconds. */
496 bp
= conv_num(bp
, &tm
->tm_sec
, 0, 61);
500 case 's': /* seconds since the epoch */
506 /* Extract the seconds as a 64-bit signed number. */
507 if (!ws_strtoi64(bp
, &endptr
, &secs
)) {
513 /* For now, reject times before the Epoch. */
519 /* Make sure it fits. */
526 if (ws_localtime_r(&sse
, tm
) == NULL
)
529 state
|= S_YDAY
| S_WDAY
|
530 S_MON
| S_MDAY
| S_YEAR
;
534 case 'U': /* The week of year, beginning on sunday. */
535 case 'W': /* The week of year, beginning on monday. */
537 * This is bogus, as we can not assume any valid
538 * information present in the tm structure at this
539 * point to calculate a real value, so save the
540 * week for now in case it can be used later.
542 bp
= conv_num(bp
, &i
, 0, 53);
545 day_offset
= TM_SUNDAY
;
547 day_offset
= TM_MONDAY
;
551 case 'w': /* The day of week, beginning on sunday. */
552 bp
= conv_num(bp
, &tm
->tm_wday
, 0, 6);
557 case 'u': /* The day of week, monday = 1. */
558 bp
= conv_num(bp
, &i
, 1, 7);
564 case 'g': /* The year corresponding to the ISO week
565 * number but without the century.
567 bp
= conv_num(bp
, &i
, 0, 99);
570 case 'G': /* The year corresponding to the ISO week
571 * number with century.
575 while (g_ascii_isdigit(*bp
));
578 case 'V': /* The ISO 8601:1988 week number as decimal */
579 bp
= conv_num(bp
, &i
, 1, 53);
582 case 'Y': /* The year. */
583 i
= TM_YEAR_BASE
; /* just for data sanity... */
584 bp
= conv_num(bp
, &i
, 0, 9999);
585 tm
->tm_year
= i
- TM_YEAR_BASE
;
590 case 'y': /* The year within 100 years of the epoch. */
591 /* LEGAL_ALT(ALT_E | ALT_O); */
592 bp
= conv_num(bp
, &i
, 0, 99);
595 /* preserve century */
596 i
+= (tm
->tm_year
/ 100) * 100;
600 i
= i
+ 2000 - TM_YEAR_BASE
;
602 i
= i
+ 1900 - TM_YEAR_BASE
;
611 mandatory
= c
== 'z';
613 * We recognize all ISO 8601 formats:
618 * We recognize all RFC-822/RFC-2822 formats:
620 * North American : UTC offsets
621 * E[DS]T = Eastern : -4 | -5
622 * C[DS]T = Central : -5 | -6
623 * M[DS]T = Mountain: -6 | -7
624 * P[DS]T = Pacific : -7 | -8
626 * [A-IL-M] = -1 ... -9 (J not used)
628 * Note: J maybe used to denote non-nautical
632 while (g_ascii_isspace(*bp
))
644 else if (!delim(*bp
) && *bp
++ != 'C')
653 SET_ZONEP(zonep
, tm_gmtoff
, tm_zone
);
665 /* Nautical / Military style */
667 ((*bp
>= 'A' && *bp
<= 'I') ||
668 (*bp
>= 'L' && *bp
<= 'Y'))) {
670 if (*bp
>= 'A' && *bp
<= 'I')
672 (int)*bp
- ('A' - 1);
673 else if (*bp
>= 'L' && *bp
<= 'M')
674 tm_gmtoff
= (int)*bp
- 'A';
675 else if (*bp
>= 'N' && *bp
<= 'Y')
676 tm_gmtoff
= 'M' - (int)*bp
;
679 ws_critical("Not reached!");
682 tm_gmtoff
*= SECSPERHOUR
;
683 tm_zone
= NULL
; /* XXX */
684 SET_ZONEP(zonep
, tm_gmtoff
, tm_zone
);
688 /* 'J' is local time */
689 if (delim(bp
[1]) && *bp
== 'J') {
690 tm_gmtoff
= -utc_offset();
691 tm_zone
= NULL
; /* XXX */
692 SET_ZONEP(zonep
, tm_gmtoff
, tm_zone
);
698 * From our 3 letter hard-coded table
700 ep
= find_string(bp
, &i
, nast
, NULL
, 4);
702 tm_gmtoff
= (-5 - i
) * SECSPERHOUR
;
704 SET_ZONEP(zonep
, tm_gmtoff
, tm_zone
);
708 ep
= find_string(bp
, &i
, nadt
, NULL
, 4);
711 tm_gmtoff
= (-4 - i
) * SECSPERHOUR
;
713 SET_ZONEP(zonep
, tm_gmtoff
, tm_zone
);
718 * Our current timezone
720 ep
= find_string(bp
, &i
,
721 (const char * const *)tzname
,
725 tm_gmtoff
= -utc_offset();
727 SET_ZONEP(zonep
, tm_gmtoff
, tm_zone
);
734 for (i
= 0; i
< 4; ) {
735 if (g_ascii_isdigit(*bp
)) {
736 offs
= offs
* 10 + (*bp
++ - '0');
740 if (i
== 2 && *bp
== ':') {
746 if (g_ascii_isdigit(*bp
))
757 /* Convert minutes into decimal */
758 offs
= offs
* SECSPERHOUR
+ i
* SECSPERMIN
;
767 /* ISO 8601 & RFC 3339 limit to 23:59 max */
768 if (offs
>= (HOURSPERDAY
* SECSPERHOUR
))
772 tm
->tm_isdst
= 0; /* XXX */
774 tm_zone
= NULL
; /* XXX */
775 SET_ZONEP(zonep
, tm_gmtoff
, tm_zone
);
779 * Miscellaneous conversions.
781 case 'n': /* Any kind of white-space. */
783 while (g_ascii_isspace(*bp
))
789 default: /* Unknown/unsupported conversion. */
794 if (!HAVE_YDAY(state
) && HAVE_YEAR(state
)) {
795 if (HAVE_MON(state
) && HAVE_MDAY(state
)) {
796 /* calculate day of year (ordinal date) */
797 tm
->tm_yday
= start_of_month
[isleap_sum(tm
->tm_year
,
798 TM_YEAR_BASE
)][tm
->tm_mon
] + (tm
->tm_mday
- 1);
800 } else if (day_offset
!= -1) {
802 * Set the date to the first Sunday (or Monday)
803 * of the specified week of the year.
805 if (!HAVE_WDAY(state
)) {
806 tm
->tm_wday
= day_offset
;
810 first_wday_of(tm
->tm_year
+ TM_YEAR_BASE
) +
811 day_offset
) % 7 + (week_offset
- 1) * 7 +
812 tm
->tm_wday
- day_offset
;
817 if (HAVE_YDAY(state
) && HAVE_YEAR(state
)) {
820 if (!HAVE_MON(state
)) {
821 /* calculate month of day of year */
823 isleap
= isleap_sum(tm
->tm_year
, TM_YEAR_BASE
);
824 while (tm
->tm_yday
>= start_of_month
[isleap
][i
])
828 tm
->tm_yday
-= start_of_month
[isleap
][12];
835 if (!HAVE_MDAY(state
)) {
836 /* calculate day of month */
837 isleap
= isleap_sum(tm
->tm_year
, TM_YEAR_BASE
);
838 tm
->tm_mday
= tm
->tm_yday
-
839 start_of_month
[isleap
][tm
->tm_mon
] + 1;
843 if (!HAVE_WDAY(state
)) {
844 /* calculate day of week */
846 week_offset
= first_wday_of(tm
->tm_year
);
847 while (i
++ <= tm
->tm_yday
) {
848 if (week_offset
++ >= 6)
851 tm
->tm_wday
= week_offset
;
859 static const unsigned char *
860 conv_num(const unsigned char *buf
, int *dest
, unsigned llim
, unsigned ulim
)
865 /* The limit also determines the number of valid digits. */
866 unsigned rulim
= ulim
;
869 if (ch
< '0' || ch
> '9')
877 } while ((result
* 10 <= ulim
) && rulim
&& ch
>= '0' && ch
<= '9');
879 if (result
< llim
|| result
> ulim
)
886 static const unsigned char *
887 find_string(const unsigned char *bp
, int *tgt
, const char * const *n1
,
888 const char * const *n2
, int c
)
893 /* check full name - then abbreviated ones */
894 for (; n1
!= NULL
; n1
= n2
, n2
= NULL
) {
895 for (i
= 0; i
< c
; i
++, n1
++) {
897 if (g_ascii_strncasecmp(*n1
, (const char *)bp
, len
) == 0) {
904 /* Nothing matched */