1 //===----------------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html
18 #include "include/tzdb/time_zone_private.h"
19 #include "include/tzdb/types_private.h"
20 #include "include/tzdb/tzdb_list_private.h"
21 #include "include/tzdb/tzdb_private.h"
23 // Contains a parser for the IANA time zone data files.
25 // These files can be found at https://data.iana.org/time-zones/ and are in the
26 // public domain. Information regarding the input can be found at
27 // https://data.iana.org/time-zones/tz-how-to.html and
28 // https://man7.org/linux/man-pages/man8/zic.8.html.
30 // As indicated at https://howardhinnant.github.io/date/tz.html#Installation
31 // For Windows another file seems to be required
32 // https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml
33 // This file seems to contain the mapping of Windows time zone name to IANA
36 // However this article mentions another way to do the mapping on Windows
37 // https://devblogs.microsoft.com/oldnewthing/20210527-00/?p=105255
38 // This requires Windows 10 Version 1903, which was released in May of 2019
39 // and considered end of life in December 2020
40 // https://learn.microsoft.com/en-us/lifecycle/announcements/windows-10-1903-end-of-servicing
42 // TODO TZDB Implement the Windows mapping in tzdb::current_zone
44 _LIBCPP_BEGIN_NAMESPACE_STD
48 // This function is weak so it can be overriden in the tests. The
49 // declaration is in the test header test/support/test_tzdb.h
50 _LIBCPP_WEAK string_view
__libcpp_tzdb_directory() {
51 #if defined(__linux__)
52 return "/usr/share/zoneinfo/";
54 # error "unknown path to the IANA Time Zone Database"
58 //===----------------------------------------------------------------------===//
60 //===----------------------------------------------------------------------===//
62 [[nodiscard
]] static bool __is_whitespace(int __c
) { return __c
== ' ' || __c
== '\t'; }
64 static void __skip_optional_whitespace(istream
& __input
) {
65 while (chrono::__is_whitespace(__input
.peek()))
69 static void __skip_mandatory_whitespace(istream
& __input
) {
70 if (!chrono::__is_whitespace(__input
.get()))
71 std::__throw_runtime_error("corrupt tzdb: expected whitespace");
73 chrono::__skip_optional_whitespace(__input
);
76 [[nodiscard
]] static bool __is_eol(int __c
) { return __c
== '\n' || __c
== std::char_traits
<char>::eof(); }
78 static void __skip_line(istream
& __input
) {
79 while (!chrono::__is_eol(__input
.peek())) {
85 static void __skip(istream
& __input
, char __suffix
) {
86 if (std::tolower(__input
.peek()) == __suffix
)
90 static void __skip(istream
& __input
, string_view __suffix
) {
91 for (auto __c
: __suffix
)
92 if (std::tolower(__input
.peek()) == __c
)
96 static void __matches(istream
& __input
, char __expected
) {
97 if (std::tolower(__input
.get()) != __expected
)
98 std::__throw_runtime_error((string("corrupt tzdb: expected character '") + __expected
+ '\'').c_str());
101 static void __matches(istream
& __input
, string_view __expected
) {
102 for (auto __c
: __expected
)
103 if (std::tolower(__input
.get()) != __c
)
104 std::__throw_runtime_error((string("corrupt tzdb: expected string '") + string(__expected
) + '\'').c_str());
107 [[nodiscard
]] static string
__parse_string(istream
& __input
) {
110 int __c
= __input
.get();
117 case istream::traits_type::eof():
118 if (__result
.empty())
119 std::__throw_runtime_error("corrupt tzdb: expected a string");
124 __result
.push_back(__c
);
129 [[nodiscard
]] static int64_t __parse_integral(istream
& __input
, bool __leading_zero_allowed
) {
130 int64_t __result
= __input
.get();
131 if (__leading_zero_allowed
) {
132 if (__result
< '0' || __result
> '9')
133 std::__throw_runtime_error("corrupt tzdb: expected a digit");
135 if (__result
< '1' || __result
> '9')
136 std::__throw_runtime_error("corrupt tzdb: expected a non-zero digit");
140 if (__input
.peek() < '0' || __input
.peek() > '9')
143 // In order to avoid possible overflows we limit the accepted range.
144 // Most values parsed are expected to be very small:
145 // - 8784 hours in a year
146 // - 31 days in a month
147 // - year no real maximum, these values are expected to be less than
148 // the range of the year type.
150 // However the leapseconds use a seconds after epoch value. Using an
151 // int would run into an overflow in 2038. By using a 64-bit value
152 // the range is large enough for the bilions of years. Limiting that
153 // range slightly to make the code easier is not an issue.
154 if (__result
> (std::numeric_limits
<int64_t>::max() / 16))
155 std::__throw_runtime_error("corrupt tzdb: integral too large");
158 __result
+= __input
.get() - '0';
162 //===----------------------------------------------------------------------===//
164 //===----------------------------------------------------------------------===//
166 [[nodiscard
]] static day
__parse_day(istream
& __input
) {
167 unsigned __result
= chrono::__parse_integral(__input
, false);
169 std::__throw_runtime_error("corrupt tzdb day: value too large");
170 return day
{__result
};
173 [[nodiscard
]] static weekday
__parse_weekday(istream
& __input
) {
174 // TZDB allows the shortest unique name.
175 switch (std::tolower(__input
.get())) {
177 chrono::__skip(__input
, "riday");
181 chrono::__skip(__input
, "onday");
185 switch (std::tolower(__input
.get())) {
187 chrono::__skip(__input
, "turday");
191 chrono::__skip(__input
, "nday");
197 switch (std::tolower(__input
.get())) {
199 chrono::__skip(__input
, "ursday");
203 chrono::__skip(__input
, "esday");
208 chrono::__skip(__input
, "ednesday");
212 std::__throw_runtime_error("corrupt tzdb weekday: invalid name");
215 [[nodiscard
]] static month
__parse_month(istream
& __input
) {
216 // TZDB allows the shortest unique name.
217 switch (std::tolower(__input
.get())) {
219 switch (std::tolower(__input
.get())) {
221 chrono::__skip(__input
, "ril");
225 chrono::__skip(__input
, "gust");
231 chrono::__skip(__input
, "ecember");
235 chrono::__skip(__input
, "ebruary");
239 switch (std::tolower(__input
.get())) {
241 chrono::__skip(__input
, "nuary");
245 switch (std::tolower(__input
.get())) {
247 chrono::__skip(__input
, 'e');
251 chrono::__skip(__input
, 'y');
258 if (std::tolower(__input
.get()) == 'a')
259 switch (std::tolower(__input
.get())) {
264 chrono::__skip(__input
, "ch");
270 chrono::__skip(__input
, "ovember");
274 chrono::__skip(__input
, "ctober");
278 chrono::__skip(__input
, "eptember");
281 std::__throw_runtime_error("corrupt tzdb month: invalid name");
284 [[nodiscard
]] static year
__parse_year_value(istream
& __input
) {
285 bool __negative
= __input
.peek() == '-';
286 if (__negative
) [[unlikely
]]
289 int64_t __result
= __parse_integral(__input
, true);
290 if (__result
> static_cast<int>(year::max())) {
292 std::__throw_runtime_error("corrupt tzdb year: year is less than the minimum");
294 std::__throw_runtime_error("corrupt tzdb year: year is greater than the maximum");
297 return year
{static_cast<int>(__negative
? -__result
: __result
)};
300 [[nodiscard
]] static year
__parse_year(istream
& __input
) {
301 if (std::tolower(__input
.peek()) != 'm') [[likely
]]
302 return chrono::__parse_year_value(__input
);
305 switch (std::tolower(__input
.peek())) {
308 chrono::__skip(__input
, 'n');
312 // The m is minimum, even when that is ambiguous.
317 chrono::__skip(__input
, 'x');
321 std::__throw_runtime_error("corrupt tzdb year: expected 'min' or 'max'");
324 //===----------------------------------------------------------------------===//
326 //===----------------------------------------------------------------------===//
328 [[nodiscard
]] static year
__parse_to(istream
& __input
, year __only
) {
329 if (std::tolower(__input
.peek()) != 'o')
330 return chrono::__parse_year(__input
);
333 chrono::__skip(__input
, "nly");
337 [[nodiscard
]] static __tz::__constrained_weekday::__comparison_t
__parse_comparison(istream
& __input
) {
338 switch (__input
.get()) {
340 chrono::__matches(__input
, '=');
341 return __tz::__constrained_weekday::__ge
;
344 chrono::__matches(__input
, '=');
345 return __tz::__constrained_weekday::__le
;
347 std::__throw_runtime_error("corrupt tzdb on: expected '>=' or '<='");
350 [[nodiscard
]] static __tz::__on
__parse_on(istream
& __input
) {
351 if (std::isdigit(__input
.peek()))
352 return chrono::__parse_day(__input
);
354 if (std::tolower(__input
.peek()) == 'l') {
355 chrono::__matches(__input
, "last");
356 return weekday_last(chrono::__parse_weekday(__input
));
359 return __tz::__constrained_weekday
{
360 chrono::__parse_weekday(__input
), chrono::__parse_comparison(__input
), chrono::__parse_day(__input
)};
363 [[nodiscard
]] static seconds
__parse_duration(istream
& __input
) {
365 int __c
= __input
.peek();
366 bool __negative
= __c
== '-';
369 // Negative is either a negative value or a single -.
370 // The latter means 0 and the parsing is complete.
371 if (!std::isdigit(__input
.peek()))
375 __result
+= hours(__parse_integral(__input
, true));
376 if (__input
.peek() != ':')
377 return __negative
? -__result
: __result
;
380 __result
+= minutes(__parse_integral(__input
, true));
381 if (__input
.peek() != ':')
382 return __negative
? -__result
: __result
;
385 __result
+= seconds(__parse_integral(__input
, true));
386 if (__input
.peek() != '.')
387 return __negative
? -__result
: __result
;
390 (void)__parse_integral(__input
, true); // Truncate the digits.
392 return __negative
? -__result
: __result
;
395 [[nodiscard
]] static __tz::__clock
__parse_clock(istream
& __input
) {
396 switch (__input
.get()) { // case sensitive
398 return __tz::__clock::__local
;
400 return __tz::__clock::__standard
;
405 return __tz::__clock::__universal
;
409 return __tz::__clock::__local
;
412 [[nodiscard
]] static bool __parse_dst(istream
& __input
, seconds __offset
) {
413 switch (__input
.get()) { // case sensitive
422 return __offset
!= 0s
;
425 [[nodiscard
]] static __tz::__at
__parse_at(istream
& __input
) {
426 return {__parse_duration(__input
), __parse_clock(__input
)};
429 [[nodiscard
]] static __tz::__save
__parse_save(istream
& __input
) {
430 seconds __time
= chrono::__parse_duration(__input
);
431 return {__time
, chrono::__parse_dst(__input
, __time
)};
434 [[nodiscard
]] static string
__parse_letters(istream
& __input
) {
435 string __result
= __parse_string(__input
);
436 // Canonicalize "-" to "" since they are equivalent in the specification.
437 return __result
!= "-" ? __result
: "";
440 [[nodiscard
]] static __tz::__continuation::__rules_t
__parse_rules(istream
& __input
) {
441 int __c
= __input
.peek();
442 // A single - is not a SAVE but a special case.
445 if (chrono::__is_whitespace(__input
.peek()))
448 return chrono::__parse_save(__input
);
451 if (std::isdigit(__c
) || __c
== '+')
452 return chrono::__parse_save(__input
);
454 return chrono::__parse_string(__input
);
457 [[nodiscard
]] static __tz::__continuation
__parse_continuation(__tz::__rules_storage_type
& __rules
, istream
& __input
) {
458 __tz::__continuation __result
;
460 __result
.__rule_database_
= std::addressof(__rules
);
462 // Note STDOFF is specified as
463 // This field has the same format as the AT and SAVE fields of rule lines;
464 // These fields have different suffix letters, these letters seem
465 // not to be used so do not allow any of them.
467 __result
.__stdoff
= chrono::__parse_duration(__input
);
468 chrono::__skip_mandatory_whitespace(__input
);
469 __result
.__rules
= chrono::__parse_rules(__input
);
470 chrono::__skip_mandatory_whitespace(__input
);
471 __result
.__format
= chrono::__parse_string(__input
);
472 chrono::__skip_optional_whitespace(__input
);
474 if (chrono::__is_eol(__input
.peek()))
476 __result
.__year
= chrono::__parse_year(__input
);
477 chrono::__skip_optional_whitespace(__input
);
479 if (chrono::__is_eol(__input
.peek()))
481 __result
.__in
= chrono::__parse_month(__input
);
482 chrono::__skip_optional_whitespace(__input
);
484 if (chrono::__is_eol(__input
.peek()))
486 __result
.__on
= chrono::__parse_on(__input
);
487 chrono::__skip_optional_whitespace(__input
);
489 if (chrono::__is_eol(__input
.peek()))
491 __result
.__at
= __parse_at(__input
);
496 //===----------------------------------------------------------------------===//
497 // Time Zone Database entries
498 //===----------------------------------------------------------------------===//
500 static string
__parse_version(istream
& __input
) {
501 // The first line in tzdata.zi contains
503 // The parser expects this pattern
504 // #\s*version\s*\(.*)
505 // This part is not documented.
506 chrono::__matches(__input
, '#');
507 chrono::__skip_optional_whitespace(__input
);
508 chrono::__matches(__input
, "version");
509 chrono::__skip_mandatory_whitespace(__input
);
510 return chrono::__parse_string(__input
);
514 static __tz::__rule
& __create_entry(__tz::__rules_storage_type
& __rules
, const string
& __name
) {
515 auto __result
= [&]() -> __tz::__rule
& {
516 auto& __rule
= __rules
.emplace_back(__name
, vector
<__tz::__rule
>{});
517 return __rule
.second
.emplace_back();
523 // Typically rules are in contiguous order in the database.
524 // But there are exceptions, some rules are interleaved.
525 if (__rules
.back().first
== __name
)
526 return __rules
.back().second
.emplace_back();
528 if (auto __it
= ranges::find(__rules
, __name
, [](const auto& __r
) { return __r
.first
; });
529 __it
!= ranges::end(__rules
))
530 return __it
->second
.emplace_back();
535 static void __parse_rule(tzdb
& __tzdb
, __tz::__rules_storage_type
& __rules
, istream
& __input
) {
536 chrono::__skip_mandatory_whitespace(__input
);
537 string __name
= chrono::__parse_string(__input
);
539 __tz::__rule
& __rule
= __create_entry(__rules
, __name
);
541 chrono::__skip_mandatory_whitespace(__input
);
542 __rule
.__from
= chrono::__parse_year(__input
);
543 chrono::__skip_mandatory_whitespace(__input
);
544 __rule
.__to
= chrono::__parse_to(__input
, __rule
.__from
);
545 chrono::__skip_mandatory_whitespace(__input
);
546 chrono::__matches(__input
, '-');
547 chrono::__skip_mandatory_whitespace(__input
);
548 __rule
.__in
= chrono::__parse_month(__input
);
549 chrono::__skip_mandatory_whitespace(__input
);
550 __rule
.__on
= chrono::__parse_on(__input
);
551 chrono::__skip_mandatory_whitespace(__input
);
552 __rule
.__at
= __parse_at(__input
);
553 chrono::__skip_mandatory_whitespace(__input
);
554 __rule
.__save
= __parse_save(__input
);
555 chrono::__skip_mandatory_whitespace(__input
);
556 __rule
.__letters
= chrono::__parse_letters(__input
);
557 chrono::__skip_line(__input
);
560 static void __parse_zone(tzdb
& __tzdb
, __tz::__rules_storage_type
& __rules
, istream
& __input
) {
561 chrono::__skip_mandatory_whitespace(__input
);
562 auto __p
= std::make_unique
<time_zone::__impl
>(chrono::__parse_string(__input
), __rules
);
563 vector
<__tz::__continuation
>& __continuations
= __p
->__continuations();
564 chrono::__skip_mandatory_whitespace(__input
);
567 // The first line must be valid, continuations are optional.
568 __continuations
.emplace_back(__parse_continuation(__rules
, __input
));
569 chrono::__skip_line(__input
);
570 chrono::__skip_optional_whitespace(__input
);
571 } while (std::isdigit(__input
.peek()) || __input
.peek() == '-');
573 __tzdb
.zones
.emplace_back(time_zone::__create(std::move(__p
)));
576 static void __parse_link(tzdb
& __tzdb
, istream
& __input
) {
577 chrono::__skip_mandatory_whitespace(__input
);
578 string __target
= chrono::__parse_string(__input
);
579 chrono::__skip_mandatory_whitespace(__input
);
580 string __name
= chrono::__parse_string(__input
);
581 chrono::__skip_line(__input
);
583 __tzdb
.links
.emplace_back(std::__private_constructor_tag
{}, std::move(__name
), std::move(__target
));
586 static void __parse_tzdata(tzdb
& __db
, __tz::__rules_storage_type
& __rules
, istream
& __input
) {
588 int __c
= std::tolower(__input
.get());
591 case istream::traits_type::eof():
600 chrono::__skip_line(__input
);
604 chrono::__skip(__input
, "ule");
605 chrono::__parse_rule(__db
, __rules
, __input
);
609 chrono::__skip(__input
, "one");
610 chrono::__parse_zone(__db
, __rules
, __input
);
614 chrono::__skip(__input
, "ink");
615 chrono::__parse_link(__db
, __input
);
619 std::__throw_runtime_error("corrupt tzdb: unexpected input");
624 static void __parse_leap_seconds(vector
<leap_second
>& __leap_seconds
, istream
&& __input
) {
625 // The file stores dates since 1 January 1900, 00:00:00, we want
626 // seconds since 1 January 1970.
627 constexpr auto __offset
= sys_days
{1970y
/ January
/ 1} - sys_days
{1900y
/ January
/ 1};
630 sys_seconds __timestamp
;
633 vector
<__entry
> __entries
;
636 switch (__input
.peek()) {
637 case istream::traits_type::eof():
647 chrono::__skip_line(__input
);
651 sys_seconds __date
= sys_seconds
{seconds
{chrono::__parse_integral(__input
, false)}} - __offset
;
652 chrono::__skip_mandatory_whitespace(__input
);
653 seconds __value
{chrono::__parse_integral(__input
, false)};
654 chrono::__skip_line(__input
);
656 __entries
.emplace_back(__date
, __value
);
659 // The Standard requires the leap seconds to be sorted. The file
660 // leap-seconds.list usually provides them in sorted order, but that is not
661 // guaranteed so we ensure it here.
662 ranges::sort(__entries
, {}, &__entry::__timestamp
);
664 // The database should contain the number of seconds inserted by a leap
665 // second (1 or -1). So the difference between the two elements is stored.
666 // std::ranges::views::adjacent has not been implemented yet.
667 (void)ranges::adjacent_find(__entries
, [&](const __entry
& __first
, const __entry
& __second
) {
668 __leap_seconds
.emplace_back(
669 std::__private_constructor_tag
{}, __second
.__timestamp
, __second
.__value
- __first
.__value
);
674 void __init_tzdb(tzdb
& __tzdb
, __tz::__rules_storage_type
& __rules
) {
675 filesystem::path __root
= chrono::__libcpp_tzdb_directory();
676 ifstream __tzdata
{__root
/ "tzdata.zi"};
678 __tzdb
.version
= chrono::__parse_version(__tzdata
);
679 chrono::__parse_tzdata(__tzdb
, __rules
, __tzdata
);
680 ranges::sort(__tzdb
.zones
);
681 ranges::sort(__tzdb
.links
);
682 ranges::sort(__rules
, {}, [](const auto& p
) { return p
.first
; });
684 // There are two files with the leap second information
685 // - leapseconds as specified by zic
686 // - leap-seconds.list the source data
687 // The latter is much easier to parse, it seems Howard shares that
689 chrono::__parse_leap_seconds(__tzdb
.leap_seconds
, ifstream
{__root
/ "leap-seconds.list"});
693 [[nodiscard
]] static const time_zone
* __current_zone_windows(const tzdb
& tzdb
) {
694 // TODO TZDB Implement this on Windows.
695 std::__throw_runtime_error("unknown time zone");
697 #else // ifdef _WIN32
698 [[nodiscard
]] static const time_zone
* __current_zone_posix(const tzdb
& tzdb
) {
699 // On POSIX systems there are several ways to configure the time zone.
700 // In order of priority they are:
701 // - TZ environment variable
702 // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08
703 // The documentation is unclear whether or not it's allowed to
704 // change time zone information. For example the TZ string
706 // this is an entry in tzdata.zi. The value
708 // is also an entry. Is it allowed to use the following?
710 // Even when this is valid there is no time_zone record in the
711 // database. Since the library would need to return a valid pointer,
712 // this means the library needs to allocate and leak a pointer.
714 // - The time zone name is the target of the symlink /etc/localtime
715 // relative to /usr/share/zoneinfo/
717 // The algorithm is like this:
718 // - If the environment variable TZ is set and points to a valid
719 // record use this value.
720 // - Else use the name based on the `/etc/localtime` symlink.
722 if (const char* __tz
= getenv("TZ"))
723 if (const time_zone
* __result
= tzdb
.__locate_zone(__tz
))
726 filesystem::path __path
= "/etc/localtime";
727 if (!filesystem::exists(__path
))
728 std::__throw_runtime_error("tzdb: the symlink '/etc/localtime' does not exist");
730 if (!filesystem::is_symlink(__path
))
731 std::__throw_runtime_error("tzdb: the path '/etc/localtime' is not a symlink");
733 filesystem::path __tz
= filesystem::read_symlink(__path
);
734 // The path may be a relative path, in that case convert it to an absolute
735 // path based on the proper initial directory.
736 if (__tz
.is_relative())
737 __tz
= filesystem::canonical("/etc" / __tz
);
739 string __name
= filesystem::relative(__tz
, "/usr/share/zoneinfo/");
740 if (const time_zone
* __result
= tzdb
.__locate_zone(__name
))
743 std::__throw_runtime_error(("tzdb: the time zone '" + __name
+ "' is not found in the database").c_str());
745 #endif // ifdef _WIN32
747 //===----------------------------------------------------------------------===//
749 //===----------------------------------------------------------------------===//
751 _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI tzdb_list
& get_tzdb_list() {
752 static tzdb_list __result
{new tzdb_list::__impl()};
756 [[nodiscard
]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI
const time_zone
* tzdb::__current_zone() const {
758 return chrono::__current_zone_windows(*this);
760 return chrono::__current_zone_posix(*this);
764 _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI
const tzdb
& reload_tzdb() {
765 if (chrono::remote_version() == chrono::get_tzdb().version
)
766 return chrono::get_tzdb();
768 return chrono::get_tzdb_list().__implementation().__load();
771 _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI string
remote_version() {
772 filesystem::path __root
= chrono::__libcpp_tzdb_directory();
773 ifstream __tzdata
{__root
/ "tzdata.zi"};
774 return chrono::__parse_version(__tzdata
);
777 } // namespace chrono
779 _LIBCPP_END_NAMESPACE_STD