3 // Domains as used in email and as implemented (ie constrained) by the DNS.
9 #include "is_ascii.hpp"
18 #include <glog/logging.h>
20 #include <tao/pegtl.hpp>
21 #include <tao/pegtl/contrib/abnf.hpp>
23 using namespace tao::pegtl
;
24 using namespace tao::pegtl::abnf
;
26 #include <boost/algorithm/string/classification.hpp>
27 #include <boost/algorithm/string/split.hpp>
29 #include <fmt/format.h>
30 #include <fmt/ostream.h>
36 using dash
= one
<'-'>;
38 struct u_let_dig
: sor
<ALPHA
, DIGIT
, UTF8_non_ascii
> {};
40 struct u_ldh_tail
: star
<sor
<seq
<plus
<dash
>, u_let_dig
>, u_let_dig
>> {};
42 struct u_label
: seq
<u_let_dig
, u_ldh_tail
> {};
44 struct let_dig
: sor
<ALPHA
, DIGIT
> {};
46 struct ldh_tail
: star
<sor
<seq
<plus
<dash
>, let_dig
>, let_dig
>> {};
48 struct ldh_str
: seq
<let_dig
, ldh_tail
> {};
50 struct sub_domain
: u_label
{};
52 struct domain
: list_tail
<sub_domain
, dot
> {};
54 struct domain_only
: seq
<domain
, eof
> {};
56 }; // namespace RFC5321
59 // Maximum length of a domain in dotted-quad notation.
60 size_t constexpr max_dom_length
= 253; // RFC-1035 section 3.1
61 size_t constexpr max_lab_length
= 63;
65 struct fmt::formatter
<Domain
> : ostream_formatter
{};
68 bool is_fully_qualified(Domain
const& dom
, std::string
& msg
)
75 auto labels
{std::vector
<std::string
>{}};
76 boost::algorithm::split(labels
, dom
.ascii(),
77 boost::algorithm::is_any_of("."));
79 if (labels
.size() < 2) {
80 msg
= fmt::format("domain «{}» must have two or more labels", dom
);
84 if (labels
[labels
.size() - 1].length() < 2) {
85 msg
= fmt::format("TLD «{}» must be two or more octets",
86 labels
[labels
.size() - 1]);
97 void operator()(T
* p
) const
99 std::free(const_cast<std::remove_const_t
<T
>*>(p
));
103 template <typename T
>
104 using uc_ptr
= std::unique_ptr
<T
, free_deleter
>;
105 static_assert(sizeof(char*) == sizeof(uc_ptr
<char>), ""); // to be sure
107 std::string_view
remove_trailing_dot(std::string_view a
)
109 if (a
.length() && (a
.back() == '.')) {
115 bool Domain::set_(std::string_view dom
, bool should_throw
, std::string
& msg
)
117 msg
.clear(); // no error
119 if (IP::is_address_literal(dom
)) {
122 is_address_literal_
= true;
126 // A dotted quad IPv4 address will match the syntax of RFC-5321
127 // Domain, but should not be confused as a DNS domain.
129 if (IP::is_address(dom
)) {
130 ascii_
= IP::to_address_literal(dom
);
132 is_address_literal_
= true;
136 dom
= remove_trailing_dot(dom
);
143 auto in
{memory_input
<>(dom
.data(), dom
.size(), "domain")};
144 if (!tao::pegtl::parse
<RFC5321::domain_only
>(in
)) {
146 throw std::invalid_argument("failed to parse domain");
148 msg
= fmt::format("failed to parse domain «{}»", dom
);
156 if (dom
.length() > max_dom_length
) {
158 throw std::invalid_argument("domain name too long");
159 msg
= fmt::format("domain name «{}» too long", dom
);
163 // Check for domain /label/ too long.
164 auto lst
= dom
.begin();
166 auto const lab
= std::find(lst
, dom
.end(), '.');
167 auto const len
= size_t(std::distance(lst
, lab
));
168 if (len
> max_lab_length
) {
170 throw std::invalid_argument("domain label too long");
171 msg
= fmt::format("domain label «{}» too long",
172 std::string_view
{lst
, len
});
175 if (lab
== dom
.end())
181 // Map domains to lower case.
183 ascii_
.reserve(dom
.length());
184 std::transform(dom
.begin(), dom
.end(), std::back_inserter(ascii_
),
185 [](unsigned char ch
) { return std::tolower(ch
); });
187 is_address_literal_
= false;
192 /* Unicode (UTF-8) case:
195 // Normalization Form KC (NFKC) Compatibility Decomposition, followed
196 // by Canonical Composition, see <http://unicode.org/reports/tr15/>
199 uc_ptr
<uint8_t> normp(
200 u8_normalize(UNINORM_NFKC
, reinterpret_cast<uint8_t const*>(dom
.data()),
201 dom
.size(), nullptr, &length
));
204 auto const errmsg
= std::strerror(errno
);
206 throw std::invalid_argument(errmsg
);
207 msg
= fmt::format("u8_normalize(\"{}\") failed: ", dom
, errmsg
);
211 std::string norm
{reinterpret_cast<char*>(normp
.get()),
212 length
}; // idn2_to_ascii_8z() needs a NUL terminated c_str
214 // idn2_to_ascii_8z() converts (ASCII) to lower case
217 auto code
= idn2_to_ascii_8z(norm
.c_str(), &ptr
, IDN2_TRANSITIONAL
);
218 if (code
!= IDN2_OK
) {
219 if (code
== IDN2_TOO_BIG_DOMAIN
) {
221 throw std::invalid_argument("domain name too long");
222 msg
= fmt::format("domain name «{}» too long", norm
);
225 if (code
== IDN2_TOO_BIG_LABEL
) {
227 throw std::invalid_argument("domain label too long");
228 msg
= fmt::format("domain label «{}» too long", norm
);
231 auto const errmsg
= idn2_strerror(code
);
233 throw std::invalid_argument(errmsg
);
235 fmt::format("idn2_to_ascii_8z(\"{}\", …, IDN2_TRANSITIONAL) failed: {}",
239 std::string ascii
{ptr
};
242 // We do an additional check since idn2_to_ascii_8z checks for >255,
243 // and we know DNS packet encoding makes the actual limit 253.
244 if (ascii
.length() > max_dom_length
) {
246 throw std::invalid_argument("domain name too long");
247 msg
= fmt::format("domain name «{}» too long", ascii
);
252 code
= idn2_to_unicode_8z8z(ascii
.c_str(), &ptr
, IDN2_TRANSITIONAL
);
253 if (code
!= IDN2_OK
) {
254 auto errmsg
= idn2_strerror(code
);
256 throw std::invalid_argument(errmsg
);
258 "idn2_to_unicode_8z8z(\"{}\", …, IDN2_TRANSITIONAL) failed: {}", ascii
,
263 std::string utf8
{ptr
};
266 // Identical byte string: not sure this can or should ever happen.
273 is_address_literal_
= false;