12 #include <glog/logging.h>
14 #include <tao/pegtl.hpp>
15 #include <tao/pegtl/contrib/abnf.hpp>
17 using namespace tao::pegtl
;
18 using namespace tao::pegtl::abnf
;
22 #include <boost/algorithm/string/classification.hpp>
23 #include <boost/algorithm/string/split.hpp>
29 using dash
= one
<'-'>;
31 struct u_let_dig
: sor
<ALPHA
, DIGIT
, UTF8_non_ascii
> {};
33 struct u_ldh_tail
: star
<sor
<seq
<plus
<dash
>, u_let_dig
>, u_let_dig
>> {};
35 struct u_label
: seq
<u_let_dig
, u_ldh_tail
> {};
37 struct let_dig
: sor
<ALPHA
, DIGIT
> {};
39 struct ldh_tail
: star
<sor
<seq
<plus
<dash
>, let_dig
>, let_dig
>> {};
41 struct ldh_str
: seq
<let_dig
, ldh_tail
> {};
43 struct sub_domain
: u_label
{};
45 struct domain
: list_tail
<sub_domain
, dot
> {};
49 size_t constexpr max_length
= 255;
51 bool is_domain(std::string_view dom
)
53 auto in
{memory_input
<>(dom
.data(), dom
.size(), "domain")};
54 return tao::pegtl::parse
<RFC5321::domain
>(in
);
57 bool domain_check(std::string_view dom
)
60 return true; // domains in email addresses can be empty
63 if (!is_domain(dom
)) {
64 LOG(ERROR
) << "failed to parse «" << dom
<< "» as domain";
69 * Allow "localhost" amung others.
71 std::string domain(dom.data(), dom.length());
73 auto labels{std::vector<std::string>{}};
74 boost::algorithm::split(labels, domain, boost::algorithm::is_any_of("."));
76 if (labels.size() < 2) {
77 LOG(ERROR) << "domain «" << dom << "» must have two or more labels";
81 if (labels[labels.size() - 1].length() < 2) {
82 LOG(ERROR) << "TLD must be two or more chars in «" << dom << "»";
91 // Normalization Form KC (NFKC) Compatibility Decomposition, followed
92 // by Canonical Composition, see <http://unicode.org/reports/tr15/>
94 std::string
nfkc(std::string_view str
)
96 size_t length
= max_length
;
98 CHECK_LE(str
.length(), max_length
);
99 auto udata
= reinterpret_cast<uint8_t const*>(str
.data());
100 auto ubfr
= reinterpret_cast<uint8_t*>(bfr
);
101 CHECK_NOTNULL(u8_normalize(UNINORM_NFKC
, udata
, str
.size(), ubfr
, &length
));
102 return std::string
{bfr
, length
};
105 bool Domain::validate(std::string_view dom
)
107 if (dom
.length() > max_length
) {
111 // Handle "bare" IP addresses, without the brackets.
112 if (IP::is_address(dom
)) {
116 if (IP::is_address_literal(dom
)) {
120 dom
= remove_trailing_dot(dom
);
122 auto const norm
= nfkc(dom
);
124 // idn2_to_ascii_8z() converts (ASCII) to lower case
127 auto code
= idn2_to_ascii_8z(norm
.c_str(), &ptr
, IDN2_TRANSITIONAL
);
130 std::string
ascii(ptr
);
134 code
= idn2_to_unicode_8z8z(ascii
.c_str(), &ptr
, IDN2_TRANSITIONAL
);
139 if (!domain_check(ascii
)) {
146 void Domain::set(std::string_view dom
)
148 if (dom
.length() > max_length
) {
149 throw std::invalid_argument("domain name too long");
152 // Handle "bare" IP addresses, without the brackets.
153 if (IP::is_address(dom
)) {
154 ascii_
= IP::to_address_literal(dom
);
156 is_address_literal_
= true;
160 if (IP::is_address_literal(dom
)) {
161 ascii_
= std::string(dom
.data(), dom
.length());
163 is_address_literal_
= true;
167 is_address_literal_
= false;
169 // Since all Domains are fully qualified and not just some bag of
170 // labels, the trailing dot provides no real information and will
171 // mess up name matching on certs and stuff.
173 dom
= remove_trailing_dot(dom
);
175 auto const norm
= nfkc(dom
);
177 // idn2_to_ascii_8z() converts (ASCII) to lower case
180 auto code
= idn2_to_ascii_8z(norm
.c_str(), &ptr
, IDN2_TRANSITIONAL
);
182 throw std::invalid_argument(idn2_strerror(code
));
187 code
= idn2_to_unicode_8z8z(ascii_
.c_str(), &ptr
, IDN2_TRANSITIONAL
);
189 throw std::invalid_argument(idn2_strerror(code
));
193 if (!domain_check(ascii_
)) {
194 throw std::invalid_argument("domain not correct");