share
[ghsmtp.git] / Domain.cpp
blobf23730793d471f996a55569126df67d37973904e
1 #include "Domain.hpp"
3 #include "IP.hpp"
4 #include "IP4.hpp"
5 #include "IP6.hpp"
7 #include <algorithm>
9 #include <idn2.h>
10 #include <uninorm.h>
12 #include <glog/logging.h>
14 #include <tao/pegtl.hpp>
15 #include <tao/pegtl/contrib/abnf.hpp>
17 using namespace tao::pegtl;
18 using namespace tao::pegtl::abnf;
20 #include <stdexcept>
22 #include <boost/algorithm/string/classification.hpp>
23 #include <boost/algorithm/string/split.hpp>
25 namespace RFC5321 {
26 #include "UTF8.hpp"
28 using dot = one<'.'>;
29 using dash = one<'-'>;
31 struct u_let_dig : sor<ALPHA, DIGIT, UTF8_non_ascii> {};
33 struct u_ldh_tail : star<sor<seq<plus<dash>, u_let_dig>, u_let_dig>> {};
35 struct u_label : seq<u_let_dig, u_ldh_tail> {};
37 struct let_dig : sor<ALPHA, DIGIT> {};
39 struct ldh_tail : star<sor<seq<plus<dash>, let_dig>, let_dig>> {};
41 struct ldh_str : seq<let_dig, ldh_tail> {};
43 struct sub_domain : u_label {};
45 struct domain : list_tail<sub_domain, dot> {};
48 namespace {
49 size_t constexpr max_length = 255;
51 bool is_domain(std::string_view dom)
53 auto in{memory_input<>(dom.data(), dom.size(), "domain")};
54 return tao::pegtl::parse<RFC5321::domain>(in);
57 bool domain_check(std::string_view dom)
59 if (dom.empty()) {
60 return true; // domains in email addresses can be empty
63 if (!is_domain(dom)) {
64 LOG(ERROR) << "failed to parse «" << dom << "» as domain";
65 return false;
69 * Allow "localhost" amung others.
71 std::string domain(dom.data(), dom.length());
73 auto labels{std::vector<std::string>{}};
74 boost::algorithm::split(labels, domain, boost::algorithm::is_any_of("."));
76 if (labels.size() < 2) {
77 LOG(ERROR) << "domain «" << dom << "» must have two or more labels";
78 return false;
81 if (labels[labels.size() - 1].length() < 2) {
82 LOG(ERROR) << "TLD must be two or more chars in «" << dom << "»";
83 return false;
87 return true;
89 } // namespace
91 // Normalization Form KC (NFKC) Compatibility Decomposition, followed
92 // by Canonical Composition, see <http://unicode.org/reports/tr15/>
94 std::string nfkc(std::string_view str)
96 size_t length = max_length;
97 char bfr[max_length];
98 CHECK_LE(str.length(), max_length);
99 auto udata = reinterpret_cast<uint8_t const*>(str.data());
100 auto ubfr = reinterpret_cast<uint8_t*>(bfr);
101 CHECK_NOTNULL(u8_normalize(UNINORM_NFKC, udata, str.size(), ubfr, &length));
102 return std::string{bfr, length};
105 bool Domain::validate(std::string_view dom)
107 if (dom.length() > max_length) {
108 return false;
111 // Handle "bare" IP addresses, without the brackets.
112 if (IP::is_address(dom)) {
113 return true;
116 if (IP::is_address_literal(dom)) {
117 return true;
120 dom = remove_trailing_dot(dom);
122 auto const norm = nfkc(dom);
124 // idn2_to_ascii_8z() converts (ASCII) to lower case
126 char* ptr = nullptr;
127 auto code = idn2_to_ascii_8z(norm.c_str(), &ptr, IDN2_TRANSITIONAL);
128 if (code != IDN2_OK)
129 return false;
130 std::string ascii(ptr);
131 idn2_free(ptr);
133 ptr = nullptr;
134 code = idn2_to_unicode_8z8z(ascii.c_str(), &ptr, IDN2_TRANSITIONAL);
135 if (code != IDN2_OK)
136 return false;
137 idn2_free(ptr);
139 if (!domain_check(ascii)) {
140 return false;
143 return true;
146 void Domain::set(std::string_view dom)
148 if (dom.length() > max_length) {
149 throw std::invalid_argument("domain name too long");
152 // Handle "bare" IP addresses, without the brackets.
153 if (IP::is_address(dom)) {
154 ascii_ = IP::to_address_literal(dom);
155 utf8_ = ascii_;
156 is_address_literal_ = true;
157 return;
160 if (IP::is_address_literal(dom)) {
161 ascii_ = std::string(dom.data(), dom.length());
162 utf8_ = ascii_;
163 is_address_literal_ = true;
164 return;
167 is_address_literal_ = false;
169 // Since all Domains are fully qualified and not just some bag of
170 // labels, the trailing dot provides no real information and will
171 // mess up name matching on certs and stuff.
173 dom = remove_trailing_dot(dom);
175 auto const norm = nfkc(dom);
177 // idn2_to_ascii_8z() converts (ASCII) to lower case
179 char* ptr = nullptr;
180 auto code = idn2_to_ascii_8z(norm.c_str(), &ptr, IDN2_TRANSITIONAL);
181 if (code != IDN2_OK)
182 throw std::invalid_argument(idn2_strerror(code));
183 ascii_ = ptr;
184 idn2_free(ptr);
186 ptr = nullptr;
187 code = idn2_to_unicode_8z8z(ascii_.c_str(), &ptr, IDN2_TRANSITIONAL);
188 if (code != IDN2_OK)
189 throw std::invalid_argument(idn2_strerror(code));
190 utf8_ = ptr;
191 idn2_free(ptr);
193 if (!domain_check(ascii_)) {
194 throw std::invalid_argument("domain not correct");