removed
[ghsmtp.git] / Domain.cpp
blob1241b6971bef4305eb8b803660502b47365f4891
1 #include "Domain.hpp"
3 // Domains as used in email and as implemented (ie constrained) by the DNS.
5 #include "IP.hpp"
6 #include "IP4.hpp"
7 #include "IP6.hpp"
9 #include "is_ascii.hpp"
11 #include <algorithm>
12 #include <cctype>
13 #include <stdexcept>
15 #include <idn2.h>
16 #include <uninorm.h>
18 #include <glog/logging.h>
20 #include <tao/pegtl.hpp>
21 #include <tao/pegtl/contrib/abnf.hpp>
23 using namespace tao::pegtl;
24 using namespace tao::pegtl::abnf;
26 #include <boost/algorithm/string/classification.hpp>
27 #include <boost/algorithm/string/split.hpp>
29 #include <fmt/format.h>
30 #include <fmt/ostream.h>
32 namespace RFC5321 {
33 #include "UTF8.hpp"
35 using dot = one<'.'>;
36 using dash = one<'-'>;
38 struct u_let_dig : sor<ALPHA, DIGIT, UTF8_non_ascii> {};
40 struct u_ldh_tail : star<sor<seq<plus<dash>, u_let_dig>, u_let_dig>> {};
42 struct u_label : seq<u_let_dig, u_ldh_tail> {};
44 struct let_dig : sor<ALPHA, DIGIT> {};
46 struct ldh_tail : star<sor<seq<plus<dash>, let_dig>, let_dig>> {};
48 struct ldh_str : seq<let_dig, ldh_tail> {};
50 struct sub_domain : u_label {};
52 struct domain : list_tail<sub_domain, dot> {};
54 struct domain_only : seq<domain, eof> {};
56 }; // namespace RFC5321
58 namespace {
59 // Maximum length of a domain in dotted-quad notation.
60 size_t constexpr max_dom_length = 253; // RFC-1035 section 3.1
61 size_t constexpr max_lab_length = 63;
62 } // namespace
64 template <>
65 struct fmt::formatter<Domain> : ostream_formatter {};
67 namespace domain {
68 bool is_fully_qualified(Domain const& dom, std::string& msg)
70 if (dom.empty()) {
71 msg = "empty domain";
72 return false;
75 auto labels{std::vector<std::string>{}};
76 boost::algorithm::split(labels, dom.ascii(),
77 boost::algorithm::is_any_of("."));
79 if (labels.size() < 2) {
80 msg = fmt::format("domain «{}» must have two or more labels", dom);
81 return false;
84 if (labels[labels.size() - 1].length() < 2) {
85 msg = fmt::format("TLD «{}» must be two or more octets",
86 labels[labels.size() - 1]);
87 return false;
90 msg.clear();
91 return true;
93 } // namespace domain
95 struct free_deleter {
96 template <typename T>
97 void operator()(T* p) const
99 std::free(const_cast<std::remove_const_t<T>*>(p));
103 template <typename T>
104 using uc_ptr = std::unique_ptr<T, free_deleter>;
105 static_assert(sizeof(char*) == sizeof(uc_ptr<char>), ""); // to be sure
107 std::string_view remove_trailing_dot(std::string_view a)
109 if (a.length() && (a.back() == '.')) {
110 a.remove_suffix(1);
112 return a;
115 bool Domain::set_(std::string_view dom, bool should_throw, std::string& msg)
117 msg.clear(); // no error
119 if (IP::is_address_literal(dom)) {
120 ascii_ = dom;
121 utf8_.clear();
122 is_address_literal_ = true;
123 return true;
126 // A dotted quad IPv4 address will match the syntax of RFC-5321
127 // Domain, but should not be confused as a DNS domain.
129 if (IP::is_address(dom)) {
130 ascii_ = IP::to_address_literal(dom);
131 utf8_.clear();
132 is_address_literal_ = true;
133 return true;
136 dom = remove_trailing_dot(dom);
138 if (dom.empty()) {
139 clear();
140 return true;
143 auto in{memory_input<>(dom.data(), dom.size(), "domain")};
144 if (!tao::pegtl::parse<RFC5321::domain_only>(in)) {
145 if (should_throw) {
146 throw std::invalid_argument("failed to parse domain");
148 msg = fmt::format("failed to parse domain «{}»", dom);
149 return false;
152 /* ASCII case:
155 if (is_ascii(dom)) {
156 if (dom.length() > max_dom_length) {
157 if (should_throw)
158 throw std::invalid_argument("domain name too long");
159 msg = fmt::format("domain name «{}» too long", dom);
160 return false;
163 // Check for domain /label/ too long.
164 auto lst = dom.begin();
165 for (;;) {
166 auto const lab = std::find(lst, dom.end(), '.');
167 auto const len = size_t(std::distance(lst, lab));
168 if (len > max_lab_length) {
169 if (should_throw)
170 throw std::invalid_argument("domain label too long");
171 msg = fmt::format("domain label «{}» too long",
172 std::string_view{lst, len});
173 return false;
175 if (lab == dom.end())
176 break;
178 lst = lab + 1;
181 // Map domains to lower case.
182 ascii_.clear();
183 ascii_.reserve(dom.length());
184 std::transform(dom.begin(), dom.end(), std::back_inserter(ascii_),
185 [](unsigned char ch) { return std::tolower(ch); });
186 utf8_.clear();
187 is_address_literal_ = false;
189 return true;
192 /* Unicode (UTF-8) case:
195 // Normalization Form KC (NFKC) Compatibility Decomposition, followed
196 // by Canonical Composition, see <http://unicode.org/reports/tr15/>
198 size_t length = 0;
199 uc_ptr<uint8_t> normp(
200 u8_normalize(UNINORM_NFKC, reinterpret_cast<uint8_t const*>(dom.data()),
201 dom.size(), nullptr, &length));
203 if (!normp) {
204 auto const errmsg = std::strerror(errno);
205 if (should_throw)
206 throw std::invalid_argument(errmsg);
207 msg = fmt::format("u8_normalize(\"{}\") failed: ", dom, errmsg);
208 return false;
211 std::string norm{reinterpret_cast<char*>(normp.get()),
212 length}; // idn2_to_ascii_8z() needs a NUL terminated c_str
214 // idn2_to_ascii_8z() converts (ASCII) to lower case
216 char* ptr = nullptr;
217 auto code = idn2_to_ascii_8z(norm.c_str(), &ptr, IDN2_TRANSITIONAL);
218 if (code != IDN2_OK) {
219 if (code == IDN2_TOO_BIG_DOMAIN) {
220 if (should_throw)
221 throw std::invalid_argument("domain name too long");
222 msg = fmt::format("domain name «{}» too long", norm);
223 return false;
225 if (code == IDN2_TOO_BIG_LABEL) {
226 if (should_throw)
227 throw std::invalid_argument("domain label too long");
228 msg = fmt::format("domain label «{}» too long", norm);
229 return false;
231 auto const errmsg = idn2_strerror(code);
232 if (should_throw)
233 throw std::invalid_argument(errmsg);
234 msg =
235 fmt::format("idn2_to_ascii_8z(\"{}\", …, IDN2_TRANSITIONAL) failed: {}",
236 norm, errmsg);
237 return false;
239 std::string ascii{ptr};
240 idn2_free(ptr);
242 // We do an additional check since idn2_to_ascii_8z checks for >255,
243 // and we know DNS packet encoding makes the actual limit 253.
244 if (ascii.length() > max_dom_length) {
245 if (should_throw)
246 throw std::invalid_argument("domain name too long");
247 msg = fmt::format("domain name «{}» too long", ascii);
248 return false;
251 ptr = nullptr;
252 code = idn2_to_unicode_8z8z(ascii.c_str(), &ptr, IDN2_TRANSITIONAL);
253 if (code != IDN2_OK) {
254 auto errmsg = idn2_strerror(code);
255 if (should_throw)
256 throw std::invalid_argument(errmsg);
257 msg = fmt::format(
258 "idn2_to_unicode_8z8z(\"{}\", …, IDN2_TRANSITIONAL) failed: {}", ascii,
259 errmsg);
260 return false;
262 CHECK_NOTNULL(ptr);
263 std::string utf8{ptr};
264 idn2_free(ptr);
266 // Identical byte string: not sure this can or should ever happen.
267 if (utf8 == ascii) {
268 utf8.clear();
271 ascii_ = ascii;
272 utf8_ = utf8;
273 is_address_literal_ = false;
275 return true;