be explicit about calling string ctor at return
[ghsmtp.git] / Domain.cpp
blobf23e4f5e48e20f571e0182d08c20a780ea7083eb
1 #include "Domain.hpp"
3 #include "IP.hpp"
4 #include "IP4.hpp"
5 #include "IP6.hpp"
7 #include <algorithm>
9 #include <idn2.h>
10 #include <uninorm.h>
12 #include <glog/logging.h>
14 #include <stdexcept>
16 namespace {
17 size_t constexpr max_length = 255;
20 // Normalization Form KC (NFKC) Compatibility Decomposition, followed
21 // by Canonical Composition, see <http://unicode.org/reports/tr15/>
23 std::string nfkc(std::string_view str)
25 size_t length = max_length;
26 char bfr[max_length];
27 CHECK_LE(str.length(), max_length);
28 auto udata = reinterpret_cast<uint8_t const*>(str.data());
29 auto ubfr = reinterpret_cast<uint8_t*>(bfr);
30 CHECK_NOTNULL(u8_normalize(UNINORM_NFKC, udata, str.size(), ubfr, &length));
31 return std::string{bfr, length};
34 bool Domain::validate(std::string_view dom)
36 if (dom.length() > max_length) {
37 return false;
40 // Handle "bare" IP addresses, without the brackets.
41 if (IP::is_address(dom)) {
42 return true;
45 if (IP::is_address_literal(dom)) {
46 return true;
49 dom = remove_trailing_dot(dom);
51 auto const norm = nfkc(dom);
53 // idn2_to_ascii_8z() converts (ASCII) to lower case
55 char* ptr = nullptr;
56 auto code = idn2_to_ascii_8z(norm.c_str(), &ptr, IDN2_TRANSITIONAL);
57 if (code != IDN2_OK)
58 return false;
59 std::string ascii(ptr);
60 idn2_free(ptr);
62 ptr = nullptr;
63 code = idn2_to_unicode_8z8z(ascii.c_str(), &ptr, IDN2_TRANSITIONAL);
64 if (code != IDN2_OK)
65 return false;
66 idn2_free(ptr);
68 // FIXME: check syntax is dot-string?
70 return true;
73 void Domain::set(std::string_view dom)
75 if (dom.length() > max_length) {
76 throw std::invalid_argument("domain name too long");
79 // Handle "bare" IP addresses, without the brackets.
80 if (IP::is_address(dom)) {
81 ascii_ = IP::to_address_literal(dom);
82 utf8_ = ascii_;
83 is_address_literal_ = true;
84 return;
87 if (IP::is_address_literal(dom)) {
88 ascii_ = std::string(dom.data(), dom.length());
89 utf8_ = ascii_;
90 is_address_literal_ = true;
91 return;
94 is_address_literal_ = false;
96 // Since all Domains are fully qualified and not just some bag of
97 // labels, the trailing dot provides no real information and will
98 // mess up name matching on certs and stuff.
100 dom = remove_trailing_dot(dom);
102 auto const norm = nfkc(dom);
104 // idn2_to_ascii_8z() converts (ASCII) to lower case
106 char* ptr = nullptr;
107 auto code = idn2_to_ascii_8z(norm.c_str(), &ptr, IDN2_TRANSITIONAL);
108 if (code != IDN2_OK)
109 throw std::invalid_argument(idn2_strerror(code));
110 ascii_ = ptr;
111 idn2_free(ptr);
113 ptr = nullptr;
114 code = idn2_to_unicode_8z8z(ascii_.c_str(), &ptr, IDN2_TRANSITIONAL);
115 if (code != IDN2_OK)
116 throw std::invalid_argument(idn2_strerror(code));
117 utf8_ = ptr;
118 idn2_free(ptr);