removed
[ghsmtp.git] / Mailbox.cpp
blob9a3807c2aa1df4b13248fcebe3b42fd253382bdd
1 #include "Mailbox.hpp"
3 #include <string>
5 #include <boost/algorithm/string/classification.hpp>
6 #include <boost/algorithm/string/split.hpp>
8 #include <tao/pegtl.hpp>
9 #include <tao/pegtl/contrib/abnf.hpp>
11 #include <glog/logging.h>
13 #include <boost/algorithm/string/classification.hpp>
14 #include <boost/algorithm/string/split.hpp>
16 #include <fmt/format.h>
17 #include <fmt/ostream.h>
19 using namespace tao::pegtl;
20 using namespace tao::pegtl::abnf;
22 #include "is_ascii.hpp"
24 namespace RFC3629 {
25 // clang-format off
27 // 4. Syntax of UTF-8 Byte Sequences
29 struct UTF8_tail : range<'\x80', '\xBF'> {};
31 struct UTF8_1 : range<0x00, 0x7F> {};
33 struct UTF8_2 : seq<range<'\xC2', '\xDF'>, UTF8_tail> {};
35 struct UTF8_3 : sor<seq<one<'\xE0'>, range<'\xA0', '\xBF'>, UTF8_tail>,
36 seq<range<'\xE1', '\xEC'>, rep<2, UTF8_tail>>,
37 seq<one<'\xED'>, range<'\x80', '\x9F'>, UTF8_tail>,
38 seq<range<'\xEE', '\xEF'>, rep<2, UTF8_tail>>> {};
40 struct UTF8_4 : sor<seq<one<'\xF0'>, range<'\x90', '\xBF'>, rep<2, UTF8_tail>>,
41 seq<range<'\xF1', '\xF3'>, rep<3, UTF8_tail>>,
42 seq<one<'\xF4'>, range<'\x80', '\x8F'>, rep<2, UTF8_tail>>> {};
44 struct non_ascii : sor<UTF8_2, UTF8_3, UTF8_4> {};
46 } // namespace RFC3629
48 namespace Chars {
49 struct VUCHAR : sor<VCHAR, RFC3629::non_ascii> {};
51 // excluded from atext: "(),.@[]"
52 struct atext : sor<ALPHA, DIGIT,
53 one<'!', '#',
54 '$', '%',
55 '&', '\'',
56 '*', '+',
57 '-', '/',
58 '=', '?',
59 '^', '_',
60 '`', '{',
61 '|', '}',
62 '~'>,
63 RFC3629::non_ascii> {};
65 } // namespace Chars
67 namespace RFC5321 {
68 // <https://tools.ietf.org/html/rfc5321>
70 using dot = one<'.'>;
71 using colon = one<':'>;
73 struct u_let_dig : sor<ALPHA, DIGIT, RFC3629::non_ascii> {};
75 struct u_ldh_tail : star<sor<seq<plus<one<'-'>>, u_let_dig>, u_let_dig>> {};
77 struct u_label : seq<u_let_dig, u_ldh_tail> {};
79 struct let_dig : sor<ALPHA, DIGIT> {};
81 struct ldh_tail : star<sor<seq<plus<one<'-'>>, let_dig>, let_dig>> {};
83 struct ldh_str : seq<let_dig, ldh_tail> {};
85 struct label : ldh_str {};
87 struct sub_domain : sor<label, u_label> {};
89 struct domain : list<sub_domain, dot> {};
91 struct dec_octet : sor<seq<string<'2','5'>, range<'0','5'>>,
92 seq<one<'2'>, range<'0','4'>, DIGIT>,
93 seq<range<'0', '1'>, rep<2, DIGIT>>,
94 rep_min_max<1, 2, DIGIT>> {};
96 struct IPv4_address_literal : seq<dec_octet, dot, dec_octet, dot, dec_octet, dot, dec_octet> {};
98 struct h16 : rep_min_max<1, 4, HEXDIG> {};
100 struct ls32 : sor<seq<h16, colon, h16>, IPv4_address_literal> {};
102 struct dcolon : two<':'> {};
104 struct IPv6address : sor<seq< rep<6, h16, colon>, ls32>,
105 seq< dcolon, rep<5, h16, colon>, ls32>,
106 seq<opt<h16 >, dcolon, rep<4, h16, colon>, ls32>,
107 seq<opt<h16, opt< colon, h16>>, dcolon, rep<3, h16, colon>, ls32>,
108 seq<opt<h16, rep_opt<2, colon, h16>>, dcolon, rep<2, h16, colon>, ls32>,
109 seq<opt<h16, rep_opt<3, colon, h16>>, dcolon, h16, colon, ls32>,
110 seq<opt<h16, rep_opt<4, colon, h16>>, dcolon, ls32>,
111 seq<opt<h16, rep_opt<5, colon, h16>>, dcolon, h16>,
112 seq<opt<h16, rep_opt<6, colon, h16>>, dcolon >> {};
114 struct IPv6_address_literal : seq<TAO_PEGTL_ISTRING("IPv6:"), IPv6address> {};
116 struct dcontent : ranges<33, 90, 94, 126> {};
118 struct standardized_tag : ldh_str {};
120 struct general_address_literal : seq<standardized_tag, colon, plus<dcontent>> {};
122 // 4.1.3. Address Literals
123 struct address_literal : seq<one<'['>,
124 sor<IPv4_address_literal,
125 IPv6_address_literal,
126 general_address_literal>,
127 one<']'>> {};
130 struct qtextSMTP : sor<ranges<32, 33, 35, 91, 93, 126>, RFC3629::non_ascii> {};
131 struct graphic : range<32, 126> {};
132 struct quoted_pairSMTP : seq<one<'\\'>, graphic> {};
133 struct qcontentSMTP : sor<qtextSMTP, quoted_pairSMTP> {};
135 struct atom : plus<Chars::atext> {};
136 struct dot_string : list<atom, dot> {};
137 struct quoted_string : seq<one<'"'>, star<qcontentSMTP>, one<'"'>> {};
138 struct local_part : sor<dot_string, quoted_string> {};
139 struct non_local_part : sor<domain, address_literal> {};
140 struct mailbox : seq<local_part, one<'@'>, non_local_part> {};
141 struct mailbox_only : seq<mailbox, eof> {};
142 struct dot_string_only : seq<dot_string, eof> {};
144 // clang-format on
145 // Actions
147 template <typename Input>
148 static std::string_view make_view(Input const& in)
150 return std::string_view(in.begin(), std::distance(in.begin(), in.end()));
153 template <typename Rule>
154 struct action : nothing<Rule> {
157 template <>
158 struct action<dot_string> {
159 template <typename Input>
160 static void apply(Input const& in, Mailbox::parse_results& results)
162 results.local_type = Mailbox::local_types::dot_string;
166 template <>
167 struct action<quoted_string> {
168 template <typename Input>
169 static void apply(Input const& in, Mailbox::parse_results& results)
171 results.local_type = Mailbox::local_types::quoted_string;
175 template <>
176 struct action<domain> {
177 template <typename Input>
178 static void apply(Input const& in, Mailbox::parse_results& results)
180 results.domain_type = Mailbox::domain_types::domain;
184 template <>
185 struct action<IPv4_address_literal> {
186 template <typename Input>
187 static void apply(Input const& in, Mailbox::parse_results& results)
189 results.domain_type = Mailbox::domain_types::address_literal;
193 template <>
194 struct action<IPv6_address_literal> {
195 template <typename Input>
196 static void apply(Input const& in, Mailbox::parse_results& results)
198 results.domain_type = Mailbox::domain_types::address_literal;
202 template <>
203 struct action<standardized_tag> {
204 template <typename Input>
205 static void apply(Input const& in, Mailbox::parse_results& results)
207 results.standardized_tag = make_view(in);
211 template <>
212 struct action<general_address_literal> {
213 template <typename Input>
214 static void apply(Input const& in, Mailbox::parse_results& results)
216 results.domain_type = Mailbox::domain_types::general_address_literal;
220 template <>
221 struct action<local_part> {
222 template <typename Input>
223 static void apply(Input const& in, Mailbox::parse_results& results)
225 results.local = make_view(in);
229 template <>
230 struct action<non_local_part> {
231 template <typename Input>
232 static void apply(Input const& in, Mailbox::parse_results& results)
234 results.domain = make_view(in);
237 } // namespace RFC5321
239 template <>
240 struct fmt::formatter<Mailbox> : ostream_formatter {};
242 std::optional<Mailbox::parse_results> Mailbox::parse(std::string_view mailbox)
244 if (mailbox.empty())
245 return {};
247 parse_results results;
248 memory_input<> mbx_in(mailbox, "mailbox");
249 if (tao::pegtl::parse<RFC5321::mailbox_only, RFC5321::action>(mbx_in,
250 results)) {
251 return results;
253 return {};
256 std::string normalize_quoted_string(std::string_view local_part)
258 CHECK_GE(local_part.size(), 2);
259 CHECK_EQ(local_part[0], '"');
260 CHECK_EQ(local_part[local_part.length() - 1], '"');
262 // normalize local_part, 1st step is unescape
263 auto const raw = local_part.substr(1, local_part.length() - 2);
265 std::string uq;
266 uq.reserve(raw.length());
267 for (auto p = raw.begin(); p != raw.end(); ++p) {
268 if (*p == '\\') {
269 CHECK_NE(p + 1, raw.end());
270 ++p; // past the backslash
271 CHECK_LE(*p, '\x7E');
273 uq += *p;
276 Mailbox::parse_results results;
277 memory_input<> loc_in(uq, "local-part");
278 if (tao::pegtl::parse<RFC5321::dot_string_only, RFC5321::action>(loc_in,
279 results))
280 return uq;
282 // If not, (re)escape
283 std::string esc;
284 esc.reserve(local_part.length());
285 esc += '"';
286 for (auto p = uq.begin(); p != uq.end(); ++p) {
287 if (*p == '\\') {
288 esc += "\\\\";
290 else if (*p == '"') {
291 esc += "\\\"";
293 else {
294 esc += *p;
297 esc += '"';
298 return esc;
301 bool Mailbox::set_(std::string_view mailbox,
302 bool should_throw,
303 std::string& msg)
305 msg.clear();
307 if (iequal(mailbox, "Postmaster")) {
308 local_part_ = "Postmaster";
309 domain_.clear();
310 return true;
313 if (mailbox.empty()) {
314 local_part_.clear();
315 domain_.clear();
316 return true;
319 parse_results results;
320 memory_input<> mbx_in(mailbox, "mailbox");
321 if (!tao::pegtl::parse<RFC5321::mailbox_only, RFC5321::action>(mbx_in,
322 results)) {
323 if (should_throw)
324 throw std::invalid_argument("invalid mailbox syntax");
325 msg = fmt::format("invalid mailbox syntax «{}»", mailbox);
326 return false;
329 // "Impossible" errors; if the parse succeeded, the types must not
330 // be unknown.
331 CHECK(results.local_type != local_types::unknown);
332 CHECK(results.domain_type != domain_types::unknown);
334 if (results.domain_type == domain_types::general_address_literal) {
335 if (should_throw)
336 throw std::invalid_argument("general address literal in mailbox");
337 msg =
338 fmt::format("general address literal in mailbox «{}», unknown tag «{}»",
339 mailbox, results.standardized_tag);
340 return false;
343 std::string loc_part;
344 if (results.local_type == local_types::quoted_string) {
345 loc_part = normalize_quoted_string(results.local);
347 else {
348 // plain old Dot-string
349 loc_part = results.local;
352 Domain dom;
353 if (!Domain::validate(results.domain, msg, dom)) {
354 if (should_throw)
355 throw std::invalid_argument("invalid domain");
356 return false;
359 std::swap(local_part_, loc_part);
360 std::swap(domain_, dom);
362 return true;
365 size_t Mailbox::length(domain_encoding enc) const
367 if (enc == domain_encoding::ascii) {
368 if (!is_ascii(local_part_)) {
369 LOG(ERROR) << "non ascii chars in local part:" << local_part_;
370 throw std::range_error("non ascii chars in local part of mailbox");
373 auto const& d =
374 (enc == domain_encoding::utf8) ? domain().utf8() : domain().ascii();
375 return local_part_.length() + (d.length() ? (d.length() + 1) : 0);
378 std::string Mailbox::as_string(domain_encoding enc) const
380 if (enc == domain_encoding::ascii) {
381 if (!is_ascii(local_part_)) {
382 LOG(ERROR) << "non ascii chars in local part:" << local_part_;
383 throw std::range_error("non ascii chars in local part of mailbox");
386 std::string s;
387 s.reserve(length(enc));
388 s = local_part();
389 auto const& d =
390 (enc == domain_encoding::utf8) ? domain().utf8() : domain().ascii();
391 if (!d.empty()) {
392 s += '@' + d;
394 return s;