rewrite
[ghsmtp.git] / Mailbox.cpp
blob45138d7e440527ef884e6c9dbb073bbe4c1aa608
1 #include "Mailbox.hpp"
3 #include <string>
5 #include <tao/pegtl.hpp>
6 #include <tao/pegtl/contrib/abnf.hpp>
8 #include <glog/logging.h>
10 using namespace tao::pegtl;
11 using namespace tao::pegtl::abnf;
13 namespace RFC3629 {
14 // clang-format off
16 // 4. Syntax of UTF-8 Byte Sequences
18 struct UTF8_tail : range<'\x80', '\xBF'> {};
20 struct UTF8_1 : range<0x00, 0x7F> {};
22 struct UTF8_2 : seq<range<'\xC2', '\xDF'>, UTF8_tail> {};
24 struct UTF8_3 : sor<seq<one<'\xE0'>, range<'\xA0', '\xBF'>, UTF8_tail>,
25 seq<range<'\xE1', '\xEC'>, rep<2, UTF8_tail>>,
26 seq<one<'\xED'>, range<'\x80', '\x9F'>, UTF8_tail>,
27 seq<range<'\xEE', '\xEF'>, rep<2, UTF8_tail>>> {};
29 struct UTF8_4 : sor<seq<one<'\xF0'>, range<'\x90', '\xBF'>, rep<2, UTF8_tail>>,
30 seq<range<'\xF1', '\xF3'>, rep<3, UTF8_tail>>,
31 seq<one<'\xF4'>, range<'\x80', '\x8F'>, rep<2, UTF8_tail>>> {};
33 struct non_ascii : sor<UTF8_2, UTF8_3, UTF8_4> {};
35 } // namespace RFC3629
37 namespace Chars {
38 struct VUCHAR : sor<VCHAR, RFC3629::non_ascii> {};
40 // excluded from atext: "(),.@[]"
41 struct atext : sor<ALPHA, DIGIT,
42 one<'!', '#',
43 '$', '%',
44 '&', '\'',
45 '*', '+',
46 '-', '/',
47 '=', '?',
48 '^', '_',
49 '`', '{',
50 '|', '}',
51 '~'>,
52 RFC3629::non_ascii> {};
54 } // namespace Chars
56 namespace RFC5321 {
57 // <https://tools.ietf.org/html/rfc5321>
59 using dot = one<'.'>;
60 using colon = one<':'>;
62 struct u_let_dig : sor<ALPHA, DIGIT, RFC3629::non_ascii> {};
64 struct u_ldh_tail : star<sor<seq<plus<one<'-'>>, u_let_dig>, u_let_dig>> {};
66 struct u_label : seq<u_let_dig, u_ldh_tail> {};
68 struct let_dig : sor<ALPHA, DIGIT> {};
70 struct ldh_tail : star<sor<seq<plus<one<'-'>>, let_dig>, let_dig>> {};
72 struct ldh_str : seq<let_dig, ldh_tail> {};
74 struct label : ldh_str {};
76 struct sub_domain : sor<label, u_label> {};
78 struct domain : list<sub_domain, dot> {};
80 struct dec_octet : sor<seq<string<'2','5'>, range<'0','5'>>,
81 seq<one<'2'>, range<'0','4'>, DIGIT>,
82 seq<range<'0', '1'>, rep<2, DIGIT>>,
83 rep_min_max<1, 2, DIGIT>> {};
85 struct IPv4_address_literal : seq<dec_octet, dot, dec_octet, dot, dec_octet, dot, dec_octet> {};
87 struct h16 : rep_min_max<1, 4, HEXDIG> {};
89 struct ls32 : sor<seq<h16, colon, h16>, IPv4_address_literal> {};
91 struct dcolon : two<':'> {};
93 struct IPv6address : sor<seq< rep<6, h16, colon>, ls32>,
94 seq< dcolon, rep<5, h16, colon>, ls32>,
95 seq<opt<h16 >, dcolon, rep<4, h16, colon>, ls32>,
96 seq<opt<h16, opt< colon, h16>>, dcolon, rep<3, h16, colon>, ls32>,
97 seq<opt<h16, rep_opt<2, colon, h16>>, dcolon, rep<2, h16, colon>, ls32>,
98 seq<opt<h16, rep_opt<3, colon, h16>>, dcolon, h16, colon, ls32>,
99 seq<opt<h16, rep_opt<4, colon, h16>>, dcolon, ls32>,
100 seq<opt<h16, rep_opt<5, colon, h16>>, dcolon, h16>,
101 seq<opt<h16, rep_opt<6, colon, h16>>, dcolon >> {};
103 struct IPv6_address_literal : seq<TAO_PEGTL_ISTRING("IPv6:"), IPv6address> {};
105 struct dcontent : ranges<33, 90, 94, 126> {};
107 struct standardized_tag : ldh_str {};
109 struct general_address_literal : seq<standardized_tag, colon, plus<dcontent>> {};
111 // 4.1.3. Address Literals
112 struct address_literal : seq<one<'['>,
113 sor<IPv4_address_literal,
114 IPv6_address_literal,
115 general_address_literal>,
116 one<']'>> {};
119 struct qtextSMTP : sor<ranges<32, 33, 35, 91, 93, 126>, RFC3629::non_ascii> {};
120 struct graphic : range<32, 126> {};
121 struct quoted_pairSMTP : seq<one<'\\'>, graphic> {};
122 struct qcontentSMTP : sor<qtextSMTP, quoted_pairSMTP> {};
124 struct atom : plus<Chars::atext> {};
125 struct dot_string : list<atom, dot> {};
126 struct quoted_string : seq<one<'"'>, star<qcontentSMTP>, one<'"'>> {};
127 struct local_part : sor<dot_string, quoted_string> {};
128 struct non_local_part : sor<domain, address_literal> {};
129 struct mailbox : seq<local_part, one<'@'>, non_local_part> {};
130 struct mailbox_only : seq<mailbox, eof> {};
132 // clang-format on
133 // Actions
135 template <typename Rule>
136 struct action : nothing<Rule> {
139 template <>
140 struct action<local_part> {
141 template <typename Input>
142 static void apply(Input const& in, Mailbox& addr)
144 addr.set_local(in.string());
148 template <>
149 struct action<non_local_part> {
150 template <typename Input>
151 static void apply(Input const& in, Mailbox& addr)
153 addr.set_domain(in.string());
156 } // namespace RFC5321
158 bool Mailbox::validate(std::string_view mailbox)
160 Mailbox mbx;
161 memory_input<> address_in(mailbox, "address");
162 return !mailbox.empty() &&
163 parse<RFC5321::mailbox_only, RFC5321::action>(address_in, mbx);
166 bool Mailbox::validate_strict_lengths(std::string_view mailbox)
168 Mailbox mbx;
169 memory_input<> address_in(mailbox, "address");
170 return !mailbox.empty() &&
171 parse<RFC5321::mailbox_only, RFC5321::action>(address_in, mbx) &&
172 (mbx.local_part().length() <= 64) &&
173 (mbx.domain().ascii().length() <= 255);
176 Mailbox::Mailbox(std::string_view mailbox)
178 if (!mailbox.empty()) {
179 memory_input<> address_in(mailbox, "address");
180 if (!parse<RFC5321::mailbox_only, RFC5321::action>(address_in, *this)) {
181 LOG(ERROR) << "invalid mailbox syntax «" << mailbox << "»";
182 throw std::invalid_argument("invalid mailbox syntax");
186 // RFC-5321 section 4.5.3.1. Size Limits and Minimums
188 if (local_part().length() > 64) { // Section 4.5.3.1.1. Local-part
189 LOG(WARNING) << "local part > 64 octets «" << mailbox << "»";
191 if (domain().ascii().length() > 255) { // Section 4.5.3.1.2.
192 // Also RFC 2181 section 11. Name syntax
193 LOG(WARNING) << "domain > 255 octets «" << mailbox << "»";
196 // FIXME
197 // Check that each label is limited to between 1 and 63 octets.
200 size_t Mailbox::length(domain_encoding enc) const
202 if (enc == domain_encoding::ascii) {
203 for (auto ch : local_part_) {
204 if (!isascii(static_cast<unsigned char>(ch))) {
205 LOG(WARNING) << "non ascii chars in local part:" << local_part_;
206 // throw std::range_error("non ascii chars in local part of mailbox");
210 auto const& d
211 = (enc == domain_encoding::utf8) ? domain().utf8() : domain().ascii();
212 return local_part_.length() + (d.length() ? (d.length() + 1) : 0);
215 std::string Mailbox::as_string(domain_encoding enc) const
217 if (enc == domain_encoding::ascii) {
218 for (auto ch : local_part_) {
219 if (!isascii(static_cast<unsigned char>(ch))) {
220 LOG(WARNING) << "non ascii chars in local part:" << local_part_;
221 // throw std::range_error("non ascii chars in local part of mailbox");
225 std::string s;
226 s.reserve(length(enc));
227 s = local_part();
228 auto const& d
229 = (enc == domain_encoding::utf8) ? domain().utf8() : domain().ascii();
230 if (!d.empty()) {
231 s += '@';
232 s += d;
234 return s;