more Spamhaus checking and EHLO id lookup
[ghsmtp.git] / Mailbox.cpp
blob1ae0416f385551bb4d3e1dff1ae5186276302dbf
1 #include "Mailbox.hpp"
3 #include <string>
5 #include <tao/pegtl.hpp>
6 #include <tao/pegtl/contrib/abnf.hpp>
8 #include <glog/logging.h>
10 using namespace tao::pegtl;
11 using namespace tao::pegtl::abnf;
13 namespace RFC3629 {
14 // clang-format off
16 // 4. Syntax of UTF-8 Byte Sequences
18 struct UTF8_tail : range<'\x80', '\xBF'> {};
20 struct UTF8_1 : range<0x00, 0x7F> {};
22 struct UTF8_2 : seq<range<'\xC2', '\xDF'>, UTF8_tail> {};
24 struct UTF8_3 : sor<seq<one<'\xE0'>, range<'\xA0', '\xBF'>, UTF8_tail>,
25 seq<range<'\xE1', '\xEC'>, rep<2, UTF8_tail>>,
26 seq<one<'\xED'>, range<'\x80', '\x9F'>, UTF8_tail>,
27 seq<range<'\xEE', '\xEF'>, rep<2, UTF8_tail>>> {};
29 struct UTF8_4 : sor<seq<one<'\xF0'>, range<'\x90', '\xBF'>, rep<2, UTF8_tail>>,
30 seq<range<'\xF1', '\xF3'>, rep<3, UTF8_tail>>,
31 seq<one<'\xF4'>, range<'\x80', '\x8F'>, rep<2, UTF8_tail>>> {};
33 struct non_ascii : sor<UTF8_2, UTF8_3, UTF8_4> {};
35 } // namespace RFC3629
37 namespace Chars {
38 struct VUCHAR : sor<VCHAR, RFC3629::non_ascii> {};
40 // excluded from atext: "(),.@[]"
41 struct atext : sor<ALPHA, DIGIT,
42 one<'!', '#',
43 '$', '%',
44 '&', '\'',
45 '*', '+',
46 '-', '/',
47 '=', '?',
48 '^', '_',
49 '`', '{',
50 '|', '}',
51 '~'>,
52 RFC3629::non_ascii> {};
54 } // namespace Chars
56 namespace RFC5321 {
57 // <https://tools.ietf.org/html/rfc5321>
59 using dot = one<'.'>;
60 using colon = one<':'>;
62 struct u_let_dig : sor<ALPHA, DIGIT, RFC3629::non_ascii> {};
64 struct u_ldh_tail : star<sor<seq<plus<one<'-'>>, u_let_dig>, u_let_dig>> {};
66 struct u_label : seq<u_let_dig, u_ldh_tail> {};
68 struct let_dig : sor<ALPHA, DIGIT> {};
70 struct ldh_tail : star<sor<seq<plus<one<'-'>>, let_dig>, let_dig>> {};
72 struct ldh_str : seq<let_dig, ldh_tail> {};
74 struct label : ldh_str {};
76 struct sub_domain : sor<label, u_label> {};
78 struct domain : list<sub_domain, dot> {};
80 struct dec_octet : sor<seq<string<'2','5'>, range<'0','5'>>,
81 seq<one<'2'>, range<'0','4'>, DIGIT>,
82 seq<range<'0', '1'>, rep<2, DIGIT>>,
83 rep_min_max<1, 2, DIGIT>> {};
85 struct IPv4_address_literal : seq<dec_octet, dot, dec_octet, dot, dec_octet, dot, dec_octet> {};
87 struct h16 : rep_min_max<1, 4, HEXDIG> {};
89 struct ls32 : sor<seq<h16, colon, h16>, IPv4_address_literal> {};
91 struct dcolon : two<':'> {};
93 struct IPv6address : sor<seq< rep<6, h16, colon>, ls32>,
94 seq< dcolon, rep<5, h16, colon>, ls32>,
95 seq<opt<h16 >, dcolon, rep<4, h16, colon>, ls32>,
96 seq<opt<h16, opt< colon, h16>>, dcolon, rep<3, h16, colon>, ls32>,
97 seq<opt<h16, rep_opt<2, colon, h16>>, dcolon, rep<2, h16, colon>, ls32>,
98 seq<opt<h16, rep_opt<3, colon, h16>>, dcolon, h16, colon, ls32>,
99 seq<opt<h16, rep_opt<4, colon, h16>>, dcolon, ls32>,
100 seq<opt<h16, rep_opt<5, colon, h16>>, dcolon, h16>,
101 seq<opt<h16, rep_opt<6, colon, h16>>, dcolon >> {};
103 struct IPv6_address_literal : seq<TAO_PEGTL_ISTRING("IPv6:"), IPv6address> {};
105 struct dcontent : ranges<33, 90, 94, 126> {};
107 struct standardized_tag : ldh_str {};
109 struct general_address_literal : seq<standardized_tag, colon, plus<dcontent>> {};
111 // 4.1.3. Address Literals
112 struct address_literal : seq<one<'['>,
113 sor<IPv4_address_literal,
114 IPv6_address_literal,
115 general_address_literal>,
116 one<']'>> {};
119 struct qtextSMTP : sor<ranges<32, 33, 35, 91, 93, 126>, RFC3629::non_ascii> {};
120 struct graphic : range<32, 126> {};
121 struct quoted_pairSMTP : seq<one<'\\'>, graphic> {};
122 struct qcontentSMTP : sor<qtextSMTP, quoted_pairSMTP> {};
124 struct atom : plus<Chars::atext> {};
125 struct dot_string : list<atom, dot> {};
126 struct quoted_string : seq<one<'"'>, star<qcontentSMTP>, one<'"'>> {};
127 struct local_part : sor<dot_string, quoted_string> {};
128 struct non_local_part : sor<domain, address_literal> {};
129 struct mailbox : seq<local_part, one<'@'>, non_local_part> {};
130 struct mailbox_only : seq<mailbox, eof> {};
132 // clang-format on
133 // Actions
135 template <typename Input>
136 static std::string_view make_view(Input const& in)
138 return std::string_view(in.begin(), std::distance(in.begin(), in.end()));
141 template <typename Rule>
142 struct action : nothing<Rule> {
145 template <>
146 struct action<dot_string> {
147 template <typename Input>
148 static void apply(Input const& in, Mailbox::parse_results& results)
150 results.local_type = Mailbox::local_types::dot_string;
154 template <>
155 struct action<quoted_string> {
156 template <typename Input>
157 static void apply(Input const& in, Mailbox::parse_results& results)
159 results.local_type = Mailbox::local_types::quoted_string;
163 template <>
164 struct action<domain> {
165 template <typename Input>
166 static void apply(Input const& in, Mailbox::parse_results& results)
168 results.domain_type = Mailbox::domain_types::domain;
172 template <>
173 struct action<address_literal> {
174 template <typename Input>
175 static void apply(Input const& in, Mailbox::parse_results& results)
177 results.domain_type = Mailbox::domain_types::address_literal;
181 template <>
182 struct action<local_part> {
183 template <typename Input>
184 static void apply(Input const& in, Mailbox::parse_results& results)
186 results.local = make_view(in);
190 template <>
191 struct action<non_local_part> {
192 template <typename Input>
193 static void apply(Input const& in, Mailbox::parse_results& results)
195 results.domain = make_view(in);
198 } // namespace RFC5321
200 std::optional<Mailbox::parse_results> Mailbox::parse(std::string_view mailbox)
202 parse_results results;
203 if (mailbox.empty())
204 return {};
205 memory_input<> mbx_in(mailbox, "mailbox");
206 if (tao::pegtl::parse<RFC5321::mailbox_only, RFC5321::action>(mbx_in,
207 results)) {
208 return results;
210 return {};
213 bool Mailbox::validate(std::string_view mailbox)
215 parse_results results;
216 memory_input<> mbx_in(mailbox, "mailbox");
217 return !mailbox.empty() &&
218 tao::pegtl::parse<RFC5321::mailbox_only, RFC5321::action>(mbx_in,
219 results);
222 bool Mailbox::validate_strict_lengths(std::string_view mailbox)
224 parse_results results;
225 memory_input<> mbx_in(mailbox, "mailbox");
226 return !mailbox.empty() &&
227 tao::pegtl::parse<RFC5321::mailbox_only, RFC5321::action>(mbx_in,
228 results) &&
229 (results.local.length() <= 64) && (results.domain.length() <= 255);
232 Mailbox::Mailbox(std::string_view mailbox)
234 if (mailbox.empty()) {
235 throw std::invalid_argument("empty mailbox string");
238 parse_results results;
239 memory_input<> mbx_in(mailbox, "mailbox");
240 if (!tao::pegtl::parse<RFC5321::mailbox_only, RFC5321::action>(mbx_in,
241 results)) {
242 LOG(ERROR) << "invalid mailbox syntax «" << mailbox << "»";
243 throw std::invalid_argument("invalid mailbox syntax");
246 CHECK(results.local_type != local_types::unknown);
247 CHECK(results.domain_type != domain_types::unknown);
249 // RFC-5321 section 4.5.3.1. Size Limits and Minimums
251 if (results.local.length() > 64) { // Section 4.5.3.1.1. Local-part
252 LOG(WARNING) << "local part > 64 octets «" << mailbox << "»";
254 if (results.domain.length() > 255) { // Section 4.5.3.1.2.
255 // Also RFC 2181 section 11. Name syntax
256 LOG(WARNING) << "domain > 255 octets «" << mailbox << "»";
259 set_local(results.local);
260 set_domain(results.domain);
262 // FIXME
263 // Check that each label is limited to between 1 and 63 octets.
266 size_t Mailbox::length(domain_encoding enc) const
268 if (enc == domain_encoding::ascii) {
269 for (auto ch : local_part_) {
270 if (!isascii(static_cast<unsigned char>(ch))) {
271 LOG(WARNING) << "non ascii chars in local part:" << local_part_;
272 // throw std::range_error("non ascii chars in local part of mailbox");
276 auto const& d
277 = (enc == domain_encoding::utf8) ? domain().utf8() : domain().ascii();
278 return local_part_.length() + (d.length() ? (d.length() + 1) : 0);
281 std::string Mailbox::as_string(domain_encoding enc) const
283 if (enc == domain_encoding::ascii) {
284 for (auto ch : local_part_) {
285 if (!isascii(static_cast<unsigned char>(ch))) {
286 LOG(WARNING) << "non ascii chars in local part:" << local_part_;
287 // throw std::range_error("non ascii chars in local part of mailbox");
291 std::string s;
292 s.reserve(length(enc));
293 s = local_part();
294 auto const& d
295 = (enc == domain_encoding::utf8) ? domain().utf8() : domain().ascii();
296 if (!d.empty()) {
297 s += '@';
298 s += d;
300 return s;