5 #include <tao/pegtl.hpp>
6 #include <tao/pegtl/contrib/abnf.hpp>
8 #include <glog/logging.h>
10 using namespace tao::pegtl
;
11 using namespace tao::pegtl::abnf
;
16 // 4. Syntax of UTF-8 Byte Sequences
18 struct UTF8_tail
: range
<'\x80', '\xBF'> {};
20 struct UTF8_1
: range
<0x00, 0x7F> {};
22 struct UTF8_2
: seq
<range
<'\xC2', '\xDF'>, UTF8_tail
> {};
24 struct UTF8_3
: sor
<seq
<one
<'\xE0'>, range
<'\xA0', '\xBF'>, UTF8_tail
>,
25 seq
<range
<'\xE1', '\xEC'>, rep
<2, UTF8_tail
>>,
26 seq
<one
<'\xED'>, range
<'\x80', '\x9F'>, UTF8_tail
>,
27 seq
<range
<'\xEE', '\xEF'>, rep
<2, UTF8_tail
>>> {};
29 struct UTF8_4
: sor
<seq
<one
<'\xF0'>, range
<'\x90', '\xBF'>, rep
<2, UTF8_tail
>>,
30 seq
<range
<'\xF1', '\xF3'>, rep
<3, UTF8_tail
>>,
31 seq
<one
<'\xF4'>, range
<'\x80', '\x8F'>, rep
<2, UTF8_tail
>>> {};
33 struct non_ascii
: sor
<UTF8_2
, UTF8_3
, UTF8_4
> {};
35 } // namespace RFC3629
38 struct VUCHAR
: sor
<VCHAR
, RFC3629::non_ascii
> {};
40 // excluded from atext: "(),.@[]"
41 struct atext
: sor
<ALPHA
, DIGIT
,
52 RFC3629::non_ascii
> {};
57 // <https://tools.ietf.org/html/rfc5321>
60 using colon
= one
<':'>;
62 struct u_let_dig
: sor
<ALPHA
, DIGIT
, RFC3629::non_ascii
> {};
64 struct u_ldh_tail
: star
<sor
<seq
<plus
<one
<'-'>>, u_let_dig
>, u_let_dig
>> {};
66 struct u_label
: seq
<u_let_dig
, u_ldh_tail
> {};
68 struct let_dig
: sor
<ALPHA
, DIGIT
> {};
70 struct ldh_tail
: star
<sor
<seq
<plus
<one
<'-'>>, let_dig
>, let_dig
>> {};
72 struct ldh_str
: seq
<let_dig
, ldh_tail
> {};
74 struct label
: ldh_str
{};
76 struct sub_domain
: sor
<label
, u_label
> {};
78 struct domain
: list
<sub_domain
, dot
> {};
80 struct dec_octet
: sor
<seq
<string
<'2','5'>, range
<'0','5'>>,
81 seq
<one
<'2'>, range
<'0','4'>, DIGIT
>,
82 seq
<range
<'0', '1'>, rep
<2, DIGIT
>>,
83 rep_min_max
<1, 2, DIGIT
>> {};
85 struct IPv4_address_literal
: seq
<dec_octet
, dot
, dec_octet
, dot
, dec_octet
, dot
, dec_octet
> {};
87 struct h16
: rep_min_max
<1, 4, HEXDIG
> {};
89 struct ls32
: sor
<seq
<h16
, colon
, h16
>, IPv4_address_literal
> {};
91 struct dcolon
: two
<':'> {};
93 struct IPv6address
: sor
<seq
< rep
<6, h16
, colon
>, ls32
>,
94 seq
< dcolon
, rep
<5, h16
, colon
>, ls32
>,
95 seq
<opt
<h16
>, dcolon
, rep
<4, h16
, colon
>, ls32
>,
96 seq
<opt
<h16
, opt
< colon
, h16
>>, dcolon
, rep
<3, h16
, colon
>, ls32
>,
97 seq
<opt
<h16
, rep_opt
<2, colon
, h16
>>, dcolon
, rep
<2, h16
, colon
>, ls32
>,
98 seq
<opt
<h16
, rep_opt
<3, colon
, h16
>>, dcolon
, h16
, colon
, ls32
>,
99 seq
<opt
<h16
, rep_opt
<4, colon
, h16
>>, dcolon
, ls32
>,
100 seq
<opt
<h16
, rep_opt
<5, colon
, h16
>>, dcolon
, h16
>,
101 seq
<opt
<h16
, rep_opt
<6, colon
, h16
>>, dcolon
>> {};
103 struct IPv6_address_literal
: seq
<TAO_PEGTL_ISTRING("IPv6:"), IPv6address
> {};
105 struct dcontent
: ranges
<33, 90, 94, 126> {};
107 struct standardized_tag
: ldh_str
{};
109 struct general_address_literal
: seq
<standardized_tag
, colon
, plus
<dcontent
>> {};
111 // 4.1.3. Address Literals
112 struct address_literal
: seq
<one
<'['>,
113 sor
<IPv4_address_literal
,
114 IPv6_address_literal
,
115 general_address_literal
>,
119 struct qtextSMTP
: sor
<ranges
<32, 33, 35, 91, 93, 126>, RFC3629::non_ascii
> {};
120 struct graphic
: range
<32, 126> {};
121 struct quoted_pairSMTP
: seq
<one
<'\\'>, graphic
> {};
122 struct qcontentSMTP
: sor
<qtextSMTP
, quoted_pairSMTP
> {};
124 struct atom
: plus
<Chars::atext
> {};
125 struct dot_string
: list
<atom
, dot
> {};
126 struct quoted_string
: seq
<one
<'"'>, star
<qcontentSMTP
>, one
<'"'>> {};
127 struct local_part
: sor
<dot_string
, quoted_string
> {};
128 struct non_local_part
: sor
<domain
, address_literal
> {};
129 struct mailbox
: seq
<local_part
, one
<'@'>, non_local_part
> {};
130 struct mailbox_only
: seq
<mailbox
, eof
> {};
135 template <typename Rule
>
136 struct action
: nothing
<Rule
> {
140 struct action
<local_part
> {
141 template <typename Input
>
142 static void apply(Input
const& in
, Mailbox
& addr
)
144 addr
.set_local(in
.string());
149 struct action
<non_local_part
> {
150 template <typename Input
>
151 static void apply(Input
const& in
, Mailbox
& addr
)
153 addr
.set_domain(in
.string());
156 } // namespace RFC5321
158 bool Mailbox::validate(std::string_view mailbox
)
161 memory_input
<> address_in(mailbox
, "address");
162 return !mailbox
.empty() &&
163 parse
<RFC5321::mailbox_only
, RFC5321::action
>(address_in
, mbx
);
166 bool Mailbox::validate_strict_lengths(std::string_view mailbox
)
169 memory_input
<> address_in(mailbox
, "address");
170 return !mailbox
.empty() &&
171 parse
<RFC5321::mailbox_only
, RFC5321::action
>(address_in
, mbx
) &&
172 (mbx
.local_part().length() <= 64) &&
173 (mbx
.domain().ascii().length() <= 255);
176 Mailbox::Mailbox(std::string_view mailbox
)
178 if (!mailbox
.empty()) {
179 memory_input
<> address_in(mailbox
, "address");
180 if (!parse
<RFC5321::mailbox_only
, RFC5321::action
>(address_in
, *this)) {
181 LOG(ERROR
) << "invalid mailbox syntax «" << mailbox
<< "»";
182 throw std::invalid_argument("invalid mailbox syntax");
186 // RFC-5321 section 4.5.3.1. Size Limits and Minimums
188 if (local_part().length() > 64) { // Section 4.5.3.1.1. Local-part
189 LOG(WARNING
) << "local part > 64 octets «" << mailbox
<< "»";
191 if (domain().ascii().length() > 255) { // Section 4.5.3.1.2.
192 // Also RFC 2181 section 11. Name syntax
193 LOG(WARNING
) << "domain > 255 octets «" << mailbox
<< "»";
197 // Check that each label is limited to between 1 and 63 octets.
200 size_t Mailbox::length(domain_encoding enc
) const
202 if (enc
== domain_encoding::ascii
) {
203 for (auto ch
: local_part_
) {
204 if (!isascii(static_cast<unsigned char>(ch
))) {
205 LOG(WARNING
) << "non ascii chars in local part:" << local_part_
;
206 // throw std::range_error("non ascii chars in local part of mailbox");
211 = (enc
== domain_encoding::utf8
) ? domain().utf8() : domain().ascii();
212 return local_part_
.length() + (d
.length() ? (d
.length() + 1) : 0);
215 std::string
Mailbox::as_string(domain_encoding enc
) const
217 if (enc
== domain_encoding::ascii
) {
218 for (auto ch
: local_part_
) {
219 if (!isascii(static_cast<unsigned char>(ch
))) {
220 LOG(WARNING
) << "non ascii chars in local part:" << local_part_
;
221 // throw std::range_error("non ascii chars in local part of mailbox");
226 s
.reserve(length(enc
));
229 = (enc
== domain_encoding::utf8
) ? domain().utf8() : domain().ascii();