5 #include <tao/pegtl.hpp>
6 #include <tao/pegtl/contrib/abnf.hpp>
8 #include <glog/logging.h>
10 using namespace tao::pegtl
;
11 using namespace tao::pegtl::abnf
;
16 // 4. Syntax of UTF-8 Byte Sequences
18 struct UTF8_tail
: range
<'\x80', '\xBF'> {};
20 struct UTF8_1
: range
<0x00, 0x7F> {};
22 struct UTF8_2
: seq
<range
<'\xC2', '\xDF'>, UTF8_tail
> {};
24 struct UTF8_3
: sor
<seq
<one
<'\xE0'>, range
<'\xA0', '\xBF'>, UTF8_tail
>,
25 seq
<range
<'\xE1', '\xEC'>, rep
<2, UTF8_tail
>>,
26 seq
<one
<'\xED'>, range
<'\x80', '\x9F'>, UTF8_tail
>,
27 seq
<range
<'\xEE', '\xEF'>, rep
<2, UTF8_tail
>>> {};
29 struct UTF8_4
: sor
<seq
<one
<'\xF0'>, range
<'\x90', '\xBF'>, rep
<2, UTF8_tail
>>,
30 seq
<range
<'\xF1', '\xF3'>, rep
<3, UTF8_tail
>>,
31 seq
<one
<'\xF4'>, range
<'\x80', '\x8F'>, rep
<2, UTF8_tail
>>> {};
33 struct non_ascii
: sor
<UTF8_2
, UTF8_3
, UTF8_4
> {};
35 } // namespace RFC3629
38 struct VUCHAR
: sor
<VCHAR
, RFC3629::non_ascii
> {};
40 // excluded from atext: "(),.@[]"
41 struct atext
: sor
<ALPHA
, DIGIT
,
52 RFC3629::non_ascii
> {};
57 // <https://tools.ietf.org/html/rfc5321>
60 using colon
= one
<':'>;
62 struct u_let_dig
: sor
<ALPHA
, DIGIT
, RFC3629::non_ascii
> {};
64 struct u_ldh_tail
: star
<sor
<seq
<plus
<one
<'-'>>, u_let_dig
>, u_let_dig
>> {};
66 struct u_label
: seq
<u_let_dig
, u_ldh_tail
> {};
68 struct let_dig
: sor
<ALPHA
, DIGIT
> {};
70 struct ldh_tail
: star
<sor
<seq
<plus
<one
<'-'>>, let_dig
>, let_dig
>> {};
72 struct ldh_str
: seq
<let_dig
, ldh_tail
> {};
74 struct label
: ldh_str
{};
76 struct sub_domain
: sor
<label
, u_label
> {};
78 struct domain
: list
<sub_domain
, dot
> {};
80 struct dec_octet
: sor
<seq
<string
<'2','5'>, range
<'0','5'>>,
81 seq
<one
<'2'>, range
<'0','4'>, DIGIT
>,
82 seq
<range
<'0', '1'>, rep
<2, DIGIT
>>,
83 rep_min_max
<1, 2, DIGIT
>> {};
85 struct IPv4_address_literal
: seq
<dec_octet
, dot
, dec_octet
, dot
, dec_octet
, dot
, dec_octet
> {};
87 struct h16
: rep_min_max
<1, 4, HEXDIG
> {};
89 struct ls32
: sor
<seq
<h16
, colon
, h16
>, IPv4_address_literal
> {};
91 struct dcolon
: two
<':'> {};
93 struct IPv6address
: sor
<seq
< rep
<6, h16
, colon
>, ls32
>,
94 seq
< dcolon
, rep
<5, h16
, colon
>, ls32
>,
95 seq
<opt
<h16
>, dcolon
, rep
<4, h16
, colon
>, ls32
>,
96 seq
<opt
<h16
, opt
< colon
, h16
>>, dcolon
, rep
<3, h16
, colon
>, ls32
>,
97 seq
<opt
<h16
, rep_opt
<2, colon
, h16
>>, dcolon
, rep
<2, h16
, colon
>, ls32
>,
98 seq
<opt
<h16
, rep_opt
<3, colon
, h16
>>, dcolon
, h16
, colon
, ls32
>,
99 seq
<opt
<h16
, rep_opt
<4, colon
, h16
>>, dcolon
, ls32
>,
100 seq
<opt
<h16
, rep_opt
<5, colon
, h16
>>, dcolon
, h16
>,
101 seq
<opt
<h16
, rep_opt
<6, colon
, h16
>>, dcolon
>> {};
103 struct IPv6_address_literal
: seq
<TAO_PEGTL_ISTRING("IPv6:"), IPv6address
> {};
105 struct dcontent
: ranges
<33, 90, 94, 126> {};
107 struct standardized_tag
: ldh_str
{};
109 struct general_address_literal
: seq
<standardized_tag
, colon
, plus
<dcontent
>> {};
111 // 4.1.3. Address Literals
112 struct address_literal
: seq
<one
<'['>,
113 sor
<IPv4_address_literal
,
114 IPv6_address_literal
,
115 general_address_literal
>,
119 struct qtextSMTP
: sor
<ranges
<32, 33, 35, 91, 93, 126>, RFC3629::non_ascii
> {};
120 struct graphic
: range
<32, 126> {};
121 struct quoted_pairSMTP
: seq
<one
<'\\'>, graphic
> {};
122 struct qcontentSMTP
: sor
<qtextSMTP
, quoted_pairSMTP
> {};
124 struct atom
: plus
<Chars::atext
> {};
125 struct dot_string
: list
<atom
, dot
> {};
126 struct quoted_string
: seq
<one
<'"'>, star
<qcontentSMTP
>, one
<'"'>> {};
127 struct local_part
: sor
<dot_string
, quoted_string
> {};
128 struct non_local_part
: sor
<domain
, address_literal
> {};
129 struct mailbox
: seq
<local_part
, one
<'@'>, non_local_part
> {};
130 struct mailbox_only
: seq
<mailbox
, eof
> {};
135 template <typename Input
>
136 static std::string_view
make_view(Input
const& in
)
138 return std::string_view(in
.begin(), std::distance(in
.begin(), in
.end()));
141 template <typename Rule
>
142 struct action
: nothing
<Rule
> {
146 struct action
<dot_string
> {
147 template <typename Input
>
148 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
150 results
.local_type
= Mailbox::local_types::dot_string
;
155 struct action
<quoted_string
> {
156 template <typename Input
>
157 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
159 results
.local_type
= Mailbox::local_types::quoted_string
;
164 struct action
<domain
> {
165 template <typename Input
>
166 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
168 results
.domain_type
= Mailbox::domain_types::domain
;
173 struct action
<address_literal
> {
174 template <typename Input
>
175 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
177 results
.domain_type
= Mailbox::domain_types::address_literal
;
182 struct action
<local_part
> {
183 template <typename Input
>
184 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
186 results
.local
= make_view(in
);
191 struct action
<non_local_part
> {
192 template <typename Input
>
193 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
195 results
.domain
= make_view(in
);
198 } // namespace RFC5321
200 std::optional
<Mailbox::parse_results
> Mailbox::parse(std::string_view mailbox
)
202 parse_results results
;
205 memory_input
<> mbx_in(mailbox
, "mailbox");
206 if (tao::pegtl::parse
<RFC5321::mailbox_only
, RFC5321::action
>(mbx_in
,
213 bool Mailbox::validate(std::string_view mailbox
)
215 parse_results results
;
216 memory_input
<> mbx_in(mailbox
, "mailbox");
217 return !mailbox
.empty() &&
218 tao::pegtl::parse
<RFC5321::mailbox_only
, RFC5321::action
>(mbx_in
,
222 bool Mailbox::validate_strict_lengths(std::string_view mailbox
)
224 parse_results results
;
225 memory_input
<> mbx_in(mailbox
, "mailbox");
226 return !mailbox
.empty() &&
227 tao::pegtl::parse
<RFC5321::mailbox_only
, RFC5321::action
>(mbx_in
,
229 (results
.local
.length() <= 64) && (results
.domain
.length() <= 255);
232 Mailbox::Mailbox(std::string_view mailbox
)
234 if (mailbox
.empty()) {
235 throw std::invalid_argument("empty mailbox string");
238 parse_results results
;
239 memory_input
<> mbx_in(mailbox
, "mailbox");
240 if (!tao::pegtl::parse
<RFC5321::mailbox_only
, RFC5321::action
>(mbx_in
,
242 LOG(ERROR
) << "invalid mailbox syntax «" << mailbox
<< "»";
243 throw std::invalid_argument("invalid mailbox syntax");
246 CHECK(results
.local_type
!= local_types::unknown
);
247 CHECK(results
.domain_type
!= domain_types::unknown
);
249 // RFC-5321 section 4.5.3.1. Size Limits and Minimums
251 if (results
.local
.length() > 64) { // Section 4.5.3.1.1. Local-part
252 LOG(WARNING
) << "local part > 64 octets «" << mailbox
<< "»";
254 if (results
.domain
.length() > 255) { // Section 4.5.3.1.2.
255 // Also RFC 2181 section 11. Name syntax
256 LOG(WARNING
) << "domain > 255 octets «" << mailbox
<< "»";
259 set_local(results
.local
);
260 set_domain(results
.domain
);
263 // Check that each label is limited to between 1 and 63 octets.
266 size_t Mailbox::length(domain_encoding enc
) const
268 if (enc
== domain_encoding::ascii
) {
269 for (auto ch
: local_part_
) {
270 if (!isascii(static_cast<unsigned char>(ch
))) {
271 LOG(WARNING
) << "non ascii chars in local part:" << local_part_
;
272 // throw std::range_error("non ascii chars in local part of mailbox");
277 = (enc
== domain_encoding::utf8
) ? domain().utf8() : domain().ascii();
278 return local_part_
.length() + (d
.length() ? (d
.length() + 1) : 0);
281 std::string
Mailbox::as_string(domain_encoding enc
) const
283 if (enc
== domain_encoding::ascii
) {
284 for (auto ch
: local_part_
) {
285 if (!isascii(static_cast<unsigned char>(ch
))) {
286 LOG(WARNING
) << "non ascii chars in local part:" << local_part_
;
287 // throw std::range_error("non ascii chars in local part of mailbox");
292 s
.reserve(length(enc
));
295 = (enc
== domain_encoding::utf8
) ? domain().utf8() : domain().ascii();