5 #include <boost/algorithm/string/classification.hpp>
6 #include <boost/algorithm/string/split.hpp>
8 #include <tao/pegtl.hpp>
9 #include <tao/pegtl/contrib/abnf.hpp>
11 #include <glog/logging.h>
13 #include <boost/algorithm/string/classification.hpp>
14 #include <boost/algorithm/string/split.hpp>
16 #include <fmt/format.h>
17 #include <fmt/ostream.h>
19 using namespace tao::pegtl
;
20 using namespace tao::pegtl::abnf
;
22 #include "is_ascii.hpp"
27 // 4. Syntax of UTF-8 Byte Sequences
29 struct UTF8_tail
: range
<'\x80', '\xBF'> {};
31 struct UTF8_1
: range
<0x00, 0x7F> {};
33 struct UTF8_2
: seq
<range
<'\xC2', '\xDF'>, UTF8_tail
> {};
35 struct UTF8_3
: sor
<seq
<one
<'\xE0'>, range
<'\xA0', '\xBF'>, UTF8_tail
>,
36 seq
<range
<'\xE1', '\xEC'>, rep
<2, UTF8_tail
>>,
37 seq
<one
<'\xED'>, range
<'\x80', '\x9F'>, UTF8_tail
>,
38 seq
<range
<'\xEE', '\xEF'>, rep
<2, UTF8_tail
>>> {};
40 struct UTF8_4
: sor
<seq
<one
<'\xF0'>, range
<'\x90', '\xBF'>, rep
<2, UTF8_tail
>>,
41 seq
<range
<'\xF1', '\xF3'>, rep
<3, UTF8_tail
>>,
42 seq
<one
<'\xF4'>, range
<'\x80', '\x8F'>, rep
<2, UTF8_tail
>>> {};
44 struct non_ascii
: sor
<UTF8_2
, UTF8_3
, UTF8_4
> {};
46 } // namespace RFC3629
49 struct VUCHAR
: sor
<VCHAR
, RFC3629::non_ascii
> {};
51 // excluded from atext: "(),.@[]"
52 struct atext
: sor
<ALPHA
, DIGIT
,
63 RFC3629::non_ascii
> {};
68 // <https://tools.ietf.org/html/rfc5321>
71 using colon
= one
<':'>;
73 struct u_let_dig
: sor
<ALPHA
, DIGIT
, RFC3629::non_ascii
> {};
75 struct u_ldh_tail
: star
<sor
<seq
<plus
<one
<'-'>>, u_let_dig
>, u_let_dig
>> {};
77 struct u_label
: seq
<u_let_dig
, u_ldh_tail
> {};
79 struct let_dig
: sor
<ALPHA
, DIGIT
> {};
81 struct ldh_tail
: star
<sor
<seq
<plus
<one
<'-'>>, let_dig
>, let_dig
>> {};
83 struct ldh_str
: seq
<let_dig
, ldh_tail
> {};
85 struct label
: ldh_str
{};
87 struct sub_domain
: sor
<label
, u_label
> {};
89 struct domain
: list
<sub_domain
, dot
> {};
91 struct dec_octet
: sor
<seq
<string
<'2','5'>, range
<'0','5'>>,
92 seq
<one
<'2'>, range
<'0','4'>, DIGIT
>,
93 seq
<range
<'0', '1'>, rep
<2, DIGIT
>>,
94 rep_min_max
<1, 2, DIGIT
>> {};
96 struct IPv4_address_literal
: seq
<dec_octet
, dot
, dec_octet
, dot
, dec_octet
, dot
, dec_octet
> {};
98 struct h16
: rep_min_max
<1, 4, HEXDIG
> {};
100 struct ls32
: sor
<seq
<h16
, colon
, h16
>, IPv4_address_literal
> {};
102 struct dcolon
: two
<':'> {};
104 struct IPv6address
: sor
<seq
< rep
<6, h16
, colon
>, ls32
>,
105 seq
< dcolon
, rep
<5, h16
, colon
>, ls32
>,
106 seq
<opt
<h16
>, dcolon
, rep
<4, h16
, colon
>, ls32
>,
107 seq
<opt
<h16
, opt
< colon
, h16
>>, dcolon
, rep
<3, h16
, colon
>, ls32
>,
108 seq
<opt
<h16
, rep_opt
<2, colon
, h16
>>, dcolon
, rep
<2, h16
, colon
>, ls32
>,
109 seq
<opt
<h16
, rep_opt
<3, colon
, h16
>>, dcolon
, h16
, colon
, ls32
>,
110 seq
<opt
<h16
, rep_opt
<4, colon
, h16
>>, dcolon
, ls32
>,
111 seq
<opt
<h16
, rep_opt
<5, colon
, h16
>>, dcolon
, h16
>,
112 seq
<opt
<h16
, rep_opt
<6, colon
, h16
>>, dcolon
>> {};
114 struct IPv6_address_literal
: seq
<TAO_PEGTL_ISTRING("IPv6:"), IPv6address
> {};
116 struct dcontent
: ranges
<33, 90, 94, 126> {};
118 struct standardized_tag
: ldh_str
{};
120 struct general_address_literal
: seq
<standardized_tag
, colon
, plus
<dcontent
>> {};
122 // 4.1.3. Address Literals
123 struct address_literal
: seq
<one
<'['>,
124 sor
<IPv4_address_literal
,
125 IPv6_address_literal
,
126 general_address_literal
>,
130 struct qtextSMTP
: sor
<ranges
<32, 33, 35, 91, 93, 126>, RFC3629::non_ascii
> {};
131 struct graphic
: range
<32, 126> {};
132 struct quoted_pairSMTP
: seq
<one
<'\\'>, graphic
> {};
133 struct qcontentSMTP
: sor
<qtextSMTP
, quoted_pairSMTP
> {};
135 struct atom
: plus
<Chars::atext
> {};
136 struct dot_string
: list
<atom
, dot
> {};
137 struct quoted_string
: seq
<one
<'"'>, star
<qcontentSMTP
>, one
<'"'>> {};
138 struct local_part
: sor
<dot_string
, quoted_string
> {};
139 struct non_local_part
: sor
<domain
, address_literal
> {};
140 struct mailbox
: seq
<local_part
, one
<'@'>, non_local_part
> {};
141 struct mailbox_only
: seq
<mailbox
, eof
> {};
142 struct dot_string_only
: seq
<dot_string
, eof
> {};
147 template <typename Input
>
148 static std::string_view
make_view(Input
const& in
)
150 return std::string_view(in
.begin(), std::distance(in
.begin(), in
.end()));
153 template <typename Rule
>
154 struct action
: nothing
<Rule
> {
158 struct action
<dot_string
> {
159 template <typename Input
>
160 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
162 results
.local_type
= Mailbox::local_types::dot_string
;
167 struct action
<quoted_string
> {
168 template <typename Input
>
169 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
171 results
.local_type
= Mailbox::local_types::quoted_string
;
176 struct action
<domain
> {
177 template <typename Input
>
178 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
180 results
.domain_type
= Mailbox::domain_types::domain
;
185 struct action
<IPv4_address_literal
> {
186 template <typename Input
>
187 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
189 results
.domain_type
= Mailbox::domain_types::address_literal
;
194 struct action
<IPv6_address_literal
> {
195 template <typename Input
>
196 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
198 results
.domain_type
= Mailbox::domain_types::address_literal
;
203 struct action
<standardized_tag
> {
204 template <typename Input
>
205 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
207 results
.standardized_tag
= make_view(in
);
212 struct action
<general_address_literal
> {
213 template <typename Input
>
214 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
216 results
.domain_type
= Mailbox::domain_types::general_address_literal
;
221 struct action
<local_part
> {
222 template <typename Input
>
223 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
225 results
.local
= make_view(in
);
230 struct action
<non_local_part
> {
231 template <typename Input
>
232 static void apply(Input
const& in
, Mailbox::parse_results
& results
)
234 results
.domain
= make_view(in
);
237 } // namespace RFC5321
240 struct fmt::formatter
<Mailbox
> : ostream_formatter
{};
242 std::optional
<Mailbox::parse_results
> Mailbox::parse(std::string_view mailbox
)
247 parse_results results
;
248 memory_input
<> mbx_in(mailbox
, "mailbox");
249 if (tao::pegtl::parse
<RFC5321::mailbox_only
, RFC5321::action
>(mbx_in
,
256 std::string
normalize_quoted_string(std::string_view local_part
)
258 CHECK_GE(local_part
.size(), 2);
259 CHECK_EQ(local_part
[0], '"');
260 CHECK_EQ(local_part
[local_part
.length() - 1], '"');
262 // normalize local_part, 1st step is unescape
263 auto const raw
= local_part
.substr(1, local_part
.length() - 2);
266 uq
.reserve(raw
.length());
267 for (auto p
= raw
.begin(); p
!= raw
.end(); ++p
) {
269 CHECK_NE(p
+ 1, raw
.end());
270 ++p
; // past the backslash
271 CHECK_LE(*p
, '\x7E');
276 Mailbox::parse_results results
;
277 memory_input
<> loc_in(uq
, "local-part");
278 if (tao::pegtl::parse
<RFC5321::dot_string_only
, RFC5321::action
>(loc_in
,
282 // If not, (re)escape
284 esc
.reserve(local_part
.length());
286 for (auto p
= uq
.begin(); p
!= uq
.end(); ++p
) {
290 else if (*p
== '"') {
301 bool Mailbox::set_(std::string_view mailbox
,
307 if (iequal(mailbox
, "Postmaster")) {
308 local_part_
= "Postmaster";
313 if (mailbox
.empty()) {
319 parse_results results
;
320 memory_input
<> mbx_in(mailbox
, "mailbox");
321 if (!tao::pegtl::parse
<RFC5321::mailbox_only
, RFC5321::action
>(mbx_in
,
324 throw std::invalid_argument("invalid mailbox syntax");
325 msg
= fmt::format("invalid mailbox syntax «{}»", mailbox
);
329 // "Impossible" errors; if the parse succeeded, the types must not
331 CHECK(results
.local_type
!= local_types::unknown
);
332 CHECK(results
.domain_type
!= domain_types::unknown
);
334 if (results
.domain_type
== domain_types::general_address_literal
) {
336 throw std::invalid_argument("general address literal in mailbox");
338 fmt::format("general address literal in mailbox «{}», unknown tag «{}»",
339 mailbox
, results
.standardized_tag
);
343 std::string loc_part
;
344 if (results
.local_type
== local_types::quoted_string
) {
345 loc_part
= normalize_quoted_string(results
.local
);
348 // plain old Dot-string
349 loc_part
= results
.local
;
353 if (!Domain::validate(results
.domain
, msg
, dom
)) {
355 throw std::invalid_argument("invalid domain");
359 std::swap(local_part_
, loc_part
);
360 std::swap(domain_
, dom
);
365 size_t Mailbox::length(domain_encoding enc
) const
367 if (enc
== domain_encoding::ascii
) {
368 if (!is_ascii(local_part_
)) {
369 LOG(ERROR
) << "non ascii chars in local part:" << local_part_
;
370 throw std::range_error("non ascii chars in local part of mailbox");
374 (enc
== domain_encoding::utf8
) ? domain().utf8() : domain().ascii();
375 return local_part_
.length() + (d
.length() ? (d
.length() + 1) : 0);
378 std::string
Mailbox::as_string(domain_encoding enc
) const
380 if (enc
== domain_encoding::ascii
) {
381 if (!is_ascii(local_part_
)) {
382 LOG(ERROR
) << "non ascii chars in local part:" << local_part_
;
383 throw std::range_error("non ascii chars in local part of mailbox");
387 s
.reserve(length(enc
));
390 (enc
== domain_encoding::utf8
) ? domain().utf8() : domain().ascii();