still must match obs first
[ghsmtp.git] / msg.cpp
blobfc569b7dcc3d99ba7fe588b9f75faec12091e3be
1 // Toy RFC-5322 message parser and DMARC validator.
3 #include <gflags/gflags.h>
4 namespace gflags {
5 // in case we didn't have one
8 DEFINE_bool(selftest, false, "run a self test");
10 #include <map>
11 #include <string>
12 #include <vector>
14 #include <glog/logging.h>
16 #include <fmt/format.h>
17 #include <fmt/ostream.h>
19 #include <boost/algorithm/string.hpp>
20 #include <boost/iostreams/device/mapped_file.hpp>
22 #include <iostream>
24 #include "Mailbox.hpp"
25 #include "OpenDKIM.hpp"
26 #include "OpenDMARC.hpp"
27 #include "SPF.hpp"
28 #include "esc.hpp"
29 #include "fs.hpp"
30 #include "iequal.hpp"
31 #include "osutil.hpp"
33 #include <tao/pegtl.hpp>
34 #include <tao/pegtl/contrib/abnf.hpp>
36 // #include <tao/pegtl/contrib/tracer.hpp>
38 using namespace tao::pegtl;
39 using namespace tao::pegtl::abnf;
41 template <typename T, std::size_t N>
42 constexpr std::size_t countof(T const (&)[N]) noexcept
44 return N;
47 namespace RFC5322 {
49 constexpr char const* defined_fields[]{
51 // Trace Fields
52 "Return-Path",
53 "Received",
54 "Received-SPF", // RFC 7208 added trace field
56 // Sig
57 "DKIM-Signature", // RFC 7489
59 // Originator Fields
60 "Date",
61 "From",
62 "Sender",
63 "Reply-To",
65 // Destination Address Fields
66 "To",
67 "Cc",
68 "Bcc",
70 // Identification Fields
71 "Message-ID",
72 "In-Reply-To",
73 "References",
75 // Informational Fields
76 "Subject",
77 "Comments",
78 "Keywords",
80 // Resent Fields
81 "Resent-Date",
82 "Resent-From",
83 "Resent-Sender",
84 "Resent-To",
85 "Resent-Cc",
86 "Resent-Bcc",
87 "Resent-Message-ID",
89 // MIME Fields
90 "MIME-Version",
92 "Content-Type",
93 "Content-Transfer-Encoding",
94 "Content-ID",
95 "Content-Description",
98 bool is_defined_field(std::string_view name)
100 return std::find_if(std::begin(defined_fields), std::end(defined_fields),
101 [=](std::string_view v) { return iequal(name, v); })
102 != std::end(defined_fields);
105 char const* defined_field(std::string_view name)
107 auto df = std::find_if(std::begin(defined_fields), std::end(defined_fields),
108 [=](std::string_view v) { return iequal(name, v); });
109 if (df != std::end(defined_fields))
110 return *df;
111 return "";
114 struct ci_less {
115 bool operator()(std::string const& lhs, std::string const& rhs) const
117 return strcasecmp(lhs.c_str(), rhs.c_str()) < 0;
121 struct Ctx {
122 OpenDKIM::verify dkv;
124 OpenDMARC::policy dmp;
126 std::string mb_loc;
127 std::string mb_dom;
129 std::vector<::Mailbox> mb_list; // temporary accumulator
131 std::vector<::Mailbox> from_list;
133 ::Mailbox sender;
135 std::string key;
136 std::string value;
138 std::vector<std::pair<std::string, std::string>> kv_list;
140 std::map<std::string, std::string, ci_less> spf_info;
141 std::string spf_result;
143 std::unordered_multimap<char const*, std::string> defined_hdrs;
144 std::multimap<std::string, std::string, ci_less> opt_hdrs;
146 std::string unstructured;
147 std::string id;
149 std::string message_id;
151 std::string opt_name;
152 std::string opt_value;
154 std::string type;
155 std::string subtype;
157 bool mime_version{false};
158 bool discrete_type{false};
159 bool composite_type{false};
161 std::vector<std::pair<std::string, std::string>> ct_parameters;
163 std::vector<std::string> msg_errors;
166 // clang-format off
168 struct UTF8_tail : range<'\x80', '\xBF'> {};
170 struct UTF8_1 : range<0x00, 0x7F> {};
172 struct UTF8_2 : seq<range<'\xC2', '\xDF'>, UTF8_tail> {};
174 struct UTF8_3 : sor<seq<one<'\xE0'>, range<'\xA0', '\xBF'>, UTF8_tail>,
175 seq<range<'\xE1', '\xEC'>, rep<2, UTF8_tail>>,
176 seq<one<'\xED'>, range<'\x80', '\x9F'>, UTF8_tail>,
177 seq<range<'\xEE', '\xEF'>, rep<2, UTF8_tail>>> {};
179 struct UTF8_4
180 : sor<seq<one<'\xF0'>, range<'\x90', '\xBF'>, rep<2, UTF8_tail>>,
181 seq<range<'\xF1', '\xF3'>, rep<3, UTF8_tail>>,
182 seq<one<'\xF4'>, range<'\x80', '\x8F'>, rep<2, UTF8_tail>>> {};
184 // UTF8_char = UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4;
186 struct UTF8_non_ascii : sor<UTF8_2, UTF8_3, UTF8_4> {};
188 struct VUCHAR : sor<VCHAR, UTF8_non_ascii> {};
190 using dot = one<'.'>;
191 using colon = one<':'>;
193 struct text : sor<ranges<1, 9, 11, 12, 14, 127>, UTF8_non_ascii> {};
195 // UTF-8 except NUL (0), LF (10) and CR (13).
196 // struct body : seq<star<seq<rep_max<998, text>, eol>>, rep_max<998, text>> {};
198 // BINARYMIME allows any byte
199 struct body : until<eof> {};
201 struct FWS : seq<opt<seq<star<WSP>, eol>>, plus<WSP>> {};
203 struct qtext : sor<one<33>, ranges<35, 91, 93, 126>, UTF8_non_ascii> {};
205 struct quoted_pair : seq<one<'\\'>, sor<VUCHAR, WSP>> {};
207 struct atext : sor<ALPHA, DIGIT,
208 one<'!', '#',
209 '$', '%',
210 '&', '\'',
211 '*', '+',
212 '-', '/',
213 '=', '?',
214 '^', '_',
215 '`', '{',
216 '|', '}',
217 '~'>,
218 UTF8_non_ascii> {};
220 // ctext is ASCII not '(' or ')' or '\\'
221 struct ctext : sor<ranges<33, 39, 42, 91, 93, 126>, UTF8_non_ascii> {};
223 // <https://tools.ietf.org/html/rfc2047>
225 // especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "
226 // <"> / "/" / "[" / "]" / "?" / "." / "="
228 // token = 1*<Any CHAR except SPACE, CTLs, and especials>
230 struct tchar47 : ranges< // NUL..' '
231 33, 33, // !
232 // 34, 34, // "
233 35, 39, // #$%&'
234 // 40, 41, // ()
235 42, 43, // *+
236 // 44, 44, // ,
237 45, 45, // -
238 // 46, 47, // ./
239 48, 57, // 0123456789
240 // 58, 64, // ;:<=>?@
241 65, 90, // A..Z
242 // 91, 91, // [
243 92, 92, // '\\'
244 // 93, 93, // ]
245 94, 126 // ^_` a..z {|}~
246 // 127,127 // DEL
247 > {};
249 struct token47 : plus<tchar47> {};
251 struct charset : token47 {};
252 struct encoding : token47 {};
254 // encoded-text = 1*<Any printable ASCII character other than "?"
255 // or SPACE>
257 struct echar : ranges< // NUL..' '
258 33, 62, // !..>
259 // 63, 63, // ?
260 64, 126 // @A..Z[\]^_` a..z {|}~
261 // 127,127 // DEL
262 > {};
264 struct encoded_text : plus<echar> {};
266 // encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
268 // leading opt<FWS> is not in RFC 2047
270 struct encoded_word_book : seq<string<'=', '?'>,
271 charset, string<'?'>,
272 encoding, string<'?'>,
273 encoded_text,
274 string<'=', '?'>
275 > {};
277 struct encoded_word : seq<opt<FWS>, encoded_word_book> {};
279 struct comment;
281 struct ccontent : sor<ctext, quoted_pair, comment, encoded_word> {};
283 // from <https://tools.ietf.org/html/rfc2047>
284 // comment = "(" *(ctext / quoted-pair / comment / encoded-word) ")"
286 struct comment
287 : seq<one<'('>, star<seq<opt<FWS>, ccontent>>, opt<FWS>, one<')'>> {};
289 struct CFWS : sor<seq<plus<seq<opt<FWS>, comment>, opt<FWS>>>, FWS> {};
291 struct qcontent : sor<qtext, quoted_pair> {};
293 // Corrected in errata ID: 3135
294 struct quoted_string
295 : seq<opt<CFWS>,
296 DQUOTE,
297 sor<seq<star<seq<opt<FWS>, qcontent>>, opt<FWS>>, FWS>,
298 DQUOTE,
299 opt<CFWS>> {};
301 // *([FWS] VCHAR) *WSP
302 struct unstructured : seq<star<seq<opt<FWS>, VUCHAR>>, star<WSP>> {};
304 struct atom : seq<opt<CFWS>, plus<atext>, opt<CFWS>> {};
306 struct dot_atom_text : list<plus<atext>, dot> {};
308 struct dot_atom : seq<opt<CFWS>, dot_atom_text, opt<CFWS>> {};
310 struct word : sor<atom, quoted_string> {};
312 // obs-phrase = word *(word / "." / CFWS)
314 struct phrase : plus<sor<encoded_word, word>> {};
316 struct dec_octet : sor<seq<string<'2','5'>, range<'0','5'>>,
317 seq<one<'2'>, range<'0','4'>, DIGIT>,
318 seq<range<'0', '1'>, rep<2, DIGIT>>,
319 rep_min_max<1, 2, DIGIT>> {};
321 struct ipv4_address
322 : seq<dec_octet, dot, dec_octet, dot, dec_octet, dot, dec_octet> {};
324 struct h16 : rep_min_max<1, 4, HEXDIG> {};
326 struct ls32 : sor<seq<h16, colon, h16>, ipv4_address> {};
328 struct dcolon : two<':'> {};
330 struct ipv6_address : sor<seq< rep<6, h16, colon>, ls32>,
331 seq< dcolon, rep<5, h16, colon>, ls32>,
332 seq<opt<h16 >, dcolon, rep<4, h16, colon>, ls32>,
333 seq<opt<h16, opt< colon, h16>>, dcolon, rep<3, h16, colon>, ls32>,
334 seq<opt<h16, rep_opt<2, colon, h16>>, dcolon, rep<2, h16, colon>, ls32>,
335 seq<opt<h16, rep_opt<3, colon, h16>>, dcolon, h16, colon, ls32>,
336 seq<opt<h16, rep_opt<4, colon, h16>>, dcolon, ls32>,
337 seq<opt<h16, rep_opt<5, colon, h16>>, dcolon, h16>,
338 seq<opt<h16, rep_opt<6, colon, h16>>, dcolon >> {};
340 struct ip : sor<ipv4_address, ipv6_address> {};
342 struct local_part : sor<dot_atom, quoted_string> {};
344 struct dtext : ranges<33, 90, 94, 126> {};
346 struct domain_literal : seq<opt<CFWS>,
347 one<'['>,
348 star<seq<opt<FWS>, dtext>>,
349 opt<FWS>,
350 one<']'>,
351 opt<CFWS>> {};
353 struct domain : sor<dot_atom, domain_literal> {};
355 struct addr_spec : seq<local_part, one<'@'>, domain> {};
357 struct angle_addr : seq<opt<CFWS>, one<'<'>, addr_spec, one<'>'>, opt<CFWS>> {};
359 struct path
360 : sor<angle_addr, seq<opt<CFWS>, one<'<'>, opt<CFWS>, one<'>'>, opt<CFWS>>> {};
362 struct display_name : phrase {};
364 struct name_addr : seq<opt<display_name>, angle_addr> {};
366 struct name_addr_only : seq<name_addr, eof> {};
368 struct mailbox : sor<name_addr, addr_spec> {};
370 struct group_list;
372 struct group
373 : seq<display_name, one<':'>, opt<group_list>, one<';'>, opt<CFWS>> {};
375 struct address : sor<mailbox, group> {};
377 #define OBSOLETE_SYNTAX
379 #ifdef OBSOLETE_SYNTAX
380 // *([CFWS] ",") mailbox *("," [mailbox / CFWS])
381 struct obs_mbox_list : seq<star<seq<opt<CFWS>, one<','>>>,
382 mailbox,
383 star<one<','>, opt<sor<mailbox, CFWS>>>> {};
385 struct mailbox_list : sor<list<mailbox, one<','>>, obs_mbox_list> {};
386 #else
387 struct mailbox_list : list<mailbox, one<','>> {};
388 #endif
390 #ifdef OBSOLETE_SYNTAX
391 // *([CFWS] ",") address *("," [address / CFWS])
392 struct obs_addr_list : seq<star<seq<opt<CFWS>, one<','>>>,
393 address,
394 star<one<','>, opt<sor<address, CFWS>>>> {};
396 struct address_list : sor<list<address, one<','>>, obs_addr_list> {};
397 #else
398 struct address_list : list<address, one<','>> {};
399 #endif
401 #ifdef OBSOLETE_SYNTAX
402 // 1*([CFWS] ",") [CFWS]
403 struct obs_group_list : seq<plus<seq<opt<CFWS>, one<','>>>, opt<CFWS>> {};
405 struct group_list : sor<mailbox_list, CFWS, obs_group_list> {};
406 #else
407 struct group_list : sor<mailbox_list, CFWS> {};
408 #endif
410 // 3.3. Date and Time Specification (mostly from RFC 2822)
412 struct day : seq<opt<FWS>, rep_min_max<1, 2, DIGIT>> {};
414 struct month_name : sor<TAO_PEGTL_ISTRING("Jan"),
415 TAO_PEGTL_ISTRING("Feb"),
416 TAO_PEGTL_ISTRING("Mar"),
417 TAO_PEGTL_ISTRING("Apr"),
418 TAO_PEGTL_ISTRING("May"),
419 TAO_PEGTL_ISTRING("Jun"),
420 TAO_PEGTL_ISTRING("Jul"),
421 TAO_PEGTL_ISTRING("Aug"),
422 TAO_PEGTL_ISTRING("Sep"),
423 TAO_PEGTL_ISTRING("Oct"),
424 TAO_PEGTL_ISTRING("Nov"),
425 TAO_PEGTL_ISTRING("Dec")> {};
427 struct month : seq<FWS, month_name, FWS> {};
429 struct year : rep<4, DIGIT> {};
431 struct date : seq<day, month, year> {};
433 struct day_name : sor<TAO_PEGTL_ISTRING("Mon"),
434 TAO_PEGTL_ISTRING("Tue"),
435 TAO_PEGTL_ISTRING("Wed"),
436 TAO_PEGTL_ISTRING("Thu"),
437 TAO_PEGTL_ISTRING("Fri"),
438 TAO_PEGTL_ISTRING("Sat"),
439 TAO_PEGTL_ISTRING("Sun")> {};
441 // struct obs_day_of_week : seq<opt<CFWS>, day_name, opt<CFWS>> {
442 // };
444 // struct obs_day : seq<opt<CFWS>, rep_min_max<1, 2, DIGIT>, opt<CFWS>> {
445 // };
447 // struct obs_year : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
448 // };
450 // struct obs_hour : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
451 // };
453 // struct obs_minute : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
454 // };
456 // struct obs_second : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
457 // };
459 // struct obs_day_of_week : seq<opt<CFWS>, day_name, opt<CFWS>> {
460 // }
462 struct day_of_week : seq<opt<FWS>, day_name> {};
464 struct hour : rep<2, DIGIT> {};
466 struct minute : rep<2, DIGIT> {};
468 struct second : rep<2, DIGIT> {};
470 struct millisecond : rep<3, DIGIT> {};
472 // RFC-5322 extension is optional milliseconds
473 struct time_of_day
474 : seq<hour,
475 one<':'>,
476 minute,
477 opt<seq<one<':'>, second, opt<seq<one<'.'>, millisecond>>>>> {};
479 // struct obs_zone : sor<range<65, 73>,
480 // range<75, 90>,
481 // range<97, 105>,
482 // range<107, 122>,
483 // TAO_PEGTL_ISTRING("UT"),
484 // TAO_PEGTL_ISTRING("GMT"),
485 // TAO_PEGTL_ISTRING("EST"),
486 // TAO_PEGTL_ISTRING("EDT"),
487 // TAO_PEGTL_ISTRING("CST"),
488 // TAO_PEGTL_ISTRING("CDT"),
489 // TAO_PEGTL_ISTRING("MST"),
490 // TAO_PEGTL_ISTRING("MDT"),
491 // TAO_PEGTL_ISTRING("PST"),
492 // TAO_PEGTL_ISTRING("PDT")> {
493 // };
495 struct zone : seq<sor<one<'+'>, one<'-'>>, rep<4, DIGIT>> {};
497 struct time : seq<time_of_day, FWS, zone> {};
499 struct date_time
500 : seq<opt<seq<day_of_week, one<','>>>, date, FWS, time, opt<CFWS>> {};
502 // The Origination Date Field
503 struct orig_date : seq<TAO_PEGTL_ISTRING("Date:"), date_time, eol> {};
505 // Originator Fields
506 struct from : seq<TAO_PEGTL_ISTRING("From:"), opt<FWS>, mailbox_list, opt<FWS>, eol> {};
508 struct sender : seq<TAO_PEGTL_ISTRING("Sender:"), mailbox, eol> {};
510 struct reply_to : seq<TAO_PEGTL_ISTRING("Reply-To:"), address_list, eol> {};
512 struct address_list_or_pm : sor<TAO_PEGTL_ISTRING("Postmaster"), address_list> {};
514 // Destination Address Fields
515 struct to : seq<TAO_PEGTL_ISTRING("To:"), address_list_or_pm, eol> {};
517 struct cc : seq<TAO_PEGTL_ISTRING("Cc:"), address_list, eol> {};
519 struct bcc : seq<TAO_PEGTL_ISTRING("Bcc:"), opt<sor<address_list, CFWS>>, eol> {};
521 // Identification Fields
523 struct no_fold_literal : seq<one<'['>, star<dtext>, one<']'>> {};
525 struct id_left : dot_atom_text {};
527 struct id_right : sor<dot_atom_text, no_fold_literal> {};
529 struct msg_id
530 : seq<opt<CFWS>, one<'<'>, id_left, one<'@'>, id_right, one<'>'>, opt<CFWS>> {};
532 struct message_id : seq<TAO_PEGTL_ISTRING("Message-ID:"), msg_id, eol> {};
534 struct in_reply_to : seq<TAO_PEGTL_ISTRING("In-Reply-To:"), plus<msg_id>, eol> {};
536 struct references : seq<TAO_PEGTL_ISTRING("References:"), star<msg_id>, eol> {};
538 // Informational Fields
540 struct subject : seq<TAO_PEGTL_ISTRING("Subject:"), unstructured, eol> {};
542 struct comments : seq<TAO_PEGTL_ISTRING("Comments:"), unstructured, eol> {};
544 struct keywords
545 : seq<TAO_PEGTL_ISTRING("Keywords:"), list<phrase, one<','>>, eol> {};
547 // Resent Fields
549 struct resent_date : seq<TAO_PEGTL_ISTRING("Resent-Date:"), date_time, eol> {};
551 struct resent_from : seq<TAO_PEGTL_ISTRING("Resent-From:"), mailbox_list, eol> {};
553 struct resent_sender : seq<TAO_PEGTL_ISTRING("Resent-Sender:"), mailbox, eol> {};
555 struct resent_to : seq<TAO_PEGTL_ISTRING("Resent-To:"), address_list, eol> {};
557 struct resent_cc : seq<TAO_PEGTL_ISTRING("Resent-Cc:"), address_list, eol> {};
559 struct resent_bcc
560 : seq<TAO_PEGTL_ISTRING("Resent-Bcc:"), opt<sor<address_list, CFWS>>, eol> {};
562 struct resent_msg_id
563 : seq<TAO_PEGTL_ISTRING("Resent-Message-ID:"), msg_id, eol> {};
565 // Trace Fields
567 struct return_path : seq<TAO_PEGTL_ISTRING("Return-Path:"), opt<FWS>, path, eol> {};
569 // Facebook, among others
571 struct return_path_non_standard : seq<TAO_PEGTL_ISTRING("Return-Path:"),
572 opt<CFWS>,
573 addr_spec,
574 star<WSP>,
575 eol> {};
577 struct received_token : sor<angle_addr, addr_spec, domain, word> {};
579 struct received : seq<TAO_PEGTL_ISTRING("Received:"),
580 opt<sor<plus<received_token>, CFWS>>,
581 one<';'>,
582 date_time,
583 opt<seq<WSP, comment>>,
584 eol> {};
586 struct result : sor<TAO_PEGTL_ISTRING("Pass"),
587 TAO_PEGTL_ISTRING("Fail"),
588 TAO_PEGTL_ISTRING("SoftFail"),
589 TAO_PEGTL_ISTRING("Neutral"),
590 TAO_PEGTL_ISTRING("None"),
591 TAO_PEGTL_ISTRING("TempError"),
592 TAO_PEGTL_ISTRING("PermError")> {};
594 struct spf_key : sor<TAO_PEGTL_ISTRING("client-ip"),
595 TAO_PEGTL_ISTRING("envelope-from"),
596 TAO_PEGTL_ISTRING("helo"),
597 TAO_PEGTL_ISTRING("problem"),
598 TAO_PEGTL_ISTRING("receiver"),
599 TAO_PEGTL_ISTRING("identity"),
600 TAO_PEGTL_ISTRING("mechanism")> {};
602 // This value syntax (allowing addr_spec and angle_addr) is not in
603 // accordance with RFC 7208 (or 4408) but is what is effectivly used
604 // by libspf2 1.2.10 and before.
606 struct spf_value : sor<ip, addr_spec, dot_atom, quoted_string, angle_addr> {};
608 struct spf_key_value_pair : seq<spf_key, opt<CFWS>, one<'='>, spf_value> {};
610 struct spf_key_value_list
611 : seq<spf_key_value_pair,
612 star<seq<one<';'>, opt<CFWS>, spf_key_value_pair>>,
613 opt<one<';'>>> {};
615 struct received_spf : seq<TAO_PEGTL_ISTRING("Received-SPF:"),
616 opt<CFWS>,
617 result,
618 opt<seq<FWS, comment>>,
619 opt<seq<FWS, spf_key_value_list>>,
620 eol> {};
622 struct dkim_signature
623 : seq<TAO_PEGTL_ISTRING("DKIM-Signature:"), unstructured, eol> {};
625 struct mime_version : seq<TAO_PEGTL_ISTRING("MIME-Version:"),
626 opt<CFWS>,
627 one<'1'>,
628 opt<CFWS>,
629 one<'.'>,
630 opt<CFWS>,
631 one<'0'>,
632 opt<CFWS>,
633 eol> {};
635 // CTL := <any ASCII control ; ( 0- 37, 0.- 31.)
636 // character and DEL> ; ( 177, 127.)
638 // SPACE := 32
640 // especials := "(" / ")" / "<" / ">" / "@" /
641 // "," / ";" / ":" / "\" / <">
642 // "/" / "[" / "]" / "?" / "="
644 // ! 33
646 // 33-33
648 // " 34
650 // 35-39
652 // ( 40
653 // ) 41
655 // 42-43
657 // , 44
659 // 45-46
661 // / 47
663 // 48-57
665 // : 58
666 // ; 59
667 // < 60
668 // = 61
669 // > 62
670 // ? 63
671 // @ 64
673 // 65-90
675 // [ 91
676 // \ 92
677 // ] 93
679 // 94-126
681 // token := 1*<any (US-ASCII) CHAR except CTLs, SPACE,
682 // or tspecials>
684 struct tchar : ranges<33, 33, 35, 39, 42, 43, 45, 46, 48, 57, 65, 90, 94, 126> {};
686 struct token : plus<tchar> {};
688 struct ietf_token : token {};
690 struct x_token : seq<TAO_PEGTL_ISTRING("X-"), token> {};
692 struct extension_token : sor<x_token, ietf_token> {};
694 struct discrete_type : sor<TAO_PEGTL_ISTRING("text"),
695 TAO_PEGTL_ISTRING("image"),
696 TAO_PEGTL_ISTRING("audio"),
697 TAO_PEGTL_ISTRING("video"),
698 TAO_PEGTL_ISTRING("application"),
699 extension_token> {};
701 struct composite_type : sor<TAO_PEGTL_ISTRING("message"),
702 TAO_PEGTL_ISTRING("multipart"),
703 extension_token> {};
705 struct type : sor<discrete_type, composite_type> {};
707 struct subtype : token {};
709 // value := token / quoted-string
711 // attribute := token
713 // parameter := attribute "=" value
715 struct value : sor<token, quoted_string> {};
717 struct attribute : token {};
719 struct parameter : seq<attribute, one<'='>, value> {};
721 struct content : seq<TAO_PEGTL_ISTRING("Content-Type:"),
722 opt<CFWS>,
723 seq<type, one<'/'>, subtype>,
724 star<seq<one<';'>, opt<CFWS>, parameter>>,
725 opt<one<';'>>, // not strictly RFC 2045, but common
726 eol> {};
728 // mechanism := "7bit" / "8bit" / "binary" /
729 // "quoted-printable" / "base64" /
730 // ietf-token / x-token
732 struct mechanism : sor<TAO_PEGTL_ISTRING("7bit"),
733 TAO_PEGTL_ISTRING("8bit"),
734 TAO_PEGTL_ISTRING("binary"),
735 TAO_PEGTL_ISTRING("quoted-printable"),
736 TAO_PEGTL_ISTRING("base64"),
737 ietf_token,
738 x_token> {};
740 struct content_transfer_encoding
741 : seq<TAO_PEGTL_ISTRING("Content-Transfer-Encoding:"),
742 opt<CFWS>,
743 mechanism,
744 eol> {};
746 struct id : seq<TAO_PEGTL_ISTRING("Content-ID:"), msg_id, eol> {};
748 struct description
749 : seq<TAO_PEGTL_ISTRING("Content-Description:"), star<text>, eol> {};
751 // Optional Fields
753 struct ftext : ranges<33, 57, 59, 126> {};
755 struct field_name : plus<ftext> {};
757 struct field_value : unstructured {};
759 struct optional_field : seq<field_name, one<':'>, field_value, eol> {};
761 // message header
763 struct fields : star<sor<
764 return_path,
765 return_path_non_standard,
766 received,
767 received_spf,
769 dkim_signature,
771 orig_date,
772 from,
773 sender,
774 reply_to,
778 bcc,
780 message_id,
781 in_reply_to,
782 references,
784 subject,
785 comments,
786 keywords,
788 resent_date,
789 resent_from,
790 resent_sender,
791 resent_to,
792 resent_cc,
793 resent_bcc,
794 resent_msg_id,
796 mime_version,
797 content,
798 content_transfer_encoding,
800 description,
802 optional_field
803 >> {};
805 struct message : seq<fields, opt<seq<eol, body>>, eof> {};
807 // clang-format on
809 template <typename Rule>
810 struct action : nothing<Rule> {
813 template <>
814 struct action<fields> {
815 template <typename Input>
816 static void apply(Input const& in, Ctx& ctx)
818 // LOG(INFO) << "fields";
822 template <>
823 struct action<unstructured> {
824 template <typename Input>
825 static void apply(Input const& in, Ctx& ctx)
827 ctx.unstructured = in.string();
831 template <>
832 struct action<field_name> {
833 template <typename Input>
834 static void apply(Input const& in, Ctx& ctx)
836 ctx.opt_name = in.string();
840 template <>
841 struct action<field_value> {
842 template <typename Input>
843 static void apply(Input const& in, Ctx& ctx)
845 ctx.opt_value = in.string();
849 template <typename Input>
850 static void header(Input const& in, Ctx& ctx)
852 ctx.dkv.header(std::string_view(begin(in), end(in) - begin(in)));
855 template <>
856 struct action<optional_field> {
857 template <typename Input>
858 static void apply(Input const& in, Ctx& ctx)
860 // LOG(INFO) << "optional_field";
861 if (is_defined_field(ctx.opt_name)) {
862 // So, this is a syntax error in a defined field.
863 if (ctx.opt_name == "Received") {
864 // Go easy on Received lines, they tend to be wild and woolly.
865 // LOG(INFO) << in.string();
867 else {
868 auto const err
869 = fmt::format("syntax error in: \"{}\"", esc(in.string()));
870 ctx.msg_errors.push_back(err);
871 LOG(ERROR) << err;
873 ctx.defined_hdrs.emplace(defined_field(ctx.opt_name), ctx.opt_value);
875 else {
876 ctx.opt_hdrs.emplace(ctx.opt_name, ctx.opt_value);
878 header(in, ctx);
879 ctx.unstructured.clear();
880 ctx.mb_list.clear();
884 template <>
885 struct action<local_part> {
886 template <typename Input>
887 static void apply(Input const& in, Ctx& ctx)
889 ctx.mb_loc = in.string();
890 boost::trim(ctx.mb_loc);
894 template <>
895 struct action<domain> {
896 template <typename Input>
897 static void apply(Input const& in, Ctx& ctx)
899 ctx.mb_dom = in.string();
900 // LOG(INFO) << "domain == '" << ctx.mb_dom << "'";
904 template <>
905 struct action<mailbox> {
906 static void apply0(Ctx& ctx)
908 // LOG(INFO) << "mailbox emplace_back(" << ctx.mb_loc << '@' << ctx.mb_dom
909 // << ')';
910 ctx.mb_list.emplace_back(ctx.mb_loc, ctx.mb_dom);
914 template <>
915 struct action<orig_date> {
916 template <typename Input>
917 static void apply(const Input& in, Ctx& ctx)
919 // LOG(INFO) << "Date:";
920 header(in, ctx);
924 // Originator Fields
926 template <>
927 struct action<from> {
928 template <typename Input>
929 static void apply(const Input& in, Ctx& ctx)
931 if (!ctx.from_list.empty()) {
932 fmt::memory_buffer msg;
933 fmt::format_to(std::back_inserter(msg),
934 "multiple 'From:' address headers, previous:\n");
935 for (auto const& add : ctx.from_list) {
936 fmt::format_to(std::back_inserter(msg), " {}\n", add.as_string());
938 fmt::format_to(std::back_inserter(msg), "new: {}", in.string());
939 ctx.msg_errors.push_back(fmt::to_string(msg));
942 header(in, ctx);
943 ctx.from_list = std::move(ctx.mb_list);
944 ctx.mb_list.clear();
948 template <>
949 struct action<sender> {
950 template <typename Input>
951 static void apply(const Input& in, Ctx& ctx)
953 if (!ctx.sender.empty()) {
954 auto const err
955 = fmt::format("multiple 'Sender:' headers, previous: {}, this: {}",
956 static_cast<std::string>(ctx.sender), in.string());
957 ctx.msg_errors.push_back(err);
959 header(in, ctx);
960 CHECK_EQ(ctx.mb_list.size(), 1);
961 ctx.sender = std::move(ctx.mb_list[0]);
962 ctx.mb_list.clear();
966 template <>
967 struct action<reply_to> {
968 template <typename Input>
969 static void apply(const Input& in, Ctx& ctx)
971 header(in, ctx);
972 ctx.mb_list.clear();
976 // Destination Address Fields
978 template <>
979 struct action<to> {
980 template <typename Input>
981 static void apply(const Input& in, Ctx& ctx)
983 header(in, ctx);
984 ctx.mb_list.clear();
988 template <>
989 struct action<cc> {
990 template <typename Input>
991 static void apply(const Input& in, Ctx& ctx)
993 header(in, ctx);
994 ctx.mb_list.clear();
998 template <>
999 struct action<bcc> {
1000 template <typename Input>
1001 static void apply(const Input& in, Ctx& ctx)
1003 header(in, ctx);
1004 ctx.mb_list.clear();
1008 // Identification Fields
1010 template <>
1011 struct action<msg_id> {
1012 template <typename Input>
1013 static void apply(const Input& in, Ctx& ctx)
1015 ctx.id = in.string();
1016 boost::trim(ctx.id);
1020 template <>
1021 struct action<message_id> {
1022 template <typename Input>
1023 static void apply(const Input& in, Ctx& ctx)
1025 header(in, ctx);
1026 if (!ctx.message_id.empty()) {
1027 LOG(ERROR) << "multiple message IDs: " << ctx.message_id << " and "
1028 << ctx.id;
1030 ctx.message_id = ctx.id;
1034 template <>
1035 struct action<in_reply_to> {
1036 template <typename Input>
1037 static void apply(const Input& in, Ctx& ctx)
1039 header(in, ctx);
1043 template <>
1044 struct action<references> {
1045 template <typename Input>
1046 static void apply(const Input& in, Ctx& ctx)
1048 header(in, ctx);
1052 // Informational Fields
1054 template <>
1055 struct action<subject> {
1056 template <typename Input>
1057 static void apply(const Input& in, Ctx& ctx)
1059 header(in, ctx);
1060 ctx.unstructured.clear();
1064 template <>
1065 struct action<comments> {
1066 template <typename Input>
1067 static void apply(const Input& in, Ctx& ctx)
1069 header(in, ctx);
1070 ctx.unstructured.clear();
1074 template <>
1075 struct action<keywords> {
1076 template <typename Input>
1077 static void apply(const Input& in, Ctx& ctx)
1079 header(in, ctx);
1083 // Resent Fields
1085 template <>
1086 struct action<resent_date> {
1087 template <typename Input>
1088 static void apply(const Input& in, Ctx& ctx)
1090 header(in, ctx);
1094 template <>
1095 struct action<resent_from> {
1096 template <typename Input>
1097 static void apply(const Input& in, Ctx& ctx)
1099 header(in, ctx);
1100 ctx.mb_list.clear();
1104 template <>
1105 struct action<resent_sender> {
1106 template <typename Input>
1107 static void apply(const Input& in, Ctx& ctx)
1109 header(in, ctx);
1110 ctx.mb_list.clear();
1114 template <>
1115 struct action<resent_to> {
1116 template <typename Input>
1117 static void apply(const Input& in, Ctx& ctx)
1119 header(in, ctx);
1120 ctx.mb_list.clear();
1124 template <>
1125 struct action<resent_cc> {
1126 template <typename Input>
1127 static void apply(const Input& in, Ctx& ctx)
1129 header(in, ctx);
1130 ctx.mb_list.clear();
1134 template <>
1135 struct action<resent_bcc> {
1136 template <typename Input>
1137 static void apply(const Input& in, Ctx& ctx)
1139 header(in, ctx);
1140 ctx.mb_list.clear();
1144 template <>
1145 struct action<resent_msg_id> {
1146 template <typename Input>
1147 static void apply(const Input& in, Ctx& ctx)
1149 header(in, ctx);
1153 // Trace Fields
1155 template <>
1156 struct action<return_path> {
1157 template <typename Input>
1158 static void apply(const Input& in, Ctx& ctx)
1160 header(in, ctx);
1161 ctx.mb_list.clear();
1165 template <>
1166 struct action<return_path_non_standard> {
1167 template <typename Input>
1168 static void apply(const Input& in, Ctx& ctx)
1170 // LOG(INFO) << "Return-Path: is retarded: " << esc(in.string());
1171 header(in, ctx);
1172 ctx.mb_list.clear();
1176 template <>
1177 struct action<received> {
1178 template <typename Input>
1179 static void apply(const Input& in, Ctx& ctx)
1181 header(in, ctx);
1182 ctx.mb_list.clear();
1186 template <>
1187 struct action<result> {
1188 template <typename Input>
1189 static void apply(const Input& in, Ctx& ctx)
1191 ctx.spf_result = std::move(in.string());
1192 boost::to_lower(ctx.spf_result);
1196 template <>
1197 struct action<spf_key> {
1198 template <typename Input>
1199 static void apply(const Input& in, Ctx& ctx)
1201 ctx.key = std::move(in.string());
1205 template <>
1206 struct action<spf_value> {
1207 template <typename Input>
1208 static void apply(const Input& in, Ctx& ctx)
1210 ctx.value = std::move(in.string());
1211 boost::trim(ctx.value);
1215 template <>
1216 struct action<spf_key_value_pair> {
1217 template <typename Input>
1218 static void apply(const Input& in, Ctx& ctx)
1220 ctx.kv_list.emplace_back(ctx.key, ctx.value);
1221 ctx.key.clear();
1222 ctx.value.clear();
1226 template <>
1227 struct action<spf_key_value_list> {
1228 static void apply0(Ctx& ctx)
1230 for (auto kvp : ctx.kv_list) {
1231 ctx.spf_info[kvp.first] = kvp.second;
1236 template <>
1237 struct action<received_spf> {
1238 template <typename Input>
1239 static void apply(const Input& in, Ctx& ctx)
1241 // LOG(INFO) << "Received-SPF:";
1243 // Do a fresh check now:
1245 auto node = osutil::get_hostname();
1247 SPF::Server spf_srv(node.c_str());
1248 SPF::Request spf_req(spf_srv);
1250 spf_req.set_ip_str(ctx.spf_info["client-ip"].c_str());
1252 spf_req.set_helo_dom(ctx.spf_info["helo"].c_str());
1253 if (ctx.spf_info.find("envelope-from") != end(ctx.spf_info)) {
1254 spf_req.set_env_from(ctx.spf_info["envelope-from"].c_str());
1257 SPF::Response spf_res(spf_req);
1258 auto res = spf_res.result();
1259 CHECK_NE(res, SPF::Result::INVALID);
1261 if (ctx.spf_result != res.c_str()) {
1262 LOG(WARNING) << "SPF results changed: "
1263 << "new result is \"" << res << "\", old result is \""
1264 << ctx.spf_result << "\"";
1267 // Get result from header:
1269 int pol_spf = DMARC_POLICY_SPF_OUTCOME_PASS;
1271 // Pass is the default:
1272 // if (ctx.spf_result == "pass") {
1273 // pol_spf = DMARC_POLICY_SPF_OUTCOME_PASS;
1274 // }
1276 // if ((ctx.spf_result == "neutral") || (ctx.spf_result == "softfail")) {
1277 // // could also be a FAIL maybe...
1278 // pol_spf = DMARC_POLICY_SPF_OUTCOME_PASS;
1279 // }
1281 if (ctx.spf_result == "none") {
1282 pol_spf = DMARC_POLICY_SPF_OUTCOME_NONE;
1285 if (ctx.spf_result == "temperror") {
1286 pol_spf = DMARC_POLICY_SPF_OUTCOME_TMPFAIL;
1289 if ((ctx.spf_result == "fail") || (ctx.spf_result == "permerror")) {
1290 pol_spf = DMARC_POLICY_SPF_OUTCOME_FAIL;
1293 if (ctx.spf_info.find("client-ip") != end(ctx.spf_info)) {
1294 ctx.dmp.connect(ctx.spf_info["client-ip"].c_str());
1295 // LOG(INFO) << "SPF: ip==" << ctx.spf_info["client-ip"] << ", "
1296 // << ctx.spf_result;
1299 // Google sometimes doesn't put in anything but client-ip
1300 if (ctx.spf_info.find("envelope-from") != end(ctx.spf_info)) {
1301 auto dom = ctx.spf_info["envelope-from"];
1302 auto origin = DMARC_POLICY_SPF_ORIGIN_MAILFROM;
1304 if (dom == "<>") {
1305 dom = ctx.spf_info["helo"];
1306 origin = DMARC_POLICY_SPF_ORIGIN_HELO;
1307 LOG(INFO) << "SPF: origin HELO " << dom;
1309 else {
1310 memory_input<> addr_in(dom, "dom");
1311 if (!parse<RFC5322::addr_spec, RFC5322::action>(addr_in, ctx)) {
1312 LOG(FATAL) << "Failed to parse domain: " << dom;
1314 dom = ctx.mb_dom;
1315 origin = DMARC_POLICY_SPF_ORIGIN_MAILFROM;
1316 LOG(INFO) << "SPF: origin MAIL FROM " << dom;
1318 ctx.mb_loc.clear();
1319 ctx.mb_dom.clear();
1321 ctx.dmp.store_spf(dom.c_str(), pol_spf, origin, "are you human?");
1324 ctx.mb_list.clear();
1328 template <>
1329 struct action<dkim_signature> {
1330 template <typename Input>
1331 static void apply(const Input& in, Ctx& ctx)
1333 header(in, ctx);
1334 CHECK(ctx.dkv.sig_syntax(ctx.unstructured)) << ctx.unstructured;
1335 ctx.unstructured.clear();
1339 template <>
1340 struct action<received_token> {
1341 template <typename Input>
1342 static void apply(const Input& in, Ctx& ctx)
1347 template <>
1348 struct action<mime_version> {
1349 template <typename Input>
1350 static void apply(const Input& in, Ctx& ctx)
1352 header(in, ctx);
1353 ctx.mime_version = true;
1357 template <>
1358 struct action<content> {
1359 template <typename Input>
1360 static void apply(const Input& in, Ctx& ctx)
1362 header(in, ctx);
1363 // ctx.unstructured.clear();
1367 template <>
1368 struct action<discrete_type> {
1369 template <typename Input>
1370 static void apply(const Input& in, Ctx& ctx)
1372 ctx.discrete_type = true;
1373 ctx.type = in.string();
1377 template <>
1378 struct action<composite_type> {
1379 template <typename Input>
1380 static void apply(const Input& in, Ctx& ctx)
1382 ctx.composite_type = true;
1383 ctx.type = in.string();
1387 template <>
1388 struct action<subtype> {
1389 template <typename Input>
1390 static void apply(const Input& in, Ctx& ctx)
1392 ctx.subtype = in.string();
1396 template <>
1397 struct action<content_transfer_encoding> {
1398 template <typename Input>
1399 static void apply(const Input& in, Ctx& ctx)
1401 header(in, ctx);
1402 // ctx.unstructured.clear();
1406 template <>
1407 struct action<id> {
1408 template <typename Input>
1409 static void apply(const Input& in, Ctx& ctx)
1411 header(in, ctx);
1415 template <>
1416 struct action<description> {
1417 template <typename Input>
1418 static void apply(const Input& in, Ctx& ctx)
1420 header(in, ctx);
1424 template <>
1425 struct action<attribute> {
1426 template <typename Input>
1427 static void apply(const Input& in, Ctx& ctx)
1429 ctx.key = in.string();
1433 template <>
1434 struct action<parameter> {
1435 template <typename Input>
1436 static void apply(const Input& in, Ctx& ctx)
1438 ctx.ct_parameters.emplace_back(ctx.key, ctx.value);
1439 ctx.key.clear();
1440 ctx.value.clear();
1444 template <>
1445 struct action<value> {
1446 template <typename Input>
1447 static void apply(const Input& in, Ctx& ctx)
1449 ctx.value = in.string();
1453 template <>
1454 struct action<body> {
1455 template <typename Input>
1456 static void apply(const Input& in, Ctx& ctx)
1458 // LOG(INFO) << "Message body:";
1459 auto const body = std::string_view(begin(in), end(in) - begin(in));
1461 ctx.dkv.eoh();
1462 ctx.dkv.body(body);
1464 if (ctx.mime_version) {
1465 // std::stringstream type;
1466 // type << "Content-Type: " << ctx.type << "/" << ctx.subtype;
1467 // for (auto const& p : ctx.ct_parameters) {
1468 // if ((type.str().length() + (3 + p.first.length() +
1469 // p.second.length()))
1470 // > 78)
1471 // type << ";\r\n\t";
1472 // else
1473 // type << "; ";
1474 // type << p.first << "=" << p.second;
1475 // }
1476 // LOG(INFO) << type.str();
1478 // memory_input<> body_in(body, "body");
1479 // if (!parse_nested<RFC5322::, RFC5322::action>(in, body_in, ctx)) {
1480 // LOG(ERROR) << "bad mime body";
1481 // }
1486 template <>
1487 struct action<message> {
1488 template <typename Input>
1489 static void apply(const Input& in, Ctx& ctx)
1491 // LOG(INFO) << "message";
1492 ctx.dkv.eom();
1494 // ctx.dkv.check();
1496 Domain from_domain;
1498 if (ctx.from_list.empty()) {
1499 // RFC-5322 says message must have a 'From:' header.
1500 LOG(ERROR) << "no (correct) RFC5322.From header";
1502 auto range = ctx.defined_hdrs.equal_range(defined_field("From"));
1503 for (auto it = range.first; it != range.second; ++it) {
1504 LOG(ERROR) << "using bogus '" << it->second << "'";
1505 // ctx.from_list.push_back(Mailbox(it->second));
1508 if (ctx.from_list.empty())
1509 return;
1512 if (ctx.from_list.size() > 1) {
1514 LOG(INFO) << ctx.from_list.size() << "multiple RFC5322.From addresses";
1515 for (auto& f : ctx.from_list) {
1516 LOG(INFO) << f;
1519 if (ctx.sender.empty()) {
1520 // Must have 'Sender:' says RFC-5322 section 3.6.2.
1521 LOG(ERROR)
1522 << "no RFC5322.Sender header with multiple RFC5322.From mailboxes";
1523 return;
1526 // find sender in from list
1527 // auto s = find(begin(ctx.from_list), end(ctx.from_list), ctx.sender);
1528 // if (s == end(ctx.from_list)) {
1529 // // can't be found, not an error
1530 // LOG(ERROR) << "No 'From:' match to 'Sender:'";
1532 // // must check all From:s
1533 // LOG(FATAL) << "write code to check all From: addresses";
1534 // }
1535 // else {
1536 // from_domain = ctx.sender;
1537 // LOG(INFO) << "using 'Sender:' domain " << ctx.sender.domain();
1538 // }
1540 else {
1542 from_domain = ctx.from_list[0].domain();
1544 // if (!ctx.sender.empty()) {
1545 // if (from_domain != ctx.sender.domain()) {
1546 // LOG(INFO) << "using 'Sender:' domain " << ctx.sender.domain()
1547 // << " in place of 'From:' domain " << from_domain;
1548 // from_domain = ctx.sender.domain();
1549 // }
1550 // }
1553 ctx.dmp.store_from_domain(from_domain.ascii().c_str());
1555 ctx.dkv.foreach_sig([&ctx](char const* domain, bool passed,
1556 char const* identity, char const* selector,
1557 char const* b) {
1558 LOG(INFO) << "DKIM check for " << domain
1559 << (passed ? " passed" : " failed");
1561 int result = passed ? DMARC_POLICY_DKIM_OUTCOME_PASS
1562 : DMARC_POLICY_DKIM_OUTCOME_FAIL;
1564 ctx.dmp.store_dkim(domain, selector, result, "I am human");
1567 ctx.dmp.query_dmarc(from_domain.ascii().c_str());
1569 // LOG(INFO) << "Message-ID: " << ctx.message_id;
1570 // LOG(INFO) << "Final DMARC advice for " << from_domain << ": "
1571 // << Advice_to_string(ctx.dmp.get_advice());
1573 if (ctx.msg_errors.size()) {
1574 for (auto e : ctx.msg_errors) {
1575 LOG(ERROR) << e;
1581 template <>
1582 struct action<obs_mbox_list> {
1583 template <typename Input>
1584 static void apply(const Input& in, Ctx& ctx)
1586 LOG(INFO) << "obsolete mailbox list: " << esc(in.string());
1590 template <>
1591 struct action<obs_addr_list> {
1592 template <typename Input>
1593 static void apply(const Input& in, Ctx& ctx)
1595 LOG(INFO) << "obsolete address list: " << esc(in.string());
1599 template <>
1600 struct action<obs_group_list> {
1601 template <typename Input>
1602 static void apply(const Input& in, Ctx& ctx)
1604 LOG(INFO) << "obsolete group list: " << esc(in.string());
1608 template <>
1609 struct action<angle_addr> {
1610 template <typename Input>
1611 static void apply(const Input& in, Ctx& ctx)
1613 // LOG(INFO) << "angle_addr: " << in.string();
1616 template <>
1617 struct action<display_name> {
1618 template <typename Input>
1619 static void apply(const Input& in, Ctx& ctx)
1621 // LOG(INFO) << "display_name: " << in.string();
1624 template <>
1625 struct action<name_addr> {
1626 template <typename Input>
1627 static void apply(const Input& in, Ctx& ctx)
1629 // LOG(INFO) << "name_addr: " << in.string();
1632 template <>
1633 struct action<name_addr_only> {
1634 template <typename Input>
1635 static void apply(const Input& in, Ctx& ctx)
1637 // LOG(INFO) << "name_addr_only: " << in.string();
1640 } // namespace RFC5322
1642 void display(RFC5322::Ctx const& ctx)
1644 // for (auto const& [name, value] : ctx.defined_hdrs) {
1645 // std::cout << name << ": " << value << '\n';
1646 // }
1647 // for (auto const& [name, value] : ctx.opt_hdrs) {
1648 // std::cout << name << ": " << value << '\n';
1649 // }
1652 void selftest()
1654 const char* name_addr_list_bad[]{
1655 "Gene Hightower . <gene@digilicious.com>",
1656 "via.Relay. <noreply@relay.firefox.com>",
1657 "[via Relay] <noreply@relay.firefox.com>",
1660 for (auto i : name_addr_list_bad) {
1661 memory_input<> in(i, i);
1662 RFC5322::Ctx ctx;
1663 if (parse<RFC5322::name_addr_only,
1664 RFC5322::action /*, tao::pegtl::tracer*/>(in, ctx)) {
1665 LOG(FATAL) << "Should not parse as name_addr_only \"" << i << "\"";
1669 const char* name_addr_list_good[]{
1670 "Gene Hightower <gene@digilicious.com>",
1671 "via Relay <noreply@relay.firefox.com>",
1672 "\"Gene Hightower <gene@digilicious.com> [via Relay]\""
1673 "<noreply@relay.firefox.com>",
1674 "\"Customer Care <care@bigcompany.com> via foo.com\" <noreply@foo.com>",
1677 for (auto i : name_addr_list_good) {
1678 memory_input<> in(i, i);
1679 RFC5322::Ctx ctx;
1680 if (!parse<RFC5322::name_addr_only,
1681 RFC5322::action /*, tao::pegtl::tracer*/>(in, ctx)) {
1682 LOG(FATAL) << "Error parsing as name_addr_only \"" << i << "\"";
1686 CHECK(RFC5322::is_defined_field("Subject"));
1687 CHECK(!RFC5322::is_defined_field("X-Subject"));
1689 const char* ip_list[]{
1690 "2607:f8b0:4001:c0b::22a",
1691 "127.0.0.1",
1694 for (auto i : ip_list) {
1695 memory_input<> in(i, i);
1696 RFC5322::Ctx ctx;
1697 if (!parse<RFC5322::ip, RFC5322::action /*, tao::pegtl::tracer*/>(in,
1698 ctx)) {
1699 LOG(ERROR) << "Error parsing as ip \"" << i << "\"";
1703 const char* rec_list[]{
1704 // github
1705 "Received: from github-smtp2a-ext-cp1-prd.iad.github.net "
1706 "(github-smtp2a-ext-cp1-prd.iad.github.net [192.30.253.16])\r\n"
1707 " by ismtpd0004p1iad1.sendgrid.net (SG) with ESMTP id "
1708 "OCAkwxSQQTiPcF-T3rLS3w\r\n"
1709 " for <gene-github@digilicious.com>; Tue, 23 May 2017 "
1710 "23:01:49.124 +0000 (UTC)\r\n",
1712 // sendgrid date is shit
1713 // "Received: by filter0810p1mdw1.sendgrid.net with SMTP id "
1714 // "filter0810p1mdw1-13879-5924BDA5-34\r\n"
1715 // " 2017-05-23 22:54:29.679063164 +0000 UTC\r\n",
1719 for (auto i : rec_list) {
1720 memory_input<> in(i, i);
1721 RFC5322::Ctx ctx;
1722 if (!parse<RFC5322::received, RFC5322::action /*, tao::pegtl::tracer*/>(
1723 in, ctx)) {
1724 LOG(ERROR) << "Error parsing as Received: \"" << i << "\"";
1728 const char* date_list[]{
1729 "Date: Tue, 30 May 2017 10:52:11 +0000 (UTC)\r\n",
1730 "Date: Mon, 29 May 2017 16:47:58 -0700\r\n",
1732 // this date is shit
1733 // "Date: Mon, 29 May 2017 19:47:08 EDT\r\n",
1736 for (auto i : date_list) {
1737 memory_input<> in(i, i);
1738 RFC5322::Ctx ctx;
1739 if (!parse<RFC5322::orig_date, RFC5322::action /*, tao::pegtl::tracer*/>(
1740 in, ctx)) {
1741 LOG(ERROR) << "Error parsing as Date: \"" << i << "\"";
1745 const char* const spf_list[]{
1746 // works
1747 "Received-SPF: pass (digilicious.com: domain of gmail.com designates "
1748 "74.125.82.46 as permitted sender) client-ip=74.125.82.46; "
1749 "envelope-from=l23456789O@gmail.com; helo=mail-wm0-f46.google.com;\r\n",
1751 // also works
1752 "Received-SPF: neutral (google.com: 2607:f8b0:4001:c0b::22a is neither "
1753 "permitted nor denied by best guess record for domain of "
1754 "1234567@riscv.org) client-ip=2607:f8b0:4001:c0b::22a;\r\n",
1757 for (auto i : spf_list) {
1758 memory_input<> in(i, i);
1759 RFC5322::Ctx ctx;
1760 if (!parse<RFC5322::received_spf, RFC5322::action /*, tao::pegtl::tracer*/>(
1761 in, ctx)) {
1762 LOG(ERROR) << "Error parsing as Received-SPF: \"" << i << "\"";
1767 int main(int argc, char* argv[])
1769 { // Need to work with either namespace.
1770 using namespace gflags;
1771 using namespace google;
1772 ParseCommandLineFlags(&argc, &argv, true);
1775 if (FLAGS_selftest) {
1776 selftest();
1777 return 0;
1780 for (auto i{1}; i < argc; ++i) {
1781 auto fn{argv[i]};
1782 auto name{fs::path(fn)};
1783 auto f{boost::iostreams::mapped_file_source(name)};
1784 auto in{memory_input<>(f.data(), f.size(), fn)};
1785 LOG(INFO) << "#### file: " << fn;
1786 try {
1787 RFC5322::Ctx ctx;
1788 // ctx.defined_hdrs.reserve(countof(RFC5322::defined_fields));
1789 if (!parse<RFC5322::message, RFC5322::action>(in, ctx)) {
1790 LOG(ERROR) << "parse returned false";
1792 display(ctx);
1794 catch (parse_error const& e) {
1795 std::cerr << e.what();
1796 return 1;
1799 return 0;