remove IP allow list, depend on domain reputation from FCrDNS
[ghsmtp.git] / msg.cpp
blob9d292510794d20f3514c5a34b71e278c1ae3b9d1
1 // Toy RFC-5322 message parser and DMARC validator.
3 #include <gflags/gflags.h>
4 namespace gflags {
5 // in case we didn't have one
8 DEFINE_bool(selftest, false, "run a self test");
10 #include <map>
11 #include <string>
12 #include <vector>
14 #include <glog/logging.h>
16 #include <fmt/format.h>
17 #include <fmt/ostream.h>
19 #include <boost/algorithm/string.hpp>
20 #include <boost/iostreams/device/mapped_file.hpp>
22 #include <iostream>
24 #include "Mailbox.hpp"
25 #include "OpenDKIM.hpp"
26 #include "OpenDMARC.hpp"
27 #include "SPF.hpp"
28 #include "esc.hpp"
29 #include "fs.hpp"
30 #include "iequal.hpp"
31 #include "osutil.hpp"
33 #include <tao/pegtl.hpp>
34 #include <tao/pegtl/contrib/abnf.hpp>
36 // #include <tao/pegtl/contrib/tracer.hpp>
38 using namespace tao::pegtl;
39 using namespace tao::pegtl::abnf;
41 template <typename T, std::size_t N>
42 constexpr std::size_t countof(T const (&)[N]) noexcept
44 return N;
47 namespace RFC5322 {
49 constexpr char const* defined_fields[]{
51 // Trace Fields
52 "Return-Path",
53 "Received",
54 "Received-SPF", // RFC 7208 added trace field
56 // Sig
57 "DKIM-Signature", // RFC 7489
59 // Originator Fields
60 "Date",
61 "From",
62 "Sender",
63 "Reply-To",
65 // Destination Address Fields
66 "To",
67 "Cc",
68 "Bcc",
70 // Identification Fields
71 "Message-ID",
72 "In-Reply-To",
73 "References",
75 // Informational Fields
76 "Subject",
77 "Comments",
78 "Keywords",
80 // Resent Fields
81 "Resent-Date",
82 "Resent-From",
83 "Resent-Sender",
84 "Resent-To",
85 "Resent-Cc",
86 "Resent-Bcc",
87 "Resent-Message-ID",
89 // MIME Fields
90 "MIME-Version",
92 "Content-Type",
93 "Content-Transfer-Encoding",
94 "Content-ID",
95 "Content-Description",
98 bool is_defined_field(std::string_view name)
100 return std::find_if(std::begin(defined_fields), std::end(defined_fields),
101 [=](std::string_view v) { return iequal(name, v); })
102 != std::end(defined_fields);
105 char const* defined_field(std::string_view name)
107 auto df = std::find_if(std::begin(defined_fields), std::end(defined_fields),
108 [=](std::string_view v) { return iequal(name, v); });
109 if (df != std::end(defined_fields))
110 return *df;
111 return "";
114 struct ci_less {
115 bool operator()(std::string const& lhs, std::string const& rhs) const
117 return strcasecmp(lhs.c_str(), rhs.c_str()) < 0;
121 struct Ctx {
122 OpenDKIM::verify dkv;
124 OpenDMARC::policy dmp;
126 std::string mb_loc;
127 std::string mb_dom;
129 std::vector<::Mailbox> mb_list; // temporary accumulator
131 std::vector<::Mailbox> from_list;
133 ::Mailbox sender;
135 std::string key;
136 std::string value;
138 std::vector<std::pair<std::string, std::string>> kv_list;
140 std::map<std::string, std::string, ci_less> spf_info;
141 std::string spf_result;
143 std::unordered_multimap<char const*, std::string> defined_hdrs;
144 std::multimap<std::string, std::string, ci_less> opt_hdrs;
146 std::string unstructured;
147 std::string id;
149 std::string message_id;
151 std::string opt_name;
152 std::string opt_value;
154 std::string type;
155 std::string subtype;
157 bool mime_version{false};
158 bool discrete_type{false};
159 bool composite_type{false};
161 std::vector<std::pair<std::string, std::string>> ct_parameters;
163 std::vector<std::string> msg_errors;
166 // clang-format off
168 struct UTF8_tail : range<'\x80', '\xBF'> {};
170 struct UTF8_1 : range<0x00, 0x7F> {};
172 struct UTF8_2 : seq<range<'\xC2', '\xDF'>, UTF8_tail> {};
174 struct UTF8_3 : sor<seq<one<'\xE0'>, range<'\xA0', '\xBF'>, UTF8_tail>,
175 seq<range<'\xE1', '\xEC'>, rep<2, UTF8_tail>>,
176 seq<one<'\xED'>, range<'\x80', '\x9F'>, UTF8_tail>,
177 seq<range<'\xEE', '\xEF'>, rep<2, UTF8_tail>>> {};
179 struct UTF8_4
180 : sor<seq<one<'\xF0'>, range<'\x90', '\xBF'>, rep<2, UTF8_tail>>,
181 seq<range<'\xF1', '\xF3'>, rep<3, UTF8_tail>>,
182 seq<one<'\xF4'>, range<'\x80', '\x8F'>, rep<2, UTF8_tail>>> {};
184 // UTF8_char = UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4;
186 struct UTF8_non_ascii : sor<UTF8_2, UTF8_3, UTF8_4> {};
188 struct VUCHAR : sor<VCHAR, UTF8_non_ascii> {};
190 using dot = one<'.'>;
191 using colon = one<':'>;
193 struct text : sor<ranges<1, 9, 11, 12, 14, 127>, UTF8_non_ascii> {};
195 // UTF-8 except NUL (0), LF (10) and CR (13).
196 // struct body : seq<star<seq<rep_max<998, text>, eol>>, rep_max<998, text>> {};
198 // BINARYMIME allows any byte
199 struct body : until<eof> {};
201 struct FWS : seq<opt<seq<star<WSP>, eol>>, plus<WSP>> {};
203 struct qtext : sor<one<33>, ranges<35, 91, 93, 126>, UTF8_non_ascii> {};
205 struct quoted_pair : seq<one<'\\'>, sor<VUCHAR, WSP>> {};
207 struct atext : sor<ALPHA, DIGIT,
208 one<'!', '#',
209 '$', '%',
210 '&', '\'',
211 '*', '+',
212 '-', '/',
213 '=', '?',
214 '^', '_',
215 '`', '{',
216 '|', '}',
217 '~'>,
218 UTF8_non_ascii> {};
220 // ctext is ASCII not '(' or ')' or '\\'
221 struct ctext : sor<ranges<33, 39, 42, 91, 93, 126>, UTF8_non_ascii> {};
223 // <https://tools.ietf.org/html/rfc2047>
225 // especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "
226 // <"> / "/" / "[" / "]" / "?" / "." / "="
228 // token = 1*<Any CHAR except SPACE, CTLs, and especials>
230 struct tchar47 : ranges< // NUL..' '
231 33, 33, // !
232 // 34, 34, // "
233 35, 39, // #$%&'
234 // 40, 41, // ()
235 42, 43, // *+
236 // 44, 44, // ,
237 45, 45, // -
238 // 46, 47, // ./
239 48, 57, // 0123456789
240 // 58, 64, // ;:<=>?@
241 65, 90, // A..Z
242 // 91, 91, // [
243 92, 92, // '\\'
244 // 93, 93, // ]
245 94, 126 // ^_` a..z {|}~
246 // 127,127 // DEL
247 > {};
249 struct token47 : plus<tchar47> {};
251 struct charset : token47 {};
252 struct encoding : token47 {};
254 // encoded-text = 1*<Any printable ASCII character other than "?"
255 // or SPACE>
257 struct echar : ranges< // NUL..' '
258 33, 62, // !..>
259 // 63, 63, // ?
260 64, 126 // @A..Z[\]^_` a..z {|}~
261 // 127,127 // DEL
262 > {};
264 struct encoded_text : plus<echar> {};
266 // encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
268 // leading opt<FWS> is not in RFC 2047
270 struct encoded_word_book : seq<string<'=', '?'>,
271 charset, string<'?'>,
272 encoding, string<'?'>,
273 encoded_text,
274 string<'=', '?'>
275 > {};
277 struct encoded_word : seq<opt<FWS>, encoded_word_book> {};
279 struct comment;
281 struct ccontent : sor<ctext, quoted_pair, comment, encoded_word> {};
283 // from <https://tools.ietf.org/html/rfc2047>
284 // comment = "(" *(ctext / quoted-pair / comment / encoded-word) ")"
286 struct comment
287 : seq<one<'('>, star<seq<opt<FWS>, ccontent>>, opt<FWS>, one<')'>> {};
289 struct CFWS : sor<seq<plus<seq<opt<FWS>, comment>, opt<FWS>>>, FWS> {};
291 struct qcontent : sor<qtext, quoted_pair> {};
293 // Corrected in errata ID: 3135
294 struct quoted_string
295 : seq<opt<CFWS>,
296 DQUOTE,
297 sor<seq<star<seq<opt<FWS>, qcontent>>, opt<FWS>>, FWS>,
298 DQUOTE,
299 opt<CFWS>> {};
301 // *([FWS] VCHAR) *WSP
302 struct unstructured : seq<star<seq<opt<FWS>, VUCHAR>>, star<WSP>> {};
304 struct atom : seq<opt<CFWS>, plus<atext>, opt<CFWS>> {};
306 struct dot_atom_text : list<plus<atext>, dot> {};
308 struct dot_atom : seq<opt<CFWS>, dot_atom_text, opt<CFWS>> {};
310 struct word : sor<atom, quoted_string> {};
312 // obs-phrase = word *(word / "." / CFWS)
314 struct phrase : plus<sor<encoded_word, word>> {};
316 struct dec_octet : sor<seq<string<'2','5'>, range<'0','5'>>,
317 seq<one<'2'>, range<'0','4'>, DIGIT>,
318 seq<range<'0', '1'>, rep<2, DIGIT>>,
319 rep_min_max<1, 2, DIGIT>> {};
321 struct ipv4_address
322 : seq<dec_octet, dot, dec_octet, dot, dec_octet, dot, dec_octet> {};
324 struct h16 : rep_min_max<1, 4, HEXDIG> {};
326 struct ls32 : sor<seq<h16, colon, h16>, ipv4_address> {};
328 struct dcolon : two<':'> {};
330 struct ipv6_address : sor<seq< rep<6, h16, colon>, ls32>,
331 seq< dcolon, rep<5, h16, colon>, ls32>,
332 seq<opt<h16 >, dcolon, rep<4, h16, colon>, ls32>,
333 seq<opt<h16, opt< colon, h16>>, dcolon, rep<3, h16, colon>, ls32>,
334 seq<opt<h16, rep_opt<2, colon, h16>>, dcolon, rep<2, h16, colon>, ls32>,
335 seq<opt<h16, rep_opt<3, colon, h16>>, dcolon, h16, colon, ls32>,
336 seq<opt<h16, rep_opt<4, colon, h16>>, dcolon, ls32>,
337 seq<opt<h16, rep_opt<5, colon, h16>>, dcolon, h16>,
338 seq<opt<h16, rep_opt<6, colon, h16>>, dcolon >> {};
340 struct ip : sor<ipv4_address, ipv6_address> {};
342 struct local_part : sor<dot_atom, quoted_string> {};
344 struct dtext : ranges<33, 90, 94, 126> {};
346 struct domain_literal : seq<opt<CFWS>,
347 one<'['>,
348 star<seq<opt<FWS>, dtext>>,
349 opt<FWS>,
350 one<']'>,
351 opt<CFWS>> {};
353 struct domain : sor<dot_atom, domain_literal> {};
355 struct addr_spec : seq<local_part, one<'@'>, domain> {};
357 struct angle_addr : seq<opt<CFWS>, one<'<'>, addr_spec, one<'>'>, opt<CFWS>> {};
359 struct path
360 : sor<angle_addr, seq<opt<CFWS>, one<'<'>, opt<CFWS>, one<'>'>, opt<CFWS>>> {};
362 struct display_name : phrase {};
364 struct name_addr : seq<opt<display_name>, angle_addr> {};
366 struct name_addr_only : seq<name_addr, eof> {};
368 struct mailbox : sor<name_addr, addr_spec> {};
370 struct group_list;
372 struct group
373 : seq<display_name, one<':'>, opt<group_list>, one<';'>, opt<CFWS>> {};
375 struct address : sor<mailbox, group> {};
377 #define OBSOLETE_SYNTAX
379 #ifdef OBSOLETE_SYNTAX
380 // *([CFWS] ",") mailbox *("," [mailbox / CFWS])
381 struct obs_mbox_list : seq<star<seq<opt<CFWS>, one<','>>>,
382 mailbox,
383 star<one<','>, opt<sor<mailbox, CFWS>>>> {};
385 struct mailbox_list : sor<list<mailbox, one<','>>, obs_mbox_list> {};
386 #else
387 struct mailbox_list : list<mailbox, one<','>> {};
388 #endif
390 #ifdef OBSOLETE_SYNTAX
391 // *([CFWS] ",") address *("," [address / CFWS])
392 struct obs_addr_list : seq<star<seq<opt<CFWS>, one<','>>>,
393 address,
394 star<one<','>, opt<sor<address, CFWS>>>> {};
396 struct address_list : sor<list<address, one<','>>, obs_addr_list> {};
397 #else
398 struct address_list : list<address, one<','>> {};
399 #endif
401 #ifdef OBSOLETE_SYNTAX
402 // 1*([CFWS] ",") [CFWS]
403 struct obs_group_list : seq<plus<seq<opt<CFWS>, one<','>>>, opt<CFWS>> {};
405 struct group_list : sor<mailbox_list, CFWS, obs_group_list> {};
406 #else
407 struct group_list : sor<mailbox_list, CFWS> {};
408 #endif
410 // 3.3. Date and Time Specification (mostly from RFC 2822)
412 struct day : seq<opt<FWS>, rep_min_max<1, 2, DIGIT>> {};
414 struct month_name : sor<TAO_PEGTL_ISTRING("Jan"),
415 TAO_PEGTL_ISTRING("Feb"),
416 TAO_PEGTL_ISTRING("Mar"),
417 TAO_PEGTL_ISTRING("Apr"),
418 TAO_PEGTL_ISTRING("May"),
419 TAO_PEGTL_ISTRING("Jun"),
420 TAO_PEGTL_ISTRING("Jul"),
421 TAO_PEGTL_ISTRING("Aug"),
422 TAO_PEGTL_ISTRING("Sep"),
423 TAO_PEGTL_ISTRING("Oct"),
424 TAO_PEGTL_ISTRING("Nov"),
425 TAO_PEGTL_ISTRING("Dec")> {};
427 struct month : seq<FWS, month_name, FWS> {};
429 struct year : rep<4, DIGIT> {};
431 struct date : seq<day, month, year> {};
433 struct day_name : sor<TAO_PEGTL_ISTRING("Mon"),
434 TAO_PEGTL_ISTRING("Tue"),
435 TAO_PEGTL_ISTRING("Wed"),
436 TAO_PEGTL_ISTRING("Thu"),
437 TAO_PEGTL_ISTRING("Fri"),
438 TAO_PEGTL_ISTRING("Sat"),
439 TAO_PEGTL_ISTRING("Sun")> {};
441 // struct obs_day_of_week : seq<opt<CFWS>, day_name, opt<CFWS>> {
442 // };
444 // struct obs_day : seq<opt<CFWS>, rep_min_max<1, 2, DIGIT>, opt<CFWS>> {
445 // };
447 // struct obs_year : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
448 // };
450 // struct obs_hour : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
451 // };
453 // struct obs_minute : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
454 // };
456 // struct obs_second : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
457 // };
459 // struct obs_day_of_week : seq<opt<CFWS>, day_name, opt<CFWS>> {
460 // }
462 struct day_of_week : seq<opt<FWS>, day_name> {};
464 struct hour : rep<2, DIGIT> {};
466 struct minute : rep<2, DIGIT> {};
468 struct second : rep<2, DIGIT> {};
470 struct millisecond : rep<3, DIGIT> {};
472 // RFC-5322 extension is optional milliseconds
473 struct time_of_day
474 : seq<hour,
475 one<':'>,
476 minute,
477 opt<seq<one<':'>, second, opt<seq<one<'.'>, millisecond>>>>> {};
479 // struct obs_zone : sor<range<65, 73>,
480 // range<75, 90>,
481 // range<97, 105>,
482 // range<107, 122>,
483 // TAO_PEGTL_ISTRING("UT"),
484 // TAO_PEGTL_ISTRING("GMT"),
485 // TAO_PEGTL_ISTRING("EST"),
486 // TAO_PEGTL_ISTRING("EDT"),
487 // TAO_PEGTL_ISTRING("CST"),
488 // TAO_PEGTL_ISTRING("CDT"),
489 // TAO_PEGTL_ISTRING("MST"),
490 // TAO_PEGTL_ISTRING("MDT"),
491 // TAO_PEGTL_ISTRING("PST"),
492 // TAO_PEGTL_ISTRING("PDT")> {
493 // };
495 struct zone : seq<sor<one<'+'>, one<'-'>>, rep<4, DIGIT>> {};
497 struct time : seq<time_of_day, FWS, zone> {};
499 struct date_time
500 : seq<opt<seq<day_of_week, one<','>>>, date, FWS, time, opt<CFWS>> {};
502 // The Origination Date Field
503 struct orig_date : seq<TAO_PEGTL_ISTRING("Date:"), date_time, eol> {};
505 // Originator Fields
506 struct from : seq<TAO_PEGTL_ISTRING("From:"), opt<FWS>, mailbox_list, opt<FWS>, eol> {};
508 struct sender : seq<TAO_PEGTL_ISTRING("Sender:"), mailbox, eol> {};
510 struct reply_to : seq<TAO_PEGTL_ISTRING("Reply-To:"), address_list, eol> {};
512 struct address_list_or_pm : sor<TAO_PEGTL_ISTRING("Postmaster"), address_list> {};
514 // Destination Address Fields
515 struct to : seq<TAO_PEGTL_ISTRING("To:"), address_list_or_pm, eol> {};
517 struct cc : seq<TAO_PEGTL_ISTRING("Cc:"), address_list, eol> {};
519 struct bcc : seq<TAO_PEGTL_ISTRING("Bcc:"), opt<sor<address_list, CFWS>>, eol> {};
521 // Identification Fields
523 struct no_fold_literal : seq<one<'['>, star<dtext>, one<']'>> {};
525 struct id_left : dot_atom_text {};
527 struct id_right : sor<dot_atom_text, no_fold_literal> {};
529 struct msg_id
530 : seq<opt<CFWS>, one<'<'>, id_left, one<'@'>, id_right, one<'>'>, opt<CFWS>> {};
532 struct message_id : seq<TAO_PEGTL_ISTRING("Message-ID:"), msg_id, eol> {};
534 struct in_reply_to : seq<TAO_PEGTL_ISTRING("In-Reply-To:"), plus<msg_id>, eol> {};
536 struct references : seq<TAO_PEGTL_ISTRING("References:"), star<msg_id>, eol> {};
538 // Informational Fields
540 struct subject : seq<TAO_PEGTL_ISTRING("Subject:"), unstructured, eol> {};
542 struct comments : seq<TAO_PEGTL_ISTRING("Comments:"), unstructured, eol> {};
544 struct keywords
545 : seq<TAO_PEGTL_ISTRING("Keywords:"), list<phrase, one<','>>, eol> {};
547 // Resent Fields
549 struct resent_date : seq<TAO_PEGTL_ISTRING("Resent-Date:"), date_time, eol> {};
551 struct resent_from : seq<TAO_PEGTL_ISTRING("Resent-From:"), mailbox_list, eol> {};
553 struct resent_sender : seq<TAO_PEGTL_ISTRING("Resent-Sender:"), mailbox, eol> {};
555 struct resent_to : seq<TAO_PEGTL_ISTRING("Resent-To:"), address_list, eol> {};
557 struct resent_cc : seq<TAO_PEGTL_ISTRING("Resent-Cc:"), address_list, eol> {};
559 struct resent_bcc
560 : seq<TAO_PEGTL_ISTRING("Resent-Bcc:"), opt<sor<address_list, CFWS>>, eol> {};
562 struct resent_msg_id
563 : seq<TAO_PEGTL_ISTRING("Resent-Message-ID:"), msg_id, eol> {};
565 // Trace Fields
567 struct return_path : seq<TAO_PEGTL_ISTRING("Return-Path:"), opt<FWS>, path, eol> {};
569 // Facebook, among others
571 struct return_path_non_standard : seq<TAO_PEGTL_ISTRING("Return-Path:"),
572 opt<CFWS>,
573 addr_spec,
574 star<WSP>,
575 eol> {};
577 struct received_token : sor<angle_addr, addr_spec, domain, word> {};
579 struct received : seq<TAO_PEGTL_ISTRING("Received:"),
580 opt<sor<plus<received_token>, CFWS>>,
581 one<';'>,
582 date_time,
583 opt<seq<WSP, comment>>,
584 eol> {};
586 struct result : sor<TAO_PEGTL_ISTRING("Pass"),
587 TAO_PEGTL_ISTRING("Fail"),
588 TAO_PEGTL_ISTRING("SoftFail"),
589 TAO_PEGTL_ISTRING("Neutral"),
590 TAO_PEGTL_ISTRING("None"),
591 TAO_PEGTL_ISTRING("TempError"),
592 TAO_PEGTL_ISTRING("PermError")> {};
594 struct spf_key : sor<TAO_PEGTL_ISTRING("client-ip"),
595 TAO_PEGTL_ISTRING("envelope-from"),
596 TAO_PEGTL_ISTRING("helo"),
597 TAO_PEGTL_ISTRING("problem"),
598 TAO_PEGTL_ISTRING("receiver"),
599 TAO_PEGTL_ISTRING("identity"),
600 TAO_PEGTL_ISTRING("mechanism")> {};
602 // This value syntax (allowing addr_spec and angle_addr) is not in
603 // accordance with RFC 7208 (or 4408) but is what is effectivly used
604 // by libspf2 1.2.10 and before.
606 struct spf_value : sor<ip, addr_spec, dot_atom, quoted_string, angle_addr> {};
608 struct spf_key_value_pair : seq<spf_key, opt<CFWS>, one<'='>, spf_value> {};
610 struct spf_key_value_list
611 : seq<spf_key_value_pair,
612 star<seq<one<';'>, opt<CFWS>, spf_key_value_pair>>,
613 opt<one<';'>>> {};
615 struct received_spf : seq<TAO_PEGTL_ISTRING("Received-SPF:"),
616 opt<CFWS>,
617 result,
618 opt<seq<FWS, comment>>,
619 opt<seq<FWS, spf_key_value_list>>,
620 eol> {};
622 struct dkim_signature
623 : seq<TAO_PEGTL_ISTRING("DKIM-Signature:"), unstructured, eol> {};
625 struct mime_version : seq<TAO_PEGTL_ISTRING("MIME-Version:"),
626 opt<CFWS>,
627 one<'1'>,
628 opt<CFWS>,
629 one<'.'>,
630 opt<CFWS>,
631 one<'0'>,
632 opt<CFWS>,
633 eol> {};
635 // CTL := <any ASCII control ; ( 0- 37, 0.- 31.)
636 // character and DEL> ; ( 177, 127.)
638 // SPACE := 32
640 // especials := "(" / ")" / "<" / ">" / "@" /
641 // "," / ";" / ":" / "\" / <">
642 // "/" / "[" / "]" / "?" / "="
644 // ! 33
646 // 33-33
648 // " 34
650 // 35-39
652 // ( 40
653 // ) 41
655 // 42-43
657 // , 44
659 // 45-46
661 // / 47
663 // 48-57
665 // : 58
666 // ; 59
667 // < 60
668 // = 61
669 // > 62
670 // ? 63
671 // @ 64
673 // 65-90
675 // [ 91
676 // \ 92
677 // ] 93
679 // 94-126
681 // token := 1*<any (US-ASCII) CHAR except CTLs, SPACE,
682 // or tspecials>
684 struct tchar : ranges<33, 33, 35, 39, 42, 43, 45, 46, 48, 57, 65, 90, 94, 126> {};
686 struct token : plus<tchar> {};
688 struct ietf_token : token {};
690 struct x_token : seq<TAO_PEGTL_ISTRING("X-"), token> {};
692 struct extension_token : sor<x_token, ietf_token> {};
694 struct discrete_type : sor<TAO_PEGTL_ISTRING("text"),
695 TAO_PEGTL_ISTRING("image"),
696 TAO_PEGTL_ISTRING("audio"),
697 TAO_PEGTL_ISTRING("video"),
698 TAO_PEGTL_ISTRING("application"),
699 extension_token> {};
701 struct composite_type : sor<TAO_PEGTL_ISTRING("message"),
702 TAO_PEGTL_ISTRING("multipart"),
703 extension_token> {};
705 struct type : sor<discrete_type, composite_type> {};
707 struct subtype : token {};
709 // value := token / quoted-string
711 // attribute := token
713 // parameter := attribute "=" value
715 struct value : sor<token, quoted_string> {};
717 struct attribute : token {};
719 struct parameter : seq<attribute, one<'='>, value> {};
721 struct content : seq<TAO_PEGTL_ISTRING("Content-Type:"),
722 opt<CFWS>,
723 seq<type, one<'/'>, subtype>,
724 star<seq<one<';'>, opt<CFWS>, parameter>>,
725 opt<one<';'>>, // not strictly RFC 2045, but common
726 eol> {};
728 // mechanism := "7bit" / "8bit" / "binary" /
729 // "quoted-printable" / "base64" /
730 // ietf-token / x-token
732 struct mechanism : sor<TAO_PEGTL_ISTRING("7bit"),
733 TAO_PEGTL_ISTRING("8bit"),
734 TAO_PEGTL_ISTRING("binary"),
735 TAO_PEGTL_ISTRING("quoted-printable"),
736 TAO_PEGTL_ISTRING("base64"),
737 ietf_token,
738 x_token> {};
740 struct content_transfer_encoding
741 : seq<TAO_PEGTL_ISTRING("Content-Transfer-Encoding:"),
742 opt<CFWS>,
743 mechanism,
744 eol> {};
746 struct id : seq<TAO_PEGTL_ISTRING("Content-ID:"), msg_id, eol> {};
748 struct description
749 : seq<TAO_PEGTL_ISTRING("Content-Description:"), star<text>, eol> {};
751 // Optional Fields
753 struct ftext : ranges<33, 57, 59, 126> {};
755 struct field_name : plus<ftext> {};
757 struct field_value : unstructured {};
759 struct optional_field : seq<field_name, one<':'>, field_value, eol> {};
761 // message header
763 struct fields : star<sor<
764 return_path,
765 return_path_non_standard,
766 received,
767 received_spf,
769 dkim_signature,
771 orig_date,
772 from,
773 sender,
774 reply_to,
778 bcc,
780 message_id,
781 in_reply_to,
782 references,
784 subject,
785 comments,
786 keywords,
788 resent_date,
789 resent_from,
790 resent_sender,
791 resent_to,
792 resent_cc,
793 resent_bcc,
794 resent_msg_id,
796 mime_version,
797 content,
798 content_transfer_encoding,
800 description,
802 optional_field
803 >> {};
805 struct message : seq<fields, opt<seq<eol, body>>, eof> {};
807 // clang-format on
809 template <typename Rule>
810 struct action : nothing<Rule> {
813 template <>
814 struct action<fields> {
815 template <typename Input>
816 static void apply(Input const& in, Ctx& ctx)
818 // LOG(INFO) << "fields";
822 template <>
823 struct action<unstructured> {
824 template <typename Input>
825 static void apply(Input const& in, Ctx& ctx)
827 ctx.unstructured = in.string();
831 template <>
832 struct action<field_name> {
833 template <typename Input>
834 static void apply(Input const& in, Ctx& ctx)
836 ctx.opt_name = in.string();
840 template <>
841 struct action<field_value> {
842 template <typename Input>
843 static void apply(Input const& in, Ctx& ctx)
845 ctx.opt_value = in.string();
849 template <typename Input>
850 static void header(Input const& in, Ctx& ctx)
852 ctx.dkv.header(std::string_view(begin(in), end(in) - begin(in)));
855 template <>
856 struct action<optional_field> {
857 template <typename Input>
858 static void apply(Input const& in, Ctx& ctx)
860 // LOG(INFO) << "optional_field";
861 if (is_defined_field(ctx.opt_name)) {
862 // So, this is a syntax error in a defined field.
863 if (ctx.opt_name == "Received") {
864 // Go easy on Received lines, they tend to be wild and woolly.
865 // LOG(INFO) << in.string();
867 else {
868 auto const err
869 = fmt::format("syntax error in: \"{}\"", esc(in.string()));
870 ctx.msg_errors.push_back(err);
871 LOG(ERROR) << err;
873 ctx.defined_hdrs.emplace(defined_field(ctx.opt_name), ctx.opt_value);
875 else {
876 ctx.opt_hdrs.emplace(ctx.opt_name, ctx.opt_value);
878 header(in, ctx);
879 ctx.unstructured.clear();
880 ctx.mb_list.clear();
884 template <>
885 struct action<local_part> {
886 template <typename Input>
887 static void apply(Input const& in, Ctx& ctx)
889 ctx.mb_loc = in.string();
890 boost::trim(ctx.mb_loc);
894 template <>
895 struct action<domain> {
896 template <typename Input>
897 static void apply(Input const& in, Ctx& ctx)
899 ctx.mb_dom = in.string();
900 // LOG(INFO) << "domain == '" << ctx.mb_dom << "'";
904 template <>
905 struct action<mailbox> {
906 static void apply0(Ctx& ctx)
908 // LOG(INFO) << "mailbox emplace_back(" << ctx.mb_loc << '@' << ctx.mb_dom
909 // << ')';
910 ctx.mb_list.emplace_back(ctx.mb_loc, ctx.mb_dom);
914 template <>
915 struct action<orig_date> {
916 template <typename Input>
917 static void apply(const Input& in, Ctx& ctx)
919 // LOG(INFO) << "Date:";
920 header(in, ctx);
924 // Originator Fields
926 template <>
927 struct action<from> {
928 template <typename Input>
929 static void apply(const Input& in, Ctx& ctx)
931 if (!ctx.from_list.empty()) {
932 fmt::memory_buffer msg;
933 fmt::format_to(msg, "multiple 'From:' address headers, previous:\n");
934 for (auto const& add : ctx.from_list) {
935 fmt::format_to(msg, " {}\n", add);
937 fmt::format_to(msg, "new: {}", in.string());
938 ctx.msg_errors.push_back(fmt::to_string(msg));
941 header(in, ctx);
942 ctx.from_list = std::move(ctx.mb_list);
943 ctx.mb_list.clear();
947 template <>
948 struct action<sender> {
949 template <typename Input>
950 static void apply(const Input& in, Ctx& ctx)
952 if (!ctx.sender.empty()) {
953 auto const err
954 = fmt::format("multiple 'Sender:' headers, previous: {}, this: {}",
955 static_cast<std::string>(ctx.sender), in.string());
956 ctx.msg_errors.push_back(err);
958 header(in, ctx);
959 CHECK_EQ(ctx.mb_list.size(), 1);
960 ctx.sender = std::move(ctx.mb_list[0]);
961 ctx.mb_list.clear();
965 template <>
966 struct action<reply_to> {
967 template <typename Input>
968 static void apply(const Input& in, Ctx& ctx)
970 header(in, ctx);
971 ctx.mb_list.clear();
975 // Destination Address Fields
977 template <>
978 struct action<to> {
979 template <typename Input>
980 static void apply(const Input& in, Ctx& ctx)
982 header(in, ctx);
983 ctx.mb_list.clear();
987 template <>
988 struct action<cc> {
989 template <typename Input>
990 static void apply(const Input& in, Ctx& ctx)
992 header(in, ctx);
993 ctx.mb_list.clear();
997 template <>
998 struct action<bcc> {
999 template <typename Input>
1000 static void apply(const Input& in, Ctx& ctx)
1002 header(in, ctx);
1003 ctx.mb_list.clear();
1007 // Identification Fields
1009 template <>
1010 struct action<msg_id> {
1011 template <typename Input>
1012 static void apply(const Input& in, Ctx& ctx)
1014 ctx.id = in.string();
1015 boost::trim(ctx.id);
1019 template <>
1020 struct action<message_id> {
1021 template <typename Input>
1022 static void apply(const Input& in, Ctx& ctx)
1024 header(in, ctx);
1025 if (!ctx.message_id.empty()) {
1026 LOG(ERROR) << "multiple message IDs: " << ctx.message_id << " and "
1027 << ctx.id;
1029 ctx.message_id = ctx.id;
1033 template <>
1034 struct action<in_reply_to> {
1035 template <typename Input>
1036 static void apply(const Input& in, Ctx& ctx)
1038 header(in, ctx);
1042 template <>
1043 struct action<references> {
1044 template <typename Input>
1045 static void apply(const Input& in, Ctx& ctx)
1047 header(in, ctx);
1051 // Informational Fields
1053 template <>
1054 struct action<subject> {
1055 template <typename Input>
1056 static void apply(const Input& in, Ctx& ctx)
1058 header(in, ctx);
1059 ctx.unstructured.clear();
1063 template <>
1064 struct action<comments> {
1065 template <typename Input>
1066 static void apply(const Input& in, Ctx& ctx)
1068 header(in, ctx);
1069 ctx.unstructured.clear();
1073 template <>
1074 struct action<keywords> {
1075 template <typename Input>
1076 static void apply(const Input& in, Ctx& ctx)
1078 header(in, ctx);
1082 // Resent Fields
1084 template <>
1085 struct action<resent_date> {
1086 template <typename Input>
1087 static void apply(const Input& in, Ctx& ctx)
1089 header(in, ctx);
1093 template <>
1094 struct action<resent_from> {
1095 template <typename Input>
1096 static void apply(const Input& in, Ctx& ctx)
1098 header(in, ctx);
1099 ctx.mb_list.clear();
1103 template <>
1104 struct action<resent_sender> {
1105 template <typename Input>
1106 static void apply(const Input& in, Ctx& ctx)
1108 header(in, ctx);
1109 ctx.mb_list.clear();
1113 template <>
1114 struct action<resent_to> {
1115 template <typename Input>
1116 static void apply(const Input& in, Ctx& ctx)
1118 header(in, ctx);
1119 ctx.mb_list.clear();
1123 template <>
1124 struct action<resent_cc> {
1125 template <typename Input>
1126 static void apply(const Input& in, Ctx& ctx)
1128 header(in, ctx);
1129 ctx.mb_list.clear();
1133 template <>
1134 struct action<resent_bcc> {
1135 template <typename Input>
1136 static void apply(const Input& in, Ctx& ctx)
1138 header(in, ctx);
1139 ctx.mb_list.clear();
1143 template <>
1144 struct action<resent_msg_id> {
1145 template <typename Input>
1146 static void apply(const Input& in, Ctx& ctx)
1148 header(in, ctx);
1152 // Trace Fields
1154 template <>
1155 struct action<return_path> {
1156 template <typename Input>
1157 static void apply(const Input& in, Ctx& ctx)
1159 header(in, ctx);
1160 ctx.mb_list.clear();
1164 template <>
1165 struct action<return_path_non_standard> {
1166 template <typename Input>
1167 static void apply(const Input& in, Ctx& ctx)
1169 // LOG(INFO) << "Return-Path: is retarded: " << esc(in.string());
1170 header(in, ctx);
1171 ctx.mb_list.clear();
1175 template <>
1176 struct action<received> {
1177 template <typename Input>
1178 static void apply(const Input& in, Ctx& ctx)
1180 header(in, ctx);
1181 ctx.mb_list.clear();
1185 template <>
1186 struct action<result> {
1187 template <typename Input>
1188 static void apply(const Input& in, Ctx& ctx)
1190 ctx.spf_result = std::move(in.string());
1191 boost::to_lower(ctx.spf_result);
1195 template <>
1196 struct action<spf_key> {
1197 template <typename Input>
1198 static void apply(const Input& in, Ctx& ctx)
1200 ctx.key = std::move(in.string());
1204 template <>
1205 struct action<spf_value> {
1206 template <typename Input>
1207 static void apply(const Input& in, Ctx& ctx)
1209 ctx.value = std::move(in.string());
1210 boost::trim(ctx.value);
1214 template <>
1215 struct action<spf_key_value_pair> {
1216 template <typename Input>
1217 static void apply(const Input& in, Ctx& ctx)
1219 ctx.kv_list.emplace_back(ctx.key, ctx.value);
1220 ctx.key.clear();
1221 ctx.value.clear();
1225 template <>
1226 struct action<spf_key_value_list> {
1227 static void apply0(Ctx& ctx)
1229 for (auto kvp : ctx.kv_list) {
1230 ctx.spf_info[kvp.first] = kvp.second;
1235 template <>
1236 struct action<received_spf> {
1237 template <typename Input>
1238 static void apply(const Input& in, Ctx& ctx)
1240 // LOG(INFO) << "Received-SPF:";
1242 // Do a fresh check now:
1244 auto node = osutil::get_hostname();
1246 SPF::Server spf_srv(node.c_str());
1247 SPF::Request spf_req(spf_srv);
1249 spf_req.set_ip_str(ctx.spf_info["client-ip"].c_str());
1251 spf_req.set_helo_dom(ctx.spf_info["helo"].c_str());
1252 if (ctx.spf_info.find("envelope-from") != end(ctx.spf_info)) {
1253 spf_req.set_env_from(ctx.spf_info["envelope-from"].c_str());
1256 SPF::Response spf_res(spf_req);
1257 auto res = spf_res.result();
1258 CHECK_NE(res, SPF::Result::INVALID);
1260 if (ctx.spf_result != res.c_str()) {
1261 LOG(WARNING) << "SPF results changed: "
1262 << "new result is \"" << res << "\", old result is \""
1263 << ctx.spf_result << "\"";
1266 // Get result from header:
1268 int pol_spf = DMARC_POLICY_SPF_OUTCOME_PASS;
1270 // Pass is the default:
1271 // if (ctx.spf_result == "pass") {
1272 // pol_spf = DMARC_POLICY_SPF_OUTCOME_PASS;
1273 // }
1275 // if ((ctx.spf_result == "neutral") || (ctx.spf_result == "softfail")) {
1276 // // could also be a FAIL maybe...
1277 // pol_spf = DMARC_POLICY_SPF_OUTCOME_PASS;
1278 // }
1280 if (ctx.spf_result == "none") {
1281 pol_spf = DMARC_POLICY_SPF_OUTCOME_NONE;
1284 if (ctx.spf_result == "temperror") {
1285 pol_spf = DMARC_POLICY_SPF_OUTCOME_TMPFAIL;
1288 if ((ctx.spf_result == "fail") || (ctx.spf_result == "permerror")) {
1289 pol_spf = DMARC_POLICY_SPF_OUTCOME_FAIL;
1292 if (ctx.spf_info.find("client-ip") != end(ctx.spf_info)) {
1293 ctx.dmp.connect(ctx.spf_info["client-ip"].c_str());
1294 // LOG(INFO) << "SPF: ip==" << ctx.spf_info["client-ip"] << ", "
1295 // << ctx.spf_result;
1298 // Google sometimes doesn't put in anything but client-ip
1299 if (ctx.spf_info.find("envelope-from") != end(ctx.spf_info)) {
1300 auto dom = ctx.spf_info["envelope-from"];
1301 auto origin = DMARC_POLICY_SPF_ORIGIN_MAILFROM;
1303 if (dom == "<>") {
1304 dom = ctx.spf_info["helo"];
1305 origin = DMARC_POLICY_SPF_ORIGIN_HELO;
1306 LOG(INFO) << "SPF: origin HELO " << dom;
1308 else {
1309 memory_input<> addr_in(dom, "dom");
1310 if (!parse<RFC5322::addr_spec, RFC5322::action>(addr_in, ctx)) {
1311 LOG(FATAL) << "Failed to parse domain: " << dom;
1313 dom = ctx.mb_dom;
1314 origin = DMARC_POLICY_SPF_ORIGIN_MAILFROM;
1315 LOG(INFO) << "SPF: origin MAIL FROM " << dom;
1317 ctx.mb_loc.clear();
1318 ctx.mb_dom.clear();
1320 ctx.dmp.store_spf(dom.c_str(), pol_spf, origin, "are you human?");
1323 ctx.mb_list.clear();
1327 template <>
1328 struct action<dkim_signature> {
1329 template <typename Input>
1330 static void apply(const Input& in, Ctx& ctx)
1332 header(in, ctx);
1333 CHECK(ctx.dkv.sig_syntax(ctx.unstructured)) << ctx.unstructured;
1334 ctx.unstructured.clear();
1338 template <>
1339 struct action<received_token> {
1340 template <typename Input>
1341 static void apply(const Input& in, Ctx& ctx)
1346 template <>
1347 struct action<mime_version> {
1348 template <typename Input>
1349 static void apply(const Input& in, Ctx& ctx)
1351 header(in, ctx);
1352 ctx.mime_version = true;
1356 template <>
1357 struct action<content> {
1358 template <typename Input>
1359 static void apply(const Input& in, Ctx& ctx)
1361 header(in, ctx);
1362 // ctx.unstructured.clear();
1366 template <>
1367 struct action<discrete_type> {
1368 template <typename Input>
1369 static void apply(const Input& in, Ctx& ctx)
1371 ctx.discrete_type = true;
1372 ctx.type = in.string();
1376 template <>
1377 struct action<composite_type> {
1378 template <typename Input>
1379 static void apply(const Input& in, Ctx& ctx)
1381 ctx.composite_type = true;
1382 ctx.type = in.string();
1386 template <>
1387 struct action<subtype> {
1388 template <typename Input>
1389 static void apply(const Input& in, Ctx& ctx)
1391 ctx.subtype = in.string();
1395 template <>
1396 struct action<content_transfer_encoding> {
1397 template <typename Input>
1398 static void apply(const Input& in, Ctx& ctx)
1400 header(in, ctx);
1401 // ctx.unstructured.clear();
1405 template <>
1406 struct action<id> {
1407 template <typename Input>
1408 static void apply(const Input& in, Ctx& ctx)
1410 header(in, ctx);
1414 template <>
1415 struct action<description> {
1416 template <typename Input>
1417 static void apply(const Input& in, Ctx& ctx)
1419 header(in, ctx);
1423 template <>
1424 struct action<attribute> {
1425 template <typename Input>
1426 static void apply(const Input& in, Ctx& ctx)
1428 ctx.key = in.string();
1432 template <>
1433 struct action<parameter> {
1434 template <typename Input>
1435 static void apply(const Input& in, Ctx& ctx)
1437 ctx.ct_parameters.emplace_back(ctx.key, ctx.value);
1438 ctx.key.clear();
1439 ctx.value.clear();
1443 template <>
1444 struct action<value> {
1445 template <typename Input>
1446 static void apply(const Input& in, Ctx& ctx)
1448 ctx.value = in.string();
1452 template <>
1453 struct action<body> {
1454 template <typename Input>
1455 static void apply(const Input& in, Ctx& ctx)
1457 // LOG(INFO) << "Message body:";
1458 auto const body = std::string_view(begin(in), end(in) - begin(in));
1460 ctx.dkv.eoh();
1461 ctx.dkv.body(body);
1463 if (ctx.mime_version) {
1464 // std::stringstream type;
1465 // type << "Content-Type: " << ctx.type << "/" << ctx.subtype;
1466 // for (auto const& p : ctx.ct_parameters) {
1467 // if ((type.str().length() + (3 + p.first.length() +
1468 // p.second.length()))
1469 // > 78)
1470 // type << ";\r\n\t";
1471 // else
1472 // type << "; ";
1473 // type << p.first << "=" << p.second;
1474 // }
1475 // LOG(INFO) << type.str();
1477 // memory_input<> body_in(body, "body");
1478 // if (!parse_nested<RFC5322::, RFC5322::action>(in, body_in, ctx)) {
1479 // LOG(ERROR) << "bad mime body";
1480 // }
1485 template <>
1486 struct action<message> {
1487 template <typename Input>
1488 static void apply(const Input& in, Ctx& ctx)
1490 // LOG(INFO) << "message";
1491 ctx.dkv.eom();
1493 // ctx.dkv.check();
1495 Domain from_domain;
1497 if (ctx.from_list.empty()) {
1498 // RFC-5322 says message must have a 'From:' header.
1499 LOG(ERROR) << "no (correct) RFC5322.From header";
1501 auto range = ctx.defined_hdrs.equal_range(defined_field("From"));
1502 for (auto it = range.first; it != range.second; ++it) {
1503 LOG(ERROR) << "using bogus '" << it->second << "'";
1504 // ctx.from_list.push_back(Mailbox(it->second));
1507 if (ctx.from_list.empty())
1508 return;
1511 if (ctx.from_list.size() > 1) {
1513 LOG(INFO) << ctx.from_list.size() << "multiple RFC5322.From addresses";
1514 for (auto& f : ctx.from_list) {
1515 LOG(INFO) << f;
1518 if (ctx.sender.empty()) {
1519 // Must have 'Sender:' says RFC-5322 section 3.6.2.
1520 LOG(ERROR)
1521 << "no RFC5322.Sender header with multiple RFC5322.From mailboxes";
1522 return;
1525 // find sender in from list
1526 // auto s = find(begin(ctx.from_list), end(ctx.from_list), ctx.sender);
1527 // if (s == end(ctx.from_list)) {
1528 // // can't be found, not an error
1529 // LOG(ERROR) << "No 'From:' match to 'Sender:'";
1531 // // must check all From:s
1532 // LOG(FATAL) << "write code to check all From: addresses";
1533 // }
1534 // else {
1535 // from_domain = ctx.sender;
1536 // LOG(INFO) << "using 'Sender:' domain " << ctx.sender.domain();
1537 // }
1539 else {
1541 from_domain = ctx.from_list[0].domain();
1543 // if (!ctx.sender.empty()) {
1544 // if (from_domain != ctx.sender.domain()) {
1545 // LOG(INFO) << "using 'Sender:' domain " << ctx.sender.domain()
1546 // << " in place of 'From:' domain " << from_domain;
1547 // from_domain = ctx.sender.domain();
1548 // }
1549 // }
1552 ctx.dmp.store_from_domain(from_domain.ascii().c_str());
1554 ctx.dkv.foreach_sig([&ctx](char const* domain, bool passed,
1555 char const* identity, char const* selector,
1556 char const* b) {
1557 LOG(INFO) << "DKIM check for " << domain
1558 << (passed ? " passed" : " failed");
1560 int result = passed ? DMARC_POLICY_DKIM_OUTCOME_PASS
1561 : DMARC_POLICY_DKIM_OUTCOME_FAIL;
1563 ctx.dmp.store_dkim(domain, selector, result, "I am human");
1566 ctx.dmp.query_dmarc(from_domain.ascii().c_str());
1568 // LOG(INFO) << "Message-ID: " << ctx.message_id;
1569 // LOG(INFO) << "Final DMARC advice for " << from_domain << ": "
1570 // << Advice_to_string(ctx.dmp.get_advice());
1572 if (ctx.msg_errors.size()) {
1573 for (auto e : ctx.msg_errors) {
1574 LOG(ERROR) << e;
1580 template <>
1581 struct action<obs_mbox_list> {
1582 template <typename Input>
1583 static void apply(const Input& in, Ctx& ctx)
1585 LOG(INFO) << "obsolete mailbox list: " << esc(in.string());
1589 template <>
1590 struct action<obs_addr_list> {
1591 template <typename Input>
1592 static void apply(const Input& in, Ctx& ctx)
1594 LOG(INFO) << "obsolete address list: " << esc(in.string());
1598 template <>
1599 struct action<obs_group_list> {
1600 template <typename Input>
1601 static void apply(const Input& in, Ctx& ctx)
1603 LOG(INFO) << "obsolete group list: " << esc(in.string());
1607 template <>
1608 struct action<angle_addr> {
1609 template <typename Input>
1610 static void apply(const Input& in, Ctx& ctx)
1612 // LOG(INFO) << "angle_addr: " << in.string();
1615 template <>
1616 struct action<display_name> {
1617 template <typename Input>
1618 static void apply(const Input& in, Ctx& ctx)
1620 // LOG(INFO) << "display_name: " << in.string();
1623 template <>
1624 struct action<name_addr> {
1625 template <typename Input>
1626 static void apply(const Input& in, Ctx& ctx)
1628 // LOG(INFO) << "name_addr: " << in.string();
1631 template <>
1632 struct action<name_addr_only> {
1633 template <typename Input>
1634 static void apply(const Input& in, Ctx& ctx)
1636 // LOG(INFO) << "name_addr_only: " << in.string();
1639 } // namespace RFC5322
1641 void display(RFC5322::Ctx const& ctx)
1643 // for (auto const& [name, value] : ctx.defined_hdrs) {
1644 // std::cout << name << ": " << value << '\n';
1645 // }
1646 // for (auto const& [name, value] : ctx.opt_hdrs) {
1647 // std::cout << name << ": " << value << '\n';
1648 // }
1651 void selftest()
1653 const char* name_addr_list_bad[]{
1654 "Gene Hightower . <gene@digilicious.com>",
1655 "via.Relay. <noreply@relay.firefox.com>",
1656 "[via Relay] <noreply@relay.firefox.com>",
1659 for (auto i : name_addr_list_bad) {
1660 memory_input<> in(i, i);
1661 RFC5322::Ctx ctx;
1662 if (parse<RFC5322::name_addr_only,
1663 RFC5322::action /*, tao::pegtl::tracer*/>(in, ctx)) {
1664 LOG(FATAL) << "Should not parse as name_addr_only \"" << i << "\"";
1668 const char* name_addr_list_good[]{
1669 "Gene Hightower <gene@digilicious.com>",
1670 "via Relay <noreply@relay.firefox.com>",
1671 "\"Gene Hightower <gene@digilicious.com> [via Relay]\""
1672 "<noreply@relay.firefox.com>",
1673 "\"Customer Care <care@bigcompany.com> via foo.com\" <noreply@foo.com>",
1676 for (auto i : name_addr_list_good) {
1677 memory_input<> in(i, i);
1678 RFC5322::Ctx ctx;
1679 if (!parse<RFC5322::name_addr_only,
1680 RFC5322::action /*, tao::pegtl::tracer*/>(in, ctx)) {
1681 LOG(FATAL) << "Error parsing as name_addr_only \"" << i << "\"";
1685 CHECK(RFC5322::is_defined_field("Subject"));
1686 CHECK(!RFC5322::is_defined_field("X-Subject"));
1688 const char* ip_list[]{
1689 "2607:f8b0:4001:c0b::22a",
1690 "127.0.0.1",
1693 for (auto i : ip_list) {
1694 memory_input<> in(i, i);
1695 RFC5322::Ctx ctx;
1696 if (!parse<RFC5322::ip, RFC5322::action /*, tao::pegtl::tracer*/>(in,
1697 ctx)) {
1698 LOG(ERROR) << "Error parsing as ip \"" << i << "\"";
1702 const char* rec_list[]{
1703 // github
1704 "Received: from github-smtp2a-ext-cp1-prd.iad.github.net "
1705 "(github-smtp2a-ext-cp1-prd.iad.github.net [192.30.253.16])\r\n"
1706 " by ismtpd0004p1iad1.sendgrid.net (SG) with ESMTP id "
1707 "OCAkwxSQQTiPcF-T3rLS3w\r\n"
1708 " for <gene-github@digilicious.com>; Tue, 23 May 2017 "
1709 "23:01:49.124 +0000 (UTC)\r\n",
1711 // sendgrid date is shit
1712 // "Received: by filter0810p1mdw1.sendgrid.net with SMTP id "
1713 // "filter0810p1mdw1-13879-5924BDA5-34\r\n"
1714 // " 2017-05-23 22:54:29.679063164 +0000 UTC\r\n",
1718 for (auto i : rec_list) {
1719 memory_input<> in(i, i);
1720 RFC5322::Ctx ctx;
1721 if (!parse<RFC5322::received, RFC5322::action /*, tao::pegtl::tracer*/>(
1722 in, ctx)) {
1723 LOG(ERROR) << "Error parsing as Received: \"" << i << "\"";
1727 const char* date_list[]{
1728 "Date: Tue, 30 May 2017 10:52:11 +0000 (UTC)\r\n",
1729 "Date: Mon, 29 May 2017 16:47:58 -0700\r\n",
1731 // this date is shit
1732 // "Date: Mon, 29 May 2017 19:47:08 EDT\r\n",
1735 for (auto i : date_list) {
1736 memory_input<> in(i, i);
1737 RFC5322::Ctx ctx;
1738 if (!parse<RFC5322::orig_date, RFC5322::action /*, tao::pegtl::tracer*/>(
1739 in, ctx)) {
1740 LOG(ERROR) << "Error parsing as Date: \"" << i << "\"";
1744 const char* const spf_list[]{
1745 // works
1746 "Received-SPF: pass (digilicious.com: domain of gmail.com designates "
1747 "74.125.82.46 as permitted sender) client-ip=74.125.82.46; "
1748 "envelope-from=l23456789O@gmail.com; helo=mail-wm0-f46.google.com;\r\n",
1750 // also works
1751 "Received-SPF: neutral (google.com: 2607:f8b0:4001:c0b::22a is neither "
1752 "permitted nor denied by best guess record for domain of "
1753 "1234567@riscv.org) client-ip=2607:f8b0:4001:c0b::22a;\r\n",
1756 for (auto i : spf_list) {
1757 memory_input<> in(i, i);
1758 RFC5322::Ctx ctx;
1759 if (!parse<RFC5322::received_spf, RFC5322::action /*, tao::pegtl::tracer*/>(
1760 in, ctx)) {
1761 LOG(ERROR) << "Error parsing as Received-SPF: \"" << i << "\"";
1766 int main(int argc, char* argv[])
1768 { // Need to work with either namespace.
1769 using namespace gflags;
1770 using namespace google;
1771 ParseCommandLineFlags(&argc, &argv, true);
1774 if (FLAGS_selftest) {
1775 selftest();
1776 return 0;
1779 for (auto i{1}; i < argc; ++i) {
1780 auto fn{argv[i]};
1781 auto name{fs::path(fn)};
1782 auto f{boost::iostreams::mapped_file_source(name)};
1783 auto in{memory_input<>(f.data(), f.size(), fn)};
1784 LOG(INFO) << "#### file: " << fn;
1785 try {
1786 RFC5322::Ctx ctx;
1787 // ctx.defined_hdrs.reserve(countof(RFC5322::defined_fields));
1788 if (!parse<RFC5322::message, RFC5322::action>(in, ctx)) {
1789 LOG(ERROR) << "parse returned false";
1791 display(ctx);
1793 catch (parse_error const& e) {
1794 std::cerr << e.what();
1795 return 1;
1798 return 0;