removed
[ghsmtp.git] / msg.cpp
blobb0bc79f523919d255b279fc19f4869e9759f941b
1 // Toy RFC-5322 message parser and DMARC validator.
3 #include <gflags/gflags.h>
4 namespace gflags {
5 // in case we didn't have one
8 DEFINE_bool(selftest, false, "run a self test");
10 #include <map>
11 #include <string>
12 #include <vector>
14 #include <glog/logging.h>
16 #include <fmt/format.h>
17 #include <fmt/ostream.h>
19 #include <boost/algorithm/string.hpp>
20 #include <boost/iostreams/device/mapped_file.hpp>
22 #include <iostream>
24 #include "Mailbox.hpp"
25 #include "OpenDKIM.hpp"
26 #include "OpenDMARC.hpp"
27 #include "SPF.hpp"
28 #include "esc.hpp"
29 #include "fs.hpp"
30 #include "iequal.hpp"
31 #include "osutil.hpp"
33 #include <tao/pegtl.hpp>
34 #include <tao/pegtl/contrib/abnf.hpp>
36 // #include <tao/pegtl/contrib/tracer.hpp>
38 using namespace tao::pegtl;
39 using namespace tao::pegtl::abnf;
41 template <typename T, std::size_t N>
42 constexpr std::size_t countof(T const (&)[N]) noexcept
44 return N;
47 namespace RFC5322 {
49 constexpr char const* defined_fields[]{
51 // Trace Fields
52 "Return-Path",
53 "Received",
54 "Received-SPF", // RFC 7208 added trace field
56 // Sig
57 "DKIM-Signature", // RFC 7489
59 // Originator Fields
60 "Date",
61 "From",
62 "Sender",
63 "Reply-To",
65 // Destination Address Fields
66 "To",
67 "Cc",
68 "Bcc",
70 // Identification Fields
71 "Message-ID",
72 "In-Reply-To",
73 "References",
75 // Informational Fields
76 "Subject",
77 "Comments",
78 "Keywords",
80 // Resent Fields
81 "Resent-Date",
82 "Resent-From",
83 "Resent-Sender",
84 "Resent-To",
85 "Resent-Cc",
86 "Resent-Bcc",
87 "Resent-Message-ID",
89 // MIME Fields
90 "MIME-Version",
92 "Content-Type",
93 "Content-Transfer-Encoding",
94 "Content-ID",
95 "Content-Description",
98 bool is_defined_field(std::string_view name)
100 return std::find_if(std::begin(defined_fields), std::end(defined_fields),
101 [=](std::string_view v) { return iequal(name, v); })
102 != std::end(defined_fields);
105 char const* defined_field(std::string_view name)
107 auto df = std::find_if(std::begin(defined_fields), std::end(defined_fields),
108 [=](std::string_view v) { return iequal(name, v); });
109 if (df != std::end(defined_fields))
110 return *df;
111 return "";
114 struct ci_less {
115 bool operator()(std::string const& lhs, std::string const& rhs) const
117 return strcasecmp(lhs.c_str(), rhs.c_str()) < 0;
121 struct Ctx {
122 OpenDKIM::verify dkv;
124 OpenDMARC::policy dmp;
126 std::string mb_loc;
127 std::string mb_dom;
129 std::vector<::Mailbox> mb_list; // temporary accumulator
131 std::vector<::Mailbox> from_list;
133 ::Mailbox sender;
135 std::string key;
136 std::string value;
138 std::vector<std::pair<std::string, std::string>> kv_list;
140 std::map<std::string, std::string, ci_less> spf_info;
141 std::string spf_result;
143 std::unordered_multimap<char const*, std::string> defined_hdrs;
144 std::multimap<std::string, std::string, ci_less> opt_hdrs;
146 std::string unstructured;
147 std::string id;
149 std::string message_id;
151 std::string opt_name;
152 std::string opt_value;
154 std::string type;
155 std::string subtype;
157 bool mime_version{false};
158 bool discrete_type{false};
159 bool composite_type{false};
161 std::vector<std::pair<std::string, std::string>> ct_parameters;
163 std::vector<std::string> msg_errors;
166 // clang-format off
168 struct UTF8_tail : range<'\x80', '\xBF'> {};
170 struct UTF8_1 : range<0x00, 0x7F> {};
172 struct UTF8_2 : seq<range<'\xC2', '\xDF'>, UTF8_tail> {};
174 struct UTF8_3 : sor<seq<one<'\xE0'>, range<'\xA0', '\xBF'>, UTF8_tail>,
175 seq<range<'\xE1', '\xEC'>, rep<2, UTF8_tail>>,
176 seq<one<'\xED'>, range<'\x80', '\x9F'>, UTF8_tail>,
177 seq<range<'\xEE', '\xEF'>, rep<2, UTF8_tail>>> {};
179 struct UTF8_4
180 : sor<seq<one<'\xF0'>, range<'\x90', '\xBF'>, rep<2, UTF8_tail>>,
181 seq<range<'\xF1', '\xF3'>, rep<3, UTF8_tail>>,
182 seq<one<'\xF4'>, range<'\x80', '\x8F'>, rep<2, UTF8_tail>>> {};
184 // UTF8_char = UTF8_1 | UTF8_2 | UTF8_3 | UTF8_4;
186 struct UTF8_non_ascii : sor<UTF8_2, UTF8_3, UTF8_4> {};
188 struct VUCHAR : sor<VCHAR, UTF8_non_ascii> {};
190 using dot = one<'.'>;
191 using colon = one<':'>;
193 struct text : sor<ranges<1, 9, 11, 12, 14, 127>, UTF8_non_ascii> {};
195 // UTF-8 except NUL (0), LF (10) and CR (13).
196 // struct body : seq<star<seq<rep_max<998, text>, eol>>, rep_max<998, text>> {};
198 // BINARYMIME allows any byte
199 struct body : until<eof> {};
201 struct FWS : seq<opt<seq<star<WSP>, eol>>, plus<WSP>> {};
203 struct qtext : sor<one<33>, ranges<35, 91, 93, 126>, UTF8_non_ascii> {};
205 struct quoted_pair : seq<one<'\\'>, sor<VUCHAR, WSP>> {};
207 struct atext : sor<ALPHA, DIGIT,
208 one<'!', '#',
209 '$', '%',
210 '&', '\'',
211 '*', '+',
212 '-', '/',
213 '=', '?',
214 '^', '_',
215 '`', '{',
216 '|', '}',
217 '~'>,
218 UTF8_non_ascii> {};
220 // ctext is ASCII not '(' or ')' or '\\'
221 struct ctext : sor<ranges<33, 39, 42, 91, 93, 126>, UTF8_non_ascii> {};
223 // <https://tools.ietf.org/html/rfc2047>
225 // especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "
226 // <"> / "/" / "[" / "]" / "?" / "." / "="
228 // token = 1*<Any CHAR except SPACE, CTLs, and especials>
230 struct tchar47 : ranges< // NUL..' '
231 33, 33, // !
232 // 34, 34, // "
233 35, 39, // #$%&'
234 // 40, 41, // ()
235 42, 43, // *+
236 // 44, 44, // ,
237 45, 45, // -
238 // 46, 47, // ./
239 48, 57, // 0123456789
240 // 58, 64, // ;:<=>?@
241 65, 90, // A..Z
242 // 91, 91, // [
243 92, 92, // '\\'
244 // 93, 93, // ]
245 94, 126 // ^_` a..z {|}~
246 // 127,127 // DEL
247 > {};
249 struct token47 : plus<tchar47> {};
251 struct charset : token47 {};
252 struct encoding : token47 {};
254 // encoded-text = 1*<Any printable ASCII character other than "?"
255 // or SPACE>
257 struct echar : ranges< // NUL..' '
258 33, 62, // !..>
259 // 63, 63, // ?
260 64, 126 // @A..Z[\]^_` a..z {|}~
261 // 127,127 // DEL
262 > {};
264 struct encoded_text : plus<echar> {};
266 // encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
268 // leading opt<FWS> is not in RFC 2047
270 struct encoded_word_book : seq<string<'=', '?'>,
271 charset, string<'?'>,
272 encoding, string<'?'>,
273 encoded_text,
274 string<'=', '?'>
275 > {};
277 struct encoded_word : seq<opt<FWS>, encoded_word_book> {};
279 struct comment;
281 struct ccontent : sor<ctext, quoted_pair, comment, encoded_word> {};
283 // from <https://tools.ietf.org/html/rfc2047>
284 // comment = "(" *(ctext / quoted-pair / comment / encoded-word) ")"
286 struct comment
287 : seq<one<'('>, star<seq<opt<FWS>, ccontent>>, opt<FWS>, one<')'>> {};
289 struct CFWS : sor<seq<plus<seq<opt<FWS>, comment>, opt<FWS>>>, FWS> {};
291 struct qcontent : sor<qtext, quoted_pair> {};
293 // Corrected in errata ID: 3135
294 struct quoted_string
295 : seq<opt<CFWS>,
296 DQUOTE,
297 sor<seq<star<seq<opt<FWS>, qcontent>>, opt<FWS>>, FWS>,
298 DQUOTE,
299 opt<CFWS>> {};
301 // *([FWS] VCHAR) *WSP
302 struct unstructured : seq<star<seq<opt<FWS>, VUCHAR>>, star<WSP>> {};
304 struct atom : seq<opt<CFWS>, plus<atext>, opt<CFWS>> {};
306 struct dot_atom_text : list<plus<atext>, dot> {};
308 struct dot_atom : seq<opt<CFWS>, dot_atom_text, opt<CFWS>> {};
310 struct word : sor<atom, quoted_string> {};
312 // obs-phrase = word *(word / "." / CFWS)
314 struct phrase : plus<sor<encoded_word, word>> {};
316 struct dec_octet : sor<seq<string<'2','5'>, range<'0','5'>>,
317 seq<one<'2'>, range<'0','4'>, DIGIT>,
318 seq<one<'1'>, rep<2, DIGIT>>,
319 seq<range<'1', '9'>, DIGIT>,
320 DIGIT
321 > {};
322 struct ipv4_address
323 : seq<dec_octet, dot, dec_octet, dot, dec_octet, dot, dec_octet> {};
325 struct h16 : rep_min_max<1, 4, HEXDIG> {};
327 struct ls32 : sor<seq<h16, colon, h16>, ipv4_address> {};
329 struct dcolon : two<':'> {};
331 struct ipv6_address : sor<seq< rep<6, h16, colon>, ls32>,
332 seq< dcolon, rep<5, h16, colon>, ls32>,
333 seq<opt<h16 >, dcolon, rep<4, h16, colon>, ls32>,
334 seq<opt<h16, opt< colon, h16>>, dcolon, rep<3, h16, colon>, ls32>,
335 seq<opt<h16, rep_opt<2, colon, h16>>, dcolon, rep<2, h16, colon>, ls32>,
336 seq<opt<h16, rep_opt<3, colon, h16>>, dcolon, h16, colon, ls32>,
337 seq<opt<h16, rep_opt<4, colon, h16>>, dcolon, ls32>,
338 seq<opt<h16, rep_opt<5, colon, h16>>, dcolon, h16>,
339 seq<opt<h16, rep_opt<6, colon, h16>>, dcolon >> {};
341 struct ip : sor<ipv4_address, ipv6_address> {};
343 struct local_part : sor<dot_atom, quoted_string> {};
345 struct dtext : ranges<33, 90, 94, 126> {};
347 struct domain_literal : seq<opt<CFWS>,
348 one<'['>,
349 star<seq<opt<FWS>, dtext>>,
350 opt<FWS>,
351 one<']'>,
352 opt<CFWS>> {};
354 struct domain : sor<dot_atom, domain_literal> {};
356 struct addr_spec : seq<local_part, one<'@'>, domain> {};
358 struct angle_addr : seq<opt<CFWS>, one<'<'>, addr_spec, one<'>'>, opt<CFWS>> {};
360 struct path
361 : sor<angle_addr, seq<opt<CFWS>, one<'<'>, opt<CFWS>, one<'>'>, opt<CFWS>>> {};
363 struct display_name : phrase {};
365 struct name_addr : seq<opt<display_name>, angle_addr> {};
367 struct name_addr_only : seq<name_addr, eof> {};
369 struct mailbox : sor<name_addr, addr_spec> {};
371 struct group_list;
373 struct group
374 : seq<display_name, one<':'>, opt<group_list>, one<';'>, opt<CFWS>> {};
376 struct address : sor<mailbox, group> {};
378 #define OBSOLETE_SYNTAX
380 #ifdef OBSOLETE_SYNTAX
381 // *([CFWS] ",") mailbox *("," [mailbox / CFWS])
382 struct obs_mbox_list : seq<star<seq<opt<CFWS>, one<','>>>,
383 mailbox,
384 star<one<','>, opt<sor<mailbox, CFWS>>>> {};
386 struct mailbox_list : sor<list<mailbox, one<','>>, obs_mbox_list> {};
387 #else
388 struct mailbox_list : list<mailbox, one<','>> {};
389 #endif
391 #ifdef OBSOLETE_SYNTAX
392 // *([CFWS] ",") address *("," [address / CFWS])
393 struct obs_addr_list : seq<star<seq<opt<CFWS>, one<','>>>,
394 address,
395 star<one<','>, opt<sor<address, CFWS>>>> {};
397 struct address_list : sor<list<address, one<','>>, obs_addr_list> {};
398 #else
399 struct address_list : list<address, one<','>> {};
400 #endif
402 #ifdef OBSOLETE_SYNTAX
403 // 1*([CFWS] ",") [CFWS]
404 struct obs_group_list : seq<plus<seq<opt<CFWS>, one<','>>>, opt<CFWS>> {};
406 struct group_list : sor<mailbox_list, CFWS, obs_group_list> {};
407 #else
408 struct group_list : sor<mailbox_list, CFWS> {};
409 #endif
411 // 3.3. Date and Time Specification (mostly from RFC 2822)
413 struct day : seq<opt<FWS>, rep_min_max<1, 2, DIGIT>> {};
415 struct month_name : sor<TAO_PEGTL_ISTRING("Jan"),
416 TAO_PEGTL_ISTRING("Feb"),
417 TAO_PEGTL_ISTRING("Mar"),
418 TAO_PEGTL_ISTRING("Apr"),
419 TAO_PEGTL_ISTRING("May"),
420 TAO_PEGTL_ISTRING("Jun"),
421 TAO_PEGTL_ISTRING("Jul"),
422 TAO_PEGTL_ISTRING("Aug"),
423 TAO_PEGTL_ISTRING("Sep"),
424 TAO_PEGTL_ISTRING("Oct"),
425 TAO_PEGTL_ISTRING("Nov"),
426 TAO_PEGTL_ISTRING("Dec")> {};
428 struct month : seq<FWS, month_name, FWS> {};
430 struct year : rep<4, DIGIT> {};
432 struct date : seq<day, month, year> {};
434 struct day_name : sor<TAO_PEGTL_ISTRING("Mon"),
435 TAO_PEGTL_ISTRING("Tue"),
436 TAO_PEGTL_ISTRING("Wed"),
437 TAO_PEGTL_ISTRING("Thu"),
438 TAO_PEGTL_ISTRING("Fri"),
439 TAO_PEGTL_ISTRING("Sat"),
440 TAO_PEGTL_ISTRING("Sun")> {};
442 // struct obs_day_of_week : seq<opt<CFWS>, day_name, opt<CFWS>> {
443 // };
445 // struct obs_day : seq<opt<CFWS>, rep_min_max<1, 2, DIGIT>, opt<CFWS>> {
446 // };
448 // struct obs_year : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
449 // };
451 // struct obs_hour : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
452 // };
454 // struct obs_minute : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
455 // };
457 // struct obs_second : seq<opt<CFWS>, rep<2, DIGIT>, opt<CFWS>> {
458 // };
460 // struct obs_day_of_week : seq<opt<CFWS>, day_name, opt<CFWS>> {
461 // }
463 struct day_of_week : seq<opt<FWS>, day_name> {};
465 struct hour : rep<2, DIGIT> {};
467 struct minute : rep<2, DIGIT> {};
469 struct second : rep<2, DIGIT> {};
471 struct millisecond : rep<3, DIGIT> {};
473 // RFC-5322 extension is optional milliseconds
474 struct time_of_day
475 : seq<hour,
476 one<':'>,
477 minute,
478 opt<seq<one<':'>, second, opt<seq<one<'.'>, millisecond>>>>> {};
480 // struct obs_zone : sor<range<65, 73>,
481 // range<75, 90>,
482 // range<97, 105>,
483 // range<107, 122>,
484 // TAO_PEGTL_ISTRING("UT"),
485 // TAO_PEGTL_ISTRING("GMT"),
486 // TAO_PEGTL_ISTRING("EST"),
487 // TAO_PEGTL_ISTRING("EDT"),
488 // TAO_PEGTL_ISTRING("CST"),
489 // TAO_PEGTL_ISTRING("CDT"),
490 // TAO_PEGTL_ISTRING("MST"),
491 // TAO_PEGTL_ISTRING("MDT"),
492 // TAO_PEGTL_ISTRING("PST"),
493 // TAO_PEGTL_ISTRING("PDT")> {
494 // };
496 struct zone : seq<sor<one<'+'>, one<'-'>>, rep<4, DIGIT>> {};
498 struct time : seq<time_of_day, FWS, zone> {};
500 struct date_time
501 : seq<opt<seq<day_of_week, one<','>>>, date, FWS, time, opt<CFWS>> {};
503 // The Origination Date Field
504 struct orig_date : seq<TAO_PEGTL_ISTRING("Date:"), date_time, eol> {};
506 // Originator Fields
507 struct from : seq<TAO_PEGTL_ISTRING("From:"), opt<FWS>, mailbox_list, opt<FWS>, eol> {};
509 struct sender : seq<TAO_PEGTL_ISTRING("Sender:"), mailbox, eol> {};
511 struct reply_to : seq<TAO_PEGTL_ISTRING("Reply-To:"), address_list, eol> {};
513 struct address_list_or_pm : sor<TAO_PEGTL_ISTRING("Postmaster"), address_list> {};
515 // Destination Address Fields
516 struct to : seq<TAO_PEGTL_ISTRING("To:"), address_list_or_pm, eol> {};
518 struct cc : seq<TAO_PEGTL_ISTRING("Cc:"), address_list, eol> {};
520 struct bcc : seq<TAO_PEGTL_ISTRING("Bcc:"), opt<sor<address_list, CFWS>>, eol> {};
522 // Identification Fields
524 struct no_fold_literal : seq<one<'['>, star<dtext>, one<']'>> {};
526 struct id_left : dot_atom_text {};
528 struct id_right : sor<dot_atom_text, no_fold_literal> {};
530 struct msg_id
531 : seq<opt<CFWS>, one<'<'>, id_left, one<'@'>, id_right, one<'>'>, opt<CFWS>> {};
533 struct message_id : seq<TAO_PEGTL_ISTRING("Message-ID:"), msg_id, eol> {};
535 struct in_reply_to : seq<TAO_PEGTL_ISTRING("In-Reply-To:"), plus<msg_id>, eol> {};
537 struct references : seq<TAO_PEGTL_ISTRING("References:"), star<msg_id>, eol> {};
539 // Informational Fields
541 struct subject : seq<TAO_PEGTL_ISTRING("Subject:"), unstructured, eol> {};
543 struct comments : seq<TAO_PEGTL_ISTRING("Comments:"), unstructured, eol> {};
545 struct keywords
546 : seq<TAO_PEGTL_ISTRING("Keywords:"), list<phrase, one<','>>, eol> {};
548 // Resent Fields
550 struct resent_date : seq<TAO_PEGTL_ISTRING("Resent-Date:"), date_time, eol> {};
552 struct resent_from : seq<TAO_PEGTL_ISTRING("Resent-From:"), mailbox_list, eol> {};
554 struct resent_sender : seq<TAO_PEGTL_ISTRING("Resent-Sender:"), mailbox, eol> {};
556 struct resent_to : seq<TAO_PEGTL_ISTRING("Resent-To:"), address_list, eol> {};
558 struct resent_cc : seq<TAO_PEGTL_ISTRING("Resent-Cc:"), address_list, eol> {};
560 struct resent_bcc
561 : seq<TAO_PEGTL_ISTRING("Resent-Bcc:"), opt<sor<address_list, CFWS>>, eol> {};
563 struct resent_msg_id
564 : seq<TAO_PEGTL_ISTRING("Resent-Message-ID:"), msg_id, eol> {};
566 // Trace Fields
568 struct return_path : seq<TAO_PEGTL_ISTRING("Return-Path:"), opt<FWS>, path, eol> {};
570 // Facebook, among others
572 struct return_path_non_standard : seq<TAO_PEGTL_ISTRING("Return-Path:"),
573 opt<CFWS>,
574 addr_spec,
575 star<WSP>,
576 eol> {};
578 struct received_token : sor<angle_addr, addr_spec, domain, word> {};
580 struct received : seq<TAO_PEGTL_ISTRING("Received:"),
581 opt<sor<plus<received_token>, CFWS>>,
582 one<';'>,
583 date_time,
584 opt<seq<WSP, comment>>,
585 eol> {};
587 struct result : sor<TAO_PEGTL_ISTRING("Pass"),
588 TAO_PEGTL_ISTRING("Fail"),
589 TAO_PEGTL_ISTRING("SoftFail"),
590 TAO_PEGTL_ISTRING("Neutral"),
591 TAO_PEGTL_ISTRING("None"),
592 TAO_PEGTL_ISTRING("TempError"),
593 TAO_PEGTL_ISTRING("PermError")> {};
595 struct spf_key : sor<TAO_PEGTL_ISTRING("client-ip"),
596 TAO_PEGTL_ISTRING("envelope-from"),
597 TAO_PEGTL_ISTRING("helo"),
598 TAO_PEGTL_ISTRING("problem"),
599 TAO_PEGTL_ISTRING("receiver"),
600 TAO_PEGTL_ISTRING("identity"),
601 TAO_PEGTL_ISTRING("mechanism")> {};
603 // This value syntax (allowing addr_spec and angle_addr) is not in
604 // accordance with RFC 7208 (or 4408) but is what is effectivly used
605 // by libspf2 1.2.10 and before.
607 struct spf_value : sor<ip, addr_spec, dot_atom, quoted_string, angle_addr> {};
609 struct spf_key_value_pair : seq<spf_key, opt<CFWS>, one<'='>, spf_value> {};
611 struct spf_key_value_list
612 : seq<spf_key_value_pair,
613 star<seq<one<';'>, opt<CFWS>, spf_key_value_pair>>,
614 opt<one<';'>>> {};
616 struct received_spf : seq<TAO_PEGTL_ISTRING("Received-SPF:"),
617 opt<CFWS>,
618 result,
619 opt<seq<FWS, comment>>,
620 opt<seq<FWS, spf_key_value_list>>,
621 eol> {};
623 struct dkim_signature
624 : seq<TAO_PEGTL_ISTRING("DKIM-Signature:"), unstructured, eol> {};
626 struct mime_version : seq<TAO_PEGTL_ISTRING("MIME-Version:"),
627 opt<CFWS>,
628 one<'1'>,
629 opt<CFWS>,
630 one<'.'>,
631 opt<CFWS>,
632 one<'0'>,
633 opt<CFWS>,
634 eol> {};
636 // CTL := <any ASCII control ; ( 0- 37, 0.- 31.)
637 // character and DEL> ; ( 177, 127.)
639 // SPACE := 32
641 // especials := "(" / ")" / "<" / ">" / "@" /
642 // "," / ";" / ":" / "\" / <">
643 // "/" / "[" / "]" / "?" / "="
645 // ! 33
647 // 33-33
649 // " 34
651 // 35-39
653 // ( 40
654 // ) 41
656 // 42-43
658 // , 44
660 // 45-46
662 // / 47
664 // 48-57
666 // : 58
667 // ; 59
668 // < 60
669 // = 61
670 // > 62
671 // ? 63
672 // @ 64
674 // 65-90
676 // [ 91
677 // \ 92
678 // ] 93
680 // 94-126
682 // token := 1*<any (US-ASCII) CHAR except CTLs, SPACE,
683 // or tspecials>
685 struct tchar : ranges<33, 33, 35, 39, 42, 43, 45, 46, 48, 57, 65, 90, 94, 126> {};
687 struct token : plus<tchar> {};
689 struct ietf_token : token {};
691 struct x_token : seq<TAO_PEGTL_ISTRING("X-"), token> {};
693 struct extension_token : sor<x_token, ietf_token> {};
695 struct discrete_type : sor<TAO_PEGTL_ISTRING("text"),
696 TAO_PEGTL_ISTRING("image"),
697 TAO_PEGTL_ISTRING("audio"),
698 TAO_PEGTL_ISTRING("video"),
699 TAO_PEGTL_ISTRING("application"),
700 extension_token> {};
702 struct composite_type : sor<TAO_PEGTL_ISTRING("message"),
703 TAO_PEGTL_ISTRING("multipart"),
704 extension_token> {};
706 struct type : sor<discrete_type, composite_type> {};
708 struct subtype : token {};
710 // value := token / quoted-string
712 // attribute := token
714 // parameter := attribute "=" value
716 struct value : sor<token, quoted_string> {};
718 struct attribute : token {};
720 struct parameter : seq<attribute, one<'='>, value> {};
722 struct content : seq<TAO_PEGTL_ISTRING("Content-Type:"),
723 opt<CFWS>,
724 seq<type, one<'/'>, subtype>,
725 star<seq<one<';'>, opt<CFWS>, parameter>>,
726 opt<one<';'>>, // not strictly RFC 2045, but common
727 eol> {};
729 // mechanism := "7bit" / "8bit" / "binary" /
730 // "quoted-printable" / "base64" /
731 // ietf-token / x-token
733 struct mechanism : sor<TAO_PEGTL_ISTRING("7bit"),
734 TAO_PEGTL_ISTRING("8bit"),
735 TAO_PEGTL_ISTRING("binary"),
736 TAO_PEGTL_ISTRING("quoted-printable"),
737 TAO_PEGTL_ISTRING("base64"),
738 ietf_token,
739 x_token> {};
741 struct content_transfer_encoding
742 : seq<TAO_PEGTL_ISTRING("Content-Transfer-Encoding:"),
743 opt<CFWS>,
744 mechanism,
745 eol> {};
747 struct id : seq<TAO_PEGTL_ISTRING("Content-ID:"), msg_id, eol> {};
749 struct description
750 : seq<TAO_PEGTL_ISTRING("Content-Description:"), star<text>, eol> {};
752 // Optional Fields
754 struct ftext : ranges<33, 57, 59, 126> {};
756 struct field_name : plus<ftext> {};
758 struct field_value : unstructured {};
760 struct optional_field : seq<field_name, one<':'>, field_value, eol> {};
762 // message header
764 struct fields : star<sor<
765 return_path,
766 return_path_non_standard,
767 received,
768 received_spf,
770 dkim_signature,
772 orig_date,
773 from,
774 sender,
775 reply_to,
779 bcc,
781 message_id,
782 in_reply_to,
783 references,
785 subject,
786 comments,
787 keywords,
789 resent_date,
790 resent_from,
791 resent_sender,
792 resent_to,
793 resent_cc,
794 resent_bcc,
795 resent_msg_id,
797 mime_version,
798 content,
799 content_transfer_encoding,
801 description,
803 optional_field
804 >> {};
806 struct message : seq<fields, opt<seq<eol, body>>, eof> {};
808 // clang-format on
810 template <typename Rule>
811 struct action : nothing<Rule> {
814 template <>
815 struct action<fields> {
816 template <typename Input>
817 static void apply(Input const& in, Ctx& ctx)
819 // LOG(INFO) << "fields";
823 template <>
824 struct action<unstructured> {
825 template <typename Input>
826 static void apply(Input const& in, Ctx& ctx)
828 ctx.unstructured = in.string();
832 template <>
833 struct action<field_name> {
834 template <typename Input>
835 static void apply(Input const& in, Ctx& ctx)
837 ctx.opt_name = in.string();
841 template <>
842 struct action<field_value> {
843 template <typename Input>
844 static void apply(Input const& in, Ctx& ctx)
846 ctx.opt_value = in.string();
850 template <typename Input>
851 static void header(Input const& in, Ctx& ctx)
853 ctx.dkv.header(std::string_view(begin(in), end(in) - begin(in)));
856 template <>
857 struct action<optional_field> {
858 template <typename Input>
859 static void apply(Input const& in, Ctx& ctx)
861 // LOG(INFO) << "optional_field";
862 if (is_defined_field(ctx.opt_name)) {
863 // So, this is a syntax error in a defined field.
864 if (ctx.opt_name == "Received") {
865 // Go easy on Received lines, they tend to be wild and woolly.
866 // LOG(INFO) << in.string();
868 else {
869 auto const err
870 = fmt::format("syntax error in: \"{}\"", esc(in.string()));
871 ctx.msg_errors.push_back(err);
872 LOG(ERROR) << err;
874 ctx.defined_hdrs.emplace(defined_field(ctx.opt_name), ctx.opt_value);
876 else {
877 ctx.opt_hdrs.emplace(ctx.opt_name, ctx.opt_value);
879 header(in, ctx);
880 ctx.unstructured.clear();
881 ctx.mb_list.clear();
885 template <>
886 struct action<local_part> {
887 template <typename Input>
888 static void apply(Input const& in, Ctx& ctx)
890 ctx.mb_loc = in.string();
891 boost::trim(ctx.mb_loc);
895 template <>
896 struct action<domain> {
897 template <typename Input>
898 static void apply(Input const& in, Ctx& ctx)
900 ctx.mb_dom = in.string();
901 // LOG(INFO) << "domain == '" << ctx.mb_dom << "'";
905 template <>
906 struct action<mailbox> {
907 static void apply0(Ctx& ctx)
909 // LOG(INFO) << "mailbox emplace_back(" << ctx.mb_loc << '@' << ctx.mb_dom
910 // << ')';
911 ctx.mb_list.emplace_back(ctx.mb_loc, Domain{ctx.mb_dom});
915 template <>
916 struct action<orig_date> {
917 template <typename Input>
918 static void apply(const Input& in, Ctx& ctx)
920 // LOG(INFO) << "Date:";
921 header(in, ctx);
925 // Originator Fields
927 template <>
928 struct action<from> {
929 template <typename Input>
930 static void apply(const Input& in, Ctx& ctx)
932 if (!ctx.from_list.empty()) {
933 fmt::memory_buffer msg;
934 fmt::format_to(std::back_inserter(msg),
935 "multiple 'From:' address headers, previous:\n");
936 for (auto const& add : ctx.from_list) {
937 fmt::format_to(std::back_inserter(msg), " {}\n", add.as_string());
939 fmt::format_to(std::back_inserter(msg), "new: {}", in.string());
940 ctx.msg_errors.push_back(fmt::to_string(msg));
943 header(in, ctx);
944 ctx.from_list = std::move(ctx.mb_list);
945 ctx.mb_list.clear();
949 template <>
950 struct action<sender> {
951 template <typename Input>
952 static void apply(const Input& in, Ctx& ctx)
954 if (!ctx.sender.empty()) {
955 auto const err
956 = fmt::format("multiple 'Sender:' headers, previous: {}, this: {}",
957 static_cast<std::string>(ctx.sender), in.string());
958 ctx.msg_errors.push_back(err);
960 header(in, ctx);
961 CHECK_EQ(ctx.mb_list.size(), 1);
962 ctx.sender = std::move(ctx.mb_list[0]);
963 ctx.mb_list.clear();
967 template <>
968 struct action<reply_to> {
969 template <typename Input>
970 static void apply(const Input& in, Ctx& ctx)
972 header(in, ctx);
973 ctx.mb_list.clear();
977 // Destination Address Fields
979 template <>
980 struct action<to> {
981 template <typename Input>
982 static void apply(const Input& in, Ctx& ctx)
984 header(in, ctx);
985 ctx.mb_list.clear();
989 template <>
990 struct action<cc> {
991 template <typename Input>
992 static void apply(const Input& in, Ctx& ctx)
994 header(in, ctx);
995 ctx.mb_list.clear();
999 template <>
1000 struct action<bcc> {
1001 template <typename Input>
1002 static void apply(const Input& in, Ctx& ctx)
1004 header(in, ctx);
1005 ctx.mb_list.clear();
1009 // Identification Fields
1011 template <>
1012 struct action<msg_id> {
1013 template <typename Input>
1014 static void apply(const Input& in, Ctx& ctx)
1016 ctx.id = in.string();
1017 boost::trim(ctx.id);
1021 template <>
1022 struct action<message_id> {
1023 template <typename Input>
1024 static void apply(const Input& in, Ctx& ctx)
1026 header(in, ctx);
1027 if (!ctx.message_id.empty()) {
1028 LOG(ERROR) << "multiple message IDs: " << ctx.message_id << " and "
1029 << ctx.id;
1031 ctx.message_id = ctx.id;
1035 template <>
1036 struct action<in_reply_to> {
1037 template <typename Input>
1038 static void apply(const Input& in, Ctx& ctx)
1040 header(in, ctx);
1044 template <>
1045 struct action<references> {
1046 template <typename Input>
1047 static void apply(const Input& in, Ctx& ctx)
1049 header(in, ctx);
1053 // Informational Fields
1055 template <>
1056 struct action<subject> {
1057 template <typename Input>
1058 static void apply(const Input& in, Ctx& ctx)
1060 header(in, ctx);
1061 ctx.unstructured.clear();
1065 template <>
1066 struct action<comments> {
1067 template <typename Input>
1068 static void apply(const Input& in, Ctx& ctx)
1070 header(in, ctx);
1071 ctx.unstructured.clear();
1075 template <>
1076 struct action<keywords> {
1077 template <typename Input>
1078 static void apply(const Input& in, Ctx& ctx)
1080 header(in, ctx);
1084 // Resent Fields
1086 template <>
1087 struct action<resent_date> {
1088 template <typename Input>
1089 static void apply(const Input& in, Ctx& ctx)
1091 header(in, ctx);
1095 template <>
1096 struct action<resent_from> {
1097 template <typename Input>
1098 static void apply(const Input& in, Ctx& ctx)
1100 header(in, ctx);
1101 ctx.mb_list.clear();
1105 template <>
1106 struct action<resent_sender> {
1107 template <typename Input>
1108 static void apply(const Input& in, Ctx& ctx)
1110 header(in, ctx);
1111 ctx.mb_list.clear();
1115 template <>
1116 struct action<resent_to> {
1117 template <typename Input>
1118 static void apply(const Input& in, Ctx& ctx)
1120 header(in, ctx);
1121 ctx.mb_list.clear();
1125 template <>
1126 struct action<resent_cc> {
1127 template <typename Input>
1128 static void apply(const Input& in, Ctx& ctx)
1130 header(in, ctx);
1131 ctx.mb_list.clear();
1135 template <>
1136 struct action<resent_bcc> {
1137 template <typename Input>
1138 static void apply(const Input& in, Ctx& ctx)
1140 header(in, ctx);
1141 ctx.mb_list.clear();
1145 template <>
1146 struct action<resent_msg_id> {
1147 template <typename Input>
1148 static void apply(const Input& in, Ctx& ctx)
1150 header(in, ctx);
1154 // Trace Fields
1156 template <>
1157 struct action<return_path> {
1158 template <typename Input>
1159 static void apply(const Input& in, Ctx& ctx)
1161 header(in, ctx);
1162 ctx.mb_list.clear();
1166 template <>
1167 struct action<return_path_non_standard> {
1168 template <typename Input>
1169 static void apply(const Input& in, Ctx& ctx)
1171 // LOG(INFO) << "Return-Path: is retarded: " << esc(in.string());
1172 header(in, ctx);
1173 ctx.mb_list.clear();
1177 template <>
1178 struct action<received> {
1179 template <typename Input>
1180 static void apply(const Input& in, Ctx& ctx)
1182 header(in, ctx);
1183 ctx.mb_list.clear();
1187 template <>
1188 struct action<result> {
1189 template <typename Input>
1190 static void apply(const Input& in, Ctx& ctx)
1192 ctx.spf_result = std::move(in.string());
1193 boost::to_lower(ctx.spf_result);
1197 template <>
1198 struct action<spf_key> {
1199 template <typename Input>
1200 static void apply(const Input& in, Ctx& ctx)
1202 ctx.key = std::move(in.string());
1206 template <>
1207 struct action<spf_value> {
1208 template <typename Input>
1209 static void apply(const Input& in, Ctx& ctx)
1211 ctx.value = std::move(in.string());
1212 boost::trim(ctx.value);
1216 template <>
1217 struct action<spf_key_value_pair> {
1218 template <typename Input>
1219 static void apply(const Input& in, Ctx& ctx)
1221 ctx.kv_list.emplace_back(ctx.key, ctx.value);
1222 ctx.key.clear();
1223 ctx.value.clear();
1227 template <>
1228 struct action<spf_key_value_list> {
1229 static void apply0(Ctx& ctx)
1231 for (auto kvp : ctx.kv_list) {
1232 ctx.spf_info[kvp.first] = kvp.second;
1237 template <>
1238 struct action<received_spf> {
1239 template <typename Input>
1240 static void apply(const Input& in, Ctx& ctx)
1242 // LOG(INFO) << "Received-SPF:";
1244 // Do a fresh check now:
1246 auto node = osutil::get_hostname();
1248 SPF::Server spf_srv(node.c_str());
1249 SPF::Request spf_req(spf_srv);
1251 spf_req.set_ip_str(ctx.spf_info["client-ip"].c_str());
1253 spf_req.set_helo_dom(ctx.spf_info["helo"].c_str());
1254 if (ctx.spf_info.find("envelope-from") != end(ctx.spf_info)) {
1255 spf_req.set_env_from(ctx.spf_info["envelope-from"].c_str());
1258 SPF::Response spf_res(spf_req);
1259 auto res = spf_res.result();
1260 CHECK_NE(res, SPF::Result::INVALID);
1262 if (ctx.spf_result != res.c_str()) {
1263 LOG(WARNING) << "SPF results changed: "
1264 << "new result is \"" << res << "\", old result is \""
1265 << ctx.spf_result << "\"";
1268 // Get result from header:
1270 int pol_spf = DMARC_POLICY_SPF_OUTCOME_PASS;
1272 // Pass is the default:
1273 // if (ctx.spf_result == "pass") {
1274 // pol_spf = DMARC_POLICY_SPF_OUTCOME_PASS;
1275 // }
1277 // if ((ctx.spf_result == "neutral") || (ctx.spf_result == "softfail")) {
1278 // // could also be a FAIL maybe...
1279 // pol_spf = DMARC_POLICY_SPF_OUTCOME_PASS;
1280 // }
1282 if (ctx.spf_result == "none") {
1283 pol_spf = DMARC_POLICY_SPF_OUTCOME_NONE;
1286 if (ctx.spf_result == "temperror") {
1287 pol_spf = DMARC_POLICY_SPF_OUTCOME_TMPFAIL;
1290 if ((ctx.spf_result == "fail") || (ctx.spf_result == "permerror")) {
1291 pol_spf = DMARC_POLICY_SPF_OUTCOME_FAIL;
1294 if (ctx.spf_info.find("client-ip") != end(ctx.spf_info)) {
1295 ctx.dmp.connect(ctx.spf_info["client-ip"].c_str());
1296 // LOG(INFO) << "SPF: ip==" << ctx.spf_info["client-ip"] << ", "
1297 // << ctx.spf_result;
1300 // Google sometimes doesn't put in anything but client-ip
1301 if (ctx.spf_info.find("envelope-from") != end(ctx.spf_info)) {
1302 auto dom = ctx.spf_info["envelope-from"];
1303 auto origin = DMARC_POLICY_SPF_ORIGIN_MAILFROM;
1305 if (dom == "<>") {
1306 dom = ctx.spf_info["helo"];
1307 origin = DMARC_POLICY_SPF_ORIGIN_HELO;
1308 LOG(INFO) << "SPF: origin HELO " << dom;
1310 else {
1311 memory_input<> addr_in(dom, "dom");
1312 if (!parse<RFC5322::addr_spec, RFC5322::action>(addr_in, ctx)) {
1313 LOG(FATAL) << "Failed to parse domain: " << dom;
1315 dom = ctx.mb_dom;
1316 origin = DMARC_POLICY_SPF_ORIGIN_MAILFROM;
1317 LOG(INFO) << "SPF: origin MAIL FROM " << dom;
1319 ctx.mb_loc.clear();
1320 ctx.mb_dom.clear();
1322 ctx.dmp.store_spf(dom.c_str(), pol_spf, origin, "are you human?");
1325 ctx.mb_list.clear();
1329 template <>
1330 struct action<dkim_signature> {
1331 template <typename Input>
1332 static void apply(const Input& in, Ctx& ctx)
1334 header(in, ctx);
1335 CHECK(ctx.dkv.sig_syntax(ctx.unstructured)) << ctx.unstructured;
1336 ctx.unstructured.clear();
1340 template <>
1341 struct action<received_token> {
1342 template <typename Input>
1343 static void apply(const Input& in, Ctx& ctx)
1348 template <>
1349 struct action<mime_version> {
1350 template <typename Input>
1351 static void apply(const Input& in, Ctx& ctx)
1353 header(in, ctx);
1354 ctx.mime_version = true;
1358 template <>
1359 struct action<content> {
1360 template <typename Input>
1361 static void apply(const Input& in, Ctx& ctx)
1363 header(in, ctx);
1364 // ctx.unstructured.clear();
1368 template <>
1369 struct action<discrete_type> {
1370 template <typename Input>
1371 static void apply(const Input& in, Ctx& ctx)
1373 ctx.discrete_type = true;
1374 ctx.type = in.string();
1378 template <>
1379 struct action<composite_type> {
1380 template <typename Input>
1381 static void apply(const Input& in, Ctx& ctx)
1383 ctx.composite_type = true;
1384 ctx.type = in.string();
1388 template <>
1389 struct action<subtype> {
1390 template <typename Input>
1391 static void apply(const Input& in, Ctx& ctx)
1393 ctx.subtype = in.string();
1397 template <>
1398 struct action<content_transfer_encoding> {
1399 template <typename Input>
1400 static void apply(const Input& in, Ctx& ctx)
1402 header(in, ctx);
1403 // ctx.unstructured.clear();
1407 template <>
1408 struct action<id> {
1409 template <typename Input>
1410 static void apply(const Input& in, Ctx& ctx)
1412 header(in, ctx);
1416 template <>
1417 struct action<description> {
1418 template <typename Input>
1419 static void apply(const Input& in, Ctx& ctx)
1421 header(in, ctx);
1425 template <>
1426 struct action<attribute> {
1427 template <typename Input>
1428 static void apply(const Input& in, Ctx& ctx)
1430 ctx.key = in.string();
1434 template <>
1435 struct action<parameter> {
1436 template <typename Input>
1437 static void apply(const Input& in, Ctx& ctx)
1439 ctx.ct_parameters.emplace_back(ctx.key, ctx.value);
1440 ctx.key.clear();
1441 ctx.value.clear();
1445 template <>
1446 struct action<value> {
1447 template <typename Input>
1448 static void apply(const Input& in, Ctx& ctx)
1450 ctx.value = in.string();
1454 template <>
1455 struct action<body> {
1456 template <typename Input>
1457 static void apply(const Input& in, Ctx& ctx)
1459 // LOG(INFO) << "Message body:";
1460 auto const body = std::string_view(begin(in), end(in) - begin(in));
1462 ctx.dkv.eoh();
1463 ctx.dkv.body(body);
1465 if (ctx.mime_version) {
1466 // std::stringstream type;
1467 // type << "Content-Type: " << ctx.type << "/" << ctx.subtype;
1468 // for (auto const& p : ctx.ct_parameters) {
1469 // if ((type.str().length() + (3 + p.first.length() +
1470 // p.second.length()))
1471 // > 78)
1472 // type << ";\r\n\t";
1473 // else
1474 // type << "; ";
1475 // type << p.first << "=" << p.second;
1476 // }
1477 // LOG(INFO) << type.str();
1479 // memory_input<> body_in(body, "body");
1480 // if (!parse_nested<RFC5322::, RFC5322::action>(in, body_in, ctx)) {
1481 // LOG(ERROR) << "bad mime body";
1482 // }
1487 template <>
1488 struct action<message> {
1489 template <typename Input>
1490 static void apply(const Input& in, Ctx& ctx)
1492 // LOG(INFO) << "message";
1493 ctx.dkv.eom();
1495 // ctx.dkv.check();
1497 Domain from_domain;
1499 if (ctx.from_list.empty()) {
1500 // RFC-5322 says message must have a 'From:' header.
1501 LOG(ERROR) << "no (correct) RFC5322.From header";
1503 auto range = ctx.defined_hdrs.equal_range(defined_field("From"));
1504 for (auto it = range.first; it != range.second; ++it) {
1505 LOG(ERROR) << "using bogus '" << it->second << "'";
1506 // ctx.from_list.push_back(Mailbox(it->second));
1509 if (ctx.from_list.empty())
1510 return;
1513 if (ctx.from_list.size() > 1) {
1515 LOG(INFO) << ctx.from_list.size() << "multiple RFC5322.From addresses";
1516 for (auto& f : ctx.from_list) {
1517 LOG(INFO) << f;
1520 if (ctx.sender.empty()) {
1521 // Must have 'Sender:' says RFC-5322 section 3.6.2.
1522 LOG(ERROR)
1523 << "no RFC5322.Sender header with multiple RFC5322.From mailboxes";
1524 return;
1527 // find sender in from list
1528 // auto s = find(begin(ctx.from_list), end(ctx.from_list), ctx.sender);
1529 // if (s == end(ctx.from_list)) {
1530 // // can't be found, not an error
1531 // LOG(ERROR) << "No 'From:' match to 'Sender:'";
1533 // // must check all From:s
1534 // LOG(FATAL) << "write code to check all From: addresses";
1535 // }
1536 // else {
1537 // from_domain = ctx.sender;
1538 // LOG(INFO) << "using 'Sender:' domain " << ctx.sender.domain();
1539 // }
1541 else {
1543 from_domain = ctx.from_list[0].domain();
1545 // if (!ctx.sender.empty()) {
1546 // if (from_domain != ctx.sender.domain()) {
1547 // LOG(INFO) << "using 'Sender:' domain " << ctx.sender.domain()
1548 // << " in place of 'From:' domain " << from_domain;
1549 // from_domain = ctx.sender.domain();
1550 // }
1551 // }
1554 ctx.dmp.store_from_domain(from_domain.ascii().c_str());
1556 ctx.dkv.foreach_sig([&ctx](char const* domain, bool passed,
1557 char const* identity, char const* selector,
1558 char const* b) {
1559 LOG(INFO) << "DKIM check for " << domain
1560 << (passed ? " passed" : " failed");
1562 int result = passed ? DMARC_POLICY_DKIM_OUTCOME_PASS
1563 : DMARC_POLICY_DKIM_OUTCOME_FAIL;
1565 ctx.dmp.store_dkim(domain, selector, result, "I am human");
1568 ctx.dmp.query_dmarc(from_domain.ascii().c_str());
1570 // LOG(INFO) << "Message-ID: " << ctx.message_id;
1571 // LOG(INFO) << "Final DMARC advice for " << from_domain << ": "
1572 // << Advice_to_string(ctx.dmp.get_advice());
1574 if (ctx.msg_errors.size()) {
1575 for (auto e : ctx.msg_errors) {
1576 LOG(ERROR) << e;
1582 template <>
1583 struct action<obs_mbox_list> {
1584 template <typename Input>
1585 static void apply(const Input& in, Ctx& ctx)
1587 LOG(INFO) << "obsolete mailbox list: " << esc(in.string());
1591 template <>
1592 struct action<obs_addr_list> {
1593 template <typename Input>
1594 static void apply(const Input& in, Ctx& ctx)
1596 LOG(INFO) << "obsolete address list: " << esc(in.string());
1600 template <>
1601 struct action<obs_group_list> {
1602 template <typename Input>
1603 static void apply(const Input& in, Ctx& ctx)
1605 LOG(INFO) << "obsolete group list: " << esc(in.string());
1609 template <>
1610 struct action<angle_addr> {
1611 template <typename Input>
1612 static void apply(const Input& in, Ctx& ctx)
1614 // LOG(INFO) << "angle_addr: " << in.string();
1617 template <>
1618 struct action<display_name> {
1619 template <typename Input>
1620 static void apply(const Input& in, Ctx& ctx)
1622 // LOG(INFO) << "display_name: " << in.string();
1625 template <>
1626 struct action<name_addr> {
1627 template <typename Input>
1628 static void apply(const Input& in, Ctx& ctx)
1630 // LOG(INFO) << "name_addr: " << in.string();
1633 template <>
1634 struct action<name_addr_only> {
1635 template <typename Input>
1636 static void apply(const Input& in, Ctx& ctx)
1638 // LOG(INFO) << "name_addr_only: " << in.string();
1641 } // namespace RFC5322
1643 void display(RFC5322::Ctx const& ctx)
1645 // for (auto const& [name, value] : ctx.defined_hdrs) {
1646 // std::cout << name << ": " << value << '\n';
1647 // }
1648 // for (auto const& [name, value] : ctx.opt_hdrs) {
1649 // std::cout << name << ": " << value << '\n';
1650 // }
1653 void selftest()
1655 const char* name_addr_list_bad[]{
1656 "Gene Hightower . <gene@digilicious.com>",
1657 "via.Relay. <noreply@relay.firefox.com>",
1658 "[via Relay] <noreply@relay.firefox.com>",
1661 for (auto i : name_addr_list_bad) {
1662 memory_input<> in(i, i);
1663 RFC5322::Ctx ctx;
1664 if (parse<RFC5322::name_addr_only,
1665 RFC5322::action /*, tao::pegtl::tracer*/>(in, ctx)) {
1666 LOG(FATAL) << "Should not parse as name_addr_only \"" << i << "\"";
1670 const char* name_addr_list_good[]{
1671 "Gene Hightower <gene@digilicious.com>",
1672 "via Relay <noreply@relay.firefox.com>",
1673 "\"Gene Hightower <gene@digilicious.com> [via Relay]\""
1674 "<noreply@relay.firefox.com>",
1675 "\"Customer Care <care@bigcompany.com> via foo.com\" <noreply@foo.com>",
1678 for (auto i : name_addr_list_good) {
1679 memory_input<> in(i, i);
1680 RFC5322::Ctx ctx;
1681 if (!parse<RFC5322::name_addr_only,
1682 RFC5322::action /*, tao::pegtl::tracer*/>(in, ctx)) {
1683 LOG(FATAL) << "Error parsing as name_addr_only \"" << i << "\"";
1687 CHECK(RFC5322::is_defined_field("Subject"));
1688 CHECK(!RFC5322::is_defined_field("X-Subject"));
1690 const char* ip_list[]{
1691 "2607:f8b0:4001:c0b::22a",
1692 "127.0.0.1",
1695 for (auto i : ip_list) {
1696 memory_input<> in(i, i);
1697 RFC5322::Ctx ctx;
1698 if (!parse<RFC5322::ip, RFC5322::action /*, tao::pegtl::tracer*/>(in,
1699 ctx)) {
1700 LOG(ERROR) << "Error parsing as ip \"" << i << "\"";
1704 const char* rec_list[]{
1705 // github
1706 "Received: from github-smtp2a-ext-cp1-prd.iad.github.net "
1707 "(github-smtp2a-ext-cp1-prd.iad.github.net [192.30.253.16])\r\n"
1708 " by ismtpd0004p1iad1.sendgrid.net (SG) with ESMTP id "
1709 "OCAkwxSQQTiPcF-T3rLS3w\r\n"
1710 " for <gene-github@digilicious.com>; Tue, 23 May 2017 "
1711 "23:01:49.124 +0000 (UTC)\r\n",
1713 // sendgrid date is shit
1714 // "Received: by filter0810p1mdw1.sendgrid.net with SMTP id "
1715 // "filter0810p1mdw1-13879-5924BDA5-34\r\n"
1716 // " 2017-05-23 22:54:29.679063164 +0000 UTC\r\n",
1720 for (auto i : rec_list) {
1721 memory_input<> in(i, i);
1722 RFC5322::Ctx ctx;
1723 if (!parse<RFC5322::received, RFC5322::action /*, tao::pegtl::tracer*/>(
1724 in, ctx)) {
1725 LOG(ERROR) << "Error parsing as Received: \"" << i << "\"";
1729 const char* date_list[]{
1730 "Date: Tue, 30 May 2017 10:52:11 +0000 (UTC)\r\n",
1731 "Date: Mon, 29 May 2017 16:47:58 -0700\r\n",
1733 // this date is shit
1734 // "Date: Mon, 29 May 2017 19:47:08 EDT\r\n",
1737 for (auto i : date_list) {
1738 memory_input<> in(i, i);
1739 RFC5322::Ctx ctx;
1740 if (!parse<RFC5322::orig_date, RFC5322::action /*, tao::pegtl::tracer*/>(
1741 in, ctx)) {
1742 LOG(ERROR) << "Error parsing as Date: \"" << i << "\"";
1746 const char* const spf_list[]{
1747 // works
1748 "Received-SPF: pass (digilicious.com: domain of gmail.com designates "
1749 "74.125.82.46 as permitted sender) client-ip=74.125.82.46; "
1750 "envelope-from=l23456789O@gmail.com; helo=mail-wm0-f46.google.com;\r\n",
1752 // also works
1753 "Received-SPF: neutral (google.com: 2607:f8b0:4001:c0b::22a is neither "
1754 "permitted nor denied by best guess record for domain of "
1755 "1234567@riscv.org) client-ip=2607:f8b0:4001:c0b::22a;\r\n",
1758 for (auto i : spf_list) {
1759 memory_input<> in(i, i);
1760 RFC5322::Ctx ctx;
1761 if (!parse<RFC5322::received_spf, RFC5322::action /*, tao::pegtl::tracer*/>(
1762 in, ctx)) {
1763 LOG(ERROR) << "Error parsing as Received-SPF: \"" << i << "\"";
1768 int main(int argc, char* argv[])
1770 { // Need to work with either namespace.
1771 using namespace gflags;
1772 using namespace google;
1773 ParseCommandLineFlags(&argc, &argv, true);
1776 if (FLAGS_selftest) {
1777 selftest();
1778 return 0;
1781 for (auto i{1}; i < argc; ++i) {
1782 auto fn{argv[i]};
1783 auto name{fs::path(fn)};
1784 auto f{boost::iostreams::mapped_file_source(name)};
1785 auto in{memory_input<>(f.data(), f.size(), fn)};
1786 LOG(INFO) << "#### file: " << fn;
1787 try {
1788 RFC5322::Ctx ctx;
1789 // ctx.defined_hdrs.reserve(countof(RFC5322::defined_fields));
1790 if (!parse<RFC5322::message, RFC5322::action>(in, ctx)) {
1791 LOG(ERROR) << "parse returned false";
1793 display(ctx);
1795 catch (parse_error const& e) {
1796 std::cerr << e.what();
1797 return 1;
1800 return 0;