more Spamhaus checking and EHLO id lookup
[ghsmtp.git] / message.cpp
blob8a355f55cba8e6d303b5d15e9c3090da54583df1
1 // What you get where:
3 // RFC5321.HELO/.EHLO domain
4 // RFC5321.MailFrom mailbox
5 // RFC5322.From mailbox-list
7 // Reply-To:
9 // MAIL FROM:<reverse-path>
10 // RCPT TO:<forward-path>
12 #include "message.hpp"
14 #include "Mailbox.hpp"
15 #include "OpenARC.hpp"
16 #include "OpenDKIM.hpp"
17 #include "OpenDMARC.hpp"
18 #include "esc.hpp"
19 #include "fs.hpp"
20 #include "iequal.hpp"
21 #include "imemstream.hpp"
23 #include <cstring>
24 #include <map>
25 #include <unordered_set>
27 #include <fmt/format.h>
28 #include <fmt/ostream.h>
30 #include <boost/algorithm/string.hpp>
31 #include <boost/iostreams/device/mapped_file.hpp>
33 #include <tao/pegtl.hpp>
34 #include <tao/pegtl/contrib/abnf.hpp>
36 using std::begin;
37 using std::end;
39 // SPF Results
40 auto constexpr Pass = "Pass";
41 auto constexpr Fail = "Fail";
42 auto constexpr SoftFail = "SoftFail";
43 auto constexpr Neutral = "Neutral";
44 auto constexpr None = "None";
45 auto constexpr TempError = "TempError";
46 auto constexpr PermError = "PermError";
48 // SPF keys
49 auto constexpr client_ip = "client-ip";
50 auto constexpr envelope_from = "envelope-from";
51 auto constexpr problem = "problem";
52 auto constexpr receiver = "receiver";
53 auto constexpr identity = "identity";
54 auto constexpr mechanism = "mechanism";
55 // auto constexpr helo = "helo"; // both key and value
57 // SPF identities
58 auto constexpr helo = "helo";
59 auto constexpr mailfrom = "mailfrom";
61 using namespace tao::pegtl;
62 using namespace tao::pegtl::abnf;
64 using namespace std::string_literals;
66 static std::string make_string(std::string_view v)
68 return std::string(v.begin(),
69 static_cast<size_t>(std::distance(v.begin(), v.end())));
72 static std::string_view trim(std::string_view v)
74 auto constexpr WS = " \t";
75 v.remove_prefix(std::min(v.find_first_not_of(WS), v.size()));
76 v.remove_suffix(std::min(v.size() - v.find_last_not_of(WS) - 1, v.size()));
77 return v;
80 template <typename Input>
81 static std::string_view make_view(Input const& in)
83 return std::string_view(in.begin(), std::distance(in.begin(), in.end()));
86 namespace RFC5322 {
88 using dot = one<'.'>;
89 using colon = one<':'>;
91 // clang-format off
93 struct UTF8_tail : range<'\x80', '\xBF'> {};
95 struct UTF8_1 : range<0x00, 0x7F> {};
97 struct UTF8_2 : seq<range<'\xC2', '\xDF'>, UTF8_tail> {};
99 struct UTF8_3 : sor<seq<one<'\xE0'>, range<'\xA0', '\xBF'>, UTF8_tail>,
100 seq<range<'\xE1', '\xEC'>, rep<2, UTF8_tail>>,
101 seq<one<'\xED'>, range<'\x80', '\x9F'>, UTF8_tail>,
102 seq<range<'\xEE', '\xEF'>, rep<2, UTF8_tail>>> {};
104 struct UTF8_4 : sor<seq<one<'\xF0'>, range<'\x90', '\xBF'>, rep<2, UTF8_tail>>,
105 seq<range<'\xF1', '\xF3'>, rep<3, UTF8_tail>>,
106 seq<one<'\xF4'>, range<'\x80', '\x8F'>, rep<2, UTF8_tail>>> {};
108 struct UTF8_non_ascii : sor<UTF8_2, UTF8_3, UTF8_4> {};
110 struct VUCHAR : sor<VCHAR, UTF8_non_ascii> {};
112 //.............................................................................
114 struct ftext : ranges<33, 57, 59, 126> {};
116 struct field_name : plus<ftext> {};
118 struct FWS : seq<opt<seq<star<WSP>, eol>>, plus<WSP>> {};
120 // *([FWS] VCHAR) *WSP
121 struct field_value : seq<star<seq<opt<FWS>, VUCHAR>>, star<WSP>> {};
123 struct field : seq<field_name, one<':'>, field_value, eol> {};
125 struct raw_field : seq<field_name, one<':'>, field_value, eof> {};
127 struct fields : star<field> {};
129 struct body : until<eof> {};
131 struct message : seq<fields, opt<seq<eol, body>>, eof> {};
133 //.............................................................................
135 // <https://tools.ietf.org/html/rfc2047>
137 // especials = "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "
138 // <"> / "/" / "[" / "]" / "?" / "." / "="
140 // token = 1*<Any CHAR except SPACE, CTLs, and especials>
142 struct tchar47 : ranges< // NUL..' '
143 33, 33, // !
144 // 34, 34, // "
145 35, 39, // #$%&'
146 // 40, 41, // ()
147 42, 43, // *+
148 // 44, 44, // ,
149 45, 45, // -
150 // 46, 47, // ./
151 48, 57, // 0123456789
152 // 58, 64, // ;:<=>?@
153 65, 90, // A..Z
154 // 91, 91, // [
155 92, 92, // '\\'
156 // 93, 93, // ]
157 94, 126 // ^_` a..z {|}~
158 // 127,127 // DEL
159 > {};
161 struct token47 : plus<tchar47> {};
163 struct charset : token47 {};
164 struct encoding : token47 {};
166 // encoded-text = 1*<Any printable ASCII character other than "?"
167 // or SPACE>
169 struct echar : ranges< // NUL..' '
170 33, 62, // !..>
171 // 63, 63, // ?
172 64, 126 // @A..Z[\]^_` a..z {|}~
173 // 127,127 // DEL
174 > {};
176 struct encoded_text : plus<echar> {};
178 // encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
180 // leading opt<FWS> is not in RFC 2047
182 struct encoded_word_book: seq<string<'=', '?'>,
183 charset, string<'?'>,
184 encoding, string<'?'>,
185 encoded_text,
186 string<'=', '?'>
187 > {};
189 struct encoded_word : seq<opt<FWS>, encoded_word_book> {};
191 //.............................................................................
193 // Comments are recursive, hence the forward declaration:
194 struct comment;
196 struct quoted_pair : seq<one<'\\'>, sor<VUCHAR, WSP>> {};
198 // ctext is ASCII not '(' or ')' or '\\'
199 struct ctext : sor<ranges<33, 39, 42, 91, 93, 126>, UTF8_non_ascii> {};
201 struct ccontent : sor<ctext, quoted_pair, comment, encoded_word> {};
203 // from <https://tools.ietf.org/html/rfc2047>
204 // comment = "(" *(ctext / quoted-pair / comment / encoded-word) ")"
206 struct comment : seq<one<'('>,
207 star<seq<opt<FWS>, ccontent>>,
208 opt<FWS>,
209 one<')'>
210 > {};
212 struct CFWS : sor<seq<plus<seq<opt<FWS>, comment>, opt<FWS>>>,
213 FWS> {};
215 struct qtext : sor<one<33>, ranges<35, 91, 93, 126>, UTF8_non_ascii> {};
217 struct qcontent : sor<qtext, quoted_pair> {};
219 // Corrected in RFC-5322, errata ID: 3135 <https://www.rfc-editor.org/errata/eid3135>
220 struct quoted_string : seq<opt<CFWS>,
221 DQUOTE,
222 sor<seq<star<seq<opt<FWS>, qcontent>>, opt<FWS>>, FWS>,
223 DQUOTE,
224 opt<CFWS>
225 > {};
227 struct atext : sor<ALPHA, DIGIT,
228 one<'!', '#',
229 '$', '%',
230 '&', '\'',
231 '*', '+',
232 '-', '/',
233 '=', '?',
234 '^', '_',
235 '`', '{',
236 '|', '}',
237 '~'>,
238 UTF8_non_ascii> {};
240 struct atom : seq<opt<CFWS>, plus<atext>, opt<CFWS>> {};
242 struct dot_atom_text : list<plus<atext>, dot> {};
244 struct dot_atom : seq<opt<CFWS>, dot_atom_text, opt<CFWS>> {};
246 struct word : sor<atom, quoted_string> {};
248 struct phrase : plus<sor<encoded_word, word>> {};
250 struct dec_octet : sor<seq<string<'2','5'>, range<'0','5'>>,
251 seq<one<'2'>, range<'0','4'>, DIGIT>,
252 seq<range<'0', '1'>, rep<2, DIGIT>>,
253 rep_min_max<1, 2, DIGIT>> {};
255 struct ipv4_address : seq<dec_octet, dot, dec_octet, dot, dec_octet, dot, dec_octet> {};
257 struct h16 : rep_min_max<1, 4, HEXDIG> {};
259 struct ls32 : sor<seq<h16, colon, h16>, ipv4_address> {};
261 struct dcolon : two<':'> {};
263 struct ipv6_address : sor<seq< rep<6, h16, colon>, ls32>,
264 seq< dcolon, rep<5, h16, colon>, ls32>,
265 seq<opt<h16 >, dcolon, rep<4, h16, colon>, ls32>,
266 seq<opt<h16, opt< colon, h16>>, dcolon, rep<3, h16, colon>, ls32>,
267 seq<opt<h16, rep_opt<2, colon, h16>>, dcolon, rep<2, h16, colon>, ls32>,
268 seq<opt<h16, rep_opt<3, colon, h16>>, dcolon, h16, colon, ls32>,
269 seq<opt<h16, rep_opt<4, colon, h16>>, dcolon, ls32>,
270 seq<opt<h16, rep_opt<5, colon, h16>>, dcolon, h16>,
271 seq<opt<h16, rep_opt<6, colon, h16>>, dcolon >> {};
273 struct ip : sor<ipv4_address, ipv6_address> {};
275 struct local_part : sor<dot_atom, quoted_string> {};
277 struct dtext : ranges<33, 90, 94, 126> {};
279 struct domain_literal : seq<opt<CFWS>,
280 one<'['>,
281 star<seq<opt<FWS>, dtext>>,
282 opt<FWS>,
283 one<']'>,
284 opt<CFWS>> {};
286 struct domain : sor<dot_atom, domain_literal> {};
288 // This addr_spec should be exactly the same as RFC5321 Mailbox, but it's not.
290 struct addr_spec : seq<local_part, one<'@'>, domain> {};
292 struct addr_spec_only : seq<addr_spec, eof> {};
294 struct result : sor<TAO_PEGTL_ISTRING("Pass"),
295 TAO_PEGTL_ISTRING("Fail"),
296 TAO_PEGTL_ISTRING("SoftFail"),
297 TAO_PEGTL_ISTRING("Neutral"),
298 TAO_PEGTL_ISTRING("None"),
299 TAO_PEGTL_ISTRING("TempError"),
300 TAO_PEGTL_ISTRING("PermError")> {};
302 struct spf_key : sor<TAO_PEGTL_ISTRING("client-ip"),
303 TAO_PEGTL_ISTRING("envelope-from"),
304 TAO_PEGTL_ISTRING("helo"),
305 TAO_PEGTL_ISTRING("problem"),
306 TAO_PEGTL_ISTRING("receiver"),
307 TAO_PEGTL_ISTRING("identity"),
308 TAO_PEGTL_ISTRING("mechanism")> {};
310 // This value syntax (allowing addr_spec) is not in accordance with RFC
311 // 7208 (or 4408) but is what is effectivly used by libspf2 1.2.10 and
312 // before.
314 struct spf_value : sor<ip, addr_spec, dot_atom, quoted_string> {};
316 struct spf_kv_pair : seq<spf_key, opt<CFWS>, one<'='>, spf_value> {};
318 struct spf_kv_list : seq<spf_kv_pair,
319 star<seq<one<';'>, opt<CFWS>, spf_kv_pair>>,
320 opt<one<';'>>> {};
322 struct spf_header : seq<opt<CFWS>,
323 result,
324 opt<seq<FWS, comment>>,
325 opt<seq<FWS, spf_kv_list>>> {};
327 struct spf_header_only : seq<spf_header, eof> {};
329 //.............................................................................
331 struct display_name : phrase {};
333 struct angle_addr : seq<opt<CFWS>, one<'<'>, addr_spec, one<'>'>, opt<CFWS>> {};
335 struct name_addr : seq<opt<display_name>, angle_addr> {};
337 struct mailbox : sor<name_addr, addr_spec> {};
339 struct obs_mbox_list : seq<star<seq<opt<CFWS>, one<','>>>,
340 mailbox,
341 star<one<','>, opt<sor<mailbox, CFWS>>>
342 > {};
344 struct mailbox_list : sor<list<mailbox, one<','>>,
345 obs_mbox_list
346 > {};
348 // struct from : seq<TAO_PEGTL_ISTRING("From:"),
349 // mailbox_list
350 // > {};
352 struct mailbox_list_only: seq<mailbox_list, eof> {};
354 //.............................................................................
356 // <https://www.rfc-editor.org/rfc/rfc2045.html>
358 // tspecials := "(" / ")" / "<" / ">" / "@" /
359 // "," / ";" / ":" / "\" / <">
360 // "/" / "[" / "]" / "?" / "="
362 // token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
363 // or tspecials>
365 // CTL 0..31 127
366 // SPACE 32
368 // tspecials
369 // 34 "
370 // 40..41 ()
371 // 44 ,
372 // 47 /
373 // 58..64 ;:<=>?@
374 // 91..93 [\]
375 // 127 DEL
377 struct tchar45 : ranges< // NUL..' '
378 33, 33, // !
379 // 34, 34, // "
380 35, 39, // #$%&'
381 // 40, 41, // ()
382 42, 43, // *+
383 // 44, 44, // ,
384 45, 46, // -.
385 // 47, 47, // /
386 48, 57, // 0123456789
387 // 58, 64, // ;:<=>?@
388 65, 90, // A..Z
389 // 91, 93, // [\]
390 94, 126 // ^_` a..z {|}~
391 // 127,127 // DEL
392 > {};
394 struct token45 : plus<tchar45> {};
396 //.............................................................................
398 // <https://tools.ietf.org/html/rfc8601#section-2.2>
400 struct value : sor<token45, quoted_string> {};
402 struct authserv_id : value {};
404 struct authres_version : seq<plus<DIGIT>, opt<CFWS>> {};
406 struct no_result : seq<opt<CFWS>, one<';'>, opt<CFWS>, TAO_PEGTL_ISTRING("none")> {};
408 struct let_dig : sor<ALPHA, DIGIT> {};
410 struct ldh_tail : star<sor<seq<plus<one<'-'>>, let_dig>, let_dig>> {};
412 struct ldh_str : seq<let_dig, ldh_tail> {};
414 struct keyword : ldh_str {};
416 struct method_version : seq<plus<DIGIT>, opt<CFWS>> {};
418 // method = Keyword [ [CFWS] "/" [CFWS] method-version ]
420 struct method : seq<keyword, opt<opt<CFWS>, one<'/'>, opt<CFWS>, method_version>> {};
422 // methodspec = [CFWS] method [CFWS] "=" [CFWS] result
423 // ; indicates which authentication method was evaluated
424 // ; and what its output was
426 struct methodspec : seq<opt<CFWS>, method, opt<CFWS>, one<'='>, opt<CFWS>, result> {};
428 // reasonspec = "reason" [CFWS] "=" [CFWS] value
429 // ; a free-form comment on the reason the given result
430 // ; was returned
432 struct reasonspec : seq<TAO_PEGTL_ISTRING("reason"), opt<CFWS>, one<'='>, opt<CFWS>, value> {};
434 // pvalue = [CFWS] ( value / [ [ local-part ] "@" ] domain-name )
435 // [CFWS]
437 struct pvalue : seq<opt<CFWS>, sor<seq<opt<seq<opt<local_part>, one<'@'>>>, domain>,
438 value>,
439 opt<CFWS>> {};
441 struct ptype : keyword {};
443 struct special_smtp_verb: sor<TAO_PEGTL_ISTRING("mailfrom"),
444 TAO_PEGTL_ISTRING("rcptto")> {};
446 struct property : sor<special_smtp_verb, keyword> {};
448 // propspec = ptype [CFWS] "." [CFWS] property [CFWS] "=" pvalue
449 // ; an indication of which properties of the message
450 // ; were evaluated by the authentication scheme being
451 // ; applied to yield the reported result
453 struct propspec : seq<ptype, opt<CFWS>, one<'.'>, opt<CFWS>, property, opt<CFWS>, one<'='>, pvalue> {};
455 struct resinfo : seq<opt<CFWS>, one<';'>, methodspec, opt<seq<CFWS, reasonspec>>,
456 opt<seq<CFWS, plus<propspec>>>
457 > {};
459 struct ar_results : sor<no_result, plus<resinfo>> {};
461 struct authres_payload : seq<opt<CFWS>, authserv_id,
462 opt<seq<CFWS, authres_version>>,
463 ar_results,
464 opt<CFWS>> {};
466 struct authres_header_field: seq<TAO_PEGTL_ISTRING("Authentication-Results:"),
467 authres_payload> {};
469 struct authres_header_field_only: seq<authres_header_field, eof> {};
471 //.............................................................................
473 // clang-format on
475 template <typename Rule>
476 struct ar_action : nothing<Rule> {
479 template <>
480 struct ar_action<ar_results> {
481 template <typename Input>
482 static void
483 apply(Input const& in, std::string& authservid, std::string& ar_results)
485 ar_results = in.string();
489 template <>
490 struct ar_action<authserv_id> {
491 template <typename Input>
492 static void
493 apply(Input const& in, std::string& authservid, std::string& ar_results)
495 authservid = in.string();
499 //.............................................................................
501 template <typename Rule>
502 struct msg_action : nothing<Rule> {
505 template <>
506 struct msg_action<field_name> {
507 template <typename Input>
508 static void apply(Input const& in, ::message::parsed& msg)
510 msg.field_name = make_view(in);
514 template <>
515 struct msg_action<field_value> {
516 template <typename Input>
517 static void apply(Input const& in, ::message::parsed& msg)
519 msg.field_value = make_view(in);
523 template <>
524 struct msg_action<field> {
525 template <typename Input>
526 static void apply(Input const& in, ::message::parsed& msg)
528 msg.headers.emplace_back(
529 ::message::header(msg.field_name, msg.field_value));
533 template <>
534 struct msg_action<raw_field> {
535 template <typename Input>
536 static void apply(Input const& in, ::message::parsed& msg)
538 msg.headers.emplace_back(
539 ::message::header(msg.field_name, msg.field_value));
543 template <>
544 struct msg_action<body> {
545 template <typename Input>
546 static void apply(Input const& in, ::message::parsed& msg)
548 msg.body = make_view(in);
552 //.............................................................................
554 struct received_spf_parsed {
555 bool parse(std::string_view input);
557 std::string_view whole_thing;
559 std::string_view result;
560 std::string_view comment;
562 std::string_view key;
563 std::string_view value;
565 std::vector<std::pair<std::string_view, std::string_view>> kv_list;
566 std::map<std::string_view, std::string_view, ci_less> kv_map;
568 std::string as_string() const { return fmt::format("{}", whole_thing); }
571 template <typename Rule>
572 struct spf_action : nothing<Rule> {
575 template <>
576 struct spf_action<result> {
577 template <typename Input>
578 static void apply(const Input& in, received_spf_parsed& spf)
580 spf.result = make_view(in);
584 template <>
585 struct spf_action<comment> {
586 template <typename Input>
587 static void apply(const Input& in, received_spf_parsed& spf)
589 spf.comment = make_view(in);
593 template <>
594 struct spf_action<spf_key> {
595 template <typename Input>
596 static void apply(const Input& in, received_spf_parsed& spf)
598 spf.key = make_view(in);
602 template <>
603 struct spf_action<spf_value> {
604 template <typename Input>
605 static void apply(const Input& in, received_spf_parsed& spf)
607 // RFC5322 syntax is full of optional WS, so we trim
608 spf.value = trim(make_view(in));
612 template <>
613 struct spf_action<spf_kv_pair> {
614 template <typename Input>
615 static void apply(const Input& in, received_spf_parsed& spf)
617 spf.kv_list.emplace_back(spf.key, spf.value);
618 spf.key = spf.value = "";
622 template <>
623 struct spf_action<spf_kv_list> {
624 static void apply0(received_spf_parsed& spf)
626 for (auto const& kvp : spf.kv_list) {
627 if (spf.kv_map.contains(kvp.first)) {
628 LOG(WARNING) << "dup key: " << kvp.first << "=" << kvp.second;
629 LOG(WARNING) << " and: " << kvp.first << "="
630 << spf.kv_map[kvp.first];
632 spf.kv_map[kvp.first] = kvp.second;
637 bool received_spf_parsed::parse(std::string_view input)
639 whole_thing = input;
640 auto in{memory_input<>(input.data(), input.size(), "spf_header")};
641 return tao::pegtl::parse<spf_header_only, spf_action>(in, *this);
644 //.............................................................................
646 // Parse a grammar and extract each addr_spec
648 template <typename Rule>
649 struct mailbox_list_action : nothing<Rule> {
652 template <>
653 struct mailbox_list_action<display_name> {
654 template <typename Input>
655 static void apply(Input const& in,
656 ::message::mailbox_name_addr_list& from_parsed)
658 from_parsed.name = in.string();
662 template <>
663 struct mailbox_list_action<addr_spec> {
664 template <typename Input>
665 static void apply(Input const& in,
666 ::message::mailbox_name_addr_list& from_parsed)
668 from_parsed.name_addr_list.push_back({from_parsed.name, in.string()});
669 from_parsed.name.clear();
673 } // namespace RFC5322
675 // Map SPF result string to DMARC policy code.
677 static int result_to_pol(std::string_view result)
679 // clang-format off
680 if (iequal(result, Pass)) return DMARC_POLICY_SPF_OUTCOME_PASS;
681 if (iequal(result, Fail)) return DMARC_POLICY_SPF_OUTCOME_FAIL;
682 if (iequal(result, SoftFail)) return DMARC_POLICY_SPF_OUTCOME_TMPFAIL;
683 if (iequal(result, Neutral)) return DMARC_POLICY_SPF_OUTCOME_NONE;
684 if (iequal(result, None)) return DMARC_POLICY_SPF_OUTCOME_NONE;
685 if (iequal(result, TempError)) return DMARC_POLICY_SPF_OUTCOME_NONE;
686 if (iequal(result, PermError)) return DMARC_POLICY_SPF_OUTCOME_NONE;
687 LOG(WARNING) << "unknown SPF result: \"" << result << "\"";
688 return DMARC_POLICY_SPF_OUTCOME_NONE;
689 // clang-format on
692 static bool is_postmaster(std::string_view from)
694 return from == "<>" || istarts_with(from, "<Postmaster@");
697 static bool sender_comment(std::string_view comment, std::string_view sender)
699 auto const prefix = fmt::format("({}:", sender);
700 return istarts_with(comment, prefix);
703 static void spf_result_to_dmarc(OpenDMARC::policy& dmp,
704 RFC5322::received_spf_parsed& spf)
706 LOG(INFO) << "spf_result_to_dmarc";
708 if (spf.kv_map.contains(problem)) {
709 LOG(WARNING) << "SPF problem: " << spf.kv_map[problem];
712 auto const spf_pol = result_to_pol(spf.result);
714 if (spf_pol == DMARC_POLICY_SPF_OUTCOME_NONE) {
715 LOG(WARNING) << "Ignoring for DMARC purposes: " << spf.as_string();
716 return;
719 std::string spf_dom;
721 int spf_origin;
723 if (spf.kv_map.contains(identity)) {
724 if (iequal(spf.kv_map[identity], mailfrom)) {
725 if (spf.kv_map.contains(envelope_from)) {
726 if (Mailbox::validate(spf.kv_map[envelope_from])) {
727 Mailbox mbx(spf.kv_map[envelope_from]);
728 spf_dom = mbx.domain().ascii();
729 spf_origin = DMARC_POLICY_SPF_ORIGIN_MAILFROM;
731 auto const human_result = fmt::format(
732 "{}, explicit origin mail from, mailbox {}", spf.result, mbx);
733 LOG(INFO) << "SPF result " << human_result;
734 dmp.store_spf(spf_dom.c_str(), spf_pol, spf_origin,
735 human_result.c_str());
736 return;
738 else {
739 LOG(WARNING) << "invalid mailbox in envelope-from: "
740 << spf.kv_map[envelope_from];
743 else {
744 LOG(WARNING)
745 << "identity checked was mail from, but no envelope_from key";
748 else if (iequal(spf.kv_map[identity], helo)) {
749 if (spf.kv_map.contains(helo)) {
750 if (Domain::validate(spf.kv_map[helo])) {
751 Domain dom(spf.kv_map[helo]);
752 spf_dom = dom.ascii();
753 spf_origin = DMARC_POLICY_SPF_ORIGIN_HELO;
755 auto const human_result = fmt::format(
756 "{}, explicit origin hello, domain {}", spf.result, dom);
757 LOG(INFO) << "SPF result " << human_result;
758 dmp.store_spf(spf_dom.c_str(), spf_pol, spf_origin,
759 human_result.c_str());
760 return;
762 else {
763 LOG(WARNING) << "invalid domain in helo: " << spf.kv_map[helo];
766 else {
767 LOG(WARNING) << "identity checked was helo, but no helo key";
770 else {
771 LOG(WARNING) << "unknown identity " << spf.kv_map[identity];
774 else {
775 LOG(INFO) << "no explicit tag for which identity was checked";
778 if (spf.kv_map.contains(envelope_from)) {
779 auto const efrom = spf.kv_map[envelope_from];
781 if (is_postmaster(efrom)) {
782 if (spf.kv_map.contains(helo)) {
783 if (Domain::validate(spf.kv_map[helo])) {
784 Domain dom(spf.kv_map[helo]);
785 spf_dom = dom.ascii();
786 spf_origin = DMARC_POLICY_SPF_ORIGIN_HELO;
788 auto const human_result = fmt::format(
789 "{}, RFC5321.MailFrom is <>, implicit origin hello, domain {}",
790 spf.result, dom);
791 LOG(INFO) << "SPF result " << human_result;
792 dmp.store_spf(spf_dom.c_str(), spf_pol, spf_origin,
793 human_result.c_str());
794 return;
796 else {
797 LOG(WARNING) << "RFC5321.MailFrom is postmaster or <> but helo is "
798 "invalid domain:"
799 << spf.kv_map[helo];
802 else {
803 LOG(WARNING) << "envelope-from is <> but no helo key";
806 else if (Mailbox::validate(efrom)) {
807 // We're good to go
808 Mailbox mbx(efrom);
809 spf_dom = mbx.domain().ascii();
810 spf_origin = DMARC_POLICY_SPF_ORIGIN_MAILFROM;
812 auto const human_result =
813 fmt::format("{}, implicit RFC5321.MailFrom <{}>", spf.result, mbx);
814 LOG(INFO) << "SPF result " << human_result;
815 dmp.store_spf(spf_dom.c_str(), spf_pol, spf_origin, human_result.c_str());
816 return;
818 else {
819 LOG(WARNING) << "envelope-from invalid mailbox: " << efrom;
822 else if (spf.kv_map.contains(helo)) {
823 if (Domain::validate(spf.kv_map[helo])) {
824 Domain dom(spf.kv_map[helo]);
825 spf_dom = dom.ascii();
826 spf_origin = DMARC_POLICY_SPF_ORIGIN_HELO;
828 auto const human_result =
829 fmt::format("{}, hello domain {}", spf.result, dom);
830 LOG(INFO) << "SPF result " << human_result;
831 dmp.store_spf(spf_dom.c_str(), spf_pol, spf_origin, human_result.c_str());
832 return;
834 else {
835 LOG(WARNING) << "helo is invalid domain:" << spf.kv_map[helo];
838 else {
839 LOG(WARNING)
840 << "no explicit \"identity\" key, and no envelope-from or helo key";
844 namespace message {
846 bool authentication_results_parse(std::string_view input,
847 std::string& authservid,
848 std::string& ar_results)
850 auto in{memory_input<>(input.data(), input.size(),
851 "authentication_results_header")};
852 return tao::pegtl::parse<RFC5322::authres_header_field_only,
853 RFC5322::ar_action>(in, authservid, ar_results);
856 bool authentication(message::parsed& msg,
857 char const* sender,
858 char const* selector,
859 fs::path key_file)
861 LOG(INFO) << "add_authentication_results";
862 CHECK(!msg.headers.empty());
864 // Remove any redundant Authentication-Results headers
865 msg.headers.erase(
866 std::remove_if(msg.headers.begin(), msg.headers.end(),
867 [sender](auto const& hdr) {
868 if (hdr == Authentication_Results) {
869 std::string authservid;
870 std::string ar_results;
871 if (message::authentication_results_parse(
872 hdr.as_view(), authservid, ar_results)) {
873 return Domain::match(authservid, sender);
875 LOG(WARNING) << "failed to parse " << hdr.as_string();
877 return false;
879 msg.headers.end());
881 // Run our message through OpenDKIM verify
883 OpenDKIM::verify dkv;
884 for (auto const& header : msg.headers) {
885 auto const hv = header.as_view();
886 dkv.header(hv);
888 dkv.eoh();
890 // LOG(INFO) << "body «" << msg.body << "»";
891 dkv.body(msg.body);
893 dkv.eom();
895 OpenDMARC::policy dmp;
897 // Build up Authentication-Results header
898 fmt::memory_buffer bfr;
900 std::unordered_set<Domain> validated_doms;
902 // Grab SPF records
903 for (auto hdr : msg.headers) {
904 if (hdr == Received_SPF) {
905 RFC5322::received_spf_parsed spf_parsed;
906 if (!spf_parsed.parse(hdr.value)) {
907 LOG(WARNING) << "failed to parse SPF record: " << hdr.value;
908 continue;
911 LOG(INFO) << "SPF record parsed";
912 if (!sender_comment(spf_parsed.comment, sender)) {
913 LOG(INFO) << "comment == \"" << spf_parsed.comment << "\" not by "
914 << sender;
915 continue;
918 if (!Mailbox::validate(spf_parsed.kv_map[envelope_from])) {
919 LOG(WARNING) << "invalid mailbox: " << spf_parsed.kv_map[envelope_from];
920 continue;
923 if (!Domain::validate(spf_parsed.kv_map[helo])) {
924 LOG(WARNING) << "invalid helo domain: " << spf_parsed.kv_map[helo];
925 continue;
928 Mailbox env_from(spf_parsed.kv_map[envelope_from]);
929 Domain helo_dom(spf_parsed.kv_map[helo]);
931 if (iequal(env_from.local_part(), "Postmaster") &&
932 env_from.domain() == helo_dom) {
933 if (validated_doms.count(helo_dom) == 0) {
934 fmt::format_to(std::back_inserter(bfr), ";\r\n\tspf={}", spf_parsed.result);
935 fmt::format_to(std::back_inserter(bfr), " {}", spf_parsed.comment);
936 fmt::format_to(std::back_inserter(bfr), " smtp.helo={}", helo_dom.ascii());
937 validated_doms.emplace(helo_dom);
939 if (spf_parsed.kv_map.contains(client_ip)) {
940 std::string ip = make_string(spf_parsed.kv_map[client_ip]);
941 dmp.connect(ip.c_str());
943 spf_result_to_dmarc(dmp, spf_parsed);
946 else {
947 if (validated_doms.count(env_from.domain()) == 0) {
948 fmt::format_to(std::back_inserter(bfr), ";\r\n\tspf={}", spf_parsed.result);
949 fmt::format_to(std::back_inserter(bfr), " {}", spf_parsed.comment);
950 fmt::format_to(std::back_inserter(bfr), " smtp.mailfrom={}",
951 env_from.as_string(Mailbox::domain_encoding::ascii));
952 validated_doms.emplace(env_from.domain());
954 if (spf_parsed.kv_map.contains(client_ip)) {
955 std::string ip = make_string(spf_parsed.kv_map[client_ip]);
956 dmp.connect(ip.c_str());
958 spf_result_to_dmarc(dmp, spf_parsed);
964 LOG(INFO) << "fetching From: header";
965 // Should be only one From:
966 if (auto hdr = std::find(begin(msg.headers), end(msg.headers), From);
967 hdr != end(msg.headers)) {
968 auto const from_str = make_string(hdr->value);
970 memory_input<> from_in(from_str, "from");
971 if (!parse<RFC5322::mailbox_list_only, RFC5322::mailbox_list_action>(
972 from_in, msg.from_parsed)) {
973 LOG(WARNING) << "failed to parse From:" << from_str;
976 for (auto hdr_next = std::next(hdr); hdr_next != end(msg.headers);
977 hdr_next = std::next(hdr_next)) {
978 if (*hdr_next == From) {
979 LOG(WARNING) << "additional RFC5322.From header found: "
980 << hdr_next->as_string();
985 if (msg.from_parsed.name_addr_list.empty()) {
986 LOG(WARNING) << "No address in RFC5322.From header";
987 return false;
991 <https://tools.ietf.org/html/rfc7489#section-6.6>
992 6.6.1. Extract Author Domain
994 The case of a syntactically valid multi-valued RFC5322.From field
995 presents a particular challenge. The process in this case is to
996 apply the DMARC check using each of those domains found in the
997 RFC5322.From field as the Author Domain and apply the most strict
998 policy selected among the checks that fail.
1002 // FIXME
1003 if (msg.from_parsed.name_addr_list.size() > 1) {
1004 LOG(WARNING) << "More than one address in RFC5322.From header";
1007 auto from_addr = msg.from_parsed.name_addr_list[0].addr;
1009 boost::trim(from_addr);
1011 if (!Mailbox::validate(from_addr)) {
1012 LOG(WARNING) << "Mailbox syntax valid for RFC-5322, not for RFC-5321: \""
1013 << from_addr << "\"";
1014 // Maybe we can pick out a valid domain?
1015 return false;
1018 Mailbox from_mbx(from_addr);
1019 msg.dmarc_from = from_mbx.as_string(Mailbox::domain_encoding::ascii);
1020 msg.dmarc_from_domain = from_mbx.domain().ascii();
1022 LOG(INFO) << "dmarc_from_domain == " << msg.dmarc_from_domain;
1023 dmp.store_from_domain(msg.dmarc_from_domain.c_str());
1025 // Check each DKIM sig, inform DMARC processor, put in AR
1027 dkv.foreach_sig([&dmp, &bfr](char const* domain, bool passed,
1028 char const* identity, char const* sel,
1029 char const* b) {
1030 int const result = passed ? DMARC_POLICY_DKIM_OUTCOME_PASS
1031 : DMARC_POLICY_DKIM_OUTCOME_FAIL;
1032 auto const human_result = (passed ? "pass" : "fail");
1034 LOG(INFO) << "DKIM check for " << domain << " " << human_result;
1036 dmp.store_dkim(domain, sel, result, human_result);
1038 auto bs = std::string_view(b, strlen(b)).substr(0, 8);
1040 fmt::format_to(std::back_inserter(bfr), ";\r\n\tdkim={}", human_result);
1041 fmt::format_to(std::back_inserter(bfr), " header.i={}", identity);
1042 fmt::format_to(std::back_inserter(bfr), " header.s={}", sel);
1043 fmt::format_to(std::back_inserter(bfr), " header.b=\"{}\"", bs);
1046 // Set DMARC status in AR
1048 auto const dmarc_passed = dmp.query_dmarc(msg.dmarc_from_domain.c_str());
1050 auto const dmarc_result = (dmarc_passed ? "pass" : "fail");
1051 LOG(INFO) << "DMARC " << dmarc_result;
1053 fmt::format_to(std::back_inserter(bfr), ";\r\n\tdmarc={} header.from={}", dmarc_result,
1054 msg.dmarc_from_domain);
1056 // ARC
1058 OpenARC::verify arv;
1059 for (auto const& header : msg.headers) {
1060 arv.header(header.as_view());
1062 arv.eoh();
1063 arv.body(msg.body);
1064 arv.eom();
1066 LOG(INFO) << "ARC status == " << arv.chain_status_str();
1067 LOG(INFO) << "ARC custody == " << arv.chain_custody_str();
1069 auto const arc_status = arv.chain_status_str();
1071 fmt::format_to(std::back_inserter(bfr), ";\r\n\tarc={}", arc_status);
1073 // New AR header on the top
1075 auto const ar_results = [&bfr]() {
1076 // Ug, OpenARC adds an extra one, arc.c:3213
1077 auto s = fmt::to_string(bfr);
1078 if (s.length() && s[0] == ';')
1079 s.erase(0, 1);
1080 return s;
1081 }();
1083 msg.ar_str =
1084 fmt::format("{}: {};{}", Authentication_Results, sender, ar_results);
1086 LOG(INFO) << "new AR header «" << esc(msg.ar_str, esc_line_option::multi)
1087 << "»";
1089 CHECK(msg.parse_hdr(msg.ar_str));
1091 // Run our message through ARC::sign
1093 OpenARC::sign ars;
1095 if (iequal(arc_status, "none")) {
1096 ars.set_cv_none();
1098 else if (iequal(arc_status, "fail")) {
1099 ars.set_cv_fail();
1101 else if (iequal(arc_status, "pass")) {
1102 ars.set_cv_pass();
1104 else {
1105 ars.set_cv_unkn();
1108 for (auto const& header : msg.headers) {
1109 ars.header(header.as_view());
1111 ars.eoh();
1112 ars.body(msg.body);
1113 ars.eom();
1115 boost::iostreams::mapped_file_source priv;
1116 priv.open(key_file);
1118 if (ars.seal(sender, selector, sender, priv.data(), priv.size(),
1119 ar_results.c_str())) {
1120 msg.arc_hdrs = ars.whole_seal();
1121 for (auto const& hdr : msg.arc_hdrs) {
1122 CHECK(msg.parse_hdr(hdr));
1125 else {
1126 LOG(INFO) << "failed to generate seal";
1129 OpenARC::verify arv2;
1130 for (auto const& header : msg.headers) {
1131 arv2.header(header.as_view());
1133 arv2.eoh();
1134 arv2.body(msg.body);
1135 arv2.eom();
1137 LOG(INFO) << "check ARC status == " << arv2.chain_status_str();
1138 LOG(INFO) << "check ARC custody == " << arv2.chain_custody_str();
1140 return dmarc_passed;
1143 void print_spf_envelope_froms(char const* file, message::parsed& msg)
1145 CHECK(!msg.headers.empty());
1146 for (auto const& hdr : msg.headers) {
1147 if (hdr == Received_SPF) {
1148 RFC5322::received_spf_parsed spf_parsed;
1149 if (spf_parsed.parse(hdr.value)) {
1150 std::cout << spf_parsed.kv_map[envelope_from] << '\n';
1151 break;
1153 else {
1154 LOG(WARNING) << "failed to parse " << file << ":\n" << hdr.as_string();
1160 void remove_delivery_headers(message::parsed& msg)
1162 // Remove headers that are added by the "delivery agent"
1163 // aka (Session::added_headers_)
1164 msg.headers.erase(
1165 std::remove(msg.headers.begin(), msg.headers.end(), Return_Path),
1166 msg.headers.end());
1168 // just in case, but right now this header should not exist.
1169 msg.headers.erase(
1170 std::remove(msg.headers.begin(), msg.headers.end(), Delivered_To),
1171 msg.headers.end());
1174 void dkim_check(message::parsed& msg, char const* domain)
1176 LOG(INFO) << "dkim";
1178 CHECK(!msg.body.empty());
1180 OpenDKIM::verify dkv;
1182 // Run our message through OpenDKIM verify
1184 for (auto const& header : msg.headers) {
1185 auto const hv = header.as_view();
1186 dkv.header(hv);
1188 dkv.eoh();
1189 dkv.body(msg.body);
1190 dkv.eom();
1192 // Check each DKIM sig, inform DMARC processor, put in AR
1194 dkv.foreach_sig([](char const* domain, bool passed, char const* identity,
1195 char const* sel, char const* b) {
1196 auto const human_result = (passed ? "pass" : "fail");
1198 auto bs = std::string_view(b, strlen(b)).substr(0, 8);
1200 LOG(INFO) << "DKIM check bfor " << domain << " " << human_result;
1201 LOG(INFO) << " header.i=" << identity;
1202 LOG(INFO) << " header.s=" << sel;
1203 LOG(INFO) << " header.b=\"" << bs << "\"";
1207 //.............................................................................
1209 bool parsed::parse(std::string_view input)
1211 auto in{memory_input<>(input.data(), input.size(), "message")};
1212 return tao::pegtl::parse<RFC5322::message, RFC5322::msg_action>(in, *this);
1215 bool parsed::parse_hdr(std::string_view input)
1217 auto in{memory_input<>(input.data(), input.size(), "message")};
1218 if (tao::pegtl::parse<RFC5322::raw_field, RFC5322::msg_action>(in, *this)) {
1219 std::rotate(headers.rbegin(), headers.rbegin() + 1, headers.rend());
1220 return true;
1222 return false;
1225 std::string parsed::as_string() const
1227 fmt::memory_buffer bfr;
1229 for (auto const& h : headers)
1230 fmt::format_to(std::back_inserter(bfr), "{}\r\n", h.as_string());
1232 if (!body.empty())
1233 fmt::format_to(std::back_inserter(bfr), "\r\n{}", body);
1235 return fmt::to_string(bfr);
1238 bool parsed::write(std::ostream& os) const
1240 for (auto const& h : headers)
1241 os << h.as_string() << "\r\n";
1243 if (!body.empty())
1244 os << "\r\n" << body;
1246 return true;
1249 std::string header::as_string() const
1251 return fmt::format("{}:{}", name, value);
1254 std::string_view parsed::get_header(std::string_view name) const
1256 if (auto hdr = std::find(begin(headers), end(headers), name);
1257 hdr != end(headers)) {
1258 return trim(hdr->value);
1260 return "";
1263 void dkim_sign(message::parsed& msg,
1264 char const* sender,
1265 char const* selector,
1266 fs::path key_file)
1268 CHECK(msg.sig_str.empty());
1270 boost::iostreams::mapped_file_source priv;
1271 priv.open(key_file);
1273 auto const key_str = std::string(priv.data(), priv.size());
1275 // Run our message through DKIM::sign
1276 OpenDKIM::sign dks(key_str.c_str(), // textual data
1277 selector, sender, OpenDKIM::sign::body_type::text);
1278 for (auto const& header : msg.headers) {
1279 dks.header(header.as_view());
1281 dks.eoh();
1282 dks.body(msg.body);
1283 dks.eom();
1285 auto const sig = dks.getsighdr();
1287 msg.sig_str = fmt::format("DKIM-Signature: {}", sig);
1288 CHECK(msg.parse_hdr(msg.sig_str));
1291 void rewrite_from_to(message::parsed& msg,
1292 std::string mail_from,
1293 std::string reply_to,
1294 char const* sender,
1295 char const* selector,
1296 fs::path key_file)
1298 LOG(INFO) << "rewrite_from_to";
1300 remove_delivery_headers(msg);
1302 if (!mail_from.empty()) {
1303 msg.headers.erase(std::remove(msg.headers.begin(), msg.headers.end(), From),
1304 msg.headers.end());
1306 msg.from_str = mail_from;
1307 CHECK(msg.parse_hdr(msg.from_str));
1310 if (!reply_to.empty()) {
1311 msg.headers.erase(
1312 std::remove(msg.headers.begin(), msg.headers.end(), Reply_To),
1313 msg.headers.end());
1315 msg.reply_to_str = reply_to;
1316 CHECK(msg.parse_hdr(msg.reply_to_str));
1319 // modify plain text body
1322 if (iequal(msg.get_header(MIME_Version), "1.0") &&
1323 istarts_with(msg.get_header(Content_Type), "text/plain;")) {
1324 LOG(INFO) << "Adding footer to message body.";
1325 msg.body_str = msg.body;
1326 msg.body_str.append("\r\n\r\n\t-- Added Footer --\r\n");
1327 msg.body = msg.body_str;
1329 else {
1330 LOG(INFO) << "Not adding footer to message body.";
1331 LOG(INFO) << "MIME-Version == " << msg.get_header(MIME_Version);
1332 LOG(INFO) << "Content-Type == " << msg.get_header(Content_Type);
1334 // LOG(INFO) << "body == " << msg.body;
1337 dkim_sign(msg, sender, selector, key_file);
1340 } // namespace message