more system calls for sanitize
[ghsmtp.git] / Reply.cpp
blobb5afad7bf61c97cc60e253542f97a5bc56de37a4
1 #include "Reply.hpp"
3 #include "Hash.hpp"
4 #include "Mailbox.hpp"
5 #include "iequal.hpp"
6 #include "is_ascii.hpp"
8 #include <algorithm>
9 #include <cctype>
10 #include <iterator>
11 #include <string>
13 #include <cppcodec/base32_crockford.hpp>
15 #include <arpa/inet.h>
16 #include <time.h>
18 #include <glog/logging.h>
20 #include <fmt/format.h>
21 #include <fmt/ostream.h>
23 using std::begin;
24 using std::end;
26 constexpr int hash_length_min = 6; // 1 in a billion
27 constexpr int hash_length_max = 10;
29 constexpr const char sep_chars_array[] = {
30 '_',
31 '=' // Must not be allowed in domain names, must not be in base32 alphabet.
34 constexpr std::string_view sep_chars{sep_chars_array, sizeof(sep_chars_array)};
36 constexpr std::string_view REP_PREFIX = "rep="; // legacy reply prefix
38 std::string to_lower(std::string data)
40 std::transform(data.begin(), data.end(), data.begin(),
41 [](unsigned char c) { return std::tolower(c); });
42 return data;
45 static std::string hash_rep(Reply::from_to const& rep, std::string_view secret)
47 Hash h;
48 h.update(secret);
49 h.update(to_lower(rep.mail_from));
50 h.update(to_lower(rep.rcpt_to_local_part));
51 return to_lower(h.final().substr(0, hash_length_min));
54 std::string enc_reply_blob(Reply::from_to const& rep, std::string_view secret)
56 auto const hash = hash_rep(rep, secret);
58 auto const pkt = fmt::format("{}{}{}{}{}", // clang-format off
59 hash, '\0',
60 rep.rcpt_to_local_part, '\0',
61 rep.mail_from); // clang-format on
63 return to_lower(cppcodec::base32_crockford::encode(pkt));
66 std::string Reply::enc_reply(Reply::from_to const& rep, std::string_view secret)
68 auto const result = Mailbox::parse(rep.mail_from);
69 if (!result) {
70 throw std::invalid_argument("invalid mailbox syntax in enc_reply");
73 // If it's "local part"@example.com or local-part@[127.0.0.1] we
74 // must fall back to the blob style.
75 if (result->local_type == Mailbox::local_types::quoted_string ||
76 result->domain_type == Mailbox::domain_types::address_literal) {
77 return enc_reply_blob(rep, secret);
80 auto const rcpt_to =
81 Mailbox::parse(fmt::format("{}@x.y", rep.rcpt_to_local_part));
82 if (!rcpt_to) {
83 throw std::invalid_argument("invalid local-part syntax in enc_reply");
85 if (rcpt_to->local_type == Mailbox::local_types::quoted_string) {
86 return enc_reply_blob(rep, secret);
89 for (auto sep_char : sep_chars) {
90 if (rep.rcpt_to_local_part.find(sep_char) == std::string_view::npos) {
91 // Must never be in the domain part, that's crazy
92 CHECK_EQ(result->domain.find(sep_char), std::string_view::npos);
93 // The sep_char *can* be in the result->local part
94 auto const hash_enc = hash_rep(rep, secret);
95 return fmt::format("{}{}at{}{}{}{}{}{}", // clang-format off
96 result->local, sep_char, /*at*/ sep_char,
97 result->domain, sep_char,
98 hash_enc, sep_char,
99 rep.rcpt_to_local_part); // clang-format on
103 return enc_reply_blob(rep, secret);
106 auto split(std::string const& str, const char delim)
108 std::vector<std::string> out;
110 size_t start;
111 size_t end = 0;
112 while ((start = str.find_first_not_of(delim, end)) != std::string::npos) {
113 end = str.find(delim, start);
114 out.push_back(str.substr(start, end - start));
117 return out;
120 static std::optional<Reply::from_to> dec_reply_blob(std::string_view addr,
121 std::string_view secret)
123 auto const pktv = cppcodec::base32_crockford::decode(addr);
124 auto const pkt =
125 std::string(reinterpret_cast<char const*>(pktv.data()), pktv.size());
127 auto const parts = split(pkt, '\0');
129 if (parts.size() != 3) {
130 LOG(WARNING) << "invalid blob format";
131 return {};
134 auto const hash = parts[0];
136 Reply::from_to rep;
137 rep.rcpt_to_local_part = parts[1];
138 rep.mail_from = parts[2];
140 auto const hash_computed = hash_rep(rep, secret);
142 if (!iequal(hash_computed, hash)) {
143 LOG(WARNING) << "hash check failed";
144 return {};
147 return rep;
150 static bool is_pure_base32(std::string_view s)
152 // clang-format off
153 static constexpr const char base32_crockford_alphabet_i[] = {
154 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
155 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
156 'J', 'K',
157 'M', 'N',
158 'P', 'Q', 'R', 'S', 'T',
159 'V', 'W', 'X', 'Y', 'Z',
160 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
161 'j', 'k',
162 'm', 'n',
163 'p', 'q', 'r', 's', 't',
164 'v', 'w', 'x', 'y', 'z'
166 // clang-format on
168 auto constexpr alpha = std::string_view(base32_crockford_alphabet_i,
169 sizeof(base32_crockford_alphabet_i));
171 // If we can't find anything not in the base32 alphabet, it's pure
172 return s.find_first_not_of(alpha) == std::string_view::npos;
175 std::optional<Reply::from_to>
176 try_decode(std::string_view addr, std::string_view secret, char sep_char)
178 // {mail_from.local}=at={mail_from.domain}={hash}={rcpt_to_local_part}
179 // or
180 // {mail_from.local}={mail_from.domain}={hash}={rcpt_to_local_part}
182 auto const rcpt_loc_sep = addr.find_last_of(sep_char);
183 if (rcpt_loc_sep == std::string_view::npos)
184 return {};
185 auto const rcpt_loc_pos = rcpt_loc_sep + 1;
186 auto const rcpt_loc_len = addr.length() - rcpt_loc_pos;
187 auto const rcpt_loc = addr.substr(rcpt_loc_pos, rcpt_loc_len);
189 auto const hash_sep = addr.substr(0, rcpt_loc_sep).find_last_of(sep_char);
190 if (hash_sep == std::string_view::npos)
191 return {};
192 auto const hash_pos = hash_sep + 1;
193 auto const hash_len = rcpt_loc_sep - hash_pos;
194 auto const hash = addr.substr(hash_pos, hash_len);
196 // The hash part must look like a hash
197 if (!is_pure_base32(hash))
198 return {};
200 auto const mail_from_dom_sep =
201 addr.substr(0, hash_sep).find_last_of(sep_char);
202 if (mail_from_dom_sep == std::string_view::npos)
203 return {};
204 auto const mail_from_dom_pos = mail_from_dom_sep + 1;
205 auto const mail_from_dom_len = hash_sep - mail_from_dom_pos;
206 auto const mail_from_dom = addr.substr(mail_from_dom_pos, mail_from_dom_len);
208 auto mail_from_loc = addr.substr(0, mail_from_dom_sep);
210 // Check if the local part ends with _at and remove it.
211 if (iends_with(mail_from_loc, fmt::format("{}at", sep_char))) {
212 mail_from_loc = addr.substr(0, mail_from_dom_sep - 3);
215 auto const mail_from = fmt::format("{}@{}", mail_from_loc, mail_from_dom);
217 // The mail_from part must be a valid Mailbox address.
218 if (!Mailbox::validate(mail_from))
219 return {};
221 Reply::from_to rep;
222 rep.mail_from = mail_from;
223 rep.rcpt_to_local_part = rcpt_loc;
225 auto const hash_computed = hash_rep(rep, secret);
227 if (!iequal(hash_computed, hash)) {
228 LOG(WARNING) << "hash check failed";
229 return {};
232 return rep;
236 * Legacy format reply address with the REP= prefix. We no longer
237 * generates these addresses, but we continue to decode them in a
238 * compatable way.
241 std::optional<Reply::from_to> old_dec_reply(std::string_view addr,
242 std::string_view secret)
244 addr.remove_prefix(REP_PREFIX.length());
246 if (is_pure_base32(addr)) {
247 // if everything after REP= is base32 we have a blob
248 return dec_reply_blob(addr, secret);
251 // REP= has been removed, addr is now:
252 // {hash}={rcpt_to_local_part}={mail_from.local}={mail_from.domain}
253 // ^1st ^2nd ^last
254 // and mail_from.local can contain '=' chars
256 auto const first_sep = addr.find_first_of('=');
257 auto const last_sep = addr.find_last_of('=');
258 auto const second_sep = addr.find_first_of('=', first_sep + 1);
260 if (first_sep == last_sep || second_sep == last_sep) {
261 LOG(WARNING) << "unrecognized legacy reply format " << addr;
262 return {};
265 auto const rcpt_to_pos = first_sep + 1;
266 auto const mf_loc_pos = second_sep + 1;
267 auto const mf_dom_pos = last_sep + 1;
269 auto const rcpt_to_len = second_sep - rcpt_to_pos;
270 auto const mf_loc_len = last_sep - mf_loc_pos;
272 auto const reply_hash = addr.substr(0, first_sep);
273 auto const rcpt_to_loc = addr.substr(rcpt_to_pos, rcpt_to_len);
274 auto const mail_from_loc = addr.substr(mf_loc_pos, mf_loc_len);
275 auto const mail_from_dom = addr.substr(mf_dom_pos, std::string_view::npos);
277 Reply::from_to rep;
278 rep.rcpt_to_local_part = rcpt_to_loc;
279 rep.mail_from = fmt::format("{}@{}", mail_from_loc, mail_from_dom);
281 auto const hash_enc = hash_rep(rep, secret);
283 if (!iequal(reply_hash, hash_enc)) {
284 return {};
287 return rep;
290 std::optional<Reply::from_to> Reply::dec_reply(std::string_view addr,
291 std::string_view secret)
293 // Check for legacy format, process appropriately.
294 if (istarts_with(addr, REP_PREFIX)) {
295 return old_dec_reply(addr, secret);
298 auto const addr_mbx = Mailbox::parse(fmt::format("{}@x.y", addr));
299 if (!addr_mbx) {
300 throw std::invalid_argument("invalid address syntax in dec_reply");
303 // The blob for the address <"x"@y.z> is 26 bytes long.
304 if (is_pure_base32(addr)) {
305 // if everything is base32 we might have a blob
306 if (addr.length() > 25) {
307 return dec_reply_blob(addr, secret);
309 return {}; // normal local-part
312 for (auto sep_char : sep_chars) {
313 auto const rep = try_decode(addr, secret, sep_char);
314 if (rep)
315 return rep;
318 LOG(WARNING) << "not a reply address: " << addr;
320 return {};