receiver: better filter processing
[chiroptera.git] / chibackend / parse.d
blob31688d1ee8607cec8b0b429343a0bc1d814a4166
1 /* E-Mail Client
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3 of the License ONLY.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 module chibackend.parse is aliced;
19 import chibackend.decode;
21 import iv.strex;
22 private import iv.vfs;
23 private import iv.vfs.util;
24 private import iv.vfs.io : byLine;
25 private import chibackend : chiroCLIMailPath, DynStr;
28 // ////////////////////////////////////////////////////////////////////////// //
29 public string[][] loadRCFile (const(char)[] fname) {
30 string[][] res;
31 string ff;
32 if (fname.length && fname[0] == '/') {
33 ff = cast(string)fname; // it is safe to cast here
34 } else if (fname.length && fname[0] == '~') {
35 char[] dpath = new char[fname.length+128];
36 dpath = expandTilde(dpath, fname);
37 ff = cast(string)dpath; // it is safe to cast here
38 } else {
39 char[] dpath;
40 dpath.reserve(chiroCLIMailPath.length+fname.length+65);
41 dpath ~= chiroCLIMailPath;
42 dpath ~= fname;
43 ff = cast(string)dpath; // it is safe to cast here
45 foreach (auto line; VFile(ff).byLine) {
46 line = line.xstrip;
47 if (line.length == 0 || line[0] == '#') continue;
48 string[] argv;
49 while (line.length) {
50 if (line[0] <= 32) { line = line[1..$]; continue; }
51 char[] word;
52 word.reserve(64);
53 if (line[0] == '"') {
54 line = line[1..$];
55 while (line.length) {
56 char ch = line[0];
57 line = line[1..$];
58 if (ch == '"') break;
59 if (ch == '\\') { ch = line[0]; line = line[1..$]; }
60 word ~= ch;
62 } else {
63 while (line.length && line[0] > 32) {
64 word ~= line[0];
65 line = line[1..$];
68 argv ~= cast(string)word; // it is safe to cast here
70 if (argv.length) res ~= argv;
72 return res;
76 // ////////////////////////////////////////////////////////////////////////// //
77 // returned position is always [0..buf.length]
78 public usize skipOneLine (const(char)[] buf, usize pos) pure nothrow @trusted @nogc {
79 import core.stdc.string : memchr;
80 if (pos >= buf.length || buf.length == 0) return buf.length;
81 const(char)* ep = cast(const(char) *)memchr(buf.ptr+pos, '\n', buf.length-pos);
82 if (ep is null) return buf.length;
83 ++ep;
84 return cast(usize)(ep-buf.ptr);
88 // ////////////////////////////////////////////////////////////////////////// //
89 // return `false` from dg to stop
90 public void forEachHeaderLine (const(char)[] buf, bool delegate (const(char)[] line) dg) {
91 auto anchor = buf;
92 if (dg is null) return;
93 if (buf.length == 0) return;
94 usize lpos = 0;
95 while (lpos < buf.length) {
96 if (isEmptyLine(buf, lpos)) return;
97 usize nlpos = skipOneLine(buf, lpos);
98 // collect continuations
99 while (nlpos < buf.length && buf.ptr[nlpos] <= ' ') nlpos = skipOneLine(buf, nlpos);
100 if (!dg(buf[lpos..nlpos])) return;
101 lpos = nlpos;
106 // ////////////////////////////////////////////////////////////////////////// //
107 private bool isDotLine (const(char)[] buf, usize pos) pure nothrow @trusted @nogc {
108 if (pos >= buf.length || buf.ptr[pos] != '.') return false;
109 ++pos;
110 if (pos < buf.length && buf.ptr[pos] == '\r') ++pos;
111 return (pos >= buf.length || buf.ptr[pos] == '\n');
115 // ////////////////////////////////////////////////////////////////////////// //
116 private bool isEmptyLine (const(char)[] buf, usize pos) pure nothrow @trusted @nogc {
117 if (pos >= buf.length) return true;
118 if (buf.ptr[pos] == '\r') { if (++pos >= buf.length) return false; }
119 return (pos >= buf.length || buf.ptr[pos] == '\n');
123 // ////////////////////////////////////////////////////////////////////////// //
124 // returns `buf.length` if no proper end was found
125 // otherwise returns position BEFORE the final dot and newline
126 public usize findMessageEnd(bool withDot=false) (const(char)[] buf) pure nothrow @trusted @nogc {
127 if (buf.length == 0) return 0;
128 usize lpos = 0;
129 while (lpos < buf.length) {
130 if (isDotLine(buf, lpos)) {
131 static if (withDot) {
132 return skipOneLine(buf, lpos);
133 } else {
134 return lpos;
137 lpos = skipOneLine(buf, lpos);
139 return buf.length;
143 // ////////////////////////////////////////////////////////////////////////// //
144 // returns `buf.length` if no proper end was found
145 // otherwise returns position at the beginnig of the empty line
146 public usize findHeadersEnd (const(char)[] buf) pure nothrow @trusted @nogc {
147 if (buf.length == 0) return 0;
148 usize lpos = 0;
149 while (lpos < buf.length) {
150 if (isEmptyLine(buf, lpos)) return lpos;
151 lpos = skipOneLine(buf, lpos);
153 return buf.length;
157 // ////////////////////////////////////////////////////////////////////////// //
158 public T cutTopMessage(T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
159 static if (!is(T == typeof(null))) {
160 if (buf.length == 0) return null;
161 usize lpos = 0;
162 while (lpos < buf.length) {
163 immutable usize nlpos = skipOneLine(buf, lpos);
164 if (isDotLine(buf, lpos)) return (nlpos < buf.length ? buf[nlpos..$] : null);
165 lpos = nlpos;
168 return null;
172 // ////////////////////////////////////////////////////////////////////////// //
173 // this takes the first field
174 // returns field data, or `null` (never returns empty values)
175 // field name should not contain ':'
176 public T findHeaderField(T:const(char)[]) (T buf, const(char)[] fldname, uint fidx=0) pure nothrow @trusted @nogc {
177 static if (!is(T == typeof(null))) {
178 if (buf.length == 0) return null;
179 fldname = fldname.xstrip;
180 while (fldname.length && (fldname[$-1] == ':' || fldname[$-1] <= ' ')) fldname = fldname[0..$-1];
181 if (fldname.length == 0) return null;
182 usize lpos = 0;
183 while (lpos < buf.length) {
184 if (isEmptyLine(buf, lpos)) return null;
185 usize nlpos = skipOneLine(buf, lpos);
186 auto hl = buf[lpos..nlpos];
187 if (!hl.startsWithCI(fldname)) { lpos = nlpos; continue; }
188 //{ import std.stdio; writeln("hl=<", hl.xstripright, "> : <", fldname, ">"); }
189 hl = hl[fldname.length..$].xstrip;
190 if (hl.length == 0 || hl.ptr[0] != ':') { lpos = nlpos; continue; }
191 // i found her!
192 if (fidx) { --fidx; lpos = nlpos; continue; }
193 // collect continuations
194 while (nlpos < buf.length && buf.ptr[nlpos] <= ' ') nlpos = skipOneLine(buf, nlpos);
195 hl = buf[lpos..nlpos];
196 // skip field name
197 while (hl.length && hl.ptr[0] != ':') hl = hl[1..$];
198 if (hl.length) hl = hl[1..$]; // skip ':'
199 hl = hl.xstrip;
200 if (hl.length == 0) { lpos = nlpos; continue; } // skip empty fields (because why not)
201 return hl;
204 return null;
208 // ////////////////////////////////////////////////////////////////////////// //
209 // stops at `termch`
210 // returned position is always valid for slicing
211 private usize skipWord (T:const(char)[]) (T buf, usize pos, char termch) pure nothrow @trusted @nogc {
212 static if (is(T == typeof(null))) {
213 return 0;
214 } else {
215 if (pos >= buf.length) return buf.length;
216 bool inq = false;
217 while (pos < buf.length) {
218 immutable char ch = buf.ptr[pos++];
219 if (inq) {
220 if (ch == '"') inq = false;
221 } else {
222 if (ch == '"') inq = true;
223 else if (ch == termch) return pos-1;
226 return buf.length;
231 // ////////////////////////////////////////////////////////////////////////// //
232 private T strUnquote (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
233 static if (is(T == typeof(null))) {
234 return null;
235 } else {
236 buf = buf.xstrip;
237 if (buf.length >= 2) {
238 if (buf.ptr[0] == '"' && buf[$-1] == '"') buf = buf[1..$-1];
239 else if (buf.ptr[0] == '<' && buf[$-1] == '>') buf = buf[1..$-1];
241 return buf;
246 // ////////////////////////////////////////////////////////////////////////// //
247 // removes double quotes, or "<>" quotes
248 public T getFieldValue (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
249 static if (is(T == typeof(null))) {
250 return null;
251 } else {
252 return strUnquote(buf);
257 // ////////////////////////////////////////////////////////////////////////// //
258 // removes double quotes, or "<>" quotes
259 public T getNextFieldValue (T:const(char)[]) (ref T buf) pure nothrow @trusted @nogc {
260 static if (is(T == typeof(null))) {
261 return null;
262 } else {
263 buf = buf.xstrip;
264 if (buf.length == 0) return null;
265 if (buf.ptr[0] == '<') {
266 usize pos = 1;
267 while (pos < buf.length && buf.ptr[pos] != '>') ++pos;
268 T res = buf[1..pos];
269 if (pos < buf.length && buf.ptr[pos] == '>') ++pos;
270 buf = buf[pos..$].xstrip;
271 return res;
272 } else if (buf.ptr[0] == '"') {
273 usize pos = 1;
274 while (pos < buf.length && buf.ptr[pos] != '"') ++pos;
275 T res = buf[1..pos];
276 if (pos < buf.length && buf.ptr[pos] == '"') ++pos;
277 buf = buf[pos..$].xstrip;
278 return res;
279 } else {
280 usize pos = 1;
281 while (pos < buf.length && buf.ptr[pos] > 32) ++pos;
282 T res = buf[0..pos];
283 buf = buf[pos..$].xstrip;
284 return res;
290 // ////////////////////////////////////////////////////////////////////////// //
291 // get next word until ";"
292 // eats ";"
293 // skips empty ";" (this is not standard, because it can skip the first empty token)
294 // returns empty slice when there are no more words
295 public T getFieldParams (T:const(char)[]) (ref T buf) pure nothrow @trusted @nogc {
296 static if (is(T == typeof(null))) {
297 return null;
298 } else {
299 while (buf.length && (buf.ptr[0] <= 32 || buf.ptr[0] == ';')) buf = buf[1..$];
300 if (buf.length == 0) return null;
301 immutable usize end = skipWord(buf, 0, ';');
302 // it is guaranteed that we have at least one non-space char here
303 T res = buf[0..end].xstripright;
304 buf = buf[end..$];
305 while (buf.length && (buf.ptr[0] <= 32 || buf.ptr[0] == ';')) buf = buf[1..$];
306 return res;
311 // ////////////////////////////////////////////////////////////////////////// //
312 // returns name part of `getFieldParams()` result
313 // removes double quotes, or "<>" quotes
314 public T getParamName (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
315 static if (is(T == typeof(null))) {
316 return null;
317 } else {
318 while (buf.length && buf.ptr[0] <= 32) buf = buf[1..$];
319 if (buf.length == 0) return null;
320 immutable usize end = skipWord(buf, 0, '=');
321 return buf[0..end].strUnquote;
326 // ////////////////////////////////////////////////////////////////////////// //
327 // returns value part of `getFieldParams()` result
328 // removes double quotes, or "<>" quotes
329 public T getParamValue (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
330 static if (is(T == typeof(null))) {
331 return null;
332 } else {
333 while (buf.length && buf.ptr[0] <= 32) buf = buf[1..$];
334 if (buf.length == 0) return null;
335 usize start = skipWord(buf, 0, '=');
336 if (start >= buf.length) return null;
337 ++start;
338 return buf[start..$].xstrip.strUnquote;
343 // ////////////////////////////////////////////////////////////////////////// //
344 // returns starting position of the found boundary
345 // if no boundary was found, returns `buf.length`,
346 public usize findBoundary (T:const(char)[]) (T buf, usize stpos, const(char)[] boundary, out bool last) pure nothrow @trusted @nogc {
347 static if (is(T == typeof(null))) {
348 last = true;
349 return null;
350 } else {
351 if (boundary.length == 0 || stpos >= buf.length) { last = true; return buf.length; }
352 last = false;
353 usize pos = stpos;
354 // just in case, find line beginning
355 while (pos > 0 && buf.ptr[pos-1] != '\n') --pos;
356 while (pos < buf.length) {
357 immutable usize bpos = pos;
358 pos = skipOneLine(buf, pos);
359 if (pos-bpos < boundary.length+2) continue;
360 if (buf.ptr[bpos] != '-' || buf.ptr[bpos+1] != '-') continue;
361 if (buf[bpos+2..bpos+2+boundary.length] != boundary) continue;
362 usize epos = bpos+2+boundary.length;
363 if (epos >= buf.length) return bpos;
364 if (buf.ptr[epos] == '\n') return bpos;
365 if (buf.ptr[epos] == '\r' && (epos+1 >= buf.length || buf.ptr[epos+1] == '\n')) return bpos;
366 if (buf.ptr[epos] == '-' && epos+1 < buf.length && buf.ptr[epos+1] == '-') {
367 epos += 2;
368 last = true;
369 if (epos >= buf.length) return bpos;
370 if (buf.ptr[epos] == '\n') return bpos;
371 if (buf.ptr[epos] == '\r' && (epos+1 >= buf.length || buf.ptr[epos+1] == '\n')) return bpos;
372 last = false;
375 last = true;
376 return buf.length;
381 // ////////////////////////////////////////////////////////////////////////// //
382 public struct Content {
383 DynStr mime; // always lowercased
384 DynStr name; // for attachments; `null` for normal parts
385 DynStr format;
386 DynStr data; // properly decoded
390 // ////////////////////////////////////////////////////////////////////////// //
391 public void parseContent (ref Content[] content, const(char)[] hdrs, const(char)[] body, bool noattaches=false) {
392 const(char)[] enc = findHeaderField(hdrs, "Content-Transfer-Encoding").getFieldValue;
393 if (enc.length == 0) enc = "8bit";
395 // parse content type
396 DynStr name;
397 DynStr boundary;
398 DynStr format = "";
399 DynStr charset;
400 DynStr mime = "text/plain";
401 auto ctype = findHeaderField(hdrs, "Content-Type");
402 if (ctype.length) {
403 // mime type
404 mime = getFieldParams(ctype).getFieldValue;
405 mime.lowerInPlace();
406 if (mime.length == 0) mime = "text/plain";
407 else if (mime == "text" || mime == "text/") mime = "text/plain";
408 // additional fields
409 while (ctype.length) {
410 auto kv = getFieldParams(ctype);
411 if (kv.length == 0) continue;
412 auto n = getParamName(kv);
413 auto v = getParamValue(kv);
414 // charset
415 if (n.strEquCI("charset")) {
416 if (charset.length != 0) continue;
417 v = v.xstrip;
418 if (v.length != 0) {
419 charset = v;
420 charset.lowerInPlace();
422 continue;
424 // format
425 if (n.strEquCI("format")) {
426 if (format.length != 0) continue;
427 v = v.xstrip;
428 if (v.length != 0) {
429 format = v;
430 format.lowerInPlace();
432 continue;
434 // name
435 if (n.strEquCI("name")) {
436 if (name.length != 0) continue;
437 v = v.sanitizeFileNameStr;
438 if (v.length != 0) name = v;
439 continue;
441 // boundary
442 if (n.strEquCI("boundary")) {
443 if (boundary.length != 0) continue;
444 if (v.length != 0) boundary = v;
445 continue;
448 if (mime == "text/richtext" || mime == "text/enriched") {
449 mime = "text/plain";
450 format = "enriched";
451 } else if (mime.startsWith("text/html") || mime.startsWith("text/xhtml")) {
452 mime = "text/html";
453 format.clear();
454 } else if (mime.startsWith("text/")) {
455 if (format.length) {
456 format ~= "; ";
457 format ~= mime[5..$];
458 } else {
459 format = mime[5..$];
461 mime = "text/plain";
464 if (charset.length == 0) charset = "us-ascii";
466 bool inline = true;
467 auto disp = findHeaderField(hdrs, "Content-Disposition");
468 while (disp.length) {
469 auto kv = getFieldParams(disp);
470 if (kv.length == 0) continue;
471 auto n = getParamName(kv);
472 auto v = getParamValue(kv);
473 if (n.strEquCI("attachment")) {
474 inline = false;
475 continue;
477 // filename?
478 if (n.strEquCI("filename")) {
479 v = v.sanitizeFileNameStr;
480 if (v.length != 0) name = v;
481 continue;
486 writeln("--------------------------------");
487 writeln("encoding: <", enc, ">");
488 writeln("name : <", name, ">");
489 writeln("boundary: <", boundary, ">");
490 writeln("format : <", format, ">");
491 writeln("charset : <", charset, ">");
492 writeln("inline : ", inline);
495 if (boundary.length == 0 || (mime != "multipart" && !mime.startsWith("multipart/"))) {
496 immutable bool istext = mime.startsWith("text/");
497 // not a multipart
498 if (noattaches && (!istext || !inline)) return; // not a text, or not an inline text, do not want
499 Content cc;
500 cc.mime = mime.idup;
501 cc.name = (inline && istext ? null : name.idup);
502 cc.format = (format.length ? format.idup : "");
503 if (istext) {
504 cc.data = decodeContent(body, enc).xstripright.recodeToUtf8(charset); // it is safe to cast here
505 } else {
506 cc.data = decodeContent(body, enc); // it is safe to cast here
510 static uint cnt = 0;
511 import std.string : format;
512 auto fo = VFile("z__%04u.bin".format(cnt++), "w");
513 fo.rawWriteExact(cc.data[]);
516 content ~= cc;
517 return;
520 // multipart, process it recursively
521 bool last;
522 for (;;) {
523 usize bpos = findBoundary(body, 0, boundary, out last);
524 if (last) break;
525 // skip it
526 bpos = skipOneLine(body, bpos);
527 // find next boundary
528 immutable usize epos = findBoundary(body, bpos, boundary, out last);
529 // get part
530 const(char)[] bpart = body[bpos..epos].xstripright;
531 if (bpart.length != 0) {
532 //{ writeln("===[", enc, "]===[", boundary, "]=== (bpos=", bpos, "; epos=", epos, ")"); writeln(bpart); writeln("------------------"); }
533 bpart = decodeContent(bpart, enc);
534 // get headers
535 usize hdrend = findHeadersEnd(bpart);
536 hdrs = bpart[0..hdrend];
537 hdrend = skipOneLine(bpart, hdrend);
538 bpart = bpart[hdrend..$];
539 parseContent(ref content, hdrs, bpart, noattaches);
541 body = body[epos..$];