sqlite: added message builder to build proper .eml messages (with possible attaches...
[chiroptera.git] / sqbase_experiment / chiroptera / parse.d
blob811cf97973135ac27d372b537b35bd68410f1a5b
1 /* E-Mail Client
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3 of the License ONLY.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 module chiroptera.parse is aliced;
19 import chiroptera.decode;
21 import iv.strex;
22 private import iv.vfs;
23 private import iv.vfs.util;
24 private import iv.vfs.io : byLine;
25 private import chiroptera : chiroCLIMailPath;
28 // ////////////////////////////////////////////////////////////////////////// //
29 public string[][] loadRCFile (const(char)[] fname) {
30 string[][] res;
31 string ff;
32 if (fname.length && fname[0] == '/') {
33 ff = cast(string)fname; // it is safe to cast here
34 } else if (fname.length && fname[0] == '~') {
35 char[] dpath = new char[fname.length+128];
36 dpath = expandTilde(dpath, fname);
37 ff = cast(string)dpath; // it is safe to cast here
38 } else {
39 char[] dpath;
40 dpath.reserve(chiroCLIMailPath.length+fname.length+65);
41 dpath ~= chiroCLIMailPath;
42 dpath ~= fname;
43 ff = cast(string)dpath; // it is safe to cast here
45 foreach (auto line; VFile(ff).byLine) {
46 line = line.xstrip;
47 if (line.length == 0 || line[0] == '#') continue;
48 string[] argv;
49 while (line.length) {
50 if (line[0] <= 32) { line = line[1..$]; continue; }
51 char[] word;
52 word.reserve(64);
53 if (line[0] == '"') {
54 line = line[1..$];
55 while (line.length) {
56 char ch = line[0];
57 line = line[1..$];
58 if (ch == '"') break;
59 if (ch == '\\') { ch = line[0]; line = line[1..$]; }
60 word ~= ch;
62 } else {
63 while (line.length && line[0] > 32) {
64 word ~= line[0];
65 line = line[1..$];
68 argv ~= cast(string)word; // it is safe to cast here
70 if (argv.length) res ~= argv;
72 return res;
76 // ////////////////////////////////////////////////////////////////////////// //
77 // returned position is always [0..buf.length]
78 public usize skipOneLine (const(char)[] buf, usize pos) pure nothrow @trusted @nogc {
79 import core.stdc.string : memchr;
80 if (pos >= buf.length || buf.length == 0) return buf.length;
81 const(char)* ep = cast(const(char) *)memchr(buf.ptr+pos, '\n', buf.length-pos);
82 if (ep is null) return buf.length;
83 ++ep;
84 return cast(usize)(ep-buf.ptr);
88 // ////////////////////////////////////////////////////////////////////////// //
89 private bool isDotLine (const(char)[] buf, usize pos) pure nothrow @trusted @nogc {
90 if (pos >= buf.length || buf.ptr[pos] != '.') return false;
91 ++pos;
92 if (pos < buf.length && buf.ptr[pos] == '\r') ++pos;
93 return (pos >= buf.length || buf.ptr[pos] == '\n');
97 // ////////////////////////////////////////////////////////////////////////// //
98 private bool isEmptyLine (const(char)[] buf, usize pos) pure nothrow @trusted @nogc {
99 if (pos >= buf.length) return true;
100 if (buf.ptr[pos] == '\r') { if (++pos >= buf.length) return false; }
101 return (pos >= buf.length || buf.ptr[pos] == '\n');
105 // ////////////////////////////////////////////////////////////////////////// //
106 // returns `buf.length` if no proper end was found
107 // otherwise returns position BEFORE the final dot and newline
108 public usize findMessageEnd(bool withDot=false) (const(char)[] buf) pure nothrow @trusted @nogc {
109 if (buf.length == 0) return 0;
110 usize lpos = 0;
111 while (lpos < buf.length) {
112 if (isDotLine(buf, lpos)) {
113 static if (withDot) {
114 return skipOneLine(buf, lpos);
115 } else {
116 return lpos;
119 lpos = skipOneLine(buf, lpos);
121 return buf.length;
125 // ////////////////////////////////////////////////////////////////////////// //
126 // returns `buf.length` if no proper end was found
127 // otherwise returns position at the beginnig of the empty line
128 public usize findHeadersEnd (const(char)[] buf) pure nothrow @trusted @nogc {
129 if (buf.length == 0) return 0;
130 usize lpos = 0;
131 while (lpos < buf.length) {
132 if (isEmptyLine(buf, lpos)) return lpos;
133 lpos = skipOneLine(buf, lpos);
135 return buf.length;
139 // ////////////////////////////////////////////////////////////////////////// //
140 public T cutTopMessage(T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
141 static if (!is(T == typeof(null))) {
142 if (buf.length == 0) return null;
143 usize lpos = 0;
144 while (lpos < buf.length) {
145 immutable usize nlpos = skipOneLine(buf, lpos);
146 if (isDotLine(buf, lpos)) return (nlpos < buf.length ? buf[nlpos..$] : null);
147 lpos = nlpos;
150 return null;
154 // ////////////////////////////////////////////////////////////////////////// //
155 // this takes the first field
156 // returns field data, or `null` (never returns empty values)
157 // field name should not contain ':'
158 public T findHeaderField(T:const(char)[]) (T buf, const(char)[] fldname, uint fidx=0) pure nothrow @trusted @nogc {
159 static if (!is(T == typeof(null))) {
160 if (buf.length == 0) return null;
161 fldname = fldname.xstrip;
162 while (fldname.length && (fldname[$-1] == ':' || fldname[$-1] <= ' ')) fldname = fldname[0..$-1];
163 if (fldname.length == 0) return null;
164 usize lpos = 0;
165 while (lpos < buf.length) {
166 if (isEmptyLine(buf, lpos)) return null;
167 usize nlpos = skipOneLine(buf, lpos);
168 auto hl = buf[lpos..nlpos];
169 if (!hl.startsWithCI(fldname)) { lpos = nlpos; continue; }
170 //{ import std.stdio; writeln("hl=<", hl.xstripright, "> : <", fldname, ">"); }
171 hl = hl[fldname.length..$].xstrip;
172 if (hl.length == 0 || hl.ptr[0] != ':') { lpos = nlpos; continue; }
173 // i found her!
174 if (fidx) { --fidx; lpos = nlpos; continue; }
175 // collect continuations
176 while (nlpos < buf.length && buf.ptr[nlpos] <= ' ') nlpos = skipOneLine(buf, nlpos);
177 hl = buf[lpos..nlpos];
178 // skip field name
179 while (hl.length && hl.ptr[0] != ':') hl = hl[1..$];
180 if (hl.length) hl = hl[1..$]; // skip ':'
181 hl = hl.xstrip;
182 if (hl.length == 0) { lpos = nlpos; continue; } // skip empty fields (because why not)
183 return hl;
186 return null;
190 // ////////////////////////////////////////////////////////////////////////// //
191 // stops at `termch`
192 // returned position is always valid for slicing
193 private usize skipWord (T:const(char)[]) (T buf, usize pos, char termch) pure nothrow @trusted @nogc {
194 static if (is(T == typeof(null))) {
195 return 0;
196 } else {
197 if (pos >= buf.length) return buf.length;
198 bool inq = false;
199 while (pos < buf.length) {
200 immutable char ch = buf.ptr[pos++];
201 if (inq) {
202 if (ch == '"') inq = false;
203 } else {
204 if (ch == '"') inq = true;
205 else if (ch == termch) return pos-1;
208 return buf.length;
213 // ////////////////////////////////////////////////////////////////////////// //
214 private T strUnquote (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
215 static if (is(T == typeof(null))) {
216 return null;
217 } else {
218 buf = buf.xstrip;
219 if (buf.length >= 2) {
220 if (buf.ptr[0] == '"' && buf[$-1] == '"') buf = buf[1..$-1];
221 else if (buf.ptr[0] == '<' && buf[$-1] == '>') buf = buf[1..$-1];
223 return buf;
228 // ////////////////////////////////////////////////////////////////////////// //
229 // removes double quotes, or "<>" quotes
230 public T getFieldValue (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
231 static if (is(T == typeof(null))) {
232 return null;
233 } else {
234 return strUnquote(buf);
239 // ////////////////////////////////////////////////////////////////////////// //
240 // removes double quotes, or "<>" quotes
241 public T getNextFieldValue (T:const(char)[]) (ref T buf) pure nothrow @trusted @nogc {
242 static if (is(T == typeof(null))) {
243 return null;
244 } else {
245 buf = buf.xstrip;
246 if (buf.length == 0) return null;
247 if (buf.ptr[0] == '<') {
248 usize pos = 1;
249 while (pos < buf.length && buf.ptr[pos] != '>') ++pos;
250 T res = buf[1..pos];
251 if (pos < buf.length && buf.ptr[pos] == '>') ++pos;
252 buf = buf[pos..$].xstrip;
253 return res;
254 } else if (buf.ptr[0] == '"') {
255 usize pos = 1;
256 while (pos < buf.length && buf.ptr[pos] != '"') ++pos;
257 T res = buf[1..pos];
258 if (pos < buf.length && buf.ptr[pos] == '"') ++pos;
259 buf = buf[pos..$].xstrip;
260 return res;
261 } else {
262 usize pos = 1;
263 while (pos < buf.length && buf.ptr[pos] > 32) ++pos;
264 T res = buf[0..pos];
265 buf = buf[pos..$].xstrip;
266 return res;
272 // ////////////////////////////////////////////////////////////////////////// //
273 // get next word until ";"
274 // eats ";"
275 // skips empty ";" (this is not standard, because it can skip the first empty token)
276 // returns empty slice when there are no more words
277 public T getFieldParams (T:const(char)[]) (ref T buf) pure nothrow @trusted @nogc {
278 static if (is(T == typeof(null))) {
279 return null;
280 } else {
281 while (buf.length && (buf.ptr[0] <= 32 || buf.ptr[0] == ';')) buf = buf[1..$];
282 if (buf.length == 0) return null;
283 immutable usize end = skipWord(buf, 0, ';');
284 // it is guaranteed that we have at least one non-space char here
285 T res = buf[0..end].xstripright;
286 buf = buf[end..$];
287 while (buf.length && (buf.ptr[0] <= 32 || buf.ptr[0] == ';')) buf = buf[1..$];
288 return res;
293 // ////////////////////////////////////////////////////////////////////////// //
294 // returns name part of `getFieldParams()` result
295 // removes double quotes, or "<>" quotes
296 public T getParamName (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
297 static if (is(T == typeof(null))) {
298 return null;
299 } else {
300 while (buf.length && buf.ptr[0] <= 32) buf = buf[1..$];
301 if (buf.length == 0) return null;
302 immutable usize end = skipWord(buf, 0, '=');
303 return buf[0..end].strUnquote;
308 // ////////////////////////////////////////////////////////////////////////// //
309 // returns value part of `getFieldParams()` result
310 // removes double quotes, or "<>" quotes
311 public T getParamValue (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
312 static if (is(T == typeof(null))) {
313 return null;
314 } else {
315 while (buf.length && buf.ptr[0] <= 32) buf = buf[1..$];
316 if (buf.length == 0) return null;
317 usize start = skipWord(buf, 0, '=');
318 if (start >= buf.length) return null;
319 ++start;
320 return buf[start..$].xstrip.strUnquote;
325 // ////////////////////////////////////////////////////////////////////////// //
326 // returns starting position of the found boundary
327 // if no boundary was found, returns `buf.length`,
328 public usize findBoundary (T:const(char)[]) (T buf, usize stpos, const(char)[] boundary, out bool last) pure nothrow @trusted @nogc {
329 static if (is(T == typeof(null))) {
330 last = true;
331 return null;
332 } else {
333 if (boundary.length == 0 || stpos >= buf.length) { last = true; return buf.length; }
334 last = false;
335 usize pos = stpos;
336 // just in case, find line beginning
337 while (pos > 0 && buf.ptr[pos-1] != '\n') --pos;
338 while (pos < buf.length) {
339 immutable usize bpos = pos;
340 pos = skipOneLine(buf, pos);
341 if (pos-bpos < boundary.length+2) continue;
342 if (buf.ptr[bpos] != '-' || buf.ptr[bpos+1] != '-') continue;
343 if (buf[bpos+2..bpos+2+boundary.length] != boundary) continue;
344 usize epos = bpos+2+boundary.length;
345 if (epos >= buf.length) return bpos;
346 if (buf.ptr[epos] == '\n') return bpos;
347 if (buf.ptr[epos] == '\r' && (epos+1 >= buf.length || buf.ptr[epos+1] == '\n')) return bpos;
348 if (buf.ptr[epos] == '-' && epos+1 < buf.length && buf.ptr[epos+1] == '-') {
349 epos += 2;
350 last = true;
351 if (epos >= buf.length) return bpos;
352 if (buf.ptr[epos] == '\n') return bpos;
353 if (buf.ptr[epos] == '\r' && (epos+1 >= buf.length || buf.ptr[epos+1] == '\n')) return bpos;
354 last = false;
357 last = true;
358 return buf.length;
363 // ////////////////////////////////////////////////////////////////////////// //
364 public struct Content {
365 const(char)[] mime; // always lowercased
366 const(char)[] name; // for attachments; `null` for normal parts
367 const(char)[] format;
368 const(char)[] data = ""; // properly decoded
372 // ////////////////////////////////////////////////////////////////////////// //
373 public void parseContent (ref Content[] content, const(char)[] hdrs, const(char)[] body, bool noattaches=false) {
374 const(char)[] enc = findHeaderField(hdrs, "Content-Transfer-Encoding").getFieldValue;
375 if (enc.length == 0) enc = "8bit";
377 // parse content type
378 const(char)[] name;
379 const(char)[] boundary;
380 const(char)[] format = "";
381 const(char)[] charset;
382 const(char)[] mime = "text/plain";
383 auto ctype = findHeaderField(hdrs, "Content-Type");
384 if (ctype.length) {
385 // mime type
386 mime = getFieldParams(ctype).getFieldValue.toLowerStr;
387 if (mime.length == 0) mime = "text/plain";
388 else if (mime == "text" || mime == "text/") mime = "text/plain";
389 // additional fields
390 while (ctype.length) {
391 auto kv = getFieldParams(ctype);
392 if (kv.length == 0) continue;
393 auto n = getParamName(kv);
394 auto v = getParamValue(kv);
395 // charset
396 if (n.strEquCI("charset")) {
397 if (charset.length != 0) continue;
398 v = v.toLowerStr.xstrip;
399 if (v.length != 0) charset = v;
400 continue;
402 // format
403 if (n.strEquCI("format")) {
404 if (format.length != 0) continue;
405 v = v.toLowerStr.xstrip;
406 if (v.length != 0) format = v;
407 continue;
409 // name
410 if (n.strEquCI("name")) {
411 if (name.length != 0) continue;
412 v = v.sanitizeFileNameStr;
413 if (v.length != 0) name = v;
414 continue;
416 // boundary
417 if (n.strEquCI("boundary")) {
418 if (boundary.length != 0) continue;
419 if (v.length != 0) boundary = v;
420 continue;
424 if (charset.length == 0) charset = "us-ascii";
426 bool inline = true;
427 auto disp = findHeaderField(hdrs, "Content-Disposition");
428 while (disp.length) {
429 auto kv = getFieldParams(disp);
430 if (kv.length == 0) continue;
431 auto n = getParamName(kv);
432 auto v = getParamValue(kv);
433 if (n.strEquCI("attachment")) {
434 inline = false;
435 continue;
437 // filename?
438 if (n.strEquCI("filename")) {
439 v = v.sanitizeFileNameStr;
440 if (v.length != 0) name = v;
441 continue;
446 writeln("--------------------------------");
447 writeln("encoding: <", enc, ">");
448 writeln("name : <", name, ">");
449 writeln("boundary: <", boundary, ">");
450 writeln("format : <", format, ">");
451 writeln("charset : <", charset, ">");
452 writeln("inline : ", inline);
455 if (boundary.length == 0 || (mime != "multipart" && !mime.startsWith("multipart/"))) {
456 immutable bool istext = mime.startsWith("text/");
457 // not a multipart
458 if (noattaches && (!istext || !inline)) return; // not a text, or not an inline text, do not want
459 Content cc;
460 cc.mime = mime.idup;
461 cc.name = (inline && istext ? null : name.idup);
462 cc.format = (format.length ? format.idup : "");
463 if (istext) {
464 cc.data = decodeContent(body, enc).xstripright.recodeToUtf8(charset); // it is safe to cast here
465 } else {
466 cc.data = decodeContent(body, enc); // it is safe to cast here
470 static uint cnt = 0;
471 import std.string : format;
472 auto fo = VFile("z__%04u.bin".format(cnt++), "w");
473 fo.rawWriteExact(cc.data[]);
476 content ~= cc;
477 return;
480 // multipart, process it recursively
481 bool last;
482 for (;;) {
483 usize bpos = findBoundary(body, 0, boundary, out last);
484 if (last) break;
485 // skip it
486 bpos = skipOneLine(body, bpos);
487 // find next boundary
488 immutable usize epos = findBoundary(body, bpos, boundary, out last);
489 // get part
490 const(char)[] bpart = body[bpos..epos].xstripright;
491 if (bpart.length != 0) {
492 //{ writeln("===[", enc, "]===[", boundary, "]=== (bpos=", bpos, "; epos=", epos, ")"); writeln(bpart); writeln("------------------"); }
493 bpart = decodeContent(bpart, enc);
494 // get headers
495 usize hdrend = findHeadersEnd(bpart);
496 hdrs = bpart[0..hdrend];
497 hdrend = skipOneLine(bpart, hdrend);
498 bpart = bpart[hdrend..$];
499 parseContent(ref content, hdrs, bpart, noattaches);
501 body = body[epos..$];