1 // this rebuilds supplementary tables in SQLite mail database.
2 // it parses the messages, extract mime parts, and populates
3 // "headers", "content", and "attaches" tables.
4 // those tables are required for the main program to work properly,
5 // but they can be dropped and recreated at any time.
6 module zsq_rebuild
is aliced
;
8 import std
.digest
.ripemd
;
22 // ////////////////////////////////////////////////////////////////////////// //
23 uint parseMailDate (const(char)[] s
) {
25 if (s
.length
== 0) return 0;
27 return cast(uint)(parseRFC822DateTime(s
).toUTC
.toUnixTime
);
28 } catch (Exception
) {}
29 // sometimes this helps
32 return cast(uint)(parseRFC822DateTime(s
).toUTC
.toUnixTime
);
33 } catch (Exception
) {}
38 // ////////////////////////////////////////////////////////////////////////// //
39 void main (string
[] args
) {
40 // here, i don't have any threads at all
41 if (sqlite3_config(SQLITE_CONFIG_SINGLETHREAD
) != SQLITE_OK
) throw new Exception("cannot switch SQLite to multi-threaded mode");
43 string mailpath
= "~/Mail";
44 int packlevel
= ChiroDefaultPackLevel
;
45 if (args
.length
> 1) {
46 for (usize f
= 1; f
< args
.length
; ++f
) {
47 if (args
[f
].strEquCI("--nocompress")) packlevel
= 0;
48 else if (args
[f
].strEquCI("--max")) packlevel
= 666;
49 else if (args
[f
].length
== 2 && args
[f
][0] == '-' && args
[f
][1] >= '0' && args
[f
][1] <= '9') packlevel
= args
[f
][1]-'0';
50 else if (args
[f
].strEquCI("--mailpath")) { mailpath
= args
[f
+1]; ++f
; }
54 auto dbstore
= chiroOpenStorageDB(readonly
:true);
56 writeln("creating message support db (compression level ", packlevel
, ")...");
57 auto db = chiroRecreateSupportDB();
60 foreach (auto row
; dbstore
.persistentStatement(`SELECT count(uid) AS total FROM messages WHERE tags <> '';`).range
) {
61 total
= row
.total
!uint;
64 writeln("found ", total
, " messages to process.");
66 // this monstrosity selects all unique tags
67 auto stAllTags
= dbstore
.persistentStatement(`
68 WITH RECURSIVE tagtable(tag, rest) AS (
69 VALUES('', (SELECT group_concat(tags,'|') FROM (SELECT DISTINCT(tags) AS tags FROM messages WHERE tags <> ''))||'|')
72 SUBSTR(rest, 0, INSTR(rest, '|')),
73 SUBSTR(rest, INSTR(rest, '|')+1)
76 SELECT DISTINCT(tag) AS tag
82 auto stInsTagName
= db.persistentStatement("INSERT INTO tagnames (tag, hidden) VALUES(:tag,:hidden);");
84 // insert all unique tags
85 foreach (auto row
; stAllTags
.range
) {
86 auto tag
= row
.tag
!SQ3Text
;
89 .bindText(":tag", tag
, transient
:false)
90 // all non-folder tags are hidden
91 .bind(":hidden", (tag
[0] != '/' ?
1 : 0))
95 auto stInsInfo
= db.persistentStatement(`
97 ( uid, from_name, from_mail, subj, to_name, to_mail)
98 VALUES(:uid,:from_name,:from_mail,:subj,:to_name,:to_mail);
101 auto stInsMsgId
= db.persistentStatement(`
104 VALUES(:uid,:msgid,:time);
107 auto stInsMsgRefId
= db.persistentStatement(`
110 VALUES(:uid,:idx,:msgid);
113 auto stInsThreads
= db.persistentStatement(`
115 ( uid, tagid, time, nntpidx, appearance)
116 VALUES(:uid,(SELECT uid FROM tagnames WHERE tag=:tagname LIMIT 1),:time,:nntpidx, 1);
119 auto stInsContent
= db.persistentStatement(`
121 ( uid, mime, format, content)
122 VALUES(:uid,:mime,:format, ChiroPack(:content, :dopack));
125 auto stInsAttach
= db.persistentStatement(`
127 ( uid, mime, name, content)
128 VALUES(:uid,:mime,:name, ChiroPack(:content, :dopack));
132 // this unpacks twice, but meh
133 auto stAllMsgs
= dbstore
.persistentStatement(`
136 , ChiroExtractHeaders(ChiroUnpack(data)) AS headers
137 , ChiroExtractBody(ChiroUnpack(data)) AS body
144 foreach (auto row; stAllMsgs.range) {
145 //writeln("COLCOUNT: ", row.colcount_); foreach (int cn; 0..row.colcount_) writeln(" ", cn, ": ", row.colname_(cn));
146 writeln(row.addr!SQ3Text, ": ", row.opcode!SQ3Text,
147 "; p1=", row.p1!SQ3Text, "; p2=", row.p2!SQ3Text,
148 "; p3=", row.p3!SQ3Text, "; p4=", row.p4!SQ3Text, "; p5=", row.p5!SQ3Text,
149 "; comment=", row.comment!SQ3Text);
150 //writeln("id=", row.id!SQ3Text, "; parent=", row.parent!SQ3Text, "; notused=", row.notused!SQ3Text, "; detail=", row.detail!SQ3Text);
155 // this should cache unpack results
156 auto stAllMsgs
= dbstore
.persistentStatement(`
157 WITH msgunpacked(msguid, msgdata, msgtags) AS (
158 SELECT uid AS msguid, ChiroUnpack(data) AS msgdata, tags AS msgtags
165 , ChiroExtractHeaders(msgdata) AS headers
166 , ChiroExtractBody(msgdata) AS body
171 Timer ctm
= Timer(true);
173 db.execute("begin transaction;");
175 foreach (auto row
; stAllMsgs
.range
) {
176 //import std.conv : to;
177 //writeln("type=", row.text!DBFieldType, "; val=<", row.text!SQLStringc, ">");
178 auto hdrs
= row
.headers
!SQ3Text
;
179 auto body = row
.body!SQ3Text
;
180 auto tags
= row
.tags
!SQ3Text
;
181 uint uid
= row
.uid
!uint;
185 writeln("---------------- ", uid
, " ----------------");
186 writeln(row
.data
!SQ3Text
);
191 writeln("============================================");
194 // insert from/to/subj info
196 auto subj
= findHeaderField(hdrs
, "Subject").decodeSubj
.subjRemoveRe
;
197 auto from
= findHeaderField(hdrs
, "From").decodeSubj
;
198 auto to
= findHeaderField(hdrs
, "To").decodeSubj
;
201 .bind(":from_name", from
.extractName
)
202 .bind(":from_mail", from
.extractMail
)
204 .bind(":to_name", to
.extractName
)
205 .bind(":to_mail", to
.extractMail
)
212 auto inreplyfld
= findHeaderField(hdrs
, "In-Reply-To");
213 while (inreplyfld
.length
) {
214 auto id
= getNextFieldValue(inreplyfld
);
218 .bind(":idx", refidx
++)
224 inreplyfld
= findHeaderField(hdrs
, "References");
225 while (inreplyfld
.length
) {
226 auto id
= getNextFieldValue(inreplyfld
);
230 .bind(":idx", refidx
++)
237 // will be used to mark msgids
245 auto nntpidxfld
= findHeaderField(hdrs
, "NNTP-Index");
246 if (nntpidxfld
.length
) {
247 auto id
= nntpidxfld
.getFieldValue
;
249 foreach (immutable ch
; id
) {
250 if (ch
< '0' || ch
> '9') { nntpidx
= 0; break; }
251 if (nntpidx
== 0 && ch
== '0') continue;
252 immutable uint nn
= nntpidx
*10u+(ch
-'0');
253 if (nn
<= nntpidx
) nntpidx
= 0x7fffffff; else nntpidx
= nn
;
262 auto datefld
= findHeaderField(hdrs
, "Injection-Date");
263 if (datefld
.length
!= 0) {
264 auto v
= datefld
.getFieldValue
;
266 time
= parseMailDate(v
);
267 } catch (Exception
) {
268 writeln("UID=", uid
, ": FUCKED INJECTION-DATE: |", v
, "|");
269 time
= 0; // just in case
273 // obsolete NNTP date field, because why not?
274 datefld
= findHeaderField(hdrs
, "NNTP-Posting-Date");
275 if (datefld
.length
!= 0) {
276 auto v
= datefld
.getFieldValue
;
278 time
= parseMailDate(v
);
279 } catch (Exception
) {
280 writeln("UID=", uid
, ": FUCKED NNTP-POSTING-DATE: |", v
, "|");
281 time
= 0; // just in case
286 datefld
= findHeaderField(hdrs
, "Date");
287 if (datefld
.length
!= 0) {
288 auto v
= datefld
.getFieldValue
;
290 time
= parseMailDate(v
);
291 } catch (Exception
) {
292 writeln("UID=", uid
, ": FUCKED DATE: |", v
, "|");
293 time
= 0; // just in case
299 // finally, try to get time from "Received:"
300 //Received: from dns9.fly.us ([131.103.96.154]) by np5-d2.fly.us with Microsoft SMTPSVC(5.0.2195.6824); Tue, 21 Mar 2017 17:35:54 -0400
302 //writeln("!!! --- !!!");
303 uint lowesttime
= uint.max
;
304 foreach (uint fidx
; 0..uint.max
) {
305 auto recvfld
= findHeaderField(hdrs
, "Received", fidx
);
306 if (recvfld
.length
== 0) break;
307 auto lsemi
= recvfld
.lastIndexOf(';');
308 if (lsemi
>= 0) recvfld
= recvfld
[lsemi
+1..$].xstrip
;
309 if (recvfld
.length
!= 0) {
310 auto v
= recvfld
.getFieldValue
;
313 tm
= parseMailDate(v
);
314 } catch (Exception
) {
315 writeln("UID=", uid
, ": FUCKED RECV DATE: |", v
, "|");
316 tm
= 0; // just in case
318 //writeln(tm, " : ", lowesttime);
319 if (tm
&& tm
< lowesttime
) lowesttime
= tm
;
322 if (lowesttime
!= uint.max
) time
= lowesttime
;
326 // create thread record for each tag
327 while (tags
.length
) {
328 auto eep
= tags
.indexOf('|');
329 auto tagname
= (eep
>= 0 ? tags
[0..eep
] : tags
[0..$]);
330 tags
= (eep
>= 0 ? tags
[eep
+1..$] : tags
[0..0]);
331 if (tagname
.length
== 0) continue;
334 .bindText(":tagname", tagname
, transient
:false)
336 .bind(":nntpidx", nntpidx
)
343 bool hasmsgid
= false;
344 auto msgidfield
= findHeaderField(hdrs
, "Message-Id");
345 if (msgidfield
.length
) {
346 auto id
= msgidfield
.getFieldValue
;
351 .bind("time", msgtime
)
352 .bindText(":msgid", id
, transient
:false)
356 // if there is no msgid, create one
360 hash
.put(cast(const(ubyte)[])hdrs
);
361 hash
.put(cast(const(ubyte)[])body);
362 ubyte[20] digest
= hash
.finish();
364 import core
.stdc
.stdio
: snprintf
;
365 foreach (immutable idx
, ubyte b
; digest
[]) snprintf(buf
.ptr
+idx
*2, 3, "%02x", b
);
368 .bind("time", msgtime
)
369 .bindText(":msgid", buf
[0..20*2], transient
:false)
374 // insert base content and attaches
377 parseContent(ref content
, hdrs
, body);
378 foreach (const ref Content cc
; content
) {
379 if (cc
.name
.length
) {
382 .bindText(":mime", cc
.mime
)
383 .bindText(":name", cc
.name
)
384 .bindBlob(":content", cc
.data
)
385 .bind(":dopack", packlevel
)
390 .bindText(":mime", cc
.mime
)
391 .bindText(":format", cc
.format
)
392 .bindBlob(":content", cc
.data
)
393 .bind(":dopack", packlevel
)
400 writeln("==== UID: ", row
.uid
!uint, " ===");
401 auto msgidfield
= findHeaderField(hdrs
, "Message-Id");
402 if (msgidfield
.length
) {
403 auto id
= msgidfield
.getFieldValue
;
404 writeln("MSGID: {", id
, "}");
406 auto subj
= findHeaderField(hdrs
, "Subject").decodeSubj
.subjRemoveRe
;
407 auto from
= findHeaderField(hdrs
, "From").decodeSubj
;
408 auto to
= findHeaderField(hdrs
, "To").decodeSubj
;
409 writeln("FROM: |", from
.recodeToKOI8
, "| MAIL=|", from
.extractMail
.recodeToKOI8
, "| NAME=|", from
.extractName
.recodeToKOI8
, "|");
410 writeln(" TO: |", to
.recodeToKOI8
, "| MAIL=|", to
.extractMail
.recodeToKOI8
, "| NAME=|", to
.extractName
.recodeToKOI8
, "|");
411 writeln("SUBJ: |", subj
.recodeToKOI8
, "|");
415 if (count
%1024 == 1) {
416 write(" ", count
, " ", count
*100u/total
, "% ", row
.tags
!SQ3Text
, "\x1b[K\r");
417 db.execute("commit transaction;");
418 db.execute("begin transaction;");
419 //if (count > 1) break;
422 db.execute("commit transaction;");
425 writeln("time: ", ctm
.toString
, "\x1b[K");
427 writeln(count
, " messages processed.\x1b[K");
429 writeln("closing the db");