switched to `iv.strex.detectUrl()`
[chiroptera.git] / sqbase_experiment / zsq_10_create_storage.d
bloba2e27f6ed75ef67213d59a9c52cd139cdd32e102
1 /* E-Mail Client
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3 of the License ONLY.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 // this converts old chiroptera mailbox CHMT database to new SQLite database
18 // this only puts message data into "messages" table; you need to rebuild
19 // other tables with the separate utility
20 module zsq_create_storage is aliced;
22 import chibackend;
24 import std.file;
26 import iv.sq3;
27 import iv.strex;
28 import iv.timer;
29 import iv.vfs;
30 import iv.vfs.io;
31 import iv.vfs.util;
34 // ////////////////////////////////////////////////////////////////////////// //
35 bool checkPfx (const(char)[] dir, const(char)[] pfx) {
36 if (pfx.length == 0 || dir.length < pfx.length) return false;
37 if (dir[0..pfx.length] != pfx) return false;
38 return (pfx.length >= dir.length || dir[pfx.length] == '/');
42 // ////////////////////////////////////////////////////////////////////////// //
43 void main (string[] args) {
44 ChiroTimerEnabled = true;
45 chiroParseCommonCLIArgs(args);
47 bool allowed = false;
48 bool update = false;
49 bool emptybase = false;
50 bool ignorenntp = true;
52 foreach (string s; args[1..$]) {
53 if (s == "force") allowed = true;
54 else if (s == "empty") emptybase = true;
55 else if (s == "nntp") ignorenntp = false;
56 else if (s == "update") update = true;
59 if (!allowed && !update) throw new Exception("use \"force\" to rebuild the storage");
61 // but here, i don't have any threads at all
62 if (sqlite3_config(SQLITE_CONFIG_SINGLETHREAD) != SQLITE_OK) throw new Exception("cannot switch SQLite to multi-threaded mode");
64 string[] ignores;
65 string[] files;
66 string[string] tagrepl;
68 auto hidfo = VFile("zhidden.rc", "w");
70 writeln("scanning '", chiroCLIMailPath, "'...");
71 foreach (DirEntry e; dirEntries(cast(string)chiroCLIMailPath, SpanMode.breadth, followSymlink:false)) {
72 if (!e.isFile) continue;
73 if (e.name.dirName == chiroCLIMailPath) continue;
74 if (e.name.getExtension == ".chmt") {
75 //writeln(e.name, "\t", e.size);
76 files ~= e.name[chiroCLIMailPath.length..$];
77 } else if (e.name.baseName == ".chiignore") {
78 ignores ~= e.name.dirName[chiroCLIMailPath.length..$];
79 } else if (e.name.baseName == ".chihidden") {
80 hidfo.writeln(e.name.dirName[chiroCLIMailPath.length-1..$]);
81 } else if (e.name.baseName == ".chitag") {
82 foreach (auto s; VFile(e.name).byLine) {
83 s = s.xstrip;
84 if (s.length == 0 || s[0] == '#') continue;
85 tagrepl[e.name.dirName[chiroCLIMailPath.length..$]] = s.idup;
86 break;
90 hidfo.close();
92 // remove ignored files
93 for (usize f = 0; f < files.length; ++f) {
94 bool ok = true;
95 foreach (string ign; ignores) if (files[f].dirName.checkPfx(ign)) { ok = false; break; }
96 if (!ok) {
97 writeln("ignored '", files[f], "'");
98 foreach (usize n; f+1..files.length) files[n-1] = files[n];
99 files.length -= 1;
100 --f;
101 continue;
105 writeln(files.length, " databases found.");
107 if (update) {
108 chiroOpenStorageDB();
109 } else {
110 writeln("creating message store db (compression level: ", ChiroCompressionLevel, ")...");
111 chiroRecreateStorageDB();
114 writeln("opening accounts db...");
115 chiroOpenConfDB(readonly:true);
117 auto stInsMsg = dbStore.persistentStatement(`
118 INSERT INTO messages (tags, data) VALUES(:tags, ChiroPack(:data))
119 ;`);
121 auto stGetAcc = dbConf.persistentStatement("SELECT name AS name FROM accounts WHERE inbox=:inbox;");
123 auto stGetAccByMail = dbConf.persistentStatement(`
124 SELECT name AS name, inbox AS inbox
125 FROM accounts
126 WHERE nntpgroup='' AND inbox<>'' AND lower(email)=lower(:email)
127 LIMIT 1
128 ;`);
130 bool dupsCreated = false;
131 VFile dupfo;
133 Timer ctm = Timer(true);
135 dbStore.execute("BEGIN TRANSACTION;");
136 uint count = 0;
137 bool forceUpdateProgress = false;
138 foreach (immutable tgidx, string tdbname; files) {
139 if (emptybase) continue;
141 string tag = tdbname.dirName;
142 if (auto rp = tag in tagrepl) {
143 auto oldtag = tag;
144 tag = *rp;
145 if (tag.length && tag[0] == '/') {
146 while (tag.length && tag[$-1] == '/') tag = tag[0..$-1];
148 writeln(oldtag, " -> ", tag, "\x1b[K");
149 assert(tag.length && tag != "/");
150 forceUpdateProgress = true;
151 } else {
152 while (tag.length && tag[0] == '/') tag = tag[1..$];
153 while (tag.length && tag[$-1] == '/') tag = tag[0..$-1];
154 tag = "/"~tag;
155 // join archives
156 auto aaidx = tag.lastIndexOf("/arch");
157 if (aaidx > 0 && tag.indexOf('/', aaidx+1) < 0) {
158 auto oldtag = tag;
159 tag = tag[0..aaidx];
160 while (tag.length && tag[$-1] == '/') tag = tag[0..$-1];
161 writeln(oldtag, " -> ", tag, "\x1b[K");
162 assert(tag.length && tag != "/");
163 forceUpdateProgress = true;
166 //if (tag != "/sent") continue;
167 if (ignorenntp && tag.startsWith("/dmars_ng/")) continue;
169 char[] alltext;
170 scope(exit) delete alltext;
172 auto fl = VFile(chiroCLIMailPath~tdbname);
173 auto sz = fl.size;
174 if (sz > 0x3fffffffU) throw new Exception("text database '"~tdbname~"' too big");
175 if (sz > 0) {
176 alltext = new char[cast(uint)sz];
177 fl.rawReadExact(alltext[]);
181 immutable origTag = tag;
183 char[] text = alltext;
184 while (text.length) {
185 usize end = findMessageEnd!true(text); // with dot
186 const(char)[] msg = text[0..end];
187 text = cutTopMessage(text);
189 char[] tags;
190 scope(exit) delete tags;
191 tags.reserve(1024);
193 tag = origTag;
194 if (tag.startsWith("/zz_spam")) tag = "#spam";
196 if (tag == "/sent") {
197 string fromMail = null;
198 foreach (auto xrow; dbStore.statement(`SELECT ChiroHdr_FromEmail(:text) AS email;`).bindConstText(":text", msg).range) {
199 fromMail = xrow.email!string;
201 string found = null;
202 foreach (auto srow; stGetAccByMail.bindConstText(":email", fromMail).range) {
203 found = srow.inbox!string;
205 if (found.length) {
206 //forceUpdateProgress = true;
207 //writeln(fromMail, " moved to ", found, "\x1b[K");
208 tag = found;
209 } else {
210 forceUpdateProgress = true;
211 writeln(fromMail, " has no account\x1b[K");
212 writeln(msg);
216 bool seenAccount = false;
217 tags ~= tag;
218 // find account
219 foreach (auto row; stGetAcc.bindConstText(":inbox", tag).range) {
220 auto name = row.name!SQ3Text;
221 if (name.length) {
222 tags ~= "|account:";
223 tags ~= name;
224 seenAccount = true;
227 // check for accounts
228 if (!seenAccount && tag != "#spam") {
229 string fromMail = null;
230 string toMail = null;
231 foreach (auto xrow; dbStore.statement(`SELECT ChiroHdr_FromEmail(:text) AS frommail, ChiroHdr_ToEmail(:text) AS tomail;`).bindConstText(":text", msg).range) {
232 fromMail = xrow.frommail!string;
233 toMail = xrow.tomail!string;
235 string found = null;
236 foreach (auto srow; stGetAccByMail.bindConstText(":email", toMail).range) found = srow.name!string;
237 if (found.length == 0) {
238 foreach (auto srow; stGetAccByMail.bindConstText(":email", fromMail).range) found = srow.name!string;
240 if (found.length) {
241 //forceUpdateProgress = true;
242 //writeln(fromMail, " moved to ", found, "\x1b[K");
243 tags ~= "|account:";
244 tags ~= found;
247 // done
249 stInsMsg
250 .bindConstText(":tags", tags)
251 .bindConstBlob(":data", msg)
252 .doAll();
254 ++count;
255 if (forceUpdateProgress || count%512 == 1) {
256 forceUpdateProgress = false;
257 write(" ", count, " ", tgidx*100/files.length, "% ", tags, "\x1b[K\r");
261 dbStore.execute("COMMIT TRANSACTION;");
263 ctm.stop;
264 writeln("time: ", ctm.toString, "\x1b[K");
266 writeln(count, " messages found.\x1b[K");
268 // restore journal and sync options (journal is saved into db, and sync MAY BE saved later)
269 // we aren't expecting to change things much, so "DELETE" journal seems to be adequate
270 dbStore.execute(`
271 PRAGMA locking_mode = NORMAL;
272 PRAGMA journal_mode = DELETE;
273 PRAGMA synchronous = NORMAL;
276 writeln("closing the db");
277 dbStore.close();
279 dbConf.close();
281 // remove journals (required to cleanup WAL files even for R/O database)
282 chiroOpenConfDB();
283 dbConf.close();