sqlite: more schema changes, various experiments with thread building; recursive...
[chiroptera.git] / sqbase_experiment / chiro_sqbase.d
blob4f7ec5035325d354cea94f4fa89084ddedf16dfd
1 module chiro_sqbase is aliced;
2 private:
4 //version = separate_headers;
5 // do not use, for testing only!
6 // and it seems to generate bigger files, lol
7 //version = use_balz;
9 // use libdeflate instead of zlib
10 // see https://github.com/ebiggers/libdeflate
11 // around 2 times slower on level 9 than zlib, resulting size is 5MB less
12 // around 3 times slower on level 12, resulting size it 10MB less
13 // totally doesn't worth it
14 //version = use_libdeflate;
16 // use libxpack instead of zlib
17 // see https://github.com/ebiggers/xpack
18 // it supports buffers up to 2^19 (524288) bytes (see https://github.com/ebiggers/xpack/issues/1)
19 // therefore it is useless (the resulting file is bigger than with zlib)
20 //version = use_libxpack;
22 // just for fun
23 // see https://github.com/jibsen/brieflz
24 // it has spurious slowdowns, and so is 4 times slower than zlib, with worser compression
25 //version = use_libbrieflz;
27 // apple crap; i just wanted to see how bad it is ;-)
28 // speed is comparable with zlib, compression is shittier by 60MB; crap
29 //version = use_liblzfse;
31 // just for fun
32 //version = use_lzjb;
34 // just for fun, slightly better than lzjb
35 //version = use_lz4;
37 // some compressors from wimlib
38 // see https://wimlib.net/
39 // only one can be selected!
40 // 15 times slower than zlib, much worser compression (~100MB bigger)
41 //version = use_libwim_lzms; // this supports chunks up to our maximum blob size
42 // two times faster than lzms, compression is still awful
43 //version = use_libwim_lzx; // this supports chunks up to 2MB; more-or-less useful
44 // quite fast (because it refuses to compress anything bigger than 64KB); compression is most awful
45 //version = use_libwim_xpress; // this supports chunks up to 64KB; useless
47 // oh, because why not?
48 // surprisingly good (but not as good as zlib), and lightning fast on default compression level
49 // sadly, requires external lib
50 //version = use_zstd;
52 import iv.sq3;
53 //import iv.utfutil;
54 //import iv.vfs.io;
56 version(use_libdeflate) import chiropack.libdeflate;
57 else version(use_balz) import iv.balz;
58 else version(use_libxpack) import chiropack.libxpack;
59 else version(use_libbrieflz) import chiropack.libbrieflz;
60 else version(use_liblzfse) import chiropack.liblzfse;
61 else version(use_lzjb) import chiropack.lzjb;
62 else version(use_libwim_lzms) import chiropack.libwim;
63 else version(use_libwim_lzx) import chiropack.libwim;
64 else version(use_libwim_xpress) import chiropack.libwim;
65 else version(use_lz4) import chiropack.liblz4;
66 else version(use_zstd) import chiropack.libzstd;
68 version(use_zstd) {
69 public enum ChiroDefaultPackLevel = 6;
70 } else {
71 public enum ChiroDefaultPackLevel = 9;
76 There are several added SQLite functions:
78 ChiroPack(data[, compratio])
79 ===============
81 This tries to compress the given data, and returns a compressed blob.
82 If `compratio` is negative or zero, do not compress anything.
85 ChiroUnpack(data)
86 =================
88 This decompresses the blob compressed with `ChiroPack()`. It is (usually) safe to pass
89 non-compressed data to this function.
92 ChiroNormCRLF(content)
93 ======================
95 Replaces CR/LF with LF, `\x7f` with `~`, control chars (except TAB and CR) with spaces.
96 Removes trailing blanks.
99 ChiroNormHeaders(content)
100 =========================
102 Replaces CR/LF with LF, `\x7f` with `~`, control chars (except CR) with spaces.
103 Then replaces 'space, LF' with a single space (joins multiline headers).
104 Removes trailing blanks.
107 ChiroExtractHeaders(content)
108 ============================
110 Can be used to extract headers from the message.
111 Replaces CR/LF with LF, `\x7f` with `~`, control chars (except CR) with spaces.
112 Then replaces 'space, LF' with a single space (joins multiline headers).
113 Removes trailing blanks.
116 ChiroExtractBody(content)
117 =========================
119 Can be used to extract body from the message.
120 Replaces CR/LF with LF, `\x7f` with `~`, control chars (except TAB and CR) with spaces.
121 Then replaces 'space, LF' with a single space (joins multiline headers).
122 Removes trailing blanks and final dot.
126 version(separate_headers) {
127 public enum ChiroSeparateHeaders = true;
128 } else {
129 public enum ChiroSeparateHeaders = false;
132 public enum StorageDBName = "message-store.db";
133 public enum SupportDBName = "message-view.db";
134 public enum OptionsDBName = "chiroptera.db";
137 // ////////////////////////////////////////////////////////////////////////// //
138 private enum CommonPragmas = `
139 PRAGMA case_sensitive_like = OFF;
140 PRAGMA foreign_keys = OFF;
141 PRAGMA locking_mode = NORMAL; /*EXCLUSIVE;*/
142 PRAGMA secure_delete = OFF;
143 PRAGMA threads = 3;
144 PRAGMA trusted_schema = OFF;
145 PRAGMA writable_schema = OFF;
148 enum CommonPragmasRO = CommonPragmas~`
149 PRAGMA temp_store = MEMORY; /*DEFAULT*/ /*FILE*/
152 enum CommonPragmasRW = CommonPragmas~`
153 PRAGMA application_id = 1128810834; /*CHIR*/
154 PRAGMA auto_vacuum = NONE;
155 PRAGMA encoding = "UTF-8";
156 PRAGMA temp_store = DEFAULT;
157 --PRAGMA journal_mode = WAL; /*OFF;*/
158 PRAGMA journal_mode = DELETE; /*OFF;*/
159 PRAGMA synchronous = NORMAL; /*OFF;*/
162 static immutable dbpragmasRO = CommonPragmasRO;
164 // we aren't expecting to change things much, so "DELETE" journal seems to be adequate
165 // use the smallest page size, because we don't need to perform alot of selects here
166 static immutable dbpragmasRWStorage = "PRAGMA page_size = 512;"~CommonPragmasRW;
168 // use slightly bigger pages
169 // funny, smaller pages leads to bigger files
170 static immutable dbpragmasRWSupport = "PRAGMA page_size = 4096;"~CommonPragmasRW~"PRAGMA journal_mode = WAL;";
171 static immutable dbpragmasRWSupportRecreate = "PRAGMA page_size = 4096;"~CommonPragmasRW~"PRAGMA journal_mode = OFF; PRAGMA synchronous = OFF;";
173 // smaller page size is ok
174 static immutable dbpragmasRWOptions = "PRAGMA page_size = 512;"~CommonPragmasRW;
177 // main storage and support databases will be in different files
178 // storing headers separately actually makes the file bigger
179 // doing CR/LF and other normalizations less than megabyte on 1gb database, so don't bother
180 // (it saves around 5mb with separate store, but the result is still bigger)
181 version(separate_headers) {
182 enum SchemaMsgData =
183 ` , headers BLOB
184 , body BLOB`;
185 } else {
186 enum SchemaMsgData = ` , data BLOB`;
189 static immutable string schemaStorage = `
190 -- deleted messages have empty headers and body
191 -- this is so uids will remain unique on inserting
192 -- tags are used to associate the message with various folders, and stored here for rebuild purposes
193 -- the frontend will use the separate "tags" table to select messages
194 -- deleted messages must not have any tags, and should contain no other data
195 -- (keeping the data is harmless, it simply sits there and takes space)
196 CREATE TABLE IF NOT EXISTS messages (
197 uid INTEGER PRIMARY KEY /* rowid, never zero */
198 , tags TEXT NOT NULL DEFAULT '' /* associated message tags, '|'-separated; case-sensitive, no extra whitespaces! */
199 -- article data; MUST contain the ending dot, and be properly dot-stuffed
200 -- basically, this is "what we had received, as is"
201 -- there is no need to normalize it in any way
202 -- it should be compressed with "ChiroPack()", and extracted with "ChiroUnpack()"
203 `~SchemaMsgData~`
208 static immutable string schemaOptions = `
209 -- use "autoincrement" to allow account deletion
210 CREATE TABLE IF NOT EXISTS accounts (
211 uid INTEGER PRIMARY KEY AUTOINCREMENT /* unique, never zero */
212 , checktime INTEGER NOT NULL DEFAULT 15 /* check time, in minutes */
213 , nosendauth INTEGER NOT NULL DEFAULT 0 /* turn off authentication on sending? */
214 , debuglog INTEGER NOT NULL DEFAULT 0 /* do debug logging? */
215 , nocheck INTEGER NOT NULL DEFAULT 0 /* disable checking? */
216 , name TEXT NOT NULL UNIQUE /* account name; lowercase alphanum, '_', '-', '.' */
217 , recvserver TEXT NOT NULL /* server for receiving messages */
218 , sendserver TEXT NOT NULL /* server for sending messages */
219 , user TEXT NOT NULL /* pop3 user name */
220 , pass TEXT NOT NULL /* pop3 password, empty for no authorisation */
221 , realname TEXT NOT NULL /* user name for e-mail headers */
222 , email TEXT NOT NULL /* account e-mail address (full, name@host) */
223 , inbox TEXT NOT NULL /* inbox tag, usually "/accname/inbox", or folder for nntp */
224 , nntpgroup TEXT NOT NULL DEFAULT '' /* nntp group name for NNTP accounts; if empty, this is POP3 account */
227 CREATE INDEX IF NOT EXISTS accounts_name ON accounts(name);
229 -- use "autoincrement" to allow account deletion
230 CREATE TABLE IF NOT EXISTS options (
231 name TEXT NOT NULL
232 , value TEXT
235 CREATE INDEX IF NOT EXISTS options_name ON options(name);
239 enum schemaSupportTable = `
240 -- tag <-> messageid correspondence
241 -- note that one message can be tagged with more than one tag
242 -- there is always tag with "uid=0", to keep all tags alive
243 CREATE TABLE IF NOT EXISTS tagnames (
244 uid INTEGER PRIMARY KEY
245 , hidden INTEGER NOT NULL DEFAULT 0 /* deleting tags may cause 'uid' reuse, so it's better to hide them instead */
246 , threading INTEGER NOT NULL DEFAULT 1 /* enable threaded view? */
247 , tag TEXT NOT NULL UNIQUE
250 -- it is here, because we don't have a lot of tags, and inserts are slightly faster this way
251 CREATE INDEX IF NOT EXISTS tagname_tag ON tagnames(tag);
252 --CREATE INDEX IF NOT EXISTS tagname_tag_uid ON tagnames(tag, uid);
255 -- each tag has its own unique threads (so uids can be duplicated, but (uid,tagid) paris cannot
256 -- appearance is:
257 -- -1: hidden (used to hide spam in newsgroups)
258 -- 0: unread
259 -- 1: read
260 -- 2: soft-delete from filter
261 -- 3: soft-delete by user (will be purged on folder change)
262 -- 4: muted (by filter)
263 -- 5: self-highlight
264 CREATE TABLE IF NOT EXISTS threads (
265 uid INTEGER /* rowid, corresponds to "id" in "messages", never zero */
266 , tagid INTEGER /* we need separat threads for each tag */
267 , time INTEGER DEFAULT 0 /* unixtime -- creation/send/receive */
268 /* threading info */
269 , parent INTEGER DEFAULT 0 /* uid: parent message in thread, or 0 */
270 /* nntp info */
271 , nntpidx INTEGER DEFAULT 0 /* nntp index for nntp articles */
272 /* flags */
273 , appearance INTEGER DEFAULT 0 /* how the message should look */
277 CREATE TABLE IF NOT EXISTS info (
278 uid INTEGER PRIMARY KEY /* rowid, corresponds to "id" in "messages", never zero */
279 , from_name TEXT /* can be empty */
280 , from_mail TEXT /* can be empty */
281 , subj TEXT /* can be empty */
282 , to_name TEXT /* can be empty */
283 , to_mail TEXT /* can be empty */
287 -- this holds msgid
288 -- moved to separate table, because this info is used only when inserting new messages
289 -- "type" has no gaps (except the 0, which can be absent)
290 CREATE TABLE IF NOT EXISTS msgids (
291 uid INTEGER PRIMARY KEY /* rowid, corresponds to "id" in "messages", never zero */
292 , time INTEGER /* so we can select the most recent message */
293 , msgid TEXT /* message id */
297 -- this holds in-reply-to, and references
298 -- moved to separate table, because this info is used only when inserting new messages
299 CREATE TABLE IF NOT EXISTS refids (
300 uid INTEGER /* rowid, corresponds to "id" in "messages", never zero */
301 , idx INTEGER /* internal index in headers, cannot have gaps, starts from 0 */
302 , msgid TEXT /* message id */
306 CREATE TABLE IF NOT EXISTS content (
307 uid INTEGER /* owner message uid */
308 , mime TEXT /* always lowercased */
309 , format TEXT /* optional format, like 'flowed' */
310 , content TEXT /* properly decoded */
314 CREATE TABLE IF NOT EXISTS attaches (
315 uid INTEGER /* owner message uid */
316 , mime TEXT /* always lowercased */
317 , name TEXT /* attachment name */
318 , content BLOB /* properly decoded */
323 enum schemaSupportIndex = `
324 CREATE UNIQUE INDEX IF NOT EXISTS trd_by_tag_uid ON threads(tagid, uid);
325 CREATE UNIQUE INDEX IF NOT EXISTS trd_by_uid_tag ON threads(uid, tagid);
327 -- this is for views where threading is disabled
328 CREATE INDEX IF NOT EXISTS trd_by_tag_time ON threads(tagid, time);
329 --CREATE INDEX IF NOT EXISTS trd_by_tag_time_parent ON threads(tagid, time, parent);
330 --CREATE INDEX IF NOT EXISTS trd_by_tag_parent_time ON threads(tagid, parent, time);
331 --CREATE INDEX IF NOT EXISTS trd_by_tag_parent ON threads(tagid, parent);
332 CREATE INDEX IF NOT EXISTS trd_by_parent_tag ON threads(parent, tagid);
334 -- this is for fast "maximum nntp index" queries
335 CREATE INDEX IF NOT EXISTS trd_by_tag_nntpidx ON threads(tagid, nntpidx DESC);
337 -- this is for test if we have any unread articles (we don't mind the exact numbers, tho)
338 CREATE INDEX IF NOT EXISTS trd_by_appearance ON threads(appearance);
339 CREATE INDEX IF NOT EXISTS trd_by_tag_appearance ON threads(tagid, appearance);
341 -- for theadmsgview
342 CREATE INDEX IF NOT EXISTS trd_by_tag_appearance_time ON threads(tagid, appearance, time);
344 CREATE INDEX IF NOT EXISTS msgid_by_msgid_time ON msgids(msgid, time DESC);
346 CREATE INDEX IF NOT EXISTS refid_by_refids_idx ON refids(msgid, idx);
347 CREATE INDEX IF NOT EXISTS refid_by_uid_idx ON refids(uid, idx);
349 CREATE INDEX IF NOT EXISTS content_by_uid ON content(uid);
350 CREATE INDEX IF NOT EXISTS content_by_uid_mime ON content(uid, mime);
352 CREATE INDEX IF NOT EXISTS attaches_by_uid_name ON attaches(uid, name);
355 static immutable string schemaSupport = schemaSupportTable~schemaSupportIndex;
358 // ////////////////////////////////////////////////////////////////////////// //
359 private bool isGoodText (const(void)[] buf) pure nothrow @safe @nogc {
360 foreach (immutable ubyte ch; cast(const(ubyte)[])buf) {
361 if (ch < 32) {
362 if (ch != 9 && ch != 10 && ch != 13 && ch != 27) return false;
363 } else {
364 if (ch == 127) return false;
367 return true;
368 //return utf8ValidText(buf);
372 // ////////////////////////////////////////////////////////////////////////// //
373 private bool isBadPrefix (const(char)[] buf) pure nothrow @trusted @nogc {
374 if (buf.length < 5) return false;
375 return
376 buf.ptr[0] == '\x1b' &&
377 buf.ptr[1] >= 'A' && buf.ptr[1] <= 'Z' &&
378 buf.ptr[2] >= 'A' && buf.ptr[2] <= 'Z' &&
379 buf.ptr[3] >= 'A' && buf.ptr[3] <= 'Z' &&
380 buf.ptr[4] >= 'A' && buf.ptr[4] <= 'Z';
384 /* two high bits of the first byte holds the size:
385 00: fit into 6 bits: [0.. 0x3f] (1 byte)
386 01: fit into 14 bits: [0.. 0x3fff] (2 bytes)
387 10: fit into 22 bits: [0.. 0x3f_ffff] (3 bytes)
388 11: fit into 30 bits: [0..0x3fff_ffff] (4 bytes)
390 number is stored as big-endian.
391 will not write anything to `dest` if there is not enough room.
393 returns number of bytes, or 0 if the number is too big.
395 private uint encodeUInt (void[] dest, uint v) nothrow @trusted @nogc {
396 if (v > 0x3fff_ffffU) return 0;
397 ubyte[] d = cast(ubyte[])dest;
398 // 4 bytes?
399 if (v > 0x3f_ffffU) {
400 v |= 0xc000_0000U;
401 if (d.length >= 4) {
402 d.ptr[0] = cast(ubyte)(v>>24);
403 d.ptr[1] = cast(ubyte)(v>>16);
404 d.ptr[2] = cast(ubyte)(v>>8);
405 d.ptr[3] = cast(ubyte)v;
407 return 4;
409 // 3 bytes?
410 if (v > 0x3fffU) {
411 v |= 0x80_0000U;
412 if (d.length >= 3) {
413 d.ptr[0] = cast(ubyte)(v>>16);
414 d.ptr[1] = cast(ubyte)(v>>8);
415 d.ptr[2] = cast(ubyte)v;
417 return 3;
419 // 2 bytes?
420 if (v > 0x3fU) {
421 v |= 0x4000U;
422 if (d.length >= 2) {
423 d.ptr[0] = cast(ubyte)(v>>8);
424 d.ptr[1] = cast(ubyte)v;
426 return 2;
428 // 1 byte
429 if (d.length >= 1) d.ptr[0] = cast(ubyte)v;
430 return 1;
434 private uint decodeUIntLength (const(void)[] dest) pure nothrow @trusted @nogc {
435 const(ubyte)[] d = cast(const(ubyte)[])dest;
436 if (d.length == 0) return 0;
437 switch (d.ptr[0]&0xc0) {
438 case 0x00: return 1;
439 case 0x40: return (d.length >= 2 ? 2 : 0);
440 case 0x80: return (d.length >= 3 ? 3 : 0);
441 default:
443 return (d.length >= 4 ? 4 : 0);
447 // returns uint.max on error (impossible value)
448 private uint decodeUInt (const(void)[] dest) pure nothrow @trusted @nogc {
449 const(ubyte)[] d = cast(const(ubyte)[])dest;
450 if (d.length == 0) return uint.max;
451 uint res = void;
452 switch (d.ptr[0]&0xc0) {
453 case 0x00:
454 res = d.ptr[0];
455 break;
456 case 0x40:
457 if (d.length < 2) return uint.max;
458 res = ((d.ptr[0]&0x3fU)<<8)|d.ptr[1];
459 break;
460 case 0x80:
461 if (d.length < 3) return uint.max;
462 res = ((d.ptr[0]&0x3fU)<<16)|(d.ptr[1]<<8)|d.ptr[2];
463 break;
464 default:
465 if (d.length < 4) return uint.max;
466 res = ((d.ptr[0]&0x3fU)<<24)|(d.ptr[1]<<16)|(d.ptr[2]<<8)|d.ptr[3];
467 break;
469 return res;
473 // ////////////////////////////////////////////////////////////////////////// //
474 extern(C) {
477 ** ChiroPack(content)
478 ** ChiroPack(content, packflag)
480 ** second form accepts int flag; 0 means "don't pack"
482 private void sq3Fn_ChiroPackCommon (sqlite3_context *ctx, sqlite3_value *val, int packlevel) nothrow @trusted {
483 immutable int sz = sqlite3_value_bytes(val);
484 if (sz < 0 || sz > 0x3fffffff-4) { sqlite3_result_error_toobig(ctx); return; }
486 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
488 const(char)* vs = cast(const(char) *)sqlite3_value_blob(val);
489 if (!vs) { sqlite3_result_error(ctx, "cannot get blob data in `ChiroPack()`", -1); return; }
491 if (sz >= 0x3fffffff-8) {
492 if (isBadPrefix(vs[0..cast(uint)sz])) { sqlite3_result_error_toobig(ctx); return; }
493 sqlite3_result_value(ctx, val);
494 return;
497 import core.stdc.stdlib : malloc, free;
498 import core.stdc.string : memcpy;
500 if (packlevel > 0 && sz > 8) {
501 import core.stdc.stdio : snprintf;
502 char[16] xsz = void;
503 version(use_balz) {
504 xsz[0..5] = "\x1bBALZ";
505 } else version(use_libxpack) {
506 xsz[0..5] = "\x1bXPAK";
507 } else version(use_libbrieflz) {
508 xsz[0..5] = "\x1bBRLZ";
509 } else version(use_liblzfse) {
510 xsz[0..5] = "\x1bLZFS";
511 } else version(use_lzjb) {
512 xsz[0..5] = "\x1bLZJB";
513 } else version(use_libwim_lzms) {
514 xsz[0..5] = "\x1bLZMS";
515 } else version(use_libwim_lzx) {
516 xsz[0..5] = "\x1bLZMX";
517 } else version(use_libwim_xpress) {
518 xsz[0..5] = "\x1bXPRS";
519 } else version(use_lz4) {
520 xsz[0..5] = "\x1bLZ4D";
521 } else version(use_zstd) {
522 xsz[0..5] = "\x1bZSTD";
523 } else {
524 xsz[0..5] = "\x1bZLIB";
526 uint xszlen = encodeUInt(xsz[5..$], cast(uint)sz);
527 if (xszlen) {
528 xszlen += 5;
529 //xsz[xszlen++] = ':';
530 version(use_libbrieflz) {
531 immutable usize bsz = blz_max_packed_size(cast(usize)sz);
532 } else version(use_lzjb) {
533 immutable uint bsz = cast(uint)sz+1024;
534 } else version(use_lz4) {
535 immutable uint bsz = cast(uint)LZ4_compressBound(sz)+1024;
536 } else {
537 immutable uint bsz = cast(uint)sz;
539 char* cbuf = cast(char*)malloc(bsz+xszlen);
540 if (cbuf is null) {
541 if (isBadPrefix(vs[0..cast(uint)sz])) { sqlite3_result_error_nomem(ctx); return; }
542 } else {
543 cbuf[0..xszlen] = xsz[0..xszlen];
544 version(use_balz) {
545 Balz bz;
546 usize spos = 0;
547 usize dpos = xszlen;
548 try {
549 bz.compress(
550 // reader
551 (buf) {
552 if (spos >= cast(usize)sz) return 0;
553 usize left = cast(usize)sz-spos;
554 if (left > buf.length) left = buf.length;
555 if (left) memcpy(buf.ptr, vs+spos, left);
556 spos += left;
557 return left;
559 // writer
560 (buf) {
561 if (dpos+buf.length >= cast(usize)sz) throw new Exception("uncompressible");
562 memcpy(cbuf+dpos, buf.ptr, buf.length);
563 dpos += buf.length;
565 // maximum compression?
566 true
568 } catch(Exception) {
569 dpos = usize.max;
571 if (dpos < cast(usize)sz) {
572 sqlite3_result_blob(ctx, cbuf, dpos, &free);
573 return;
575 } else version(use_libdeflate) {
576 if (packlevel > 12) packlevel = 12;
577 libdeflate_compressor *cpr = libdeflate_alloc_compressor(packlevel);
578 if (cpr is null) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
579 usize dsize = libdeflate_zlib_compress(cpr, vs, cast(usize)sz, cbuf+xszlen, bsz);
580 libdeflate_free_compressor(cpr);
581 if (dsize > 0 && dsize+xszlen < cast(usize)sz) {
582 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
583 return;
585 } else version(use_libxpack) {
586 // 2^19 (524288) bytes. This is definitely a big problem and I am planning to address it.
587 // https://github.com/ebiggers/xpack/issues/1
588 if (sz < 524288-64) {
589 if (packlevel > 9) packlevel = 9;
590 xpack_compressor *cpr = xpack_alloc_compressor(cast(usize)sz, packlevel);
591 if (cpr is null) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
592 usize dsize = xpack_compress(cpr, vs, cast(usize)sz, cbuf+xszlen, bsz);
593 xpack_free_compressor(cpr);
594 if (dsize > 0 && dsize+xszlen < cast(usize)sz) {
595 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
596 return;
599 } else version(use_libbrieflz) {
600 if (packlevel > 10) packlevel = 10;
601 immutable usize wbsize = blz_workmem_size_level(cast(usize)sz, packlevel);
602 void* wbuf = cast(void*)malloc(wbsize+!wbsize);
603 if (wbuf is null) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
604 uint dsize = blz_pack_level(vs, cbuf+xszlen, cast(uint)sz, wbuf, packlevel);
605 free(wbuf);
606 if (dsize+xszlen < cast(usize)sz) {
607 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
608 return;
610 } else version(use_liblzfse) {
611 immutable usize wbsize = lzfse_encode_scratch_size();
612 void* wbuf = cast(void*)malloc(wbsize+!wbsize);
613 if (wbuf is null) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
614 usize dsize = lzfse_encode_buffer(cbuf+xszlen, bsz, vs, cast(uint)sz, wbuf);
615 free(wbuf);
616 if (dsize > 0 && dsize+xszlen < cast(usize)sz) {
617 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
618 return;
620 } else version(use_lzjb) {
621 usize dsize = lzjb_compress(vs, cast(usize)sz, cbuf+xszlen, bsz);
622 if (dsize == usize.max) dsize = 0;
623 if (dsize > 0 && dsize+xszlen < cast(usize)sz) {
624 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
625 return;
627 //{ import core.stdc.stdio : fprintf, stderr; fprintf(stderr, "LZJB FAILED!\n"); }
628 } else version(use_libwim_lzms) {
629 wimlib_compressor* cpr;
630 uint clevel = (packlevel < 10 ? 50 : 1000);
631 int rc = wimlib_create_compressor(WIMLIB_COMPRESSION_TYPE_LZMS, cast(usize)sz, clevel, &cpr);
632 if (rc != 0) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
633 usize dsize = wimlib_compress(vs, cast(usize)sz, cbuf+xszlen, bsz, cpr);
634 wimlib_free_compressor(cpr);
635 if (dsize > 0 && dsize+xszlen < cast(usize)sz) {
636 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
637 return;
639 } else version(use_libwim_lzx) {
640 if (sz <= WIMLIB_LZX_MAX_CHUNK) {
641 wimlib_compressor* cpr;
642 uint clevel = (packlevel < 10 ? 50 : 1000);
643 int rc = wimlib_create_compressor(WIMLIB_COMPRESSION_TYPE_LZX, cast(usize)sz, clevel, &cpr);
644 if (rc != 0) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
645 usize dsize = wimlib_compress(vs, cast(usize)sz, cbuf+xszlen, bsz, cpr);
646 wimlib_free_compressor(cpr);
647 if (dsize > 0 && dsize+xszlen < cast(usize)sz) {
648 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
649 return;
652 } else version(use_libwim_xpress) {
653 if (sz <= WIMLIB_XPRESS_MAX_CHUNK) {
654 wimlib_compressor* cpr;
655 uint clevel = (packlevel < 10 ? 50 : 1000);
656 uint csz = WIMLIB_XPRESS_MIN_CHUNK;
657 while (csz < WIMLIB_XPRESS_MAX_CHUNK && csz < cast(uint)sz) csz *= 2U;
658 int rc = wimlib_create_compressor(WIMLIB_COMPRESSION_TYPE_XPRESS, csz, clevel, &cpr);
659 if (rc != 0) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
660 usize dsize = wimlib_compress(vs, cast(usize)sz, cbuf+xszlen, bsz, cpr);
661 wimlib_free_compressor(cpr);
662 if (dsize > 0 && dsize+xszlen < cast(usize)sz) {
663 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
664 return;
667 } else version(use_lz4) {
668 int dsize = LZ4_compress_default(vs, cbuf+xszlen, sz, cast(int)bsz);
669 if (dsize > 0 && dsize+xszlen < sz) {
670 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
671 return;
673 } else version(use_zstd) {
674 immutable int clev =
675 packlevel <= 3 ? ZSTD_minCLevel() :
676 packlevel <= 6 ? ZSTD_defaultCLevel() :
677 packlevel < 10 ? 19 :
678 ZSTD_maxCLevel();
679 usize dsize = ZSTD_compress(cbuf+xszlen, cast(int)bsz, vs, sz, clev);
680 if (!ZSTD_isError(dsize) && dsize > 0 && dsize+xszlen < sz) {
681 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
682 return;
684 } else {
685 import etc.c.zlib : /*compressBound,*/ compress2, Z_OK;
686 //uint bsz = cast(uint)compressBound(cast(uint)sz);
687 if (packlevel > 9) packlevel = 9;
688 usize dsize = bsz;
689 int zres = compress2(cast(ubyte *)(cbuf+xszlen), &dsize, cast(const(ubyte) *)vs, sz, packlevel);
690 if (zres == Z_OK && dsize+xszlen < cast(usize)sz) {
691 sqlite3_result_blob(ctx, cbuf, dsize+xszlen, &free);
692 return;
695 free(cbuf);
700 if (isBadPrefix(vs[0..cast(uint)sz])) {
701 char *res = cast(char *)malloc(sz+4);
702 if (res is null) { sqlite3_result_error_nomem(ctx); return; }
703 res[0..5] = "\x1bRAWB";
704 res[5..sz+5] = vs[0..sz];
705 if (isGoodText(vs[0..cast(usize)sz])) {
706 sqlite3_result_text(ctx, res, sz+5, &free);
707 } else {
708 sqlite3_result_blob(ctx, res, sz+5, &free);
710 } else {
711 immutable bool wantBlob = !isGoodText(vs[0..cast(usize)sz]);
712 immutable int tp = sqlite3_value_type(val);
713 if ((wantBlob && tp == SQLITE_BLOB) || (!wantBlob && tp == SQLITE3_TEXT)) {
714 sqlite3_result_value(ctx, val);
715 } else if (wantBlob) {
716 sqlite3_result_blob(ctx, vs, sz, SQLITE_TRANSIENT);
717 } else {
718 sqlite3_result_text(ctx, vs, sz, SQLITE_TRANSIENT);
725 ** ChiroPack(content)
727 private void sq3Fn_ChiroPack (sqlite3_context *ctx, int argc, sqlite3_value **argv) nothrow @trusted {
728 if (argc != 1) { sqlite3_result_error(ctx, "invalid number of arguments to `ChiroUnpack()`", -1); return; }
729 return sq3Fn_ChiroPackCommon(ctx, argv[0], 9);
734 ** ChiroPack(content, packlevel)
736 ** `packlevel` == 0 means "don't pack"
737 ** `packlevel` == 9 means "maximum compression"
739 private void sq3Fn_ChiroPackDPArg (sqlite3_context *ctx, int argc, sqlite3_value **argv) nothrow @trusted {
740 if (argc != 2) { sqlite3_result_error(ctx, "invalid number of arguments to `ChiroUnpack()`", -1); return; }
741 return sq3Fn_ChiroPackCommon(ctx, argv[0], sqlite3_value_int(argv[1]));
746 ** ChiroUnpack(content)
748 ** it is (almost) safe to pass non-packed content here
750 private void sq3Fn_ChiroUnpack (sqlite3_context *ctx, int argc, sqlite3_value **argv) {
751 //{ import core.stdc.stdio : fprintf, stderr; fprintf(stderr, "!!!000\n"); }
752 if (argc != 1) { sqlite3_result_error(ctx, "invalid number of arguments to `ChiroUnpack()`", -1); return; }
754 int sz = sqlite3_value_bytes(argv[0]);
755 if (sz < 0 || sz > 0x3fffffff-4) { sqlite3_result_error_toobig(ctx); return; }
757 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
759 const(char)* vs = cast(const(char) *)sqlite3_value_blob(argv[0]);
760 if (!vs) { sqlite3_result_error(ctx, "cannot get blob data in `ChiroUnpack()`", -1); return; }
762 if (!isBadPrefix(vs[0..cast(uint)sz])) { sqlite3_result_value(ctx, argv[0]); return; }
763 if (vs[0..5] == "\x1bRAWB") { sqlite3_result_blob(ctx, vs+5, sz-5, SQLITE_TRANSIENT); return; }
764 if (sz < 6) { sqlite3_result_error(ctx, "invalid data in `ChiroUnpack()`", -1); return; }
766 enum {
767 Codec_ZLIB,
768 Codec_BALZ,
769 Codec_XPAK,
770 Codec_BRLZ,
771 Codec_LZFS,
772 Codec_LZJB,
773 Codec_LZMS,
774 Codec_LZMX,
775 Codec_XPRS,
776 Codec_LZ4D,
777 Codec_ZSTD,
780 int codec = Codec_ZLIB;
781 if (vs[0..5] != "\x1bZLIB") {
782 version(use_balz) {
783 if (codec == Codec_ZLIB && vs[0..5] == "\x1bBALZ") codec = Codec_BALZ;
785 version(use_libxpack) {
786 if (codec == Codec_ZLIB && vs[0..5] == "\x1bXPAK") codec = Codec_XPAK;
788 version(use_libxpack) {
789 if (codec == Codec_ZLIB && vs[0..5] == "\x1bXPAK") codec = Codec_XPAK;
791 version(use_libbrieflz) {
792 if (codec == Codec_ZLIB && vs[0..5] == "\x1bBRLZ") codec = Codec_BRLZ;
794 version(use_liblzfse) {
795 if (codec == Codec_ZLIB && vs[0..5] == "\x1bLZFS") codec = Codec_LZFS;
797 version(use_lzjb) {
798 if (codec == Codec_ZLIB && vs[0..5] == "\x1bLZJB") codec = Codec_LZJB;
800 version(use_libwim_lzms) {
801 if (codec == Codec_ZLIB && vs[0..5] == "\x1bLZMS") codec = Codec_LZMS;
803 version(use_libwim_lzx) {
804 if (codec == Codec_ZLIB && vs[0..5] == "\x1bLZMX") codec = Codec_LZMX;
806 version(use_libwim_xpress) {
807 if (codec == Codec_ZLIB && vs[0..5] == "\x1bXPRS") codec = Codec_XPRS;
809 version(use_lz4) {
810 if (codec == Codec_ZLIB && vs[0..5] == "\x1bLZ4D") codec = Codec_LZ4D;
812 version(use_zstd) {
813 if (codec == Codec_ZLIB && vs[0..5] == "\x1bZSTD") codec = Codec_ZSTD;
815 if (codec == Codec_ZLIB) { sqlite3_result_error(ctx, "invalid codec in `ChiroUnpack()`", -1); return; }
818 // skip codec id
819 // size is guaranteed to be at least 6 here
820 vs += 5;
821 sz -= 5;
823 immutable uint numsz = decodeUIntLength(vs[0..cast(uint)sz]);
824 //{ import core.stdc.stdio : printf; printf("sz=%d; numsz=%u; %02X %02X %02X %02X\n", sz, numsz, cast(uint)vs[5], cast(uint)vs[6], cast(uint)vs[7], cast(uint)vs[8]); }
825 //writeln("sq3Fn_ChiroUnpack: nsz=", sz-5);
826 if (numsz == 0 || numsz > cast(uint)sz) { sqlite3_result_error(ctx, "invalid data in `ChiroUnpack()`", -1); return; }
827 //{ import core.stdc.stdio : fprintf, stderr; fprintf(stderr, "!!!100\n"); }
828 immutable uint rsize = decodeUInt(vs[0..cast(uint)sz]);
829 if (rsize == uint.max) { sqlite3_result_error(ctx, "invalid data in `ChiroUnpack()`", -1); return; }
830 //{ import core.stdc.stdio : fprintf, stderr; fprintf(stderr, "!!!101:rsize=%u\n", rsize); }
831 if (rsize == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
832 // skip number
833 vs += numsz;
834 sz -= cast(int)numsz;
835 //{ import core.stdc.stdio : printf; printf("sz=%d; rsize=%u\n", sz, rsize, dpos); }
837 import core.stdc.stdlib : malloc, free;
838 import core.stdc.string : memcpy;
840 char* cbuf = cast(char*)malloc(rsize);
841 if (cbuf is null) { sqlite3_result_error_nomem(ctx); return; }
842 //writeln("sq3Fn_ChiroUnpack: rsize=", rsize, "; left=", sz-dpos);
844 usize dsize = rsize;
845 final switch (codec) {
846 case Codec_ZLIB:
847 version(use_libdeflate) {
848 libdeflate_decompressor *dcp = libdeflate_alloc_decompressor();
849 if (dcp is null) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
850 auto rc = libdeflate_zlib_decompress(dcp, vs, cast(usize)sz, cbuf, rsize, null);
851 if (rc != LIBDEFLATE_SUCCESS) {
852 free(cbuf);
853 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
854 return;
856 } else {
857 import etc.c.zlib : uncompress, Z_OK;
858 int zres = uncompress(cast(ubyte *)cbuf, &dsize, cast(const(ubyte) *)vs, sz);
859 //writeln("sq3Fn_ChiroUnpack: rsize=", rsize, "; left=", sz, "; dsize=", dsize, "; zres=", zres);
860 if (zres != Z_OK || dsize != rsize) {
861 free(cbuf);
862 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
863 return;
866 break;
867 case Codec_BALZ:
868 version(use_balz) {
869 uint spos = 0;
870 uint outpos = 0;
871 try {
872 Unbalz bz;
873 auto dc = bz.decompress(
874 // reader
875 (buf) {
876 uint left = cast(uint)sz-spos;
877 if (left > buf.length) left = cast(uint)buf.length;
878 if (left != 0) memcpy(buf.ptr, vs, left);
879 spos += left;
880 return left;
882 // writer
883 (buf) {
884 uint left = rsize-outpos;
885 if (left == 0) throw new Exception("broken data");
886 if (left > buf.length) left = cast(uint)buf.length;
887 if (left) memcpy(cbuf+outpos, buf.ptr, left);
888 outpos += left;
891 if (dc != rsize) throw new Exception("broken data");
892 } catch (Exception) {
893 outpos = uint.max;
895 if (outpos == uint.max) {
896 free(cbuf);
897 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
898 return;
900 dsize = outpos;
901 } else {
902 free(cbuf);
903 sqlite3_result_error(ctx, "unsupported compression in `ChiroUnpack()`", -1);
904 return;
906 break;
907 case Codec_XPAK:
908 version(use_libxpack) {
909 xpack_decompressor *dcp = xpack_alloc_decompressor();
910 if (dcp is null) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
911 auto rc = xpack_decompress(dcp, vs, cast(usize)sz, cbuf, rsize, null);
912 if (rc != DECOMPRESS_SUCCESS) {
913 free(cbuf);
914 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
915 return;
917 } else {
918 free(cbuf);
919 sqlite3_result_error(ctx, "unsupported compression in `ChiroUnpack()`", -1);
920 return;
922 break;
923 case Codec_BRLZ:
924 version(use_libbrieflz) {
925 dsize = blz_depack_safe(vs, cast(uint)sz, cbuf, rsize);
926 if (dsize != rsize) {
927 free(cbuf);
928 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
929 return;
931 } else {
932 free(cbuf);
933 sqlite3_result_error(ctx, "unsupported compression in `ChiroUnpack()`", -1);
934 return;
936 break;
937 case Codec_LZFS:
938 version(use_liblzfse) {
939 immutable usize wbsize = lzfse_decode_scratch_size();
940 void* wbuf = cast(void*)malloc(wbsize+!wbsize);
941 if (wbuf is null) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
942 dsize = lzfse_decode_buffer(cbuf, cast(usize)rsize, vs, cast(usize)sz, wbuf);
943 free(wbuf);
944 if (dsize == 0 || dsize != rsize) {
945 free(cbuf);
946 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
947 return;
949 } else {
950 free(cbuf);
951 sqlite3_result_error(ctx, "unsupported compression in `ChiroUnpack()`", -1);
952 return;
954 break;
955 case Codec_LZJB:
956 version(use_lzjb) {
957 dsize = lzjb_decompress(vs, cast(usize)sz, cbuf, rsize);
958 if (dsize != rsize) {
959 free(cbuf);
960 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
961 return;
963 } else {
964 free(cbuf);
965 sqlite3_result_error(ctx, "unsupported compression in `ChiroUnpack()`", -1);
966 return;
968 break;
969 case Codec_LZMS:
970 version(use_libwim_lzms) {
971 wimlib_decompressor* dpr;
972 int rc = wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_LZMS, rsize, &dpr);
973 if (rc != 0) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
974 rc = wimlib_decompress(vs, cast(usize)sz, cbuf, rsize, dpr);
975 wimlib_free_decompressor(dpr);
976 if (rc != 0) {
977 free(cbuf);
978 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
979 return;
981 } else {
982 free(cbuf);
983 sqlite3_result_error(ctx, "unsupported compression in `ChiroUnpack()`", -1);
984 return;
986 break;
987 case Codec_LZMX:
988 version(use_libwim_lzx) {
989 wimlib_decompressor* dpr;
990 int rc = wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_LZX, rsize, &dpr);
991 if (rc != 0) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
992 rc = wimlib_decompress(vs, cast(usize)sz, cbuf, rsize, dpr);
993 wimlib_free_decompressor(dpr);
994 if (rc != 0) {
995 free(cbuf);
996 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
997 return;
999 } else {
1000 free(cbuf);
1001 sqlite3_result_error(ctx, "unsupported compression in `ChiroUnpack()`", -1);
1002 return;
1004 break;
1005 case Codec_XPRS:
1006 version(use_libwim_xpress) {
1007 wimlib_decompressor* dpr;
1008 uint csz = WIMLIB_XPRESS_MIN_CHUNK;
1009 while (csz < WIMLIB_XPRESS_MAX_CHUNK && csz < rsize) csz *= 2U;
1010 int rc = wimlib_create_decompressor(WIMLIB_COMPRESSION_TYPE_XPRESS, csz, &dpr);
1011 if (rc != 0) { free(cbuf); sqlite3_result_error_nomem(ctx); return; }
1012 rc = wimlib_decompress(vs, cast(usize)sz, cbuf, rsize, dpr);
1013 wimlib_free_decompressor(dpr);
1014 if (rc != 0) {
1015 free(cbuf);
1016 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
1017 return;
1019 } else {
1020 free(cbuf);
1021 sqlite3_result_error(ctx, "unsupported compression in `ChiroUnpack()`", -1);
1022 return;
1024 break;
1025 case Codec_LZ4D:
1026 version(use_lz4) {
1027 dsize = LZ4_decompress_safe(vs, cbuf, sz, rsize);
1028 if (dsize != rsize) {
1029 free(cbuf);
1030 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
1031 return;
1033 } else {
1034 free(cbuf);
1035 sqlite3_result_error(ctx, "unsupported compression in `ChiroUnpack()`", -1);
1036 return;
1038 break;
1039 case Codec_ZSTD:
1040 version(use_zstd) {
1041 dsize = ZSTD_decompress(cbuf, rsize, vs, sz);
1042 if (ZSTD_isError(dsize) || dsize != rsize) {
1043 free(cbuf);
1044 sqlite3_result_error(ctx, "broken data in `ChiroUnpack()`", -1);
1045 return;
1047 } else {
1048 free(cbuf);
1049 sqlite3_result_error(ctx, "unsupported compression in `ChiroUnpack()`", -1);
1050 return;
1052 break;
1055 if (isGoodText(cbuf[0..dsize])) {
1056 sqlite3_result_text(ctx, cbuf, cast(int)dsize, &free);
1057 } else {
1058 sqlite3_result_blob(ctx, cbuf, cast(int)dsize, &free);
1064 ** ChiroNormCRLF(content)
1066 ** Replaces CR/LF with LF, `\x7f` with `~`, control chars (except TAB and CR) with spaces.
1067 ** Removes trailing blanks.
1069 private void sq3Fn_ChiroNormCRLF (sqlite3_context *ctx, int argc, sqlite3_value **argv) {
1070 if (argc != 1) { sqlite3_result_error(ctx, "invalid number of arguments to `ChiroNormCRLF()`", -1); return; }
1072 int sz = sqlite3_value_bytes(argv[0]);
1073 if (sz < 0 || sz > 0x3fffffff) { sqlite3_result_error_toobig(ctx); return; }
1075 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1077 const(char)* vs = cast(const(char) *)sqlite3_value_blob(argv[0]);
1078 if (!vs) { sqlite3_result_error(ctx, "cannot get blob data in `ChiroNormCRLF()`", -1); return; }
1080 // check if we have something to do, and calculate new string size
1081 bool needwork = false;
1082 if (vs[cast(uint)sz-1] <= 32) {
1083 needwork = true;
1084 while (sz > 0 && vs[cast(uint)sz-1] <= 32) --sz;
1085 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1087 uint newsz = cast(uint)sz;
1088 foreach (immutable idx, immutable char ch; vs[0..cast(uint)sz]) {
1089 if (ch == 13) {
1090 needwork = true;
1091 if (idx+1 < cast(uint)sz && vs[idx+1] == 10) --newsz;
1092 } else if (!needwork) {
1093 needwork = ((ch < 32 && ch != 9 && ch != 10) || ch == 127);
1097 if (!needwork) {
1098 if (sqlite3_value_type(argv[0]) == SQLITE3_TEXT) sqlite3_result_value(ctx, argv[0]);
1099 else sqlite3_result_text(ctx, vs, sz, SQLITE_TRANSIENT);
1100 return;
1103 assert(newsz && newsz <= cast(uint)sz);
1105 // need a new string
1106 import core.stdc.stdlib : malloc, free;
1107 char* newstr = cast(char*)malloc(newsz);
1108 if (newstr is null) { sqlite3_result_error_nomem(ctx); return; }
1109 char* dest = newstr;
1110 foreach (immutable idx, immutable char ch; vs[0..cast(uint)sz]) {
1111 if (ch == 13) {
1112 if (idx+1 < cast(uint)sz && vs[idx+1] == 10) {} else *dest++ = ' ';
1113 } else {
1114 if (ch == 127) *dest++ = '~';
1115 else if (ch == 11 || ch == 12) *dest++ = '\n';
1116 else if (ch < 32 && ch != 9 && ch != 10) *dest++ = ' ';
1117 else *dest++ = ch;
1120 assert(dest == newstr+newsz);
1122 sqlite3_result_text(ctx, newstr, cast(int)newsz, &free);
1127 ** ChiroNormHeaders(content)
1129 ** Replaces CR/LF with LF, `\x7f` with `~`, control chars (except CR) with spaces.
1130 ** Then replaces 'space, LF' with a single space (joins multiline headers).
1131 ** Removes trailing blanks.
1133 private void sq3Fn_ChiroNormHeaders (sqlite3_context *ctx, int argc, sqlite3_value **argv) {
1134 if (argc != 1) { sqlite3_result_error(ctx, "invalid number of arguments to `ChiroNormHeaders()`", -1); return; }
1136 int sz = sqlite3_value_bytes(argv[0]);
1137 if (sz < 0 || sz > 0x3fffffff) { sqlite3_result_error_toobig(ctx); return; }
1139 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1141 const(char)* vs = cast(const(char) *)sqlite3_value_blob(argv[0]);
1142 if (!vs) { sqlite3_result_error(ctx, "cannot get blob data in `ChiroNormHeaders()`", -1); return; }
1144 // check if we have something to do, and calculate new string size
1145 bool needwork = false;
1146 if (vs[cast(uint)sz-1] <= 32) {
1147 needwork = true;
1148 while (sz > 0 && vs[cast(uint)sz-1] <= 32) --sz;
1149 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1151 uint newsz = cast(uint)sz;
1152 foreach (immutable idx, immutable char ch; vs[0..cast(uint)sz]) {
1153 if (ch == 13) {
1154 needwork = true;
1155 if (idx+1 < cast(uint)sz && vs[idx+1] == 10) --newsz;
1156 } else if (ch == 10) {
1157 if (idx+1 < cast(uint)sz && vs[idx+1] <= 32) { needwork = true; --newsz; }
1158 } else if (!needwork) {
1159 needwork = ((ch < 32 && ch != 10) || ch == 127);
1163 if (!needwork) {
1164 if (sqlite3_value_type(argv[0]) == SQLITE3_TEXT) sqlite3_result_value(ctx, argv[0]);
1165 else sqlite3_result_text(ctx, vs, sz, SQLITE_TRANSIENT);
1166 return;
1169 assert(newsz && newsz <= cast(uint)sz);
1171 // need a new string
1172 import core.stdc.stdlib : malloc, free;
1173 char* newstr = cast(char*)malloc(newsz);
1174 if (newstr is null) { sqlite3_result_error_nomem(ctx); return; }
1175 char* dest = newstr;
1176 foreach (immutable idx, immutable char ch; vs[0..cast(uint)sz]) {
1177 if (ch == 13) {
1178 if (idx+1 < cast(uint)sz && vs[idx+1] == 10) {} else *dest++ = ' ';
1179 } else if (ch == 10) {
1180 if (idx+1 < cast(uint)sz && vs[idx+1] <= 32) {} else *dest++ = '\n';
1181 } else {
1182 if (ch == 127) *dest++ = '~';
1183 else if (ch < 32 && ch != 10) *dest++ = ' ';
1184 else *dest++ = ch;
1187 assert(dest == newstr+newsz);
1189 sqlite3_result_text(ctx, newstr, cast(int)newsz, &free);
1193 // returns position AFTER the headers (empty line is skipped too)
1194 private int sq3Supp_FindHeadersEnd (const(char)* vs, const int sz) {
1195 import core.stdc.string : memchr;
1196 if (sz <= 0) return 0;
1197 const(char)* eptr = cast(const(char)*)memchr(vs, '\n', cast(uint)sz);
1198 while (eptr !is null) {
1199 ++eptr;
1200 int epos = cast(int)cast(usize)(eptr-vs);
1201 if (sz-epos < 1) break;
1202 if (*eptr == '\r') {
1203 if (sz-epos < 2) break;
1204 ++epos;
1205 ++eptr;
1207 if (*eptr == '\n') return epos+1;
1208 assert(epos < sz);
1209 eptr = cast(const(char)*)memchr(eptr, '\n', cast(uint)(sz-epos));
1211 return sz;
1215 ** ChiroExtractHeaders(content)
1217 ** Replaces CR/LF with LF, `\x7f` with `~`, control chars (except CR) with spaces.
1218 ** Then replaces 'space, LF' with a single space (joins multiline headers).
1219 ** Removes trailing blanks.
1221 private void sq3Fn_ChiroExtractHeaders (sqlite3_context *ctx, int argc, sqlite3_value **argv) {
1222 if (argc != 1) { sqlite3_result_error(ctx, "invalid number of arguments to `ChiroExtractHeaders()`", -1); return; }
1224 int sz = sqlite3_value_bytes(argv[0]);
1225 if (sz < 0 || sz > 0x3fffffff) { sqlite3_result_error_toobig(ctx); return; }
1227 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1229 const(char)* vs = cast(const(char) *)sqlite3_value_blob(argv[0]);
1230 if (!vs) { sqlite3_result_error(ctx, "cannot get blob data in `ChiroExtractHeaders()`", -1); return; }
1232 // slice headers
1233 sz = sq3Supp_FindHeadersEnd(vs, sz);
1235 // strip trailing blanks
1236 while (sz > 0 && vs[cast(uint)sz-1U] <= 32) --sz;
1237 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1239 // allocate new string (it can be smaller, but will never be bigger)
1240 import core.stdc.stdlib : malloc, free;
1241 char* newstr = cast(char*)malloc(cast(uint)sz);
1242 if (newstr is null) { sqlite3_result_error_nomem(ctx); return; }
1243 char* dest = newstr;
1244 foreach (immutable idx, immutable char ch; vs[0..cast(uint)sz]) {
1245 if (ch == 13) {
1246 if (idx+1 < cast(uint)sz && vs[idx+1] == 10) {} else *dest++ = ' ';
1247 } else if (ch == 10) {
1248 if (idx+1 < cast(uint)sz && vs[idx+1] <= 32) {} else *dest++ = '\n';
1249 } else {
1250 if (ch == 127) *dest++ = '~';
1251 else if (ch < 32 && ch != 10) *dest++ = ' ';
1252 else *dest++ = ch;
1255 assert(dest <= newstr+cast(uint)sz);
1256 sz = cast(int)cast(usize)(dest-newstr);
1257 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1258 sqlite3_result_text(ctx, newstr, sz, &free);
1263 ** ChiroExtractBody(content)
1265 ** Replaces CR/LF with LF, `\x7f` with `~`, control chars (except TAB and CR) with spaces.
1266 ** Then replaces 'space, LF' with a single space (joins multiline headers).
1267 ** Removes trailing blanks and final dot.
1269 private void sq3Fn_ChiroExtractBody (sqlite3_context *ctx, int argc, sqlite3_value **argv) {
1270 if (argc != 1) { sqlite3_result_error(ctx, "invalid number of arguments to `ChiroExtractHeaders()`", -1); return; }
1272 int sz = sqlite3_value_bytes(argv[0]);
1273 if (sz < 0 || sz > 0x3fffffff) { sqlite3_result_error_toobig(ctx); return; }
1275 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1277 const(char)* vs = cast(const(char) *)sqlite3_value_blob(argv[0]);
1278 if (!vs) { sqlite3_result_error(ctx, "cannot get blob data in `ChiroExtractHeaders()`", -1); return; }
1280 // slice body
1281 immutable int bstart = sq3Supp_FindHeadersEnd(vs, sz);
1282 if (bstart >= sz) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1283 vs += bstart;
1284 sz -= bstart;
1286 // strip trailing dot
1287 if (sz >= 2 && vs[cast(uint)sz-2U] == '\r' && vs[cast(uint)sz-1U] == '\n') sz -= 2;
1288 else if (sz >= 1 && vs[cast(uint)sz-1U] == '\n') --sz;
1289 if (sz == 1 && vs[0] == '.') sz = 0;
1290 else if (sz >= 2 && vs[cast(uint)sz-2U] == '\n' && vs[cast(uint)sz-1U] == '.') --sz;
1291 else if (sz >= 2 && vs[cast(uint)sz-2U] == '\r' && vs[cast(uint)sz-1U] == '.') --sz;
1293 // strip trailing blanks
1294 while (sz > 0 && vs[cast(uint)sz-1U] <= 32) --sz;
1295 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1297 // allocate new string (it can be smaller, but will never be bigger)
1298 import core.stdc.stdlib : malloc, free;
1299 char* newstr = cast(char*)malloc(cast(uint)sz);
1300 if (newstr is null) { sqlite3_result_error_nomem(ctx); return; }
1301 char* dest = newstr;
1302 foreach (immutable idx, immutable char ch; vs[0..cast(uint)sz]) {
1303 if (ch == 13) {
1304 if (idx+1 < cast(uint)sz && vs[idx+1] == 10) {} else *dest++ = ' ';
1305 } else {
1306 if (ch == 127) *dest++ = '~';
1307 else if (ch == 11 || ch == 12) *dest++ = '\n';
1308 else if (ch < 32 && ch != 9 && ch != 10) *dest++ = ' ';
1309 else *dest++ = ch;
1312 assert(dest <= newstr+cast(uint)sz);
1313 sz = cast(int)cast(usize)(dest-newstr);
1314 if (sz == 0) { sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); return; }
1315 sqlite3_result_text(ctx, newstr, sz, &free);
1320 ////////////////////////////////////////////////////////////////////////////////
1321 private void registerFunctions (ref Database db) {
1322 db.createFunction("ChiroPack", 1, &sq3Fn_ChiroPack, moreflags:/*SQLITE_DIRECTONLY*/SQLITE_INNOCUOUS);
1323 db.createFunction("ChiroPack", 2, &sq3Fn_ChiroPackDPArg, moreflags:/*SQLITE_DIRECTONLY*/SQLITE_INNOCUOUS);
1324 db.createFunction("ChiroUnpack", 1, &sq3Fn_ChiroUnpack, moreflags:/*SQLITE_DIRECTONLY*/SQLITE_INNOCUOUS);
1325 db.createFunction("ChiroNormCRLF", 1, &sq3Fn_ChiroNormCRLF, moreflags:/*SQLITE_DIRECTONLY*/SQLITE_INNOCUOUS);
1326 db.createFunction("ChiroNormHeaders", 1, &sq3Fn_ChiroNormHeaders, moreflags:/*SQLITE_DIRECTONLY*/SQLITE_INNOCUOUS);
1327 db.createFunction("ChiroExtractHeaders", 1, &sq3Fn_ChiroExtractHeaders, moreflags:/*SQLITE_DIRECTONLY*/SQLITE_INNOCUOUS);
1328 db.createFunction("ChiroExtractBody", 1, &sq3Fn_ChiroExtractBody, moreflags:/*SQLITE_DIRECTONLY*/SQLITE_INNOCUOUS);
1332 ////////////////////////////////////////////////////////////////////////////////
1333 public Database chiroRecreateStorageDB (const(char)[] dbname=StorageDBName) {
1334 try { import std.file : remove; remove(dbname); } catch (Exception) {}
1335 auto db = Database(dbname, Database.Mode.ReadWriteCreate, dbpragmasRWStorage, schemaStorage);
1336 registerFunctions(db);
1337 return db;
1341 ////////////////////////////////////////////////////////////////////////////////
1342 public Database chiroRecreateSupportDB (const(char)[] dbname=SupportDBName) {
1343 try { import std.file : remove; remove(dbname); } catch (Exception) {}
1344 auto db = Database(dbname, Database.Mode.ReadWriteCreate, dbpragmasRWSupportRecreate, schemaSupportTable);
1345 registerFunctions(db);
1346 db.setOnClose(schemaSupportIndex~dbpragmasRWSupport~"ANALYZE;");
1347 return db;
1351 public void chiroCreateSupportIndiciesDB (ref Database db) {
1352 db.setOnClose(dbpragmasRWSupport~"ANALYZE;");
1353 db.execute(schemaSupportIndex);
1357 ////////////////////////////////////////////////////////////////////////////////
1358 public Database chiroRecreateOptionsDB (const(char)[] dbname=OptionsDBName) {
1359 try { import std.file : remove; remove(dbname); } catch (Exception) {}
1360 auto db = Database(dbname, Database.Mode.ReadWriteCreate, dbpragmasRWOptions, schemaOptions);
1361 registerFunctions(db);
1362 db.setOnClose("ANALYZE;");
1363 return db;
1367 ////////////////////////////////////////////////////////////////////////////////
1368 public Database chiroOpenStorageDB (const(char)[] dbname=StorageDBName, bool readonly=false) {
1369 auto db = Database(dbname, (readonly ? Database.Mode.ReadOnly : Database.Mode.ReadWrite), (readonly ? dbpragmasRO : dbpragmasRWStorage), schemaStorage);
1370 registerFunctions(db);
1371 if (!readonly) db.setOnClose("PRAGMA optimize;");
1372 return db;
1376 ////////////////////////////////////////////////////////////////////////////////
1377 public Database chiroOpenSupportDB (const(char)[] dbname=SupportDBName, bool readonly=false) {
1378 auto db = Database(dbname, (readonly ? Database.Mode.ReadOnly : Database.Mode.ReadWrite), (readonly ? dbpragmasRO : dbpragmasRWSupport), schemaSupport);
1379 registerFunctions(db);
1380 if (!readonly) db.setOnClose("PRAGMA optimize;");
1381 return db;
1385 ////////////////////////////////////////////////////////////////////////////////
1386 public Database chiroOpenOptionsDB (const(char)[] dbname=OptionsDBName, bool readonly=false) {
1387 auto db = Database(dbname, (readonly ? Database.Mode.ReadOnly : Database.Mode.ReadWrite), (readonly ? dbpragmasRO : dbpragmasRWOptions), schemaOptions);
1388 registerFunctions(db);
1389 if (!readonly) db.setOnClose("PRAGMA optimize;");
1390 return db;