egra: don't use ENTER/LEAVE (because intel sux, and they are slower than the correspo...
[iv.d.git] / arcz.d
blob0604592da59f007b1dafaed8c2801fb9322d2d89
1 /** ARZ chunked archive format processor.
3 * This module provides `std.stdio.File`-like interface to ARZ archives.
5 * Copyright: Copyright Ketmar Dark, 2016
7 * License: Boost License 1.0
8 */
9 module iv.arcz /*is aliced*/;
10 //import iv.alice;
12 // use Balz compressor if available
13 static if (__traits(compiles, { import iv.balz; })) enum arcz_has_balz = true; else enum arcz_has_balz = false;
14 static if (__traits(compiles, { import iv.zopfli; })) enum arcz_has_zopfli = true; else enum arcz_has_zopfli = false;
15 static if (__traits(compiles, { import iv.dlzma; })) enum arcz_has_dlzma = true; else enum arcz_has_dlzma = false;
16 static if (arcz_has_balz) import iv.balz;
17 static if (arcz_has_zopfli) import iv.zopfli;
18 static if (arcz_has_dlzma) import iv.dlzma;
20 // comment this to free pakced chunk buffer right after using
21 // i.e. `AZFile` will allocate new block for each new chunk
22 //version = arcz_use_more_memory;
24 public import core.stdc.stdio : SEEK_SET, SEEK_CUR, SEEK_END;
26 class ArczException : Exception {
27 this (string msg, string file=__FILE__, usize line=__LINE__, Throwable next=null) pure nothrow @safe @nogc {
28 super(msg, file, line, next);
33 // ////////////////////////////////////////////////////////////////////////// //
34 /// ARZ archive accessor. Use this to open ARZ archives, and open packed files from ARZ archives.
35 public struct ArzArchive {
36 private:
37 static assert(usize.sizeof >= (void*).sizeof);
38 private import core.stdc.stdio : FILE, fopen, fclose, fread, fseek;
39 private import etc.c.zlib;
40 private import core.sync.mutex : Mutex;
42 enum Packer {
43 Zlib,
44 Balz,
45 Lzma,
48 static struct ChunkInfo {
49 uint ofs; // offset in file
50 uint pksize; // packed chunk size (same as chunk size: chunk is unpacked)
53 static struct FileInfo {
54 string name;
55 uint chunk;
56 uint chunkofs; // offset of first file byte in unpacked chunk
57 uint size; // unpacked file size
60 static struct Nfo {
61 uint rc = 1; // refcounter
62 ChunkInfo[] chunks;
63 FileInfo[string] files;
64 uint chunkSize;
65 uint lastChunkSize;
66 Packer packer;
67 FILE* afl; // archive file, we'll keep it opened
68 Mutex fileMutex;
70 @disable this (this); // no copies!
72 static void freeMe (Nfo* nfo) {
73 import core.memory : GC;
74 import core.stdc.stdlib : free;
75 if (nfo.afl !is null) fclose(nfo.afl);
76 if (nfo.fileMutex !is null) nfo.fileMutex.destroy;
77 nfo.chunks.destroy;
78 nfo.files.destroy;
79 nfo.afl = null;
80 GC.removeRange(cast(void*)nfo/*, Nfo.sizeof*/);
81 free(nfo);
82 debug(arcz_rc) { import core.stdc.stdio : printf; printf("Nfo %p freed\n", nfo); }
85 static void decRef (usize me) {
86 pragma(inline, true);
87 if (me) {
88 auto nfo = cast(Nfo*)me;
89 assert(nfo.rc);
90 if (--nfo.rc == 0) freeMe(nfo);
95 usize nfop; // hide it from GC
97 private @property inout(Nfo)* nfo () pure inout nothrow @trusted @nogc { pragma(inline, true); return cast(Nfo*)nfop; }
98 void decRef () { pragma(inline, true); Nfo.decRef(nfop); nfop = 0; }
100 static uint readUint (FILE* fl) {
101 if (fl is null) throw new ArczException("cannot read from closed file");
102 uint v;
103 if (fread(&v, 1, v.sizeof, fl) != v.sizeof) throw new ArczException("file reading error");
104 version(BigEndian) {
105 import core.bitop : bswap;
106 v = bswap(v);
107 } else version(LittleEndian) {
108 // nothing to do
109 } else {
110 static assert(0, "wtf?!");
112 return v;
115 static uint readUbyte (FILE* fl) {
116 if (fl is null) throw new ArczException("cannot read from closed file");
117 ubyte v;
118 if (fread(&v, 1, v.sizeof, fl) != v.sizeof) throw new ArczException("file reading error");
119 return v;
122 static void readBuf (FILE* fl, void[] buf) {
123 if (buf.length > 0) {
124 if (fl is null) throw new ArczException("cannot read from closed file");
125 if (fread(buf.ptr, 1, buf.length, fl) != buf.length) throw new ArczException("file reading error");
129 static T* xalloc(T, bool clear=true) (uint mem) if (T.sizeof > 0) {
130 import core.exception : onOutOfMemoryError;
131 assert(mem != 0);
132 static if (clear) {
133 import core.stdc.stdlib : calloc;
134 auto res = calloc(mem, T.sizeof);
135 if (res is null) onOutOfMemoryError();
136 static if (is(T == struct)) {
137 import core.stdc.string : memcpy;
138 static immutable T i = T.init;
139 foreach (immutable idx; 0..mem) memcpy(res+idx, &i, T.sizeof);
141 debug(arcz_alloc) { import core.stdc.stdio : printf; printf("allocated %u bytes at %p\n", cast(uint)(mem*T.sizeof), res); }
142 return cast(T*)res;
143 } else {
144 import core.stdc.stdlib : malloc;
145 auto res = malloc(mem*T.sizeof);
146 if (res is null) onOutOfMemoryError();
147 static if (is(T == struct)) {
148 import core.stdc.string : memcpy;
149 static immutable T i = T.init;
150 foreach (immutable idx; 0..mem) memcpy(res+idx, &i, T.sizeof);
152 debug(arcz_alloc) { import core.stdc.stdio : printf; printf("allocated %u bytes at %p\n", cast(uint)(mem*T.sizeof), res); }
153 return cast(T*)res;
157 static void xfree(T) (T* ptr) {
158 if (ptr !is null) {
159 import core.stdc.stdlib : free;
160 debug(arcz_alloc) { import core.stdc.stdio : printf; printf("freing at %p\n", ptr); }
161 free(ptr);
165 static if (arcz_has_balz) static ubyte balzDictSize (uint blockSize) {
166 foreach (ubyte bits; Balz.MinDictBits..Balz.MaxDictBits+1) {
167 if ((1U<<bits) >= blockSize) return bits;
169 return Balz.MaxDictBits;
172 // unpack exactly `destlen` bytes
173 static if (arcz_has_balz) static void unpackBlockBalz (void* dest, uint destlen, const(void)* src, uint srclen, uint blocksize) {
174 Unbalz bz;
175 bz.reinit(balzDictSize(blocksize));
176 int ipos, opos;
177 auto dc = bz.decompress(
178 // reader
179 (buf) {
180 import core.stdc.string : memcpy;
181 if (ipos >= srclen) return 0;
182 uint rd = destlen-ipos;
183 if (rd > buf.length) rd = cast(uint)buf.length;
184 memcpy(buf.ptr, src+ipos, rd);
185 ipos += rd;
186 return rd;
188 // writer
189 (buf) {
190 //if (opos+buf.length > destlen) throw new ArczException("error unpacking archive");
191 uint wr = destlen-opos;
192 if (wr > buf.length) wr = cast(uint)buf.length;
193 if (wr > 0) {
194 import core.stdc.string : memcpy;
195 memcpy(dest+opos, buf.ptr, wr);
196 opos += wr;
199 // unpack length
200 destlen
202 if (opos != destlen) throw new ArczException("error unpacking archive");
205 // unpack exactly `destlen` bytes
206 static if (arcz_has_dlzma) static void unpackBlockLzma (void* dest, uint destlen, const(void)* src, uint srclen, uint blocksize) {
207 if (srclen < LZMA_PROPS_SIZE) throw new ArczException("error unpacking LZMA data");
208 if (srclen == LZMA_PROPS_SIZE) {
209 if (destlen == 0) return;
210 throw new ArczException("error unpacking LZMA data");
212 const(ubyte)* srcBytes = cast(const(ubyte)*)src;
213 ubyte* destBytes = cast(ubyte*)dest;
214 CLzmaDec lzdec;
215 LzmaDec_Init(&lzdec);
216 if (LzmaDec_Allocate(&lzdec, srcBytes, LZMA_PROPS_SIZE, &lzmaDefAllocator) != SZ_OK) {
217 throw new ArczException("error allocating LZMA decoder");
219 scope(exit) LzmaDec_Free(&lzdec, &lzmaDefAllocator);
220 srcBytes += LZMA_PROPS_SIZE;
221 srclen -= LZMA_PROPS_SIZE;
223 usize dlen = destlen;
224 usize slen = srclen;
225 ELzmaStatus status;
226 immutable dres = LzmaDec_DecodeToBuf(&lzdec, destBytes, &dlen, srcBytes, &slen, LZMA_FINISH_ANY, &status);
227 if (dres != SZ_OK || dlen != destlen) throw new ArczException("error unpacking LZMA data");
230 static void unpackBlockZLib (void* dest, uint destlen, const(void)* src, uint srclen, uint blocksize) {
231 z_stream zs;
232 zs.avail_in = 0;
233 zs.avail_out = 0;
234 // initialize unpacker
235 if (inflateInit2(&zs, 15) != Z_OK) throw new ArczException("can't initialize zlib");
236 scope(exit) inflateEnd(&zs);
237 zs.next_in = cast(typeof(zs.next_in))src;
238 zs.avail_in = srclen;
239 zs.next_out = cast(typeof(zs.next_out))dest;
240 zs.avail_out = destlen;
241 while (zs.avail_out > 0) {
242 auto err = inflate(&zs, Z_SYNC_FLUSH);
243 if (err != Z_STREAM_END && err != Z_OK) throw new ArczException("error unpacking archive");
244 if (err == Z_STREAM_END) break;
246 if (zs.avail_out != 0) throw new ArczException("error unpacking archive");
249 static void unpackBlock (void* dest, uint destlen, const(void)* src, uint srclen, uint blocksize, in Packer packer) {
250 final switch (packer) {
251 case Packer.Zlib:
252 unpackBlockZLib(dest, destlen, src, srclen, blocksize);
253 break;
254 case Packer.Balz:
255 static if (arcz_has_balz) {
256 unpackBlockBalz(dest, destlen, src, srclen, blocksize);
257 break;
258 } else {
259 throw new ArczException("no Balz support was compiled in ArcZ");
261 case Packer.Lzma:
262 static if (arcz_has_dlzma) {
263 unpackBlockLzma(dest, destlen, src, srclen, blocksize);
264 break;
265 } else {
266 throw new ArczException("no LZMA support was compiled in ArcZ");
271 public:
272 this (in ArzArchive arc) {
273 assert(nfop == 0);
274 nfop = arc.nfop;
275 if (nfop) ++nfo.rc;
278 this (this) {
279 pragma(inline, true);
280 if (nfop) ++nfo.rc;
283 ~this () { pragma(inline, true); close(); }
285 void opAssign (in ArzArchive arc) {
286 if (arc.nfop) {
287 auto n = cast(Nfo*)arc.nfop;
288 ++n.rc;
290 decRef();
291 nfop = arc.nfop;
294 void close () { pragma(inline, true); decRef(); }
296 @property FileInfo[string] files () nothrow @trusted @nogc { return (nfop ? nfo.files : null); }
298 void openArchive (const(char)[] filename) {
299 debug/*(arcz)*/ import core.stdc.stdio : printf;
300 FILE* fl = null;
301 scope(exit) if (fl !is null) fclose(fl);
302 close();
303 if (filename.length == 0) throw new ArczException("cannot open unnamed archive file");
304 if (filename.length < 2048) {
305 import core.stdc.stdlib : alloca;
306 auto tfn = (cast(char*)alloca(filename.length+1))[0..filename.length+1];
307 tfn[0..filename.length] = filename[];
308 tfn[filename.length] = 0;
309 fl = fopen(tfn.ptr, "rb");
310 } else {
311 import core.stdc.stdlib : malloc, free;
312 auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
313 if (tfn !is null) {
314 scope(exit) free(tfn.ptr);
315 fl = fopen(tfn.ptr, "rb");
318 if (fl is null) throw new ArczException("cannot open archive file '"~filename.idup~"'");
319 char[4] sign;
320 Packer packer;
321 readBuf(fl, sign[]);
322 if (sign != "CZA2") throw new ArczException("invalid archive file '"~filename.idup~"'");
323 switch (readUbyte(fl)) {
324 case 0: packer = Packer.Zlib; break;
325 case 1: packer = Packer.Balz; break;
326 case 2: packer = Packer.Lzma; break;
327 default: throw new ArczException("invalid version of archive file '"~filename.idup~"'");
329 uint indexofs = readUint(fl); // index offset in file
330 uint pkidxsize = readUint(fl); // packed index size
331 uint idxsize = readUint(fl); // unpacked index size
332 if (pkidxsize == 0 || idxsize == 0 || indexofs == 0) throw new ArczException("invalid archive file '"~filename.idup~"'");
333 // now read index
334 ubyte* idxbuf = null;
335 scope(exit) xfree(idxbuf);
337 auto pib = xalloc!ubyte(pkidxsize);
338 scope(exit) xfree(pib);
339 if (fseek(fl, indexofs, 0) < 0) throw new ArczException("seek error in archive file '"~filename.idup~"'");
340 readBuf(fl, pib[0..pkidxsize]);
341 idxbuf = xalloc!ubyte(idxsize);
342 unpackBlock(idxbuf, idxsize, pib, pkidxsize, idxsize, packer);
345 // parse index and build structures
346 uint idxbufpos = 0;
348 ubyte getUbyte () {
349 if (idxsize-idxbufpos < ubyte.sizeof) throw new ArczException("invalid index for archive file '"~filename.idup~"'");
350 return idxbuf[idxbufpos++];
353 uint getUint () {
354 if (idxsize-idxbufpos < uint.sizeof) throw new ArczException("invalid index for archive file '"~filename.idup~"'");
355 version(BigEndian) {
356 import core.bitop : bswap;
357 uint v = *cast(uint*)(idxbuf+idxbufpos);
358 idxbufpos += 4;
359 return bswap(v);
360 } else version(LittleEndian) {
361 uint v = *cast(uint*)(idxbuf+idxbufpos);
362 idxbufpos += 4;
363 return v;
364 } else {
365 static assert(0, "wtf?!");
369 void getBuf (void[] buf) {
370 if (buf.length > 0) {
371 import core.stdc.string : memcpy;
372 if (idxsize-idxbufpos < buf.length) throw new ArczException("invalid index for archive file '"~filename.idup~"'");
373 memcpy(buf.ptr, idxbuf+idxbufpos, buf.length);
374 idxbufpos += buf.length;
378 // allocate shared info struct
379 Nfo* nfo = xalloc!Nfo(1);
380 assert(nfo.rc == 1);
381 debug(arcz_rc) { import core.stdc.stdio : printf; printf("Nfo %p allocated\n", nfo); }
382 scope(failure) decRef();
383 nfop = cast(usize)nfo;
385 import core.memory : GC;
386 GC.addRange(nfo, Nfo.sizeof);
388 nfo.fileMutex = new Mutex();
390 // read chunk info and data
391 nfo.packer = packer;
392 nfo.chunkSize = getUint;
393 auto ccount = getUint; // chunk count
394 nfo.lastChunkSize = getUint;
395 debug(arcz_dirread) printf("chunk size: %u\nchunk count: %u\nlast chunk size:%u\n", nfo.chunkSize, ccount, nfo.lastChunkSize);
396 if (ccount == 0 || nfo.chunkSize < 1 || nfo.lastChunkSize < 1 || nfo.lastChunkSize > nfo.chunkSize) throw new ArczException("invalid archive file '"~filename.idup~"'");
397 nfo.chunks.length = ccount;
398 // chunk offsets and sizes
399 foreach (ref ci; nfo.chunks) {
400 ci.ofs = getUint;
401 ci.pksize = getUint;
403 // read file count and info
404 auto fcount = getUint;
405 if (fcount == 0) throw new ArczException("empty archive file '"~filename.idup~"'");
406 // calc name buffer position and size
407 //immutable uint nbofs = idxbufpos+fcount*(5*4);
408 //if (nbofs >= idxsize) throw new ArczException("invalid index in archive file '"~filename.idup~"'");
409 //immutable uint nbsize = idxsize-nbofs;
410 debug(arcz_dirread) printf("file count: %u\n", fcount);
411 foreach (immutable _; 0..fcount) {
412 uint nameofs = getUint;
413 uint namelen = getUint;
414 if (namelen == 0) {
415 // skip unnamed file
416 //throw new ArczException("invalid archive file '"~filename.idup~"'");
417 getUint; // chunk number
418 getUint; // offset in chunk
419 getUint; // unpacked size
420 debug(arcz_dirread) printf("skipped empty file\n");
421 } else {
422 //if (nameofs >= nbsize || namelen > nbsize || nameofs+namelen > nbsize) throw new ArczException("invalid index in archive file '"~filename.idup~"'");
423 if (nameofs >= idxsize || namelen > idxsize || nameofs+namelen > idxsize) throw new ArczException("invalid index in archive file '"~filename.idup~"'");
424 FileInfo fi;
425 auto nb = new char[](namelen);
426 nb[0..namelen] = (cast(char*)idxbuf)[nameofs..nameofs+namelen];
427 fi.name = cast(string)(nb); // it is safe here
428 fi.chunk = getUint; // chunk number
429 fi.chunkofs = getUint; // offset in chunk
430 fi.size = getUint; // unpacked size
431 debug(arcz_dirread) printf("file size: %u\nfile chunk: %u\noffset in chunk:%u; name: [%.*s]\n", fi.size, fi.chunk, fi.chunkofs, cast(uint)fi.name.length, fi.name.ptr);
432 nfo.files[fi.name] = fi;
435 // transfer achive file ownership
436 nfo.afl = fl;
437 fl = null;
440 bool exists (const(char)[] name) { if (nfop) return ((name in nfo.files) !is null); else return false; }
442 AZFile open (const(char)[] name) {
443 if (!nfop) throw new ArczException("can't open file from non-opened archive");
444 if (auto fi = name in nfo.files) {
445 nfo.fileMutex.lock();
446 scope(exit) nfo.fileMutex.unlock();
447 auto zl = xalloc!LowLevelPackedRO(1);
448 scope(failure) xfree(zl);
449 debug(arcz_rc) { import core.stdc.stdio : printf; printf("Zl %p allocated\n", zl); }
450 zl.setup(nfo, fi.chunk, fi.chunkofs, fi.size);
451 AZFile fl;
452 fl.zlp = cast(usize)zl;
453 return fl;
455 throw new ArczException("can't open file '"~name.idup~"' from archive");
458 private:
459 static struct LowLevelPackedRO {
460 private import etc.c.zlib;
462 uint rc = 1;
463 usize nfop; // hide it from GC
465 private @property inout(Nfo*) nfo () inout pure nothrow @trusted @nogc { pragma(inline, true); return cast(typeof(return))nfop; }
466 static void decRef (usize me) {
467 if (me) {
468 auto zl = cast(LowLevelPackedRO*)me;
469 assert(zl.rc);
470 if (--zl.rc == 0) {
471 import core.stdc.stdlib : free;
472 if (zl.chunkData !is null) free(zl.chunkData);
473 version(arcz_use_more_memory) if (zl.pkdata !is null) free(zl.pkdata);
474 Nfo.decRef(zl.nfop);
475 free(zl);
476 debug(arcz_rc) { import core.stdc.stdio : printf; printf("Zl %p freed\n", zl); }
477 } else {
478 //debug(arcz_rc) { import core.stdc.stdio : printf; printf("Zl %p; rc after decRef is %u\n", zl, zl.rc); }
483 uint nextchunk; // next chunk to read
484 uint curcpos; // position in current chunk
485 uint curcsize; // number of valid bytes in `chunkData`
486 uint stchunk; // starting chunk
487 uint stofs; // offset in starting chunk
488 uint totalsize; // total file size
489 uint pos; // current file position
490 uint lastrdpos; // last actual read position
491 z_stream zs;
492 ubyte* chunkData; // can be null
493 version(arcz_use_more_memory) {
494 ubyte* pkdata;
495 uint pkdatasize;
498 @disable this (this);
500 void setup (Nfo* anfo, uint astchunk, uint astofs, uint asize) {
501 assert(anfo !is null);
502 assert(rc == 1);
503 nfop = cast(usize)anfo;
504 ++anfo.rc;
505 nextchunk = stchunk = astchunk;
506 //curcpos = 0;
507 stofs = astofs;
508 totalsize = asize;
511 @property bool eof () pure nothrow @safe @nogc { pragma(inline, true); return (pos >= totalsize); }
513 // return less than chunk size if our file fits in one non-full chunk completely
514 uint justEnoughMemory () pure const nothrow @safe @nogc {
515 pragma(inline, true);
516 version(none) {
517 return nfo.chunkSize;
518 } else {
519 return (totalsize < nfo.chunkSize && stofs+totalsize < nfo.chunkSize ? stofs+totalsize : nfo.chunkSize);
523 void unpackNextChunk () {
524 if (nfop == 0) assert(0, "wtf?!");
525 //scope(failure) if (chunkData !is null) { xfree(chunkData); chunkData = null; }
526 debug(arcz_unp) { import core.stdc.stdio : printf; printf("unpacking chunk %u\n", nextchunk); }
527 // allocate buffer for unpacked data
528 if (chunkData is null) {
529 // optimize things a little: if our file fits in less then one chunk, allocate "just enough" memory
530 chunkData = xalloc!(ubyte, false)(justEnoughMemory);
532 auto chunk = &nfo.chunks[nextchunk];
533 if (chunk.pksize == nfo.chunkSize) {
534 // unpacked chunk, just read it
535 debug(arcz_unp) { import core.stdc.stdio : printf; printf(" chunk is not packed\n"); }
537 nfo.fileMutex.lock();
538 scope(exit) nfo.fileMutex.unlock();
539 if (fseek(nfo.afl, chunk.ofs, 0) < 0) throw new ArczException("ARCZ reading error");
540 if (fread(chunkData, 1, nfo.chunkSize, nfo.afl) != nfo.chunkSize) throw new ArczException("ARCZ reading error");
542 curcsize = nfo.chunkSize;
543 } else {
544 // packed chunk, unpack it
545 // allocate buffer for packed data
546 version(arcz_use_more_memory) {
547 import core.stdc.stdlib : realloc;
548 if (pkdatasize < chunk.pksize) {
549 import core.exception : onOutOfMemoryError;
550 auto newpk = realloc(pkdata, chunk.pksize);
551 if (newpk is null) onOutOfMemoryError();
552 debug(arcz_alloc) { import core.stdc.stdio : printf; printf("reallocated from %u to %u bytes; %p -> %p\n", cast(uint)pkdatasize, cast(uint)chunk.pksize, pkdata, newpk); }
553 pkdata = cast(ubyte*)newpk;
554 pkdatasize = chunk.pksize;
556 alias pkd = pkdata;
557 } else {
558 auto pkd = xalloc!(ubyte, false)(chunk.pksize);
559 scope(exit) xfree(pkd);
562 nfo.fileMutex.lock();
563 scope(exit) nfo.fileMutex.unlock();
564 if (fseek(nfo.afl, chunk.ofs, 0) < 0) throw new ArczException("ARCZ reading error");
565 if (fread(pkd, 1, chunk.pksize, nfo.afl) != chunk.pksize) throw new ArczException("ARCZ reading error");
567 uint upsize = (nextchunk == nfo.chunks.length-1 ? nfo.lastChunkSize : nfo.chunkSize); // unpacked chunk size
568 immutable uint cksz = upsize;
569 immutable uint jem = justEnoughMemory;
570 if (upsize > jem) upsize = jem;
571 debug(arcz_unp) { import core.stdc.stdio : printf; printf(" unpacking %u bytes to %u bytes\n", chunk.pksize, upsize); }
572 ArzArchive.unpackBlock(chunkData, upsize, pkd, chunk.pksize, cksz, nfo.packer);
573 curcsize = upsize;
575 curcpos = 0;
576 // fix first chunk offset if necessary
577 if (nextchunk == stchunk && stofs > 0) {
578 // it's easier to just memmove it
579 import core.stdc.string : memmove;
580 assert(stofs < curcsize);
581 memmove(chunkData, chunkData+stofs, curcsize-stofs);
582 curcsize -= stofs;
584 ++nextchunk; // advance to next chunk
587 void syncReadPos () {
588 if (pos >= totalsize || pos == lastrdpos) return;
589 immutable uint fcdata = nfo.chunkSize-stofs; // number of our bytes in the first chunk
590 // does our pos lie in the first chunk?
591 if (pos < fcdata) {
592 // yep, just read it
593 if (nextchunk != stchunk+1) {
594 nextchunk = stchunk;
595 unpackNextChunk(); // we'll need it anyway
596 } else {
597 // just rewind
598 curcpos = 0;
600 curcpos += pos;
601 lastrdpos = pos;
602 return;
604 // find the chunk we want
605 uint npos = pos-fcdata;
606 uint xblock = stchunk+1+npos/nfo.chunkSize;
607 uint curcstart = (xblock-(stchunk+1))*nfo.chunkSize+fcdata;
608 if (xblock != nextchunk-1) {
609 // read and unpack this chunk
610 nextchunk = xblock;
611 unpackNextChunk();
612 } else {
613 // just rewind
614 curcpos = 0;
616 assert(pos >= curcstart && pos < curcstart+nfo.chunkSize);
617 uint skip = pos-curcstart;
618 lastrdpos = pos;
619 curcpos += skip;
622 int read (void* buf, uint count) {
623 if (buf is null) return -1;
624 if (count == 0 || totalsize == 0) return 0;
625 if (totalsize >= 0 && pos >= totalsize) return 0; // EOF
626 syncReadPos();
627 assert(lastrdpos == pos);
628 if (cast(long)pos+count > totalsize) count = totalsize-pos;
629 auto res = count;
630 while (count > 0) {
631 debug(arcz_read) { import core.stdc.stdio : printf; printf("reading %u bytes; pos=%u; lastrdpos=%u; curcpos=%u; curcsize=%u\n", count, pos, lastrdpos, curcpos, curcsize); }
632 import core.stdc.string : memcpy;
633 if (curcpos >= curcsize) {
634 unpackNextChunk(); // we want next chunk!
635 debug(arcz_read) { import core.stdc.stdio : printf; printf(" *reading %u bytes; pos=%u; lastrdpos=%u; curcpos=%u; curcsize=%u\n", count, pos, lastrdpos, curcpos, curcsize); }
637 assert(curcpos < curcsize && curcsize != 0);
638 int rd = (curcsize-curcpos >= count ? count : curcsize-curcpos);
639 assert(rd > 0);
640 memcpy(buf, chunkData+curcpos, rd);
641 curcpos += rd;
642 pos += rd;
643 lastrdpos += rd;
644 buf += rd;
645 count -= rd;
647 assert(pos == lastrdpos);
648 return res;
651 long lseek (long ofs, int origin) {
652 //TODO: overflow checks
653 switch (origin) {
654 case SEEK_SET: break;
655 case SEEK_CUR: ofs += pos; break;
656 case SEEK_END:
657 if (ofs > 0) ofs = 0;
658 if (-ofs > totalsize) ofs = -cast(long)totalsize;
659 ofs += totalsize;
660 break;
661 default:
662 return -1;
664 if (ofs < 0) return -1;
665 if (totalsize >= 0 && ofs > totalsize) ofs = totalsize;
666 pos = cast(uint)ofs;
667 return pos;
673 // ////////////////////////////////////////////////////////////////////////// //
674 /// Opened file.
675 public struct AZFile {
676 private:
677 usize zlp;
679 private @property inout(ArzArchive.LowLevelPackedRO)* zl () inout pure nothrow @trusted @nogc { pragma(inline, true); return cast(typeof(return))zlp; }
680 private void decRef () { pragma(inline, true); ArzArchive.LowLevelPackedRO.decRef(zlp); zlp = 0; }
682 public:
683 this (in AZFile afl) {
684 assert(zlp == 0);
685 zlp = afl.zlp;
686 if (zlp) ++zl.rc;
689 this (this) {
690 if (zlp) ++zl.rc;
693 ~this () { close(); }
695 void opAssign (in AZFile afl) {
696 if (afl.zlp) {
697 auto n = cast(ArzArchive.LowLevelPackedRO*)afl.zlp;
698 ++n.rc;
700 decRef();
701 zlp = afl.zlp;
704 void close () { decRef(); }
706 @property bool isOpen () const pure nothrow @safe @nogc { pragma(inline, true); return (zlp != 0); }
707 @property uint size () const pure nothrow @safe @nogc { pragma(inline, true); return (zlp ? zl.totalsize : 0); }
708 @property uint tell () const pure nothrow @safe @nogc { pragma(inline, true); return (zlp ? zl.pos : 0); }
710 void seek (long ofs, int origin=SEEK_SET) {
711 if (!zlp) throw new ArczException("can't seek in closed file");
712 auto res = zl.lseek(ofs, origin);
713 if (res < 0) throw new ArczException("seek error");
716 private import std.traits : isMutable;
718 //TODO: overflow check
719 T[] rawRead(T) (T[] buf) if (isMutable!T) {
720 if (!zlp) throw new ArczException("can't read from closed file");
721 if (buf.length > 0) {
722 auto res = zl.read(buf.ptr, buf.length*T.sizeof);
723 if (res == -1 || res%T.sizeof != 0) throw new ArczException("read error");
724 return buf[0..res/T.sizeof];
725 } else {
726 return buf[0..0];
732 // ////////////////////////////////////////////////////////////////////////// //
733 /** this class can be used to create archive file.
735 * Example:
736 * --------------------
737 * import std.file, std.path, std.stdio : File;
739 * enum ArcName = "z00.arz";
740 * enum DirName = "experimental-docs";
742 * ubyte[] rdbuf;
743 * rdbuf.length = 65536;
745 * auto arcz = new ArzCreator(ArcName);
746 * long total = 0;
747 * foreach (DirEntry e; dirEntries(DirName, SpanMode.breadth)) {
748 * if (e.isFile) {
749 * assert(e.size < uint.max);
750 * //writeln(e.name);
751 * total += e.size;
752 * string fname = e.name[DirName.length+1..$];
753 * arcz.newFile(fname, cast(uint)e.size);
754 * auto fi = File(e.name);
755 * for (;;) {
756 * auto rd = fi.rawRead(rdbuf[]);
757 * if (rd.length == 0) break;
758 * arcz.rawWrite(rd[]);
762 * arcz.close();
763 * writeln(total, " bytes packed to ", getSize(ArcName), " (", arcz.chunksWritten, " chunks, ", arcz.filesWritten, " files)");
764 * --------------------
766 final class ArzCreator {
767 private import etc.c.zlib;
768 private import core.stdc.stdio : FILE, fopen, fclose, ftell, fseek, fwrite;
770 public:
771 //WARNING! don't change the order!
772 enum Compressor {
773 ZLib, // default
774 Balz,
775 BalzMax, // Balz, maximum compression
776 Zopfli, // this will fallback to zlib if no zopfli support was compiled in
777 Lzma,
780 private:
781 static struct ChunkInfo {
782 uint ofs; // offset in file
783 uint pksize; // packed chunk size
786 static struct FileInfo {
787 string name;
788 uint chunk;
789 uint chunkofs; // offset of first file byte in unpacked chunk
790 uint size; // unpacked file size
793 private:
794 ubyte[] chunkdata;
795 uint cdpos;
796 FILE* arcfl;
797 ChunkInfo[] chunks;
798 FileInfo[] files;
799 uint lastChunkSize;
800 uint statChunks, statFiles;
801 Compressor cpr = Compressor.ZLib;
803 private:
804 void writeUint (uint v) {
805 if (arcfl is null) throw new ArczException("write error");
806 version(BigEndian) {
807 import core.bitop : bswap;
808 v = bswap(v);
809 } else version(LittleEndian) {
810 // nothing to do
811 } else {
812 static assert(0, "wtf?!");
814 if (fwrite(&v, 1, v.sizeof, arcfl) != v.sizeof) throw new ArczException("write error"); // signature
817 void writeUbyte (ubyte v) {
818 if (arcfl is null) throw new ArczException("write error");
819 if (fwrite(&v, 1, v.sizeof, arcfl) != v.sizeof) throw new ArczException("write error"); // signature
822 void writeBuf (const(void)[] buf) {
823 if (buf.length > 0) {
824 if (arcfl is null) throw new ArczException("write error");
825 if (fwrite(buf.ptr, 1, buf.length, arcfl) != buf.length) throw new ArczException("write error");
829 static if (arcz_has_balz) long writePackedBalz (const(void)[] upbuf) {
830 assert(upbuf.length > 0 && upbuf.length < int.max);
831 long res = 0;
832 Balz bz;
833 int ipos, opos;
834 bz.reinit(ArzArchive.balzDictSize(cast(uint)upbuf.length));
835 bz.compress(
836 // reader
837 (buf) {
838 import core.stdc.string : memcpy;
839 if (ipos >= upbuf.length) return 0;
840 uint rd = cast(uint)upbuf.length-ipos;
841 if (rd > buf.length) rd = cast(uint)buf.length;
842 memcpy(buf.ptr, upbuf.ptr+ipos, rd);
843 ipos += rd;
844 return rd;
846 // writer
847 (buf) {
848 res += buf.length;
849 writeBuf(buf[]);
851 // max mode
852 (cpr == Compressor.BalzMax)
854 return res;
857 static if (arcz_has_zopfli) long writePackedZopfli (const(void)[] upbuf) {
858 ubyte[] indata;
859 void* odata;
860 usize osize;
861 ZopfliOptions opts;
862 ZopfliCompress(opts, ZOPFLI_FORMAT_ZLIB, upbuf.ptr, upbuf.length, &odata, &osize);
863 writeBuf(odata[0..osize]);
864 ZopfliFree(odata);
865 return cast(long)osize;
868 static if (arcz_has_dlzma) long writePackedLzma (const(void)[] upbuf) {
869 import core.stdc.stdlib : malloc, free;
870 assert(upbuf.length > 0 && upbuf.length < int.max);
872 CLzmaEncHandle lzenc = LzmaEnc_Create(&lzmaDefAllocator);
873 if (lzenc is null) throw new ArczException("cannot allocate LZMA encoder");
874 scope(exit) LzmaEnc_Destroy(lzenc, &lzmaDefAllocator, &lzmaDefAllocator);
876 CLzmaEncProps props;
877 LzmaEncProps_Init(&props);
878 props.level = 9;
879 //props.dictSize = 1;
880 //while (props.dictSize < insize) props.dictSize <<= 1;
881 //props.dictSize = 1<<27; //128MB
882 props.dictSize = 1<<22; //4MB
883 props.reduceSize = upbuf.length;
885 if (LzmaEnc_SetProps(lzenc, &props) != SZ_OK) throw new ArczException("cannot init LZMA encoder");
886 LzmaEnc_SetDataSize(lzenc, upbuf.length);
888 usize inpos = 0;
889 usize outsize = 0;
891 ISeqInStream inStream;
892 inStream.Read = delegate SRes (ISeqInStream* p, void* buf, usize* size) nothrow {
893 import core.stdc.string : memcpy;
894 usize inleft = upbuf.length-inpos;
895 if (inleft > *size) inleft = *size;
896 if (inleft) memcpy(buf, (cast(const(ubyte)*)upbuf)+inpos, inleft);
897 inpos += inleft;
898 *size = inleft;
899 return SZ_OK;
902 ISeqOutStream outStream;
903 outStream.Write = delegate usize (ISeqOutStream* p, const(void)* buf, usize size) nothrow {
904 try {
905 writeBuf((cast(const(ubyte)*)buf)[0..size]);
906 } catch (Exception e) {
907 return 0;
909 outsize += size;
910 return size;
913 ubyte[LZMA_PROPS_SIZE+12] propsEnc;
914 uint propsEncSize = cast(uint)propsEnc.length;
916 if (LzmaEnc_WriteProperties(lzenc, propsEnc.ptr, &propsEncSize) != SZ_OK) throw new ArczException("cannot encode LZMA properties");
917 if (propsEncSize != LZMA_PROPS_SIZE) throw new ArczException("invalid LZMA properties size");
918 writeBuf(propsEnc[0..propsEncSize]);
919 outsize = propsEncSize;
921 immutable eres = LzmaEnc_Encode(lzenc, &outStream, &inStream, null, &lzmaDefAllocator, &lzmaDefAllocator);
922 if (eres != SZ_OK) throw new ArczException("error in LZMA packer");
924 return cast(long)outsize;
927 long writePackedZLib (const(void)[] upbuf) {
928 assert(upbuf.length > 0 && upbuf.length < int.max);
929 long res = 0;
930 z_stream zs;
931 ubyte[2048] obuf;
932 zs.next_out = obuf.ptr;
933 zs.avail_out = cast(uint)obuf.length;
934 zs.next_in = null;
935 zs.avail_in = 0;
936 // initialize packer
937 if (deflateInit2(&zs, Z_BEST_COMPRESSION, Z_DEFLATED, 15, 9, 0) != Z_OK) throw new ArczException("can't write packed data");
938 scope(exit) deflateEnd(&zs);
939 zs.next_in = cast(typeof(zs.next_in))upbuf.ptr;
940 zs.avail_in = cast(uint)upbuf.length;
941 while (zs.avail_in > 0) {
942 if (zs.avail_out == 0) {
943 res += cast(uint)obuf.length;
944 writeBuf(obuf[]);
945 zs.next_out = obuf.ptr;
946 zs.avail_out = cast(uint)obuf.length;
948 auto err = deflate(&zs, Z_NO_FLUSH);
949 if (err != Z_OK) throw new ArczException("zlib compression error");
951 while (zs.avail_out != obuf.length) {
952 res += cast(uint)obuf.length-zs.avail_out;
953 writeBuf(obuf[0..$-zs.avail_out]);
954 zs.next_out = obuf.ptr;
955 zs.avail_out = cast(uint)obuf.length;
956 auto err = deflate(&zs, Z_FINISH);
957 if (err != Z_OK && err != Z_STREAM_END) throw new ArczException("zlib compression error");
958 // succesfully flushed?
959 //if (err != Z_STREAM_END) throw new ArczException("zlib compression error");
961 return res;
964 // return size of packed data written
965 uint writePackedBuf (const(void)[] upbuf) {
966 assert(upbuf.length > 0 && upbuf.length < int.max);
967 long res = 0;
968 final switch (cpr) {
969 case Compressor.ZLib:
970 res = writePackedZLib(upbuf);
971 break;
972 case Compressor.Balz:
973 case Compressor.BalzMax:
974 static if (arcz_has_balz) {
975 res = writePackedBalz(upbuf);
976 break;
977 } else {
978 new ArczException("no Balz support was compiled in ArcZ");
980 case Compressor.Zopfli:
981 static if (arcz_has_zopfli) {
982 res = writePackedZopfli(upbuf);
983 //break;
984 } else {
985 //new ArczException("no Zopfli support was compiled in ArcZ");
986 res = writePackedZLib(upbuf);
988 break;
989 case Compressor.Lzma:
990 static if (arcz_has_dlzma) {
991 res = writePackedLzma(upbuf);
992 break;
993 } else {
994 new ArczException("no LZMA support was compiled in ArcZ");
997 if (res > uint.max) throw new ArczException("output archive too big");
998 return cast(uint)res;
1001 void flushData () {
1002 if (cdpos > 0) {
1003 ChunkInfo ci;
1004 auto pos = ftell(arcfl);
1005 if (pos < 0 || pos >= uint.max) throw new ArczException("output archive too big");
1006 ci.ofs = cast(uint)pos;
1007 auto wlen = writePackedBuf(chunkdata[0..cdpos]);
1008 ci.pksize = wlen;
1009 if (cdpos == chunkdata.length && ci.pksize >= chunkdata.length) {
1010 // wow, this chunk is unpackable
1011 //{ import std.stdio; writeln("unpackable chunk found!"); }
1012 if (fseek(arcfl, pos, 0) < 0) throw new ArczException("can't seek in output file");
1013 writeBuf(chunkdata[0..cdpos]);
1014 version(Posix) {
1015 import core.stdc.stdio : fileno;
1016 import core.sys.posix.unistd : ftruncate;
1017 pos = ftell(arcfl);
1018 if (pos < 0 || pos >= uint.max) throw new ArczException("output archive too big");
1019 if (ftruncate(fileno(arcfl), cast(uint)pos) < 0) throw new ArczException("error truncating output file");
1021 ci.pksize = cdpos;
1023 if (cdpos < chunkdata.length) lastChunkSize = cast(uint)cdpos;
1024 cdpos = 0;
1025 chunks ~= ci;
1026 } else {
1027 lastChunkSize = cast(uint)chunkdata.length;
1031 void closeArc () {
1032 flushData();
1033 // write index
1034 //assert(ftell(arcfl) > 0 && ftell(arcfl) < uint.max);
1035 assert(chunkdata.length < uint.max);
1036 assert(chunks.length < uint.max);
1037 assert(files.length < uint.max);
1038 // create index in memory
1039 ubyte[] index;
1041 void putUint (uint v) {
1042 index ~= v&0xff;
1043 index ~= (v>>8)&0xff;
1044 index ~= (v>>16)&0xff;
1045 index ~= (v>>24)&0xff;
1048 void putUbyte (ubyte v) {
1049 index ~= v;
1052 void putBuf (const(void)[] buf) {
1053 assert(buf.length > 0);
1054 index ~= (cast(const(ubyte)[])buf)[];
1057 // create index in memory
1059 // chunk size
1060 putUint(cast(uint)chunkdata.length);
1061 // chunk count
1062 putUint(cast(uint)chunks.length);
1063 // last chunk size
1064 putUint(lastChunkSize); // 0: last chunk is full
1065 // chunk offsets and sizes
1066 foreach (ref ci; chunks) {
1067 putUint(ci.ofs);
1068 putUint(ci.pksize);
1070 // file count
1071 putUint(cast(uint)files.length);
1072 uint nbofs = cast(uint)index.length+cast(uint)files.length*(5*4);
1073 //uint nbofs = 0;
1074 // files
1075 foreach (ref fi; files) {
1076 // name: length(byte), chars
1077 assert(fi.name.length > 0 && fi.name.length <= 16384);
1078 putUint(nbofs);
1079 putUint(cast(uint)fi.name.length);
1080 nbofs += cast(uint)fi.name.length+1; // put zero byte there to ease C interfacing
1081 //putBuf(fi.name[]);
1082 // chunk number
1083 putUint(fi.chunk);
1084 // offset in unpacked chunk
1085 putUint(fi.chunkofs);
1086 // unpacked size
1087 putUint(fi.size);
1089 // names
1090 foreach (ref fi; files) {
1091 putBuf(fi.name[]);
1092 putUbyte(0); // this means nothing, it is here just for convenience (hello, C!)
1094 assert(index.length < uint.max);
1096 auto cpos = ftell(arcfl);
1097 if (cpos < 0 || cpos > uint.max) throw new ArczException("output archive too big");
1098 // write packed index
1099 debug(arcz_writer) { import core.stdc.stdio : pinrtf; printf("index size: %u\n", cast(uint)index.length); }
1100 auto pkisz = writePackedBuf(index[]);
1101 debug(arcz_writer) { import core.stdc.stdio : pinrtf; printf("packed index size: %u\n", cast(uint)pkisz); }
1102 // write index info
1103 if (fseek(arcfl, 5, 0) < 0) throw new ArczException("seek error");
1104 // index offset in file
1105 writeUint(cpos);
1106 // packed index size
1107 writeUint(pkisz);
1108 // unpacked index size
1109 writeUint(cast(uint)index.length);
1110 // done
1111 statChunks = cast(uint)chunks.length;
1112 statFiles = cast(uint)files.length;
1115 public:
1116 this (const(char)[] fname, uint chunkSize=256*1024, Compressor acpr=Compressor.ZLib) {
1117 import std.internal.cstring;
1118 assert(chunkSize > 0 && chunkSize < 32*1024*1024); // arbitrary limit
1119 static if (!arcz_has_balz) {
1120 if (acpr == Compressor.Balz || acpr == Compressor.BalzMax) throw new ArczException("no Balz support was compiled in ArcZ");
1122 static if (!arcz_has_zopfli) {
1123 //if (acpr == Compressor.Zopfli) throw new ArczException("no Zopfli support was compiled in ArcZ");
1125 cpr = acpr;
1126 arcfl = fopen(fname.tempCString, "wb");
1127 if (arcfl is null) throw new ArczException("can't create output file '"~fname.idup~"'");
1128 cdpos = 0;
1129 chunkdata.length = chunkSize;
1130 scope(failure) { fclose(arcfl); arcfl = null; }
1131 writeBuf("CZA2"); // signature
1132 if (cpr == Compressor.Balz || cpr == Compressor.BalzMax) {
1133 writeUbyte(1); // compressor
1134 } else if (cpr == Compressor.Lzma) {
1135 writeUbyte(2); // compressor
1137 writeUint(0); // offset to index
1138 writeUint(0); // packed index size
1139 writeUint(0); // unpacked index size
1142 ~this () { close(); }
1144 void close () {
1145 if (arcfl !is null) {
1146 scope(exit) { fclose(arcfl); arcfl = null; }
1147 closeArc();
1149 chunkdata = null;
1150 chunks = null;
1151 files = null;
1152 lastChunkSize = 0;
1153 cdpos = 0;
1156 // valid after closing
1157 @property uint chunksWritten () const pure nothrow @safe @nogc { pragma(inline, true); return statChunks; }
1158 @property uint filesWritten () const pure nothrow @safe @nogc { pragma(inline, true); return statFiles; }
1160 void newFile (string name, uint size) {
1161 FileInfo fi;
1162 assert(name.length <= 255);
1163 fi.name = name;
1164 fi.chunk = cast(uint)chunks.length;
1165 fi.chunkofs = cast(uint)cdpos;
1166 fi.size = size;
1167 files ~= fi;
1170 void rawWrite(T) (const(T)[] buffer) {
1171 if (buffer.length > 0) {
1172 auto src = cast(const(ubyte)*)buffer.ptr;
1173 auto len = buffer.length*T.sizeof;
1174 while (len > 0) {
1175 if (cdpos == chunkdata.length) flushData();
1176 if (cdpos < chunkdata.length) {
1177 auto wr = chunkdata.length-cdpos;
1178 if (wr > len) wr = len;
1179 chunkdata[cdpos..cdpos+wr] = src[0..wr];
1180 cdpos += wr;
1181 len -= wr;
1182 src += wr;
1190 // ////////////////////////////////////////////////////////////////////////// //
1191 /* arcz file format:
1192 header
1193 ======
1194 db 'CZA2' ; signature
1195 db version ; 0: zlib; 1: balz; 2: lzma
1196 dd indexofs ; offset to packed index
1197 dd pkindexsz ; size of packed index
1198 dd upindexsz ; size of unpacked index
1201 index
1202 =====
1203 dd chunksize ; unpacked chunk size in bytes
1204 dd chunkcount ; number of chunks in file
1205 dd lastchunksz ; size of last chunk (it may be incomplete); 0: last chunk is completely used (all `chunksize` bytes)
1207 then chunk offsets and sizes follows:
1208 dd chunkofs ; from file start
1209 dd pkchunksz ; size of (possibly packed) chunk data; if it equals to `chunksize`, this chunk is not packed
1211 then file list follows:
1212 dd filecount ; number of files in archive
1214 then file info follows:
1215 dd nameofs ; (in index)
1216 dd namelen ; length of name (can't be 0)
1217 dd firstchunk ; chunk where file starts
1218 dd firstofs ; offset in first chunk (unpacked) where file starts
1219 dd filesize ; unpacked file size
1221 then name buffer follows -- just bytes