_obsolete_dont_use/inflate.d

   1 /*
   2  * tinf  -  tiny inflate library (inflate, gzip, zlib)
   3  * version 1.00
   4  *
   5  * Copyright (c) 2003 by Joergen Ibsen / Jibz
   6  * All Rights Reserved
   7  *
   8  * http://www.ibsensoftware.com/
   9  *
  10  * This software is provided 'as-is', without any express
  11  * or implied warranty.  In no event will the authors be
  12  * held liable for any damages arising from the use of
  13  * this software.
  14  *
  15  * Permission is granted to anyone to use this software
  16  * for any purpose, including commercial applications,
  17  * and to alter it and redistribute it freely, subject to
  18  * the following restrictions:
  19  *
  20  * 1. The origin of this software must not be
  21  *    misrepresented; you must not claim that you
  22  *    wrote the original software. If you use this
  23  *    software in a product, an acknowledgment in
  24  *    the product documentation would be appreciated
  25  *    but is not required.
  26  *
  27  * 2. Altered source versions must be plainly marked
  28  *    as such, and must not be misrepresented as
  29  *    being the original software.
  30  *
  31  * 3. This notice may not be removed or altered from
  32  *    any source distribution.
  33  */
  34 /*
  35  * This D port was made by Ketmar // Invisible Vector
  36  * ketmar@ketmar.no-ip.org
  37  */
  38 module iv.inflate /*is aliced*/;
  39
  40
  41 // ////////////////////////////////////////////////////////////////////////// //
  42 import iv.alice;
  43 //import iv.exex;
  44
  45 mixin(NewExceptionClass!"InflateError");
  46 mixin(NewExceptionClass!("InflateEOF", "InflateError"));
  47
  48
  49 ////////////////////////////////////////////////////////////////////////////////
  50 /*
  51  * Adler-32 algorithm taken from the zlib source, which is
  52  * Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
  53  */
  54 /// Adler-32 checksum computer
  55 struct Adler32 {
  56 nothrow @safe @nogc:
  57 private:
  58   enum { BASE = 65521, NMAX = 5552 }
  59   enum normS1S2 = `s1 %= BASE; s2 %= BASE;`;
  60
  61   template genByteProcessors(int count) {
  62     static if (count > 0)
  63       enum genByteProcessors = `s1 += *dta++;s2 += s1;`~genByteProcessors!(count-1);
  64     else
  65       enum genByteProcessors = "";
  66   }
  67
  68   uint s1 = 1;
  69   uint s2 = 0;
  70
  71 public:
  72   /**
  73    * Computes the Adler-32 checksum. We can do `Adler32(buf)`.
  74    *
  75    * Params:
  76    *  data = input data
  77    *
  78    * Returns:
  79    *  adler32 checksum
  80    */
  81   static uint opCall(T) (T[] data) {
  82     Adler32 a32;
  83     a32.doBuffer(data);
  84     return a32.result;
  85   }
  86
  87   /// reinitialize
  88   void reset () {
  89     pragma(inline, true);
  90     s1 = 1;
  91     s2 = 0;
  92   }
  93
  94   /// get current Adler-32 sum
  95   @property uint result () const pure { pragma(inline, true); return ((s2<<16)|s1); }
  96
  97   /// process buffer
  98   void doBuffer(T) (T[] data) @trusted {
  99     if (data.length == 0) return;
 100     usize len = data.length*data[0].sizeof; // length in bytes
 101     const(ubyte)* dta = cast(const(ubyte)*)data.ptr;
 102     foreach (immutable _; 0..len/NMAX) {
 103       foreach (immutable _; 0..NMAX/16) { mixin(genByteProcessors!(16)); }
 104       mixin(normS1S2);
 105     }
 106     len %= NMAX;
 107     if (len) {
 108       foreach (immutable _; 0..len) { mixin(genByteProcessors!(1)); }
 109       mixin(normS1S2);
 110     }
 111   }
 112
 113   /// process one byte
 114   void doByte(T) (T bt)
 115   // sorry
 116   if (is(T == char) || is(T == byte) || is(T == ubyte) ||
 117       is(T == const char) || is(T == const byte) || is(T == const ubyte) ||
 118       is(T == immutable char) || is(T == immutable byte) || is(T == immutable ubyte))
 119   {
 120     s1 += cast(ubyte)bt;
 121     s2 += s1;
 122     mixin(normS1S2);
 123   }
 124 }
 125
 126
 127 /**
 128  * Computes the Adler-32 checksum.
 129  *
 130  * Params:
 131  *  data = input data
 132  *
 133  * Returns:
 134  *  adler32 checksum
 135  */
 136 alias adler32 = Adler32;
 137
 138
 139 // -----------------------------------------------------------------------------
 140 // internal data structures
 141 //
 142 private struct Tree {
 143   ushort[16] table;  // table of code length counts
 144   ushort[288] trans; // code -> symbol translation table
 145
 146   @disable this (this); /* disable copying */
 147
 148   // given an array of code lengths, build a tree
 149   void buildTree (const(ubyte)[] lengths) @trusted {
 150     if (lengths.length < 1) throw new InflateError("invalid lengths");
 151     ushort[16] offs = void;
 152     ushort sum = 0;
 153     // clear code length count table
 154     table[] = 0;
 155     // scan symbol lengths, and sum code length counts
 156     foreach (immutable l; lengths) {
 157       if (l >= 16) throw new InflateError("invalid lengths");
 158       ++table.ptr[l];
 159     }
 160     table.ptr[0] = 0;
 161     // compute offset table for distribution sort
 162     foreach (immutable i, immutable n; table) {
 163       offs.ptr[i] = sum;
 164       sum += n;
 165     }
 166     // create code->symbol translation table (symbols sorted by code)
 167     foreach (ushort i, immutable l; lengths) {
 168       if (l) {
 169         auto n = offs.ptr[l]++;
 170         if (n >= 288) throw new InflateError("invalid lengths");
 171         trans.ptr[n] = i;
 172       }
 173     }
 174   }
 175 }
 176
 177
 178 // -----------------------------------------------------------------------------
 179 // stream of unpacked data
 180 //
 181 struct InfStream {
 182 private:
 183   bool delegate (out ubyte bt) readOneByte = null;
 184
 185   // state data
 186   uint bytesLeft = void; // bytes to copy both for compressed and for uncompressed blocks
 187   uint matchOfs = void; // match offset for inflated block
 188   const(Tree)* lt = void; // dynamic length/symbol tree
 189   const(Tree)* dt = void; // dynamic distance tree
 190   bool doingFinalBlock = false; // stop on next processBlockHeader()
 191
 192   // current state
 193   enum State {
 194     ExpectZLibHeader, // expecting ZLib header
 195     ExpectBlock, // expecting new block
 196     RawBlock, // uncompressed block
 197     CompressedBlock,
 198     EOF, // readOneByte() returns false before block header
 199     Dead, // some error occured, throw exception on any read
 200   }
 201   State state = State.ExpectZLibHeader;
 202   Mode mode = Mode.ZLib;
 203
 204   // other data
 205   ushort tag;
 206   int bitcount;
 207
 208   // trees
 209   Tree ltree; // dynamic length/symbol tree
 210   Tree dtree; // dynamic distance tree
 211
 212   // adler-32
 213   uint a32s1, a32s2;
 214   uint nmaxLeft = Adler32.NMAX;
 215   uint cura32;
 216
 217   // dictionary
 218   ubyte[65536] dict = void;
 219   uint dictEnd; // current dict free byte
 220
 221   void dictPutByte (ubyte bt) nothrow @trusted @nogc {
 222     if (dictEnd == dict.length) {
 223       import core.stdc.string : memmove;
 224       // move dict data
 225       memmove(dict.ptr, dict.ptr+dictEnd-32768, 32768);
 226       dictEnd = 32768;
 227     }
 228     dict.ptr[dictEnd++] = bt;
 229     if (mode == Mode.ZLib) {
 230       a32s1 += bt;
 231       a32s2 += a32s1;
 232       if (--nmaxLeft == 0) {
 233         nmaxLeft = Adler32.NMAX;
 234         a32s1 %= Adler32.BASE;
 235         a32s2 %= Adler32.BASE;
 236       }
 237     }
 238   }
 239
 240   uint finishAdler32 () nothrow @safe @nogc {
 241     a32s1 %= Adler32.BASE;
 242     a32s2 %= Adler32.BASE;
 243     return (a32s2<<16)|a32s1;
 244   }
 245
 246 private:
 247   void setErrorState () nothrow @safe @nogc {
 248     state = State.Dead;
 249     lt = dt = null; // just in case
 250     doingFinalBlock = false; // just in case too
 251   }
 252
 253   void error (string msg, string file=__FILE__, usize line=__LINE__) @safe {
 254     setErrorState();
 255     throw new InflateError(msg, file, line);
 256   }
 257
 258   void processZLibHeader () @trusted {
 259     ubyte cmf, flg;
 260     // 7 bytes
 261     // compression parameters
 262     if (!readOneByte(cmf)) error("out of input data");
 263     // flags
 264     if (!readOneByte(flg)) error("out of input data");
 265     // check format
 266     // check checksum
 267     if ((256*cmf+flg)%31) error("invalid zlib checksum");
 268     // check method (only deflate allowed)
 269     if ((cmf&0x0f) != 8) error("invalid compression method");
 270     // check window size
 271     if ((cmf>>4) > 7) error("invalid window size");
 272     // there must be no preset dictionary
 273     if (flg&0x20) error("preset dictionaries are not supported");
 274     // FYI: flg>>6 will give you compression level:
 275     //      0 - compressor used fastest algorithm
 276     //      1 - compressor used fast algorithm
 277     //      2 - compressor used default algorithm
 278     //      3 - compressor used maximum compression, slowest algorithm
 279     // not that you can make any sane use of that info though...
 280     // note that last 4 bytes is Adler32 checksum in big-endian format
 281     // init Adler32 counters
 282     a32s1 = 1;
 283     a32s2 = 0;
 284     nmaxLeft = Adler32.NMAX;
 285     // ok, we can go now
 286     state = State.ExpectBlock;
 287   }
 288
 289   void finishZLibData () {
 290     // read Adler32
 291     uint a32; // autoinit
 292     ubyte bt = void;
 293     foreach_reverse (immutable n; 0..4) {
 294       try {
 295         if (!readOneByte(bt)) error("out of input data");
 296       } catch (Exception e) {
 297         setErrorState();
 298         throw e;
 299       }
 300       a32 |= (cast(uint)bt)<<(n*8);
 301     }
 302     if (a32 != finishAdler32()) error("invalid checksum");
 303   }
 304
 305   // get one bit from source stream
 306   uint getBit () {
 307     if (!bitcount--) {
 308       ubyte bt = void;
 309       bool ok = void;
 310       try { ok = readOneByte(bt); } catch (Exception e) { setErrorState(); throw e; }
 311       if (!ok) error("out of input data");
 312       tag = bt;
 313       bitcount = 7;
 314     }
 315     uint res = tag&0x01;
 316     tag >>= 1;
 317     return res;
 318   }
 319
 320   // read a num bit value from a stream and add base
 321   uint readBits (ubyte num, uint base) {
 322     uint val = 0;
 323     if (num) {
 324       immutable uint limit = 1<<num;
 325       for (uint mask = 1; mask < limit; mask <<= 1) if (getBit()) val += mask;
 326     }
 327     return val+base;
 328   }
 329
 330   // given a data stream and a tree, decode a symbol
 331   uint decodeSymbol (const(Tree*) t) {
 332     int cur, sum, len; // autoinit
 333     // get more bits while code value is above sum
 334     do {
 335       ushort sl;
 336       cur = 2*cur+getBit();
 337       ++len;
 338       if (len >= 16) error("invalid symbol");
 339       sl = t.table.ptr[len];
 340       sum += sl;
 341       cur -= sl;
 342     } while (cur >= 0);
 343     sum += cur;
 344     if (sum < 0 || sum >= 288) error("invalid symbol");
 345     return t.trans.ptr[sum];
 346   }
 347
 348   // given a data stream, decode dynamic trees from it
 349   void decodeTrees () {
 350     // special ordering of code length codes
 351     static immutable ubyte[19] clcidx = [16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15];
 352     Tree codeTree;
 353     ubyte[288+32] lengths;
 354     uint hlit, hdist, hclen;
 355     uint num, length;
 356     // get 5 bits HLIT (257-286)
 357     hlit = readBits(5, 257);
 358     // get 5 bits HDIST (1-32)
 359     hdist = readBits(5, 1);
 360     if (hlit+hdist > 288+32) error("invalid tree");
 361     // get 4 bits HCLEN (4-19)
 362     hclen = readBits(4, 4);
 363     if (hclen > 19) error("invalid tree");
 364     lengths[0..19] = 0;
 365     // read code lengths for code length alphabet
 366     foreach (immutable i; 0..hclen) lengths[clcidx[i]] = cast(ubyte)readBits(3, 0); // get 3 bits code length (0-7)
 367     // build code length tree
 368     try { codeTree.buildTree(lengths[0..19]); } catch (Exception e) { setErrorState(); throw e; }
 369     // decode code lengths for the dynamic trees
 370     for (num = 0; num < hlit+hdist; ) {
 371       ubyte bt;
 372       uint sym = decodeSymbol(&codeTree);
 373       switch (sym) {
 374         case 16: // copy previous code length 3-6 times (read 2 bits)
 375           if (num == 0) error("invalid tree");
 376           bt = lengths[num-1];
 377           length = readBits(2, 3);
 378           break;
 379         case 17: // repeat code length 0 for 3-10 times (read 3 bits)
 380           bt = 0;
 381           length = readBits(3, 3);
 382           break;
 383         case 18: // repeat code length 0 for 11-138 times (read 7 bits)
 384           bt = 0;
 385           length = readBits(7, 11);
 386           break;
 387         default: // values 0-15 represent the actual code lengths
 388           if (sym >= 19) error("invalid tree symbol");
 389           length = 1;
 390           bt = cast(ubyte)sym;
 391           break;
 392       }
 393       // fill it
 394       if (num+length > 288+32) error("invalid tree");
 395       while (length-- > 0) lengths[num++] = bt;
 396     }
 397     // build dynamic trees
 398     try {
 399       ltree.buildTree(lengths[0..hlit]);
 400       dtree.buildTree(lengths[hlit..hlit+hdist]);
 401     } catch (Exception e) {
 402       setErrorState();
 403       throw e;
 404     }
 405   }
 406
 407   // return true if char was read
 408   bool processInflatedBlock (out ubyte bt) {
 409     if (bytesLeft > 0) {
 410       // copying match; all checks already done
 411       --bytesLeft;
 412       bt = dict[dictEnd-matchOfs];
 413     } else {
 414       uint sym = decodeSymbol(lt);
 415       if (sym == 256) {
 416         // end of block, fix state
 417         state = State.ExpectBlock;
 418         return false;
 419       }
 420       if (sym < 256) {
 421         // normal
 422         bt = cast(ubyte)sym;
 423       } else {
 424         // copy
 425         uint dist, length, offs;
 426         sym -= 257;
 427         // possibly get more bits from length code
 428         if (sym >= 30) error("invalid symbol");
 429         length = readBits(lengthBits[sym], lengthBase[sym]);
 430         dist = decodeSymbol(dt);
 431         if (dist >= 30) error("invalid distance");
 432         // possibly get more bits from distance code
 433         offs = readBits(distBits[dist], distBase[dist]);
 434         if (offs > dictEnd) error("invalid distance");
 435         // copy match
 436         bytesLeft = length;
 437         matchOfs = offs;
 438         return false; // no byte read yet
 439       }
 440     }
 441     return true;
 442   }
 443
 444   // return true if char was read
 445   bool processUncompressedBlock (out ubyte bt) {
 446     if (bytesLeft > 0) {
 447       // copying
 448       bool ok;
 449       try { ok = readOneByte(bt); } catch (Exception e) { setErrorState(); throw e; }
 450       if (!ok) error("out of input data");
 451       --bytesLeft;
 452       return true;
 453     }
 454     // end of block, fix state
 455     state = State.ExpectBlock;
 456     return false;
 457   }
 458
 459   ushort readU16 () {
 460     ubyte b0, b1;
 461     try {
 462       if (readOneByte(b0) && readOneByte(b1)) return cast(ushort)(b0|(b1<<8));
 463     } catch (Exception e) {
 464       setErrorState();
 465       throw e;
 466     }
 467     error("out of input data");
 468     assert(0);
 469   }
 470
 471   void processRawHeader () {
 472     ushort length = readU16();
 473     ushort invlength = readU16(); // one's complement of length
 474     // check length
 475     if (length != cast(ushort)(~invlength)) error("invalid uncompressed block length");
 476     bitcount = 0; // make sure we start next block on a byte boundary
 477     bytesLeft = length;
 478     state = State.RawBlock;
 479   }
 480
 481   void processFixedHeader () nothrow @safe @nogc {
 482     lt = &sltree;
 483     dt = &sdtree;
 484     bytesLeft = 0; // force reading of symbol (just in case)
 485     state = State.CompressedBlock;
 486   }
 487
 488   void processDynamicHeader () {
 489     // decode trees from stream
 490     decodeTrees();
 491     lt = &ltree;
 492     dt = &dtree;
 493     bytesLeft = 0; // force reading of symbol (just in case)
 494     state = State.CompressedBlock;
 495   }
 496
 497   // set state to State.EOF on correct EOF
 498   void processBlockHeader () {
 499     if (doingFinalBlock) {
 500       if (mode == Mode.ZLib) finishZLibData();
 501       doingFinalBlock = false;
 502       state = State.EOF;
 503       return;
 504     }
 505     doingFinalBlock = (getBit() != 0); // final block flag
 506     // read block type (2 bits) and fix state
 507     switch (readBits(2, 0)) {
 508       case 0: processRawHeader(); break; // uncompressed block
 509       case 1: processFixedHeader(); break; // block with fixed huffman trees
 510       case 2: processDynamicHeader(); break; // block with dynamic huffman trees
 511       default: error("invalid input block type");
 512     }
 513   }
 514
 515   // false on eof
 516   bool getOneByte (out ubyte bt) {
 517     bool gotbyte = false;
 518     do {
 519       final switch (state) {
 520         case State.ExpectZLibHeader: processZLibHeader(); break;
 521         case State.ExpectBlock: processBlockHeader(); break;
 522         case State.RawBlock: gotbyte = processUncompressedBlock(bt); break;
 523         case State.CompressedBlock: gotbyte = processInflatedBlock(bt); break;
 524         case State.EOF: return false;
 525         case State.Dead: error("dead stream"); break;
 526       }
 527     } while (!gotbyte);
 528     dictPutByte(bt);
 529     return true;
 530   }
 531
 532 public:
 533   /// stream format
 534   enum Mode { ZLib, Deflate };
 535
 536   /// disable copying
 537   @disable this (this);
 538
 539   /**
 540    * Initialize decompression stream.
 541    *
 542    * Params:
 543    *  dgb = byte reader;
 544    *        must either set bt to next byte and return true or return false on EOF;
 545    *        note that it will not be called anymoer after EOF or error;
 546    *        can be null
 547    *  mode = stream format; either headerless 'deflate' stream or 'zlib' stream
 548    *
 549    * Throws:
 550    *  InflateError on error
 551    */
 552   this (bool delegate (out ubyte bt) dgb, Mode mode=Mode.ZLib) {
 553     if (dgb is null) dgb = (out ubyte bt) => false;
 554     reinit(dgb, mode);
 555   }
 556
 557   /// Ditto.
 558   this (Mode mode) @trusted {
 559     reinit(null, mode);
 560   }
 561
 562   /// Ditto.
 563   void reinit (bool delegate (out ubyte bt) dgb=null, Mode mode=Mode.ZLib) {
 564     if (dgb !is null) readOneByte = dgb;
 565     state = State.ExpectBlock;
 566     this.mode = mode;
 567     tag = 0;
 568     bitcount = 0;
 569     dictEnd = 0;
 570     state = (mode == Mode.ZLib ? State.ExpectZLibHeader : State.ExpectBlock);
 571     doingFinalBlock = false;
 572     if (readOneByte is null) readOneByte = (out ubyte bt) => false;
 573   }
 574
 575   /**
 576    * Check stream header. Can be called after this() or reinit().
 577    *
 578    * Returns:
 579    *  nothing
 580    *
 581    * Throws:
 582    *  InflateError on error
 583    */
 584   void checkStreamHeader () {
 585     if (mode == Mode.ZLib && state == State.ExpectZLibHeader) processZLibHeader();
 586   }
 587
 588   /**
 589    * Get another byte from stream.
 590    *
 591    * Returns:
 592    *  one decompressed byte
 593    *
 594    * Throws:
 595    *  InflateEOF on EOF
 596    *  InflateError on error
 597    */
 598   ubyte getByte () {
 599     ubyte res;
 600     if (!getOneByte(res)) throw new InflateEOF("no more data");
 601     return res;
 602   }
 603
 604   /**
 605    * Read bytes from stream. Almost similar to File.rawRead().
 606    *
 607    * Params:
 608    *  buf = destination buffer
 609    *
 610    * Returns:
 611    *  destination buffer or slice of destination buffer if EOF encountered
 612    *
 613    * Throws:
 614    *  InflateError on error
 615    */
 616   T[] rawRead(T) (T[] buf) {
 617     auto len = buf.length*T.sizeof;
 618     auto dst = cast(ubyte*)buf.ptr;
 619     ubyte res = void;
 620     while (len--) {
 621       if (!getOneByte(*dst)) {
 622         // check if the last 'dest' item is fully decompressed
 623         static if (T.sizeof > 1) { if ((cast(usize)dst-buf.ptr)%T.sizeof) error("partial data"); }
 624         return buf[0..cast(usize)(dst-buf.ptr)];
 625       }
 626       ++dst;
 627     }
 628     return buf;
 629   }
 630
 631   @property bool eof () const pure nothrow @safe @nogc { pragma(inline, true); return (state == State.EOF); }
 632   @property bool invalid () const pure nothrow @safe @nogc { pragma(inline, true); return (state == State.Dead); }
 633 }
 634
 635
 636 // -----------------------------------------------------------------------------
 637 private:
 638 // private global data
 639 // build it using CTFE
 640
 641 // fixed length/symbol tree
 642 immutable Tree sltree = {
 643   Tree sl;
 644   sl.table[7] = 24;
 645   sl.table[8] = 152;
 646   sl.table[9] = 112;
 647   foreach (immutable i; 0..24) sl.trans[i] = cast(ushort)(256+i);
 648   foreach (immutable ushort i; 0..144) sl.trans[24+i] = i;
 649   foreach (immutable i; 0..8) sl.trans[24+144+i] = cast(ushort)(280+i);
 650   foreach (immutable i; 0..112) sl.trans[24+144+8+i] = cast(ushort)(144+i);
 651   return sl;
 652 }();
 653
 654 // fixed distance tree
 655 immutable Tree sdtree = {
 656   Tree sd;
 657   sd.table[5] = 32;
 658   foreach (immutable ushort i; 0..32) sd.trans[i] = i;
 659   return sd;
 660 }();
 661
 662 void fillBits (ubyte[] bits, uint delta) @safe nothrow {
 663   bits[0..delta] = 0;
 664   foreach (immutable i; 0..30-delta) bits[i+delta] = cast(ubyte)(i/delta);
 665 }
 666
 667 void fillBase (ushort[] base, immutable(ubyte[30]) bits, ushort first) @safe nothrow {
 668   ushort sum = first;
 669   foreach (immutable i; 0..30) {
 670     base[i] = sum;
 671     sum += 1<<bits[i];
 672   }
 673 }
 674
 675 // extra bits and base tables for length codes
 676 immutable ubyte[30] lengthBits = {
 677   ubyte[30] bits;
 678   fillBits(bits, 4);
 679   bits[28] = 0; // fix a special case
 680   return bits;
 681 }();
 682
 683 immutable ushort[30] lengthBase = {
 684   ubyte[30] bits;
 685   ushort[30] base;
 686   fillBits(bits, 4);
 687   fillBase(base, bits, 3);
 688   base[28] = 258; // fix a special case
 689   return base;
 690 }();
 691
 692 // extra bits and base tables for distance codes
 693 immutable ubyte[30] distBits = {
 694   ubyte[30] bits;
 695   fillBits(bits, 2);
 696   return bits;
 697 }();
 698
 699 immutable ushort[30] distBase = {
 700   enum bits = distBits;
 701   ushort[30] base;
 702   fillBase(base, bits, 1);
 703   return base;
 704 }();