code/modules/detector.js

   1 /* coded by Ketmar // Invisible Vector (psyc://ketmar.no-ip.org/~Ketmar)
   2  * Understanding is not required. Only obedience.
   3  *
   4  * This program is free software. It comes without any warranty, to
   5  * the extent permitted by applicable law. You can redistribute it
   6  * and/or modify it under the terms of the Do What The Fuck You Want
   7  * To Public License, Version 2, as published by Sam Hocevar. See
   8  * http://www.wtfpl.net/txt/copying/ for more details.
   9  */
  10 // image parsing engine
  11 let EXPORTED_SYMBOLS = [
  12   "FormatChecker"
  13 ];
  14
  15 const {utils:Cu, classes:Cc, interfaces:Ci, results:Cr} = Components;
  16
  17
  18 //////////////////////////////////////////////////////////////////////////////
  19 function getU8 (data, ofs) (ofs >= 0 && ofs < data.length ? data.charCodeAt(ofs) : Number.NaN);
  20
  21 function getU16LE (data, ofs) (ofs >= 0 && ofs < data.length-1 ? data.charCodeAt(ofs)+0x100*data.charCodeAt(ofs+1) : Number.NaN);
  22 function getU16BE (data, ofs) (ofs >= 0 && ofs < data.length-1 ? 0x100*data.charCodeAt(ofs)+data.charCodeAt(ofs+1) : Number.NaN);
  23 function getU24LE (data, ofs) (ofs >= 0 && ofs < data.length-2 ? data.charCodeAt(ofs)+0x100*data.charCodeAt(ofs+1)+0x10000*data.charCodeAt(ofs+2) : Number.NaN);
  24 function getU32LE (data, ofs) (ofs >= 0 && ofs < data.length-3 ? data.charCodeAt(ofs)+0x100*data.charCodeAt(ofs+1)+0x10000*data.charCodeAt(ofs+2)+0x1000000*data.charCodeAt(ofs+3) : Number.NaN);
  25 function getU32BE (data, ofs) (ofs >= 0 && ofs < data.length-3 ? 0x1000000*data.charCodeAt(ofs)+0x10000*data.charCodeAt(ofs+1)+0x100*data.charCodeAt(ofs+2)+data.charCodeAt(ofs+3) : Number.NaN);
  26
  27 function getI16LE (data, ofs) {
  28   let v = getU16LE(data, ofs);
  29   if (!isNaN(v) && v > 0x7fff) v -= 0x10000;
  30   return v;
  31 }
  32
  33 function getI16BE (data, ofs) {
  34   let v = getU16BE(data, ofs);
  35   if (!isNaN(v) && v > 0x7fff) v -= 0x10000;
  36   return v;
  37 }
  38
  39 function getI32LE (data, ofs) {
  40   let v = getU32LE(data, ofs);
  41   if (!isNaN(v) && v > 0x7fffffff) v -= 0x100000000;
  42   return v;
  43 }
  44
  45 function getI32BE (data, ofs) {
  46   let v = getU32BE(data, ofs);
  47   if (!isNaN(v) && v > 0x7fffffff) v -= 0x100000000;
  48   return v;
  49 }
  50
  51
  52 ////////////////////////////////////////////////////////////////////////////////
  53 const IMAGE_NEED_MORE_DATA = 0;
  54 const IMAGE_ACCEPTED       = 1; // `detect()` accepted this image
  55 const IMAGE_REJECTED       = 2; // `detect()` was wrong, this is not our format
  56 const IMAGE_INVALID        = 3; // detected, invalid
  57
  58
  59 ////////////////////////////////////////////////////////////////////////////////
  60 /*
  61  * string name -- format name
  62  * int minDetectBytes -- mininum bytes to detect
  63  * int minHeaderBytes -- mininum bytes to start parsing
  64  * int maxHeaderBytes -- maximum bytes to stop parsing
  65  * function detect (string data)
  66  *   will never be called with less than `minDetectBytes`
  67  *   must be fast (do more checks in `parse()` if necessary
  68  *   this is used to do "early reject"
  69  *   returns IMAGE_XXX
  70  * function parse (string data)
  71  *   parse should not be called on non-detected images
  72  *   will never be called with less than `minHeaderBytes`
  73  *   can be called with more than `maxHeaderBytes`
  74  *   returns: IMAGE_XXX or {int width, int height}
  75  */
  76 let ImageFormats = [
  77   // jpeg image
  78   {
  79     name: "JPEG",
  80     minDetectBytes: 2,
  81     minHeaderBytes: 9,
  82     maxHeaderBytes: 65536,
  83
  84     detect: function (data) {
  85       if (data[0] !== '\xff' || data[1] !== '\xd8') return IMAGE_REJECTED;
  86       return IMAGE_ACCEPTED;
  87     },
  88
  89     parse: function (data) {
  90       // process chunks (segments)
  91       let pos = 2;
  92       while (pos+4 < data.length) {
  93         if (data[pos] !== '\xff') return IMAGE_INVALID;
  94         let segid = getU8(data, pos+1);
  95         if (isNaN(segid)) return IMAGE_NEED_MORE_DATA;
  96         pos += 2;
  97         let size = getU16BE(data, pos);
  98         if (isNaN(size)) return IMAGE_NEED_MORE_DATA;
  99         if (size < 2) return IMAGE_INVALID;
 100         // SOF?
 101         if (segid === 0xc0 || segid === 0xc2) {
 102           // bpp(byte), wdt(word), hgt(word)
 103           if (size < 5) return IMAGE_INVALID;
 104           if (pos+6 >= data.length) return IMAGE_NEED_MORE_DATA;
 105           return {
 106             width: getU16BE(data, pos+3),
 107             height: getU16BE(data, pos+5),
 108           };
 109         }
 110         pos += size;
 111       }
 112       return IMAGE_NEED_MORE_DATA;
 113     },
 114   },
 115   // png image
 116   {
 117     name: "PNG",
 118     minDetectBytes: 8,
 119     minHeaderBytes: 20,
 120     maxHeaderBytes: 65536,
 121
 122     detect: function (data) {
 123       if (data[0] !== '\x89' || data[1] !== 'P' || data[2] !== 'N' || data[3] !== 'G') return IMAGE_REJECTED;
 124       if (data[4] !== '\x0d' || data[5] !== '\x0a') return IMAGE_REJECTED;
 125       if (data[6] !== '\x1a' || data[7] !== '\x0a') return IMAGE_REJECTED;
 126       return IMAGE_ACCEPTED;
 127     },
 128
 129     parse: function (data) {
 130       // process chunks
 131       let pos = 8;
 132       while (pos < data.length) {
 133         let size = getU32BE(data, pos);
 134         if (isNaN(size)) return IMAGE_NEED_MORE_DATA;
 135         if (size > 65536) return IMAGE_INVALID; // invalid -- chunk too big
 136         if (pos+size+8 > data.length) return IMAGE_NEED_MORE_DATA;
 137         if (data[pos+4] === 'I' && data[pos+5] === 'H' && data[pos+6] === 'D' && data[pos+7] === 'R') {
 138           // header chunk, get size
 139           if (size < 8) return IMAGE_INVALID;
 140           return {
 141             width: getU32BE(data, pos+8),
 142             height: getU32BE(data, pos+12),
 143           };
 144         }
 145         // skip chunk
 146         pos += size+12; // size,name,data,crc
 147       }
 148       return IMAGE_NEED_MORE_DATA;
 149     },
 150   },
 151   // gif image
 152   {
 153     name: "GIF",
 154     minDetectBytes: 6,
 155     minHeaderBytes: 10,
 156     maxHeaderBytes: 10,
 157
 158     detect: function (data) {
 159       if (data[0] !== 'G' || data[1] !== 'I' || data[2] !== 'F' || data[3] !== '8' || data[5] !== 'a') return IMAGE_REJECTED;
 160       if (data[4] !== '7' && data[4] !== '9') return IMAGE_REJECTED;
 161       return IMAGE_ACCEPTED;
 162     },
 163
 164     parse: function (data) {
 165       return {
 166         width: getU16LE(data, 6),
 167         height: getU16LE(data, 8),
 168       };
 169     },
 170   },
 171   // shitdowz icon (some stupid sites still using that for favicons)
 172   {
 173     name: "ICO",
 174     minDetectBytes: 4,
 175     minHeaderBytes: 8,
 176     maxHeaderBytes: 8,
 177
 178     detect: function (data) {
 179       if (data[0] !== '\x00' || data[1] !== '\x00') return IMAGE_REJECTED;
 180       if (data[2] !== '\x01' || data[3] !== '\x00') return IMAGE_REJECTED;
 181       return IMAGE_ACCEPTED;
 182     },
 183
 184     parse: function (data) {
 185       // max 8 icons in set
 186       if (data[4] === '\x00' || data.charCodeAt(4) > 8 || data.charCodeAt(5) > 0) return IMAGE_INVALID;
 187       let res = {
 188         width: getU8(data, 6),
 189         height: getU8(data, 7),
 190       };
 191       if (res.width === 0) res.width = 256;
 192       if (res.height === 0) res.height = 256;
 193       return res;
 194     },
 195   },
 196   // shitdowz bitmap (no, really, some shitheads using that!)
 197   {
 198     name: "BMP",
 199     minDetectBytes: 2,
 200     minHeaderBytes: 26,
 201     maxHeaderBytes: 26,
 202
 203     detect: function (data) {
 204       if (data[0] !== 'B' || data[1] !== 'M') return IMAGE_REJECTED;
 205       return IMAGE_ACCEPTED;
 206     },
 207
 208     parse: function (data) {
 209       let res = {
 210         width: getI32LE(data, 18),
 211         height: getI32LE(data, 22),
 212       };
 213       if (res.width < 0) res.width = -res.width;
 214       if (res.height < 0) res.height = -res.height;
 215       return res;
 216     },
 217   },
 218   // webp (dunno why)
 219   {
 220     name: "WEBP",
 221     minDetectBytes: 16,
 222     minHeaderBytes: 30,
 223     maxHeaderBytes: 30,
 224
 225     detect: function (data) {
 226       if (data[0] !== 'R' || data[1] !== 'I' || data[2] !== 'F' || data[3] !== 'F') return IMAGE_REJECTED;
 227       if (data[8] !== 'W' || data[9] !== 'E' || data[10] !== 'B' || data[11] !== 'P') return IMAGE_REJECTED;
 228       if (data[12] !== 'V' || data[13] !== 'P' || data[14] !== '8') return IMAGE_REJECTED;
 229       if (data[15] !== ' ' && data[15] !== 'L' && data[15] !== 'X') return IMAGE_INVALID;
 230       return IMAGE_ACCEPTED;
 231     },
 232
 233     parse: function (data) {
 234       switch (data[15]) {
 235         case 'X': // extended
 236           return {
 237             width: getU24LE(data, 24)+1,
 238             height: getU24LE(data, 27)+1,
 239           };
 240         case 'L': // lossless
 241           if (data[20] !== '\x2f') return IMAGE_INVALID; // signature
 242           {
 243             let b1 = getU8(data, 21);
 244             let b2 = getU8(data, 22);
 245             let b3 = getU8(data, 23);
 246             let b4 = getU8(data, 24);
 247             return {
 248               width: (((b2&0x3f)<<8)|b1)+1,
 249               height: (((b4&0x0f)<<10)|(b3<<2)|((b2&0xc0)>>6))+1,
 250             };
 251           }
 252         case ' ': // lossy
 253           // 3 bytes: frame tag
 254           // 0x9d, 0x01, 0x2a
 255           // u16 w&3ffff, u16h&3ffff
 256           if (data[23] !== '\x9d' || data[24] !== '\x01' || data[25] !== '\x2a') return IMAGE_INVALID;
 257           return {
 258             width: getU16LE(data, 26)&0x3fff,
 259             height: getU16LE(data, 28)&0x3fff,
 260           };
 261       }
 262       return IMAGE_INVALID; // just in case
 263     },
 264   },
 265 ];
 266
 267
 268 ////////////////////////////////////////////////////////////////////////////////
 269 function FormatChecker () {
 270   this.formats = ImageFormats.slice();
 271   // prepare some internal data
 272   let maxHeaderBytes = 0;
 273   for (let fmt of this.formats) {
 274     if (!fmt) continue;
 275     if (typeof(fmt) !== "object") continue;
 276     fmt.accepted = false; // `true` means that format is accepted, but size is not determined yet
 277     if (maxHeaderBytes < fmt.maxHeaderBytes) maxHeaderBytes = fmt.maxHeaderBytes;
 278   }
 279   if (maxHeaderBytes < 1) throw new Error("WTF?!");
 280   this.maxHeaderBytes = maxHeaderBytes;
 281 }
 282
 283
 284 FormatChecker.prototype = {
 285   // `true`: no more checkers, unknown format
 286   get done () (this.formats.length === 0),
 287
 288   // check buffer
 289   // returns:
 290   //  `null` -- no more possible formats
 291   //  `true` -- need more data
 292   //  {string name, int width, int height, valid:true} -- format detected
 293   //  {string name, valid:false} -- format detected as invalid
 294   process: function (data) {
 295     if (this.formats.length < 1) return null; // no more formats
 296     // check formats
 297     for (let idx = 0; idx < this.formats.length; ++idx) {
 298       let fmt = this.formats[idx];
 299       // need to do detection?
 300       if (!fmt.accepted) {
 301         if (data.length < fmt.minDetectBytes) continue;
 302         let res = fmt.detect(data);
 303         if (res === IMAGE_NEED_MORE_DATA) continue;
 304         if (res === IMAGE_INVALID) {
 305           this.formats = [];
 306           return {name:fmt.name, valid:false};
 307         }
 308         if (res === IMAGE_REJECTED) {
 309           //conlog("'", fmt.name, "' rejected");
 310           // remove it
 311           this.formats.splice(idx, 1);
 312           --idx;
 313           continue;
 314         }
 315         if (res !== IMAGE_ACCEPTED) throw new Error("'"+fmt.name+"' detector returns invalid result");
 316         fmt.accepted = true;
 317       }
 318       // try to parse format
 319       if (data.length >= fmt.minHeaderBytes) {
 320         let res = fmt.parse(data);
 321         if (res === IMAGE_INVALID) {
 322           this.formats = [];
 323           return {name:fmt.name, valid:false};
 324         }
 325         if (res === IMAGE_REJECTED) {
 326           //conlog("'", fmt.name, "' rejected");
 327           // remove it
 328           this.formats.splice(idx, 1);
 329           --idx;
 330           continue;
 331         } else if (typeof(res) === "object") {
 332           if (!isNaN(res.width) && !isNaN(res.height)) {
 333             // format detected!
 334             this.formats = [];
 335             res.name = fmt.name;
 336             res.valid = true;
 337             return res;
 338           }
 339         } else if (res !== IMAGE_ACCEPTED && res !== IMAGE_NEED_MORE_DATA) {
 340           throw new Error("'"+fmt.name+"' parser returns invalid result");
 341         }
 342       }
 343       // reject this format if header is too big, and can't parse yet
 344       if (data.length > fmt.maxHeaderBytes) {
 345         this.formats.splice(idx, 1);
 346         --idx;
 347       }
 348     }
 349     // no suitable format found
 350     if (data.length > this.maxHeaderBytes) this.formats = []; // alas
 351     return (this.formats.length < 1 ? null : true);
 352   },
 353 };