apps/recorder/jpeg_load.c

   1 /***************************************************************************
   2 *             __________               __   ___.
   3 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7 *                     \/            \/     \/    \/            \/
   8 * $Id$
   9 *
  10 * JPEG image viewer
  11 * (This is a real mess if it has to be coded in one single C file)
  12 *
  13 * Copyright (C) 2009 Andrew Mahone fractional decode, split IDCT - 16-point
  14 *   IDCT based on IJG jpeg-7 pre-release
  15 * File scrolling addition (C) 2005 Alexander Spyridakis
  16 * Copyright (C) 2004 Jörg Hohensohn aka [IDC]Dragon
  17 * Heavily borrowed from the IJG implementation (C) Thomas G. Lane
  18 * Small & fast downscaling IDCT (C) 2002 by Guido Vollbeding  JPEGclub.org
  19 *
  20 * This program is free software; you can redistribute it and/or
  21 * modify it under the terms of the GNU General Public License
  22 * as published by the Free Software Foundation; either version 2
  23 * of the License, or (at your option) any later version.
  24 *
  25 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  26 * KIND, either express or implied.
  27 *
  28 ****************************************************************************/
  29
  30 #include "plugin.h"
  31 #include "debug.h"
  32 #include "jpeg_load.h"
  33 /*#define JPEG_BS_DEBUG*/
  34 #define ROCKBOX_DEBUG_JPEG
  35 /* for portability of below JPEG code */
  36 #define MEMSET(p,v,c) memset(p,v,c)
  37 #define MEMCPY(d,s,c) memcpy(d,s,c)
  38 #define INLINE static inline
  39 #define ENDIAN_SWAP16(n) n /* only for poor little endian machines */
  40 #ifdef ROCKBOX_DEBUG_JPEG
  41 #define JDEBUGF DEBUGF
  42 #else
  43 #define JDEBUGF(...)
  44 #endif
  45
  46 /**************** begin JPEG code ********************/
  47
  48 #ifdef HAVE_LCD_COLOR
  49 typedef struct uint8_rgb jpeg_pix_t;
  50 #else
  51 typedef uint8_t jpeg_pix_t;
  52 #endif
  53 #define JPEG_IDCT_TRANSPOSE
  54 #define JPEG_PIX_SZ (sizeof(jpeg_pix_t))
  55 #ifdef HAVE_LCD_COLOR
  56 #define COLOR_EXTRA_IDCT_WS 64
  57 #else
  58 #define COLOR_EXTRA_IDCT_WS 0
  59 #endif
  60 #ifdef JPEG_IDCT_TRANSPOSE
  61 #define V_OUT(n) ws2[8*n]
  62 #define V_IN_ST 1
  63 #define TRANSPOSE_EXTRA_IDCT_WS 64
  64 #else
  65 #define V_OUT(n) ws[8*n]
  66 #define V_IN_ST 8
  67 #define TRANSPOSE_EXTRA_IDCT_WS 0
  68 #endif
  69 #define IDCT_WS_SIZE (64 + TRANSPOSE_EXTRA_IDCT_WS + COLOR_EXTRA_IDCT_WS)
  70
  71 /* This can't be in jpeg_load.h because plugin.h includes it, and it conflicts
  72  * with the definition in jpeg_decoder.h
  73  */
  74 struct jpeg
  75 {
  76 #ifdef JPEG_FROM_MEM
  77     unsigned char *data;
  78     unsigned long len;
  79 #else
  80     int fd;
  81     int buf_left;
  82     int buf_index;
  83 #endif
  84     unsigned long int bitbuf;
  85     int bitbuf_bits;
  86     int marker_ind;
  87     int marker_val;
  88     unsigned char marker;
  89     int x_size, y_size; /* size of image (can be less than block boundary) */
  90     int x_phys, y_phys; /* physical size, block aligned */
  91     int x_mbl; /* x dimension of MBL */
  92     int y_mbl; /* y dimension of MBL */
  93     int blocks; /* blocks per MB */
  94     int restart_interval; /* number of MCUs between RSTm markers */
  95     int restart; /* blocks until next restart marker */
  96     int mcu_row; /* current row relative to first row of this row of MCUs */
  97     unsigned char *out_ptr; /* pointer to current row to output */
  98     int cur_row; /* current row relative to top of image */
  99     int set_rows;
 100     int store_pos[4]; /* for Y block ordering */
 101 #ifdef HAVE_LCD_COLOR
 102     int last_dc_val[3];
 103     int h_scale[2]; /* horizontal scalefactor = (2**N) / 8 */
 104     int v_scale[2]; /* same as above, for vertical direction */
 105     int k_need[2]; /* per component zig-zag index of last needed coefficient */
 106     int zero_need[2]; /* per compenent number of coefficients to zero */
 107 #else
 108     int last_dc_val;
 109     int h_scale[1]; /* horizontal scalefactor = (2**N) / 8 */
 110     int v_scale[1]; /* same as above, for vertical direction */
 111     int k_need[1]; /* per component zig-zag index of last needed coefficient */
 112     int zero_need[1]; /* per compenent number of coefficients to zero */
 113 #endif
 114     jpeg_pix_t *img_buf;
 115
 116     int16_t quanttable[4][QUANT_TABLE_LENGTH];/* raw quantization tables 0-3 */
 117
 118     struct huffman_table hufftable[2]; /* Huffman tables  */
 119     struct derived_tbl dc_derived_tbls[2]; /* Huffman-LUTs */
 120     struct derived_tbl ac_derived_tbls[2];
 121
 122     struct frame_component frameheader[3]; /* Component descriptor */
 123     struct scan_component scanheader[3]; /* currently not used */
 124
 125     int mcu_membership[6]; /* info per block */
 126     int tab_membership[6];
 127     int subsample_x[3]; /* info per component */
 128     int subsample_y[3];
 129     bool resize;
 130     unsigned char buf[JPEG_READ_BUF_SIZE];
 131     struct img_part part;
 132 };
 133
 134 #ifdef JPEG_FROM_MEM
 135 static struct jpeg jpeg;
 136 #endif
 137
 138 INLINE unsigned range_limit(int value)
 139 {
 140 #if CONFIG_CPU == SH7034
 141     unsigned tmp;
 142     asm (  /* Note: Uses knowledge that only low byte of result is used */
 143         "extu.b  %[v],%[t]   \n"
 144         "cmp/eq  %[v],%[t]   \n"  /* low byte == whole number ? */
 145         "bt      1f          \n"  /* yes: no overflow */
 146         "cmp/pz  %[v]        \n"  /* overflow: positive? */
 147         "subc    %[v],%[v]   \n"  /* %[r] now either 0 or 0xffffffff */
 148     "1:                      \n"
 149         : /* outputs */
 150         [v]"+r"(value),
 151         [t]"=&r"(tmp)
 152     );
 153     return value;
 154 #elif defined(CPU_COLDFIRE)
 155     /* Note: Uses knowledge that only the low byte of the result is used */
 156     asm (
 157         "cmp.l   #255,%[v]   \n"  /* overflow? */
 158         "bls.b   1f          \n"  /* no: return value */
 159         /* yes: set low byte to appropriate boundary */
 160         "spl.b   %[v]        \n"
 161     "1:                      \n"
 162         : /* outputs */
 163         [v]"+d"(value)
 164     );
 165     return value;
 166 #elif defined(CPU_ARM)
 167     /* Note: Uses knowledge that only the low byte of the result is used */
 168     asm (
 169         "cmp     %[v], #255          \n"  /* out of range 0..255? */
 170         "mvnhi   %[v], %[v], asr #31 \n"  /* yes: set all bits to ~(sign_bit) */
 171         : /* outputs */
 172         [v]"+r"(value)
 173     );
 174     return value;
 175 #else
 176     if ((unsigned)value <= 255)
 177         return value;
 178
 179     if (value < 0)
 180         return 0;
 181
 182     return 255;
 183 #endif
 184 }
 185
 186 INLINE unsigned scale_output(int value)
 187 {
 188 #if defined(CPU_ARM) && ARM_ARCH >= 6
 189     asm (
 190         "usat %[v], #8, %[v], asr #18\n"
 191         : [v] "+r" (value)
 192     );
 193     return value;
 194 #else
 195     return range_limit(value >> 18);
 196 #endif
 197 }
 198
 199 /* IDCT implementation */
 200
 201
 202 #define CONST_BITS 13
 203 #define PASS1_BITS 2
 204
 205
 206 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
 207 * causing a lot of useless floating-point operations at run time.
 208 * To get around this we use the following pre-calculated constants.
 209 * If you change CONST_BITS you may want to add appropriate values.
 210 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
 211 */
 212 #define FIX_0_298631336  2446 /* FIX(0.298631336) */
 213 #define FIX_0_390180644  3196 /* FIX(0.390180644) */
 214 #define FIX_0_541196100  4433 /* FIX(0.541196100) */
 215 #define FIX_0_765366865  6270 /* FIX(0.765366865) */
 216 #define FIX_0_899976223  7373 /* FIX(0.899976223) */
 217 #define FIX_1_175875602  9633 /* FIX(1.175875602) */
 218 #define FIX_1_501321110 12299 /* FIX(1.501321110) */
 219 #define FIX_1_847759065 15137 /* FIX(1.847759065) */
 220 #define FIX_1_961570560 16069 /* FIX(1.961570560) */
 221 #define FIX_2_053119869 16819 /* FIX(2.053119869) */
 222 #define FIX_2_562915447 20995 /* FIX(2.562915447) */
 223 #define FIX_3_072711026 25172 /* FIX(3.072711026) */
 224
 225
 226
 227 /* Multiply an long variable by an long constant to yield an long result.
 228 * For 8-bit samples with the recommended scaling, all the variable
 229 * and constant values involved are no more than 16 bits wide, so a
 230 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
 231 * For 12-bit samples, a full 32-bit multiplication will be needed.
 232 */
 233 #define MULTIPLY(var1, var2) ((var1) * (var2))
 234
 235 #if defined(CPU_SH) || defined(CPU_COLDFIRE) || \
 236     (defined(CPU_ARM) && ARM_ARCH > 4)
 237 #define MULTIPLY16(var,const)  (((short) (var)) * ((short) (const)))
 238 #else
 239 #define MULTIPLY16 MULTIPLY
 240 #endif
 241
 242 /*
 243  * Macros for handling fixed-point arithmetic; these are used by many
 244  * but not all of the DCT/IDCT modules.
 245  *
 246  * All values are expected to be of type INT32.
 247  * Fractional constants are scaled left by CONST_BITS bits.
 248  * CONST_BITS is defined within each module using these macros,
 249  * and may differ from one module to the next.
 250  */
 251 #define ONE ((long)1)
 252 #define CONST_SCALE (ONE << CONST_BITS)
 253
 254 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
 255  * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
 256  * thus causing a lot of useless floating-point operations at run time.
 257  */
 258 #define FIX(x) ((long) ((x) * CONST_SCALE + 0.5))
 259 #define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
 260
 261 /* Descale and correctly round an int value that's scaled by N bits.
 262 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
 263 * the fudge factor is correct for either sign of X.
 264 */
 265 #define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n))
 266
 267 #define DS_OUT ((CONST_BITS)+(PASS1_BITS)+3)
 268
 269 /*
 270  * Conversion of full 0-255 range YCrCb to RGB:
 271  *   |R|   |1.000000 -0.000001  1.402000| |Y'|
 272  *   |G| = |1.000000 -0.334136 -0.714136| |Pb|
 273  *   |B|   |1.000000  1.772000  0.000000| |Pr|
 274  * Scaled (yields s15-bit output):
 275  *   |R|   |128    0  179| |Y       |
 276  *   |G| = |128  -43  -91| |Cb - 128|
 277  *   |B|   |128  227    0| |Cr - 128|
 278  */
 279 #define YFAC            128
 280 #define RVFAC           179
 281 #define GUFAC           (-43)
 282 #define GVFAC           (-91)
 283 #define BUFAC           227
 284 #define COMPONENT_SHIFT  15
 285
 286 #ifndef CPU_ARM
 287 /* horizontal-pass 1-point IDCT */
 288 static void jpeg_idct1h(int16_t *ws, unsigned char *out, int16_t *end, int rowstep)
 289 {
 290     for (; ws < end; ws += 8)
 291     {
 292         *out = range_limit(128 + (int) DESCALE(*ws, 3 + PASS1_BITS));
 293         out += rowstep;
 294     }
 295 }
 296
 297 /* vertical-pass 2-point IDCT */
 298 static void jpeg_idct2v(int16_t *ws, int16_t *end)
 299 {
 300     for (; ws < end; ws++)
 301     {
 302         int tmp1 = ws[0*8];
 303         int tmp2 = ws[1*8];
 304         ws[0*8] = tmp1 + tmp2;
 305         ws[1*8] = tmp1 - tmp2;
 306     }
 307 }
 308
 309 /* horizontal-pass 2-point IDCT */
 310 static void jpeg_idct2h(int16_t *ws, unsigned char *out, int16_t *end, int rowstep)
 311 {
 312     for (; ws < end; ws += 8, out += rowstep)
 313     {
 314         int tmp1 = ws[0] + (ONE << (PASS1_BITS + 2))
 315                    + (128 << (PASS1_BITS + 3));
 316         int tmp2 = ws[1];
 317         out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp1 + tmp2,
 318             PASS1_BITS + 3));
 319         out[JPEG_PIX_SZ*1] = range_limit((int) RIGHT_SHIFT(tmp1 - tmp2,
 320             PASS1_BITS + 3));
 321     }
 322 }
 323
 324 /* vertical-pass 4-point IDCT */
 325 static void jpeg_idct4v(int16_t *ws, int16_t *end)
 326 {
 327     for (; ws < end; ws++)
 328     {
 329         int tmp0, tmp2, tmp10, tmp12;
 330         int z1, z2, z3;
 331         /* Even part */
 332
 333         tmp0 = ws[8*0];
 334         tmp2 = ws[8*2];
 335
 336         tmp10 = (tmp0 + tmp2) << PASS1_BITS;
 337         tmp12 = (tmp0 - tmp2) << PASS1_BITS;
 338
 339         /* Odd part */
 340         /* Same rotation as in the even part of the 8x8 LL&M IDCT */
 341
 342         z2 = ws[8*1];
 343         z3 = ws[8*3];
 344
 345         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100) +
 346             (ONE << (CONST_BITS - PASS1_BITS - 1));
 347         tmp0 = RIGHT_SHIFT(z1 + MULTIPLY16(z3, - FIX_1_847759065),
 348             CONST_BITS-PASS1_BITS);
 349         tmp2 = RIGHT_SHIFT(z1 + MULTIPLY16(z2, FIX_0_765366865),
 350             CONST_BITS-PASS1_BITS);
 351
 352         /* Final output stage */
 353         ws[8*0] = (int) (tmp10 + tmp2);
 354         ws[8*3] = (int) (tmp10 - tmp2);
 355         ws[8*1] = (int) (tmp12 + tmp0);
 356         ws[8*2] = (int) (tmp12 - tmp0);
 357     }
 358 }
 359
 360 /* horizontal-pass 4-point IDCT */
 361 static void jpeg_idct4h(int16_t *ws, unsigned char *out, int16_t *end, int rowstep)
 362 {
 363     for (; ws < end; out += rowstep, ws += 8)
 364     {
 365         int tmp0, tmp2, tmp10, tmp12;
 366         int z1, z2, z3;
 367         /* Even part */
 368
 369         tmp0 = (int) ws[0] + (ONE << (PASS1_BITS + 2))
 370                + (128 << (PASS1_BITS + 3));
 371         tmp2 = (int) ws[2];
 372
 373         tmp10 = (tmp0 + tmp2) << CONST_BITS;
 374         tmp12 = (tmp0 - tmp2) << CONST_BITS;
 375
 376         /* Odd part */
 377         /* Same rotation as in the even part of the 8x8 LL&M IDCT */
 378
 379         z2 = (int) ws[1];
 380         z3 = (int) ws[3];
 381
 382         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 383         tmp0 = z1 - MULTIPLY16(z3, FIX_1_847759065);
 384         tmp2 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 385
 386         /* Final output stage */
 387
 388         out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp10 + tmp2,
 389             DS_OUT));
 390         out[JPEG_PIX_SZ*3] = range_limit((int) RIGHT_SHIFT(tmp10 - tmp2,
 391             DS_OUT));
 392         out[JPEG_PIX_SZ*1] = range_limit((int) RIGHT_SHIFT(tmp12 + tmp0,
 393             DS_OUT));
 394         out[JPEG_PIX_SZ*2] = range_limit((int) RIGHT_SHIFT(tmp12 - tmp0,
 395             DS_OUT));
 396     }
 397 }
 398
 399 /* vertical-pass 8-point IDCT */
 400 static void jpeg_idct8v(int16_t *ws, int16_t *end)
 401 {
 402     long tmp0, tmp1, tmp2, tmp3;
 403     long tmp10, tmp11, tmp12, tmp13;
 404     long z1, z2, z3, z4, z5;
 405 #ifdef JPEG_IDCT_TRANSPOSE
 406     int16_t *ws2 = ws + 64;
 407     for (; ws < end; ws += 8, ws2++)
 408     {
 409 #else
 410     for (; ws < end; ws++)
 411     {
 412 #endif
 413     /* Due to quantization, we will usually find that many of the input
 414     * coefficients are zero, especially the AC terms.  We can exploit this
 415     * by short-circuiting the IDCT calculation for any column in which all
 416     * the AC terms are zero.  In that case each output is equal to the
 417     * DC coefficient (with scale factor as needed).
 418     * With typical images and quantization tables, half or more of the
 419     * column DCT calculations can be simplified this way.
 420     */
 421         if ((ws[V_IN_ST*1] | ws[V_IN_ST*2] | ws[V_IN_ST*3]
 422            | ws[V_IN_ST*4] | ws[V_IN_ST*5] | ws[V_IN_ST*6] | ws[V_IN_ST*7]) == 0)
 423         {
 424             /* AC terms all zero */
 425             int dcval = ws[V_IN_ST*0] << PASS1_BITS;
 426
 427             V_OUT(0) = V_OUT(1) = V_OUT(2) = V_OUT(3) = V_OUT(4) = V_OUT(5) =
 428                        V_OUT(6) = V_OUT(7) = dcval;
 429             continue;
 430         }
 431
 432         /* Even part: reverse the even part of the forward DCT. */
 433         /* The rotator is sqrt(2)*c(-6). */
 434
 435         z2 = ws[V_IN_ST*2];
 436         z3 = ws[V_IN_ST*6];
 437
 438         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 439         tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 440         tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 441
 442         z2 = ws[V_IN_ST*0] << CONST_BITS;
 443         z2 += ONE << (CONST_BITS - PASS1_BITS - 1);
 444         z3 = ws[V_IN_ST*4] << CONST_BITS;
 445
 446         tmp0 = (z2 + z3);
 447         tmp1 = (z2 - z3);
 448
 449         tmp10 = tmp0 + tmp3;
 450         tmp13 = tmp0 - tmp3;
 451         tmp11 = tmp1 + tmp2;
 452         tmp12 = tmp1 - tmp2;
 453
 454         /* Odd part per figure 8; the matrix is unitary and hence its
 455            transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively. */
 456
 457         tmp0 = ws[V_IN_ST*7];
 458         tmp1 = ws[V_IN_ST*5];
 459         tmp2 = ws[V_IN_ST*3];
 460         tmp3 = ws[V_IN_ST*1];
 461
 462         z1 = tmp0 + tmp3;
 463         z2 = tmp1 + tmp2;
 464         z3 = tmp0 + tmp2;
 465         z4 = tmp1 + tmp3;
 466         z5 = MULTIPLY16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 467
 468         tmp0 = MULTIPLY16(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 469         tmp1 = MULTIPLY16(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 470         tmp2 = MULTIPLY16(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 471         tmp3 = MULTIPLY16(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 472         z1 = MULTIPLY16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 473         z2 = MULTIPLY16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 474         z3 = MULTIPLY16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 475         z4 = MULTIPLY16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 476
 477         z3 += z5;
 478         z4 += z5;
 479
 480         tmp0 += z1 + z3;
 481         tmp1 += z2 + z4;
 482         tmp2 += z2 + z3;
 483         tmp3 += z1 + z4;
 484
 485         /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 486
 487         V_OUT(0) = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
 488         V_OUT(7) = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
 489         V_OUT(1) = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
 490         V_OUT(6) = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
 491         V_OUT(2) = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
 492         V_OUT(5) = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
 493         V_OUT(3) = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
 494         V_OUT(4) = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
 495     }
 496 }
 497
 498 /* horizontal-pass 8-point IDCT */
 499 static void jpeg_idct8h(int16_t *ws, unsigned char *out, int16_t *end, int rowstep)
 500 {
 501     long tmp0, tmp1, tmp2, tmp3;
 502     long tmp10, tmp11, tmp12, tmp13;
 503     long z1, z2, z3, z4, z5;
 504     for (; ws < end; out += rowstep, ws += 8)
 505     {
 506         /* Rows of zeroes can be exploited in the same way as we did with
 507          * columns. However, the column calculation has created many nonzero AC
 508          * terms, so the simplification applies less often (typically 5% to 10%
 509          * of the time). On machines with very fast multiplication, it's
 510          * possible that the test takes more time than it's worth.  In that
 511          * case this section may be commented out.
 512         */
 513
 514 #ifndef NO_ZERO_ROW_TEST
 515         if ((ws[1] | ws[2] | ws[3]
 516            | ws[4] | ws[5] | ws[6] | ws[7]) == 0)
 517         {
 518             /* AC terms all zero */
 519             unsigned char dcval = range_limit(128 + (int) DESCALE((long) ws[0],
 520                 PASS1_BITS+3));
 521
 522             out[JPEG_PIX_SZ*0] = dcval;
 523             out[JPEG_PIX_SZ*1] = dcval;
 524             out[JPEG_PIX_SZ*2] = dcval;
 525             out[JPEG_PIX_SZ*3] = dcval;
 526             out[JPEG_PIX_SZ*4] = dcval;
 527             out[JPEG_PIX_SZ*5] = dcval;
 528             out[JPEG_PIX_SZ*6] = dcval;
 529             out[JPEG_PIX_SZ*7] = dcval;
 530             continue;
 531         }
 532 #endif
 533
 534         /* Even part: reverse the even part of the forward DCT. */
 535         /* The rotator is sqrt(2)*c(-6). */
 536
 537         z2 = (long) ws[2];
 538         z3 = (long) ws[6];
 539
 540         z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
 541         tmp2 = z1 + MULTIPLY16(z3, - FIX_1_847759065);
 542         tmp3 = z1 + MULTIPLY16(z2, FIX_0_765366865);
 543
 544         z4 = (long) ws[0] + (ONE << (PASS1_BITS + 2))
 545              + (128 << (PASS1_BITS + 3));
 546         z4 <<= CONST_BITS;
 547         z5 = (long) ws[4] << CONST_BITS;
 548         tmp0 = z4 + z5;
 549         tmp1 = z4 - z5;
 550
 551         tmp10 = tmp0 + tmp3;
 552         tmp13 = tmp0 - tmp3;
 553         tmp11 = tmp1 + tmp2;
 554         tmp12 = tmp1 - tmp2;
 555
 556         /* Odd part per figure 8; the matrix is unitary and hence its
 557         * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. */
 558
 559         tmp0 = (long) ws[7];
 560         tmp1 = (long) ws[5];
 561         tmp2 = (long) ws[3];
 562         tmp3 = (long) ws[1];
 563
 564         z1 = tmp0 + tmp3;
 565         z2 = tmp1 + tmp2;
 566         z3 = tmp0 + tmp2;
 567         z4 = tmp1 + tmp3;
 568         z5 = MULTIPLY16(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
 569
 570         tmp0 = MULTIPLY16(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
 571         tmp1 = MULTIPLY16(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
 572         tmp2 = MULTIPLY16(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
 573         tmp3 = MULTIPLY16(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
 574         z1 = MULTIPLY16(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
 575         z2 = MULTIPLY16(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
 576         z3 = MULTIPLY16(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
 577         z4 = MULTIPLY16(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
 578
 579         z3 += z5;
 580         z4 += z5;
 581
 582         tmp0 += z1 + z3;
 583         tmp1 += z2 + z4;
 584         tmp2 += z2 + z3;
 585         tmp3 += z1 + z4;
 586
 587         /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 588
 589         out[JPEG_PIX_SZ*0] = range_limit((int) RIGHT_SHIFT(tmp10 + tmp3,
 590             DS_OUT));
 591         out[JPEG_PIX_SZ*7] = range_limit((int) RIGHT_SHIFT(tmp10 - tmp3,
 592             DS_OUT));
 593         out[JPEG_PIX_SZ*1] = range_limit((int) RIGHT_SHIFT(tmp11 + tmp2,
 594             DS_OUT));
 595         out[JPEG_PIX_SZ*6] = range_limit((int) RIGHT_SHIFT(tmp11 - tmp2,
 596             DS_OUT));
 597         out[JPEG_PIX_SZ*2] = range_limit((int) RIGHT_SHIFT(tmp12 + tmp1,
 598             DS_OUT));
 599         out[JPEG_PIX_SZ*5] = range_limit((int) RIGHT_SHIFT(tmp12 - tmp1,
 600             DS_OUT));
 601         out[JPEG_PIX_SZ*3] = range_limit((int) RIGHT_SHIFT(tmp13 + tmp0,
 602             DS_OUT));
 603         out[JPEG_PIX_SZ*4] = range_limit((int) RIGHT_SHIFT(tmp13 - tmp0,
 604             DS_OUT));
 605     }
 606 }
 607
 608 #else
 609 extern void jpeg_idct1h(int16_t *ws, unsigned char *out, int16_t *end, int rowstep);
 610 extern void jpeg_idct2v(int16_t *ws, int16_t *end);
 611 extern void jpeg_idct2h(int16_t *ws, unsigned char *out, int16_t *end, int rowstep);
 612 extern void jpeg_idct4v(int16_t *ws, int16_t *end);
 613 extern void jpeg_idct4h(int16_t *ws, unsigned char *out, int16_t *end, int rowstep);
 614 extern void jpeg_idct8v(int16_t *ws, int16_t *end);
 615 extern void jpeg_idct8h(int16_t *ws, unsigned char *out, int16_t *end, int rowstep);
 616 #endif
 617
 618 #ifdef HAVE_LCD_COLOR
 619 /* vertical-pass 16-point IDCT */
 620 static void jpeg_idct16v(int16_t *ws, int16_t *end)
 621 {
 622     long tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
 623     long tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
 624     long z1, z2, z3, z4;
 625 #ifdef JPEG_IDCT_TRANSPOSE
 626     int16_t *ws2 = ws + 64;
 627     for (; ws < end; ws += 8, ws2++)
 628     {
 629 #else
 630     for (; ws < end; ws++)
 631     {
 632 #endif
 633         /* Even part */
 634
 635         tmp0 = ws[V_IN_ST*0] << CONST_BITS;
 636         /* Add fudge factor here for final descale. */
 637         tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
 638
 639         z1 = ws[V_IN_ST*4];
 640         tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
 641         tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
 642
 643         tmp10 = tmp0 + tmp1;
 644         tmp11 = tmp0 - tmp1;
 645         tmp12 = tmp0 + tmp2;
 646         tmp13 = tmp0 - tmp2;
 647
 648         z1 = ws[V_IN_ST*2];
 649         z2 = ws[V_IN_ST*6];
 650         z3 = z1 - z2;
 651         z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
 652         z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
 653
 654         /* (c6+c2)[16] = (c3+c1)[8] */
 655         tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);
 656         /* (c6-c14)[16] = (c3-c7)[8] */
 657         tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);
 658         /* (c2-c10)[16] = (c1-c5)[8] */
 659         tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887));
 660         /* (c10-c14)[16] = (c5-c7)[8] */
 661         tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579));
 662
 663         tmp20 = tmp10 + tmp0;
 664         tmp27 = tmp10 - tmp0;
 665         tmp21 = tmp12 + tmp1;
 666         tmp26 = tmp12 - tmp1;
 667         tmp22 = tmp13 + tmp2;
 668         tmp25 = tmp13 - tmp2;
 669         tmp23 = tmp11 + tmp3;
 670         tmp24 = tmp11 - tmp3;
 671
 672         /* Odd part */
 673
 674         z1 = ws[V_IN_ST*1];
 675         z2 = ws[V_IN_ST*3];
 676         z3 = ws[V_IN_ST*5];
 677         z4 = ws[V_IN_ST*7];
 678
 679         tmp11 = z1 + z3;
 680
 681         tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
 682         tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
 683         tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
 684         tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
 685         tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
 686         tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
 687         tmp0  = tmp1 + tmp2 + tmp3 -
 688             MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
 689         tmp13 = tmp10 + tmp11 + tmp12 -
 690             MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
 691         z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
 692         tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
 693         tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
 694         z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
 695         tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
 696         tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
 697         z2    += z4;
 698         z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
 699         tmp1  += z1;
 700         tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
 701         z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
 702         tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
 703         tmp12 += z2;
 704         z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
 705         tmp2  += z2;
 706         tmp3  += z2;
 707         z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
 708         tmp10 += z2;
 709         tmp11 += z2;
 710
 711         /* Final output stage */
 712         V_OUT(0)  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
 713         V_OUT(15) = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
 714         V_OUT(1)  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
 715         V_OUT(14) = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
 716         V_OUT(2)  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
 717         V_OUT(13) = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
 718         V_OUT(3)  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
 719         V_OUT(12) = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
 720         V_OUT(4)  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
 721         V_OUT(11) = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
 722         V_OUT(5)  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
 723         V_OUT(10) = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
 724         V_OUT(6)  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
 725         V_OUT(9)  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
 726         V_OUT(7)  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
 727         V_OUT(8)  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
 728     }
 729 }
 730
 731 /* horizontal-pass 16-point IDCT */
 732 static void jpeg_idct16h(int16_t *ws, unsigned char *out, int16_t *end, int rowstep)
 733 {
 734     long tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
 735     long tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
 736     long z1, z2, z3, z4;
 737     for (; ws < end; out += rowstep, ws += 8)
 738     {
 739         /* Even part */
 740
 741         /* Add fudge factor here for final descale. */
 742         tmp0 = (long) ws[0] + (ONE << (PASS1_BITS+2))
 743                + (128 << (PASS1_BITS + 3));
 744         tmp0 <<= CONST_BITS;
 745
 746         z1 = (long) ws[4];
 747         tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
 748         tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
 749
 750         tmp10 = tmp0 + tmp1;
 751         tmp11 = tmp0 - tmp1;
 752         tmp12 = tmp0 + tmp2;
 753         tmp13 = tmp0 - tmp2;
 754
 755         z1 = (long) ws[2];
 756         z2 = (long) ws[6];
 757         z3 = z1 - z2;
 758         z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
 759         z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
 760
 761         /* (c6+c2)[16] = (c3+c1)[8] */
 762         tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);
 763         /* (c6-c14)[16] = (c3-c7)[8] */
 764         tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);
 765         /* (c2-c10)[16] = (c1-c5)[8] */
 766         tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887));
 767         /* (c10-c14)[16] = (c5-c7)[8] */
 768         tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579));
 769
 770         tmp20 = tmp10 + tmp0;
 771         tmp27 = tmp10 - tmp0;
 772         tmp21 = tmp12 + tmp1;
 773         tmp26 = tmp12 - tmp1;
 774         tmp22 = tmp13 + tmp2;
 775         tmp25 = tmp13 - tmp2;
 776         tmp23 = tmp11 + tmp3;
 777         tmp24 = tmp11 - tmp3;
 778
 779         /* Odd part */
 780
 781         z1 = (long) ws[1];
 782         z2 = (long) ws[3];
 783         z3 = (long) ws[5];
 784         z4 = (long) ws[7];
 785
 786         tmp11 = z1 + z3;
 787
 788         tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
 789         tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
 790         tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
 791         tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
 792         tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
 793         tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
 794         tmp0  = tmp1 + tmp2 + tmp3 -
 795             MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
 796         tmp13 = tmp10 + tmp11 + tmp12 -
 797             MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
 798         z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
 799         tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
 800         tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
 801         z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
 802         tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
 803         tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
 804         z2    += z4;
 805         z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
 806         tmp1  += z1;
 807         tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
 808         z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
 809         tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
 810         tmp12 += z2;
 811         z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
 812         tmp2  += z2;
 813         tmp3  += z2;
 814         z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
 815         tmp10 += z2;
 816         tmp11 += z2;
 817
 818         /* Final output stage */
 819
 820         out[JPEG_PIX_SZ*0]  = scale_output(tmp20 + tmp0);
 821         out[JPEG_PIX_SZ*15] = scale_output(tmp20 - tmp0);
 822         out[JPEG_PIX_SZ*1]  = scale_output(tmp21 + tmp1);
 823         out[JPEG_PIX_SZ*14] = scale_output(tmp21 - tmp1);
 824         out[JPEG_PIX_SZ*2]  = scale_output(tmp22 + tmp2);
 825         out[JPEG_PIX_SZ*13] = scale_output(tmp22 - tmp2);
 826         out[JPEG_PIX_SZ*3]  = scale_output(tmp23 + tmp3);
 827         out[JPEG_PIX_SZ*12] = scale_output(tmp23 - tmp3);
 828         out[JPEG_PIX_SZ*4]  = scale_output(tmp24 + tmp10);
 829         out[JPEG_PIX_SZ*11] = scale_output(tmp24 - tmp10);
 830         out[JPEG_PIX_SZ*5]  = scale_output(tmp25 + tmp11);
 831         out[JPEG_PIX_SZ*10] = scale_output(tmp25 - tmp11);
 832         out[JPEG_PIX_SZ*6]  = scale_output(tmp26 + tmp12);
 833         out[JPEG_PIX_SZ*9]  = scale_output(tmp26 - tmp12);
 834         out[JPEG_PIX_SZ*7]  = scale_output(tmp27 + tmp13);
 835         out[JPEG_PIX_SZ*8]  = scale_output(tmp27 - tmp13);
 836     }
 837 }
 838 #endif
 839
 840 struct idct_entry {
 841     int scale;
 842     void (*v_idct)(int16_t *ws, int16_t *end);
 843     void (*h_idct)(int16_t *ws, unsigned char *out, int16_t *end, int rowstep);
 844 };
 845
 846 struct idct_entry idct_tbl[] = {
 847     { PASS1_BITS, NULL, jpeg_idct1h },
 848     { PASS1_BITS, jpeg_idct2v, jpeg_idct2h },
 849     { 0, jpeg_idct4v, jpeg_idct4h },
 850     { 0, jpeg_idct8v, jpeg_idct8h },
 851 #ifdef HAVE_LCD_COLOR
 852     { 0, jpeg_idct16v, jpeg_idct16h },
 853 #endif
 854 };
 855
 856 /* JPEG decoder implementation */
 857
 858 #ifdef JPEG_FROM_MEM
 859 INLINE unsigned char *getc(struct jpeg* p_jpeg)
 860 {
 861     if (LIKELY(p_jpeg->len))
 862     {
 863         p_jpeg->len--;
 864         return p_jpeg->data++;
 865     } else
 866         return NULL;
 867 }
 868
 869 INLINE bool skip_bytes(struct jpeg* p_jpeg, int count)
 870 {
 871     if (p_jpeg->len >= (unsigned)count)
 872     {
 873         p_jpeg->len -= count;
 874         p_jpeg->data += count;
 875         return true;
 876     } else {
 877         p_jpeg->data += p_jpeg->len;
 878         p_jpeg->len = 0;
 879         return false;
 880     }
 881 }
 882
 883 INLINE void putc(struct jpeg* p_jpeg)
 884 {
 885     p_jpeg->len++;
 886     p_jpeg->data--;
 887 }
 888 #else
 889 INLINE void fill_buf(struct jpeg* p_jpeg)
 890 {
 891         p_jpeg->buf_left = read(p_jpeg->fd, p_jpeg->buf, JPEG_READ_BUF_SIZE);
 892         p_jpeg->buf_index = 0;
 893 }
 894
 895 static unsigned char *getc(struct jpeg* p_jpeg)
 896 {
 897     if (UNLIKELY(p_jpeg->buf_left < 1))
 898         fill_buf(p_jpeg);
 899     if (UNLIKELY(p_jpeg->buf_left < 1))
 900         return NULL;
 901     p_jpeg->buf_left--;
 902     return (p_jpeg->buf_index++) + p_jpeg->buf;
 903 }
 904
 905 INLINE bool skip_bytes_seek(struct jpeg* p_jpeg)
 906 {
 907     if (UNLIKELY(lseek(p_jpeg->fd, -p_jpeg->buf_left, SEEK_CUR) < 0))
 908         return false;
 909     p_jpeg->buf_left = 0;
 910     return true;
 911 }
 912
 913 static bool skip_bytes(struct jpeg* p_jpeg, int count)
 914 {
 915     p_jpeg->buf_left -= count;
 916     p_jpeg->buf_index += count;
 917     return p_jpeg->buf_left >= 0 || skip_bytes_seek(p_jpeg);
 918 }
 919
 920 static void putc(struct jpeg* p_jpeg)
 921 {
 922     p_jpeg->buf_left++;
 923     p_jpeg->buf_index--;
 924 }
 925 #endif
 926
 927 #define e_skip_bytes(jpeg, count) \
 928 do {\
 929     if (UNLIKELY(!skip_bytes((jpeg),(count)))) \
 930         return -1; \
 931 } while (0)
 932
 933 #define e_getc(jpeg, code) \
 934 ({ \
 935     unsigned char *c; \
 936     if (UNLIKELY(!(c = getc(jpeg)))) \
 937         return (code); \
 938     *c; \
 939 })
 940
 941 #define d_getc(jpeg, def) \
 942 ({ \
 943     unsigned char *cp = getc(jpeg); \
 944     unsigned char c = LIKELY(cp) ? *cp : (def); \
 945     c; \
 946 })
 947
 948 /* Preprocess the JPEG JFIF file */
 949 static int process_markers(struct jpeg* p_jpeg)
 950 {
 951     unsigned char c;
 952     int marker_size; /* variable length of marker segment */
 953     int i, j, n;
 954     int ret = 0; /* returned flags */
 955
 956     while ((c = e_getc(p_jpeg, -1)))
 957     {
 958         if (c != 0xFF) /* no marker? */
 959         {
 960             JDEBUGF("Non-marker data\n");
 961             putc(p_jpeg);
 962             break; /* exit marker processing */
 963         }
 964
 965         c = e_getc(p_jpeg, -1);
 966         JDEBUGF("marker value %X\n",c);
 967         switch (c)
 968         {
 969         case 0xFF: /* Fill byte */
 970             ret |= FILL_FF;
 971         case 0x00: /* Zero stuffed byte - entropy data */
 972             putc(p_jpeg);
 973             continue;
 974
 975         case 0xC0: /* SOF Huff  - Baseline DCT */
 976             {
 977                 JDEBUGF("SOF marker ");
 978                 ret |= SOF0;
 979                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
 980                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
 981                 JDEBUGF("len: %d\n", marker_size);
 982                 n = e_getc(p_jpeg, -1); /* sample precision (= 8 or 12) */
 983                 if (n != 8)
 984                 {
 985                     return(-1); /* Unsupported sample precision */
 986                 }
 987                 p_jpeg->y_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
 988                 p_jpeg->y_size |= e_getc(p_jpeg, -1); /* Lowbyte */
 989                 p_jpeg->x_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
 990                 p_jpeg->x_size |= e_getc(p_jpeg, -1); /* Lowbyte */
 991                 JDEBUGF("  dimensions: %dx%d\n", p_jpeg->x_size,
 992                     p_jpeg->y_size);
 993
 994                 n = (marker_size-2-6)/3;
 995                 if (e_getc(p_jpeg, -1) != n || (n != 1 && n != 3))
 996                 {
 997                     return(-2); /* Unsupported SOF0 component specification */
 998                 }
 999                 for (i=0; i<n; i++)
1000                 {
1001                     /* Component info */
1002                     p_jpeg->frameheader[i].ID = e_getc(p_jpeg, -1);
1003                     p_jpeg->frameheader[i].horizontal_sampling =
1004                         (c = e_getc(p_jpeg, -1)) >> 4;
1005                     p_jpeg->frameheader[i].vertical_sampling = c & 0x0F;
1006                     p_jpeg->frameheader[i].quanttable_select =
1007                         e_getc(p_jpeg, -1);
1008                     if (p_jpeg->frameheader[i].horizontal_sampling > 2
1009                      || p_jpeg->frameheader[i].vertical_sampling > 2)
1010                     return -3; /* Unsupported SOF0 subsampling */
1011                 }
1012                 p_jpeg->blocks = n;
1013             }
1014             break;
1015
1016         case 0xC1: /* SOF Huff  - Extended sequential DCT*/
1017         case 0xC2: /* SOF Huff  - Progressive DCT*/
1018         case 0xC3: /* SOF Huff  - Spatial (sequential) lossless*/
1019         case 0xC5: /* SOF Huff  - Differential sequential DCT*/
1020         case 0xC6: /* SOF Huff  - Differential progressive DCT*/
1021         case 0xC7: /* SOF Huff  - Differential spatial*/
1022         case 0xC8: /* SOF Arith - Reserved for JPEG extensions*/
1023         case 0xC9: /* SOF Arith - Extended sequential DCT*/
1024         case 0xCA: /* SOF Arith - Progressive DCT*/
1025         case 0xCB: /* SOF Arith - Spatial (sequential) lossless*/
1026         case 0xCD: /* SOF Arith - Differential sequential DCT*/
1027         case 0xCE: /* SOF Arith - Differential progressive DCT*/
1028         case 0xCF: /* SOF Arith - Differential spatial*/
1029             {
1030                 return (-4); /* other DCT model than baseline not implemented */
1031             }
1032
1033         case 0xC4: /* Define Huffman Table(s) */
1034             {
1035                 ret |= DHT;
1036                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
1037                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
1038                 marker_size -= 2;
1039
1040                 while (marker_size > 17) /* another table */
1041                 {
1042                     c = e_getc(p_jpeg, -1);
1043                     marker_size--;
1044                     int sum = 0;
1045                     i = c & 0x0F; /* table index */
1046                     if (i > 1)
1047                     {
1048                         return (-5); /* Huffman table index out of range */
1049                     } else {
1050                         if (c & 0xF0) /* AC table */
1051                         {
1052                             for (j=0; j<16; j++)
1053                             {
1054                                 p_jpeg->hufftable[i].huffmancodes_ac[j] =
1055                                     (c = e_getc(p_jpeg, -1));
1056                                 sum += c;
1057                                 marker_size -= 1;
1058                             }
1059                             if(16 + sum > AC_LEN)
1060                                 return -10; /* longer than allowed */
1061
1062                             for (; j < 16 + sum; j++)
1063                             {
1064                                 p_jpeg->hufftable[i].huffmancodes_ac[j] =
1065                                     e_getc(p_jpeg, -1);
1066                                 marker_size--;
1067                             }
1068                         }
1069                         else /* DC table */
1070                         {
1071                             for (j=0; j<16; j++)
1072                             {
1073                                 p_jpeg->hufftable[i].huffmancodes_dc[j] =
1074                                     (c = e_getc(p_jpeg, -1));
1075                                 sum += c;
1076                                 marker_size--;
1077                             }
1078                             if(16 + sum > DC_LEN)
1079                                 return -11; /* longer than allowed */
1080
1081                             for (; j < 16 + sum; j++)
1082                             {
1083                                 p_jpeg->hufftable[i].huffmancodes_dc[j] =
1084                                     e_getc(p_jpeg, -1);
1085                                 marker_size--;
1086                             }
1087                         }
1088                     }
1089                 } /* while */
1090                 e_skip_bytes(p_jpeg, marker_size);
1091             }
1092             break;
1093
1094         case 0xCC: /* Define Arithmetic coding conditioning(s) */
1095             return(-6); /* Arithmetic coding not supported */
1096
1097         case 0xD8: /* Start of Image */
1098             JDEBUGF("SOI\n");
1099             break;
1100         case 0xD9: /* End of Image */
1101             JDEBUGF("EOI\n");
1102             break;
1103         case 0x01: /* for temp private use arith code */
1104             JDEBUGF("private\n");
1105             break; /* skip parameterless marker */
1106
1107
1108         case 0xDA: /* Start of Scan */
1109             {
1110                 ret |= SOS;
1111                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
1112                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
1113                 marker_size -= 2;
1114
1115                 n = (marker_size-1-3)/2;
1116                 if (e_getc(p_jpeg, -1) != n || (n != 1 && n != 3))
1117                 {
1118                     return (-7); /* Unsupported SOS component specification */
1119                 }
1120                 marker_size--;
1121                 for (i=0; i<n; i++)
1122                 {
1123                     p_jpeg->scanheader[i].ID = e_getc(p_jpeg, -1);
1124                     p_jpeg->scanheader[i].DC_select = (c = e_getc(p_jpeg, -1))
1125                         >> 4;
1126                     p_jpeg->scanheader[i].AC_select = c & 0x0F;
1127                     marker_size -= 2;
1128                 }
1129                 /* skip spectral information */
1130                 e_skip_bytes(p_jpeg, marker_size);
1131             }
1132             break;
1133
1134         case 0xDB: /* Define quantization Table(s) */
1135             {
1136                 ret |= DQT;
1137                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
1138                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
1139                 marker_size -= 2;
1140
1141                 n = (marker_size)/(QUANT_TABLE_LENGTH+1); /* # of tables */
1142                 for (i=0; i<n; i++)
1143                 {
1144                     int id = e_getc(p_jpeg, -1); /* ID */
1145                     marker_size--;
1146                     if (id >= 4)
1147                     {
1148                         return (-8); /* Unsupported quantization table */
1149                     }
1150                     /* Read Quantisation table: */
1151                     for (j=0; j<QUANT_TABLE_LENGTH; j++)
1152                     {
1153                         p_jpeg->quanttable[id][j] = e_getc(p_jpeg, -1);
1154                         marker_size--;
1155                     }
1156                 }
1157                 e_skip_bytes(p_jpeg, marker_size);
1158             }
1159             break;
1160
1161         case 0xDD: /* Define Restart Interval */
1162             {
1163                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
1164                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
1165                 marker_size -= 4;
1166                 /* Highbyte */
1167                 p_jpeg->restart_interval = e_getc(p_jpeg, -1) << 8;
1168                 p_jpeg->restart_interval |= e_getc(p_jpeg, -1); /* Lowbyte */
1169                 e_skip_bytes(p_jpeg, marker_size); /* skip segment */
1170             }
1171             break;
1172
1173         case 0xDC: /* Define Number of Lines */
1174         case 0xDE: /* Define Hierarchical progression */
1175         case 0xDF: /* Expand Reference Component(s) */
1176         case 0xE0: /* Application Field 0*/
1177         case 0xE1: /* Application Field 1*/
1178         case 0xE2: /* Application Field 2*/
1179         case 0xE3: /* Application Field 3*/
1180         case 0xE4: /* Application Field 4*/
1181         case 0xE5: /* Application Field 5*/
1182         case 0xE6: /* Application Field 6*/
1183         case 0xE7: /* Application Field 7*/
1184         case 0xE8: /* Application Field 8*/
1185         case 0xE9: /* Application Field 9*/
1186         case 0xEA: /* Application Field 10*/
1187         case 0xEB: /* Application Field 11*/
1188         case 0xEC: /* Application Field 12*/
1189         case 0xED: /* Application Field 13*/
1190         case 0xEE: /* Application Field 14*/
1191         case 0xEF: /* Application Field 15*/
1192         case 0xFE: /* Comment */
1193             {
1194                 marker_size = e_getc(p_jpeg, -1) << 8; /* Highbyte */
1195                 marker_size |= e_getc(p_jpeg, -1); /* Lowbyte */
1196                 marker_size -= 2;
1197                 JDEBUGF("unhandled marker len %d\n", marker_size);
1198                 e_skip_bytes(p_jpeg, marker_size); /* skip segment */
1199             }
1200             break;
1201
1202         case 0xF0: /* Reserved for JPEG extensions */
1203         case 0xF1: /* Reserved for JPEG extensions */
1204         case 0xF2: /* Reserved for JPEG extensions */
1205         case 0xF3: /* Reserved for JPEG extensions */
1206         case 0xF4: /* Reserved for JPEG extensions */
1207         case 0xF5: /* Reserved for JPEG extensions */
1208         case 0xF6: /* Reserved for JPEG extensions */
1209         case 0xF7: /* Reserved for JPEG extensions */
1210         case 0xF8: /* Reserved for JPEG extensions */
1211         case 0xF9: /* Reserved for JPEG extensions */
1212         case 0xFA: /* Reserved for JPEG extensions */
1213         case 0xFB: /* Reserved for JPEG extensions */
1214         case 0xFC: /* Reserved for JPEG extensions */
1215         case 0xFD: /* Reserved for JPEG extensions */
1216         case 0x02: /* Reserved */
1217         default:
1218             return (-9); /* Unknown marker */
1219         } /* switch */
1220     } /* while */
1221
1222     return (ret); /* return flags with seen markers */
1223 }
1224
1225 static const struct huffman_table luma_table =
1226 {
1227     {
1228         0x00,0x01,0x05,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,0x00,0x00,
1229         0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B
1230     },
1231     {
1232         0x00,0x02,0x01,0x03,0x03,0x02,0x04,0x03,0x05,0x05,0x04,0x04,0x00,0x00,
1233         0x01,0x7D,0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,
1234         0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08,0x23,0x42,
1235         0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,
1236         0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28,0x29,0x2A,0x34,0x35,0x36,0x37,
1237         0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,
1238         0x56,0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,
1239         0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
1240         0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,
1241         0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,
1242         0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,
1243         0xD7,0xD8,0xD9,0xDA,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,
1244         0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA
1245     }
1246 };
1247
1248 static const struct huffman_table chroma_table =
1249 {
1250     {
1251         0x00,0x03,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,0x00,
1252         0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B
1253     },
1254     {
1255         0x00,0x02,0x01,0x02,0x04,0x04,0x03,0x04,0x07,0x05,0x04,0x04,0x00,0x01,
1256         0x02,0x77,0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,
1257         0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xA1,0xB1,
1258         0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,
1259         0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26,0x27,0x28,0x29,0x2A,0x35,0x36,
1260         0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,
1261         0x55,0x56,0x57,0x58,0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,
1262         0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,
1263         0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,
1264         0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,
1265         0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,
1266         0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,
1267         0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA
1268     }
1269 };
1270
1271 static void default_huff_tbl(struct jpeg* p_jpeg)
1272 {
1273
1274     MEMCPY(&p_jpeg->hufftable[0], &luma_table, sizeof(luma_table));
1275     MEMCPY(&p_jpeg->hufftable[1], &chroma_table, sizeof(chroma_table));
1276
1277     return;
1278 }
1279
1280 /* Compute the derived values for a Huffman table */
1281 static void fix_huff_tbl(int* htbl, struct derived_tbl* dtbl)
1282 {
1283     int p, i, l, si;
1284     int lookbits, ctr;
1285     char huffsize[257];
1286     unsigned int huffcode[257];
1287     unsigned int code;
1288
1289     dtbl->pub = htbl; /* fill in back link */
1290
1291     /* Figure C.1: make table of Huffman code length for each symbol */
1292     /* Note that this is in code-length order. */
1293
1294     p = 0;
1295     for (l = 1; l <= 16; l++)
1296     {    /* all possible code length */
1297         for (i = 1; i <= (int) htbl[l-1]; i++)  /* all codes per length */
1298             huffsize[p++] = (char) l;
1299     }
1300     huffsize[p] = 0;
1301
1302     /* Figure C.2: generate the codes themselves */
1303     /* Note that this is in code-length order. */
1304
1305     code = 0;
1306     si = huffsize[0];
1307     p = 0;
1308     while (huffsize[p])
1309     {
1310         while (((int) huffsize[p]) == si)
1311         {
1312             huffcode[p++] = code;
1313             code++;
1314         }
1315         code <<= 1;
1316         si++;
1317     }
1318
1319     /* Figure F.15: generate decoding tables for bit-sequential decoding */
1320
1321     p = 0;
1322     for (l = 1; l <= 16; l++)
1323     {
1324         if (htbl[l-1])
1325         {
1326             /* huffval[] index of 1st symbol of code length l */
1327             dtbl->valptr[l] = p;
1328             dtbl->mincode[l] = huffcode[p]; /* minimum code of length l */
1329             p += htbl[l-1];
1330             dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
1331         }
1332         else
1333         {
1334             dtbl->maxcode[l] = -1;  /* -1 if no codes of this length */
1335         }
1336     }
1337     dtbl->maxcode[17] = 0xFFFFFL; /* ensures huff_DECODE terminates */
1338
1339     /* Compute lookahead tables to speed up decoding.
1340     * First we set all the table entries to 0, indicating "too long";
1341     * then we iterate through the Huffman codes that are short enough and
1342     * fill in all the entries that correspond to bit sequences starting
1343     * with that code.
1344     */
1345
1346     MEMSET(dtbl->look_nbits, 0, sizeof(dtbl->look_nbits));
1347
1348     p = 0;
1349     for (l = 1; l <= HUFF_LOOKAHEAD; l++)
1350     {
1351         for (i = 1; i <= (int) htbl[l-1]; i++, p++)
1352         {
1353             /* l = current code's length, p = its index in huffcode[] &
1354              * huffval[]. Generate left-justified code followed by all possible
1355              * bit sequences
1356              */
1357             lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
1358             for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--)
1359             {
1360                 dtbl->look_nbits[lookbits] = l;
1361                 dtbl->look_sym[lookbits] = htbl[16+p];
1362                 lookbits++;
1363             }
1364         }
1365     }
1366 }
1367
1368
1369 /* zag[i] is the natural-order position of the i'th element of zigzag order. */
1370 static const unsigned char zag[] =
1371 {
1372 #ifdef JPEG_IDCT_TRANSPOSE
1373       0,   8,   1,   2,   9,  16,  24,  17,
1374      10,   3,   4,  11,  18,  25,  32,  40,
1375      33,  26,  19,  12,   5,   6,  13,  20,
1376      27,  34,  41,  48,  56,  49,  42,  35,
1377      28,  21,  14,   7,  15,  22,  29,  36,
1378      43,  50,  57,  58,  51,  44,  37,  30,
1379      23,  31,  38,  45,  52,  59,  60,  53,
1380      46,  39,  47,  54,  61,  62,  55,  63,
1381 #endif
1382       0,   1,   8,  16,   9,   2,   3,  10,
1383      17,  24,  32,  25,  18,  11,   4,   5,
1384      12,  19,  26,  33,  40,  48,  41,  34,
1385      27,  20,  13,   6,   7,  14,  21,  28,
1386      35,  42,  49,  56,  57,  50,  43,  36,
1387      29,  22,  15,  23,  30,  37,  44,  51,
1388      58,  59,  52,  45,  38,  31,  39,  46,
1389      53,  60,  61,  54,  47,  55,  62,  63,
1390 };
1391
1392 /* zig[i] is the the zig-zag order position of the i'th element of natural
1393  * order, reading left-to-right then top-to-bottom.
1394  */
1395 static const unsigned char zig[] =
1396 {
1397      0,  1,  5,  6, 14, 15, 27, 28,
1398      2,  4,  7, 13, 16, 26, 29, 42,
1399      3,  8, 12, 17, 25, 30, 41, 43,
1400      9, 11, 18, 24, 31, 40, 44, 53,
1401     10, 19, 23, 32, 39, 45, 52, 54,
1402     20, 22, 33, 38, 46, 51, 55, 60,
1403     21, 34, 37, 47, 50, 56, 59, 61,
1404     35, 36, 48, 49, 57, 58, 62, 63
1405 };
1406
1407 /* Reformat some image header data so that the decoder can use it properly. */
1408 INLINE void fix_headers(struct jpeg* p_jpeg)
1409 {
1410     int i;
1411
1412     for (i=0; i<4; i++)
1413         p_jpeg->store_pos[i] = i; /* default ordering */
1414
1415     /* assignments for the decoding of blocks */
1416     if (p_jpeg->frameheader[0].horizontal_sampling == 2
1417         && p_jpeg->frameheader[0].vertical_sampling == 1)
1418     {   /* 4:2:2 */
1419         p_jpeg->blocks = 4;
1420         p_jpeg->x_mbl = (p_jpeg->x_size+15) / 16;
1421         p_jpeg->x_phys = p_jpeg->x_mbl * 16;
1422         p_jpeg->y_mbl = (p_jpeg->y_size+7) / 8;
1423         p_jpeg->y_phys = p_jpeg->y_mbl * 8;
1424         p_jpeg->mcu_membership[0] = 0; /* Y1=Y2=0, U=1, V=2 */
1425         p_jpeg->mcu_membership[1] = 0;
1426         p_jpeg->mcu_membership[2] = 1;
1427         p_jpeg->mcu_membership[3] = 2;
1428         p_jpeg->tab_membership[0] = 0; /* DC, DC, AC, AC */
1429         p_jpeg->tab_membership[1] = 0;
1430         p_jpeg->tab_membership[2] = 1;
1431         p_jpeg->tab_membership[3] = 1;
1432         p_jpeg->subsample_x[0] = 1;
1433         p_jpeg->subsample_x[1] = 2;
1434         p_jpeg->subsample_x[2] = 2;
1435         p_jpeg->subsample_y[0] = 1;
1436         p_jpeg->subsample_y[1] = 1;
1437         p_jpeg->subsample_y[2] = 1;
1438     }
1439     if (p_jpeg->frameheader[0].horizontal_sampling == 1
1440         && p_jpeg->frameheader[0].vertical_sampling == 2)
1441     {   /* 4:2:2 vertically subsampled */
1442         p_jpeg->store_pos[1] = 2; /* block positions are mirrored */
1443         p_jpeg->store_pos[2] = 1;
1444         p_jpeg->blocks = 4;
1445         p_jpeg->x_mbl = (p_jpeg->x_size+7) / 8;
1446         p_jpeg->x_phys = p_jpeg->x_mbl * 8;
1447         p_jpeg->y_mbl = (p_jpeg->y_size+15) / 16;
1448         p_jpeg->y_phys = p_jpeg->y_mbl * 16;
1449         p_jpeg->mcu_membership[0] = 0; /* Y1=Y2=0, U=1, V=2 */
1450         p_jpeg->mcu_membership[1] = 0;
1451         p_jpeg->mcu_membership[2] = 1;
1452         p_jpeg->mcu_membership[3] = 2;
1453         p_jpeg->tab_membership[0] = 0; /* DC, DC, AC, AC */
1454         p_jpeg->tab_membership[1] = 0;
1455         p_jpeg->tab_membership[2] = 1;
1456         p_jpeg->tab_membership[3] = 1;
1457         p_jpeg->subsample_x[0] = 1;
1458         p_jpeg->subsample_x[1] = 1;
1459         p_jpeg->subsample_x[2] = 1;
1460         p_jpeg->subsample_y[0] = 1;
1461         p_jpeg->subsample_y[1] = 2;
1462         p_jpeg->subsample_y[2] = 2;
1463     }
1464     else if (p_jpeg->frameheader[0].horizontal_sampling == 2
1465         && p_jpeg->frameheader[0].vertical_sampling == 2)
1466     {   /* 4:2:0 */
1467         p_jpeg->blocks = 6;
1468         p_jpeg->x_mbl = (p_jpeg->x_size+15) / 16;
1469         p_jpeg->x_phys = p_jpeg->x_mbl * 16;
1470         p_jpeg->y_mbl = (p_jpeg->y_size+15) / 16;
1471         p_jpeg->y_phys = p_jpeg->y_mbl * 16;
1472         p_jpeg->mcu_membership[0] = 0;
1473         p_jpeg->mcu_membership[1] = 0;
1474         p_jpeg->mcu_membership[2] = 0;
1475         p_jpeg->mcu_membership[3] = 0;
1476         p_jpeg->mcu_membership[4] = 1;
1477         p_jpeg->mcu_membership[5] = 2;
1478         p_jpeg->tab_membership[0] = 0;
1479         p_jpeg->tab_membership[1] = 0;
1480         p_jpeg->tab_membership[2] = 0;
1481         p_jpeg->tab_membership[3] = 0;
1482         p_jpeg->tab_membership[4] = 1;
1483         p_jpeg->tab_membership[5] = 1;
1484         p_jpeg->subsample_x[0] = 1;
1485         p_jpeg->subsample_x[1] = 2;
1486         p_jpeg->subsample_x[2] = 2;
1487         p_jpeg->subsample_y[0] = 1;
1488         p_jpeg->subsample_y[1] = 2;
1489         p_jpeg->subsample_y[2] = 2;
1490     }
1491     else if (p_jpeg->frameheader[0].horizontal_sampling == 1
1492         && p_jpeg->frameheader[0].vertical_sampling == 1)
1493     {   /* 4:4:4 */
1494         /* don't overwrite p_jpeg->blocks */
1495         p_jpeg->x_mbl = (p_jpeg->x_size+7) / 8;
1496         p_jpeg->x_phys = p_jpeg->x_mbl * 8;
1497         p_jpeg->y_mbl = (p_jpeg->y_size+7) / 8;
1498         p_jpeg->y_phys = p_jpeg->y_mbl * 8;
1499         p_jpeg->mcu_membership[0] = 0;
1500         p_jpeg->mcu_membership[1] = 1;
1501         p_jpeg->mcu_membership[2] = 2;
1502         p_jpeg->tab_membership[0] = 0;
1503         p_jpeg->tab_membership[1] = 1;
1504         p_jpeg->tab_membership[2] = 1;
1505         p_jpeg->subsample_x[0] = 1;
1506         p_jpeg->subsample_x[1] = 1;
1507         p_jpeg->subsample_x[2] = 1;
1508         p_jpeg->subsample_y[0] = 1;
1509         p_jpeg->subsample_y[1] = 1;
1510         p_jpeg->subsample_y[2] = 1;
1511     }
1512     else
1513     {
1514         /* error */
1515     }
1516
1517 }
1518
1519 INLINE void fix_huff_tables(struct jpeg *p_jpeg)
1520 {
1521     fix_huff_tbl(p_jpeg->hufftable[0].huffmancodes_dc,
1522         &p_jpeg->dc_derived_tbls[0]);
1523     fix_huff_tbl(p_jpeg->hufftable[0].huffmancodes_ac,
1524         &p_jpeg->ac_derived_tbls[0]);
1525     fix_huff_tbl(p_jpeg->hufftable[1].huffmancodes_dc,
1526         &p_jpeg->dc_derived_tbls[1]);
1527     fix_huff_tbl(p_jpeg->hufftable[1].huffmancodes_ac,
1528         &p_jpeg->ac_derived_tbls[1]);
1529 }
1530
1531 /* Because some of the IDCT routines never multiply by any constants, and
1532  * therefore do not produce shifted output, we add the shift into the
1533  * quantization table when one of these IDCT routines is used, rather than
1534  * have the IDCT shift each value it processes.
1535  */
1536 INLINE void fix_quant_tables(struct jpeg *p_jpeg)
1537 {
1538     int shift, i, j;
1539     for (i = 0; i < 2; i++)
1540     {
1541         shift = idct_tbl[p_jpeg->v_scale[i]].scale;
1542         if (shift)
1543         {
1544             for (j = 0; j < 64; j++)
1545                 p_jpeg->quanttable[i][j] <<= shift;
1546         }
1547     }
1548 }
1549
1550 /*
1551 * These functions/macros provide the in-line portion of bit fetching.
1552 * Use check_bit_buffer to ensure there are N bits in get_buffer
1553 * before using get_bits, peek_bits, or drop_bits.
1554 *  check_bit_buffer(state,n,action);
1555 *    Ensure there are N bits in get_buffer; if suspend, take action.
1556 *  val = get_bits(n);
1557 *    Fetch next N bits.
1558 *  val = peek_bits(n);
1559 *    Fetch next N bits without removing them from the buffer.
1560 *  drop_bits(n);
1561 *    Discard next N bits.
1562 * The value N should be a simple variable, not an expression, because it
1563 * is evaluated multiple times.
1564 */
1565
1566 static void fill_bit_buffer(struct jpeg* p_jpeg)
1567 {
1568     unsigned char byte, marker;
1569
1570     if (p_jpeg->marker_val)
1571         p_jpeg->marker_ind += 16;
1572     byte = d_getc(p_jpeg, 0);
1573     if (UNLIKELY(byte == 0xFF)) /* legal marker can be byte stuffing or RSTm */
1574     {   /* simplification: just skip the (one-byte) marker code */
1575         marker = d_getc(p_jpeg, 0);
1576         if ((marker & ~7) == 0xD0)
1577         {
1578             p_jpeg->marker_val = marker;
1579             p_jpeg->marker_ind = 8;
1580         }
1581     }
1582     p_jpeg->bitbuf = (p_jpeg->bitbuf << 8) | byte;
1583
1584     byte = d_getc(p_jpeg, 0);
1585     if (UNLIKELY(byte == 0xFF)) /* legal marker can be byte stuffing or RSTm */
1586     {   /* simplification: just skip the (one-byte) marker code */
1587         marker = d_getc(p_jpeg, 0);
1588         if ((marker & ~7) == 0xD0)
1589         {
1590             p_jpeg->marker_val = marker;
1591             p_jpeg->marker_ind = 0;
1592         }
1593     }
1594     p_jpeg->bitbuf = (p_jpeg->bitbuf << 8) | byte;
1595     p_jpeg->bitbuf_bits += 16;
1596 #ifdef JPEG_BS_DEBUG
1597     DEBUGF("read in: %04X\n", p_jpeg->bitbuf & 0xFFFF);
1598 #endif
1599 }
1600
1601 INLINE void check_bit_buffer(struct jpeg *p_jpeg, int nbits)
1602 {
1603     if (nbits > p_jpeg->bitbuf_bits)
1604         fill_bit_buffer(p_jpeg);
1605 }
1606
1607 INLINE int get_bits(struct jpeg *p_jpeg, int nbits)
1608 {
1609 #ifdef JPEG_BS_DEBUG
1610     if (nbits > p_jpeg->bitbuf_bits)
1611         DEBUGF("bitbuffer underrun\n");
1612     int mask = BIT_N(p_jpeg->bitbuf_bits - 1);
1613     int i;
1614     DEBUGF("get %d bits: ", nbits);
1615     for (i = 0; i < nbits; i++)
1616         DEBUGF("%d",!!(p_jpeg->bitbuf & (mask >>= 1)));
1617     DEBUGF("\n");
1618 #endif
1619     return ((int) (p_jpeg->bitbuf >> (p_jpeg->bitbuf_bits -= nbits))) &
1620         (BIT_N(nbits)-1);
1621 }
1622
1623 INLINE int peek_bits(struct jpeg *p_jpeg, int nbits)
1624 {
1625 #ifdef JPEG_BS_DEBUG
1626     int mask = BIT_N(p_jpeg->bitbuf_bits - 1);
1627     int i;
1628     DEBUGF("peek %d bits: ", nbits);
1629     for (i = 0; i < nbits; i++)
1630         DEBUGF("%d",!!(p_jpeg->bitbuf & (mask >>= 1)));
1631     DEBUGF("\n");
1632 #endif
1633     return ((int) (p_jpeg->bitbuf >> (p_jpeg->bitbuf_bits - nbits))) &
1634         (BIT_N(nbits)-1);
1635 }
1636
1637 INLINE void drop_bits(struct jpeg *p_jpeg, int nbits)
1638 {
1639 #ifdef JPEG_BS_DEBUG
1640     int mask = BIT_N(p_jpeg->bitbuf_bits - 1);
1641     int i;
1642     DEBUGF("drop %d bits: ", nbits);
1643     for (i = 0; i < nbits; i++)
1644         DEBUGF("%d",!!(p_jpeg->bitbuf & (mask >>= 1)));
1645     DEBUGF("\n");
1646 #endif
1647     p_jpeg->bitbuf_bits -= nbits;
1648 }
1649
1650 /* re-synchronize to entropy data (skip restart marker) */
1651 static void search_restart(struct jpeg *p_jpeg)
1652 {
1653     if (p_jpeg->marker_val)
1654     {
1655         p_jpeg->marker_val = 0;
1656         p_jpeg->bitbuf_bits = p_jpeg->marker_ind;
1657         p_jpeg->marker_ind = 0;
1658         return;
1659     }
1660     unsigned char byte;
1661     p_jpeg->bitbuf_bits = 0;
1662     while ((byte = d_getc(p_jpeg, 0xFF)))
1663     {
1664         if (byte == 0xff)
1665         {
1666             byte = d_getc(p_jpeg, 0xD0);
1667             if ((byte & ~7) == 0xD0)
1668             {
1669                 return;
1670             }
1671             else
1672                 putc(p_jpeg);
1673         }
1674     }
1675 }
1676
1677 /* Figure F.12: extend sign bit. */
1678 #if CONFIG_CPU == SH7034
1679 /* SH1 lacks a variable-shift instruction */
1680 #define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
1681
1682 static const int extend_test[16] =   /* entry n is 2**(n-1) */
1683 {
1684     0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
1685     0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
1686 };
1687
1688 static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
1689 {
1690     0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
1691     ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
1692     ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
1693     ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1
1694 };
1695 #else
1696 /* This saves some code and data size, benchmarks about the same on RAM */
1697 #define HUFF_EXTEND(x,s) \
1698 ({ \
1699     int x__ = x; \
1700     int s__ = s; \
1701     x__ & BIT_N(s__- 1) ? x__ : x__ + (-1 << s__) + 1; \
1702 })
1703 #endif
1704
1705 /* Decode a single value */
1706 #define huff_decode_dc(p_jpeg, tbl, s, r) \
1707 { \
1708     int nb, look; \
1709 \
1710     check_bit_buffer((p_jpeg), HUFF_LOOKAHEAD); \
1711     look = peek_bits((p_jpeg), HUFF_LOOKAHEAD); \
1712     if ((nb = (tbl)->look_nbits[look]) != 0) \
1713     { \
1714         drop_bits((p_jpeg), nb); \
1715         s = (tbl)->look_sym[look]; \
1716         check_bit_buffer((p_jpeg), s); \
1717         r = get_bits((p_jpeg), s); \
1718     } else { \
1719         /*  slow_DECODE(s, HUFF_LOOKAHEAD+1)) < 0); */ \
1720         long code; \
1721         nb=HUFF_LOOKAHEAD+1; \
1722         check_bit_buffer((p_jpeg), nb); \
1723         code = get_bits((p_jpeg), nb); \
1724         while (code > (tbl)->maxcode[nb]) \
1725         { \
1726             code <<= 1; \
1727             check_bit_buffer((p_jpeg), 1); \
1728             code |= get_bits((p_jpeg), 1); \
1729             nb++; \
1730         } \
1731         if (nb > 16) /* error in Huffman */ \
1732         { \
1733             r = 0; s = 0; /* fake a zero, this is most safe */ \
1734         } else { \
1735             s = (tbl)->pub[16 + (tbl)->valptr[nb] + \
1736                 ((int) (code - (tbl)->mincode[nb]))]; \
1737             check_bit_buffer((p_jpeg), s); \
1738             r = get_bits((p_jpeg), s); \
1739         } \
1740     } /* end slow decode */ \
1741 }
1742
1743 #define huff_decode_ac(p_jpeg, tbl, s) \
1744 { \
1745     int nb, look; \
1746 \
1747     check_bit_buffer((p_jpeg), HUFF_LOOKAHEAD); \
1748     look = peek_bits((p_jpeg), HUFF_LOOKAHEAD); \
1749     if ((nb = (tbl)->look_nbits[look]) != 0) \
1750     { \
1751         drop_bits((p_jpeg), nb); \
1752         s = (tbl)->look_sym[look]; \
1753     } else { \
1754         /*  slow_DECODE(s, HUFF_LOOKAHEAD+1)) < 0); */ \
1755         long code; \
1756         nb=HUFF_LOOKAHEAD+1; \
1757         check_bit_buffer((p_jpeg), nb); \
1758         code = get_bits((p_jpeg), nb); \
1759         while (code > (tbl)->maxcode[nb]) \
1760         { \
1761             code <<= 1; \
1762             check_bit_buffer((p_jpeg), 1); \
1763             code |= get_bits((p_jpeg), 1); \
1764             nb++; \
1765         } \
1766         if (nb > 16) /* error in Huffman */ \
1767         { \
1768             s = 0; /* fake a zero, this is most safe */ \
1769         } else { \
1770             s = (tbl)->pub[16 + (tbl)->valptr[nb] + \
1771                 ((int) (code - (tbl)->mincode[nb]))]; \
1772         } \
1773     } /* end slow decode */ \
1774 }
1775
1776 static struct img_part *store_row_jpeg(void *jpeg_args)
1777 {
1778     struct jpeg *p_jpeg = (struct jpeg*) jpeg_args;
1779 #ifdef HAVE_LCD_COLOR
1780     int mcu_hscale = p_jpeg->h_scale[1];
1781     int mcu_vscale = p_jpeg->v_scale[1];
1782 #else
1783     int mcu_hscale = (p_jpeg->h_scale[0] +
1784         p_jpeg->frameheader[0].horizontal_sampling - 1);
1785     int mcu_vscale = (p_jpeg->v_scale[0] +
1786         p_jpeg->frameheader[0].vertical_sampling - 1);
1787 #endif
1788     unsigned int width = p_jpeg->x_mbl << mcu_hscale;
1789     unsigned int b_width = width * JPEG_PIX_SZ;
1790     int height = BIT_N(mcu_vscale);
1791     int x;
1792     if (!p_jpeg->mcu_row) /* Need to decode a new row of MCUs */
1793     {
1794         p_jpeg->out_ptr = (unsigned char *)p_jpeg->img_buf;
1795         int store_offs[4];
1796 #ifdef HAVE_LCD_COLOR
1797         unsigned mcu_width = BIT_N(mcu_hscale);
1798 #endif
1799         int mcu_offset = JPEG_PIX_SZ << mcu_hscale;
1800         unsigned char *out = p_jpeg->out_ptr;
1801         store_offs[p_jpeg->store_pos[0]] = 0;
1802         store_offs[p_jpeg->store_pos[1]] = JPEG_PIX_SZ << p_jpeg->h_scale[0];
1803         store_offs[p_jpeg->store_pos[2]] = b_width << p_jpeg->v_scale[0];
1804         store_offs[p_jpeg->store_pos[3]] = store_offs[1] + store_offs[2];
1805         /* decoded DCT coefficients */
1806         int16_t block[IDCT_WS_SIZE] __attribute__((aligned(8)));
1807         for (x = 0; x < p_jpeg->x_mbl; x++)
1808         {
1809             int blkn;
1810             for (blkn = 0; blkn < p_jpeg->blocks; blkn++)
1811             {
1812                 int ci = p_jpeg->mcu_membership[blkn]; /* component index */
1813                 int ti = p_jpeg->tab_membership[blkn]; /* table index */
1814 #ifdef JPEG_IDCT_TRANSPOSE
1815                 bool transpose = p_jpeg->v_scale[!!ci] > 2;
1816 #endif
1817                 int k = 1; /* coefficient index */
1818                 int s, r; /* huffman values */
1819                 struct derived_tbl* dctbl = &p_jpeg->dc_derived_tbls[ti];
1820                 struct derived_tbl* actbl = &p_jpeg->ac_derived_tbls[ti];
1821
1822                 /* Section F.2.2.1: decode the DC coefficient difference */
1823                 huff_decode_dc(p_jpeg, dctbl, s, r);
1824
1825 #ifndef HAVE_LCD_COLOR
1826                 if (!ci)
1827 #endif
1828                 {
1829                     s = HUFF_EXTEND(r, s);
1830 #ifdef HAVE_LCD_COLOR
1831                     p_jpeg->last_dc_val[ci] += s;
1832                     /* output it (assumes zag[0] = 0) */
1833                     block[0] = MULTIPLY16(p_jpeg->last_dc_val[ci],
1834                         p_jpeg->quanttable[!!ci][0]);
1835 #else
1836                     p_jpeg->last_dc_val += s;
1837                     /* output it (assumes zag[0] = 0) */
1838                     block[0] = MULTIPLY16(p_jpeg->last_dc_val,
1839                         p_jpeg->quanttable[0][0]);
1840 #endif
1841                     /* coefficient buffer must be cleared */
1842                     MEMSET(block+1, 0, p_jpeg->zero_need[!!ci] * sizeof(int));
1843                     /* Section F.2.2.2: decode the AC coefficients */
1844                     while(true)
1845                     {
1846                         huff_decode_ac(p_jpeg, actbl, s);
1847                         r = s >> 4;
1848                         s &= 15;
1849                         k += r;
1850                         if (s)
1851                         {
1852                             check_bit_buffer(p_jpeg, s);
1853                             if (k >= p_jpeg->k_need[!!ci])
1854                                 goto skip_rest;
1855                             r = get_bits(p_jpeg, s);
1856                             r = HUFF_EXTEND(r, s);
1857                             r = MULTIPLY16(r, p_jpeg->quanttable[!!ci][k]);
1858 #ifdef JPEG_IDCT_TRANSPOSE
1859                             block[zag[transpose ? k : k + 64]] = r ;
1860 #else
1861                             block[zag[k]] = r ;
1862 #endif
1863                         }
1864                         else
1865                         {
1866                             if (r != 15)
1867                                 goto block_end;
1868                         }
1869                         if ((++k) & 64)
1870                             goto block_end;
1871                     }  /* for k */
1872                 }
1873                 for (; k < 64; k++)
1874                 {
1875                     huff_decode_ac(p_jpeg, actbl, s);
1876                     r = s >> 4;
1877                     s &= 15;
1878
1879                     if (s)
1880                     {
1881                         k += r;
1882                         check_bit_buffer(p_jpeg, s);
1883 skip_rest:
1884                         drop_bits(p_jpeg, s);
1885                     }
1886                     else
1887                     {
1888                         if (r != 15)
1889                             break;
1890                         k += r;
1891                     }
1892                 }  /* for k */
1893 block_end:
1894 #ifndef HAVE_LCD_COLOR
1895                 if (!ci)
1896 #endif
1897                 {
1898                     int idct_cols = BIT_N(MIN(p_jpeg->h_scale[!!ci], 3));
1899                     int idct_rows = BIT_N(p_jpeg->v_scale[!!ci]);
1900                     unsigned char *b_out = out + (ci ? ci : store_offs[blkn]);
1901                     if (idct_tbl[p_jpeg->v_scale[!!ci]].v_idct)
1902 #ifdef JPEG_IDCT_TRANSPOSE
1903                         idct_tbl[p_jpeg->v_scale[!!ci]].v_idct(block,
1904                             transpose ? block + 8 * idct_cols
1905                                       : block + idct_cols);
1906                     uint16_t * h_block = transpose ? block + 64 : block;
1907                     idct_tbl[p_jpeg->h_scale[!!ci]].h_idct(h_block, b_out,
1908                         h_block + idct_rows * 8, b_width);
1909 #else
1910                         idct_tbl[p_jpeg->v_scale[!!ci]].v_idct(block,
1911                             block + idct_cols);
1912                     idct_tbl[p_jpeg->h_scale[!!ci]].h_idct(block, b_out,
1913                         block + idct_rows * 8, b_width);
1914 #endif
1915                 }
1916             } /* for blkn */
1917             /* don't starve other threads while an MCU row decodes */
1918             yield();
1919 #ifdef HAVE_LCD_COLOR
1920             unsigned int xp;
1921             int yp;
1922             unsigned char *row = out;
1923             if (p_jpeg->blocks == 1)
1924             {
1925                 for (yp = 0; yp < height; yp++, row += b_width)
1926                 {
1927                     unsigned char *px = row;
1928                     for (xp = 0; xp < mcu_width; xp++, px += JPEG_PIX_SZ)
1929                     {
1930                         px[1] = px[2] = px[0];
1931                     }
1932                 }
1933             }
1934 #endif
1935             out += mcu_offset;
1936             if (p_jpeg->restart_interval && --p_jpeg->restart == 0)
1937             {   /* if a restart marker is due: */
1938                 p_jpeg->restart = p_jpeg->restart_interval; /* count again */
1939                 search_restart(p_jpeg); /* align the bitstream */
1940 #ifdef HAVE_LCD_COLOR
1941                 p_jpeg->last_dc_val[0] = p_jpeg->last_dc_val[1] =
1942                                  p_jpeg->last_dc_val[2] = 0; /* reset decoder */
1943 #else
1944                 p_jpeg->last_dc_val = 0;
1945 #endif
1946             }
1947         }
1948     } /* if !p_jpeg->mcu_row */
1949     p_jpeg->mcu_row = (p_jpeg->mcu_row + 1) & (height - 1);
1950     p_jpeg->part.len = width;
1951     p_jpeg->part.buf = (jpeg_pix_t *)p_jpeg->out_ptr;
1952     p_jpeg->out_ptr += b_width;
1953     return &(p_jpeg->part);
1954 }
1955
1956 /******************************************************************************
1957  * read_jpeg_file()
1958  *
1959  * Reads a JPEG file and puts the data in rockbox format in *bitmap.
1960  *
1961  *****************************************************************************/
1962 #ifndef JPEG_FROM_MEM
1963 int read_jpeg_file(const char* filename,
1964                    struct bitmap *bm,
1965                    int maxsize,
1966                    int format,
1967                    const struct custom_format *cformat)
1968 {
1969     int fd, ret;
1970     fd = open(filename, O_RDONLY);
1971     JDEBUGF("read_jpeg_file: filename: %s buffer len: %d cformat: %p\n",
1972         filename, maxsize, cformat);
1973     /* Exit if file opening failed */
1974     if (fd < 0) {
1975         DEBUGF("read_jpeg_file: can't open '%s', rc: %d\n", filename, fd);
1976         return fd * 10 - 1;
1977     }
1978
1979     ret = read_jpeg_fd(fd, bm, maxsize, format, cformat);
1980     close(fd);
1981     return ret;
1982 }
1983 #endif
1984
1985 static int calc_scale(int in_size, int out_size)
1986 {
1987     int scale = 0;
1988     out_size <<= 3;
1989     for (scale = 0; scale < 3; scale++)
1990     {
1991         if (out_size <= in_size)
1992             break;
1993         else
1994             in_size <<= 1;
1995     }
1996     return scale;
1997 }
1998
1999 #ifdef JPEG_FROM_MEM
2000 int get_jpeg_dim_mem(unsigned char *data, unsigned long len,
2001                      struct dim *size)
2002 {
2003     struct jpeg *p_jpeg = &jpeg;
2004     memset(p_jpeg, 0, sizeof(struct jpeg));
2005     p_jpeg->data = data;
2006     p_jpeg->len = len;
2007     int status = process_markers(p_jpeg);
2008     if (status < 0)
2009         return status;
2010     if ((status & (DQT | SOF0)) != (DQT | SOF0))
2011         return -(status * 16);
2012     size->width = p_jpeg->x_size;
2013     size->height = p_jpeg->y_size;
2014     return 0;
2015 }
2016
2017 int decode_jpeg_mem(unsigned char *data, unsigned long len,
2018 #else
2019 int read_jpeg_fd(int fd,
2020 #endif
2021                  struct bitmap *bm,
2022                  int maxsize,
2023                  int format,
2024                  const struct custom_format *cformat)
2025 {
2026     bool resize = false, dither = false;
2027     struct rowset rset;
2028     struct dim src_dim;
2029     int status;
2030     int bm_size;
2031 #ifdef JPEG_FROM_MEM
2032     struct jpeg *p_jpeg = &jpeg;
2033 #else
2034     struct jpeg *p_jpeg = (struct jpeg*)bm->data;
2035     int tmp_size = maxsize;
2036     ALIGN_BUFFER(p_jpeg, tmp_size, sizeof(int));
2037     /* not enough memory for our struct jpeg */
2038     if ((size_t)tmp_size < sizeof(struct jpeg))
2039         return -1;
2040 #endif
2041     memset(p_jpeg, 0, sizeof(struct jpeg));
2042 #ifdef JPEG_FROM_MEM
2043     p_jpeg->data = data;
2044     p_jpeg->len = len;
2045 #else
2046     p_jpeg->fd = fd;
2047 #endif
2048     status = process_markers(p_jpeg);
2049 #ifndef JPEG_FROM_MEM
2050     JDEBUGF("position in file: %d buffer fill: %d\n",
2051         (int)lseek(p_jpeg->fd, 0, SEEK_CUR), p_jpeg->buf_left);
2052 #endif
2053     if (status < 0)
2054         return status;
2055     if ((status & (DQT | SOF0)) != (DQT | SOF0))
2056         return -(status * 16);
2057     if (!(status & DHT)) /* if no Huffman table present: */
2058         default_huff_tbl(p_jpeg); /* use default */
2059     fix_headers(p_jpeg); /* derive Huffman and other lookup-tables */
2060     src_dim.width = p_jpeg->x_size;
2061     src_dim.height = p_jpeg->y_size;
2062     if (format & FORMAT_RESIZE)
2063         resize = true;
2064     if (format & FORMAT_DITHER)
2065         dither = true;
2066     if (resize) {
2067         struct dim resize_dim = {
2068             .width = bm->width,
2069             .height = bm->height,
2070         };
2071         if (format & FORMAT_KEEP_ASPECT)
2072             recalc_dimension(&resize_dim, &src_dim);
2073         bm->width = resize_dim.width;
2074         bm->height = resize_dim.height;
2075     } else {
2076         bm->width = p_jpeg->x_size;
2077         bm->height = p_jpeg->y_size;
2078     }
2079     p_jpeg->h_scale[0] = calc_scale(p_jpeg->x_size, bm->width);
2080     p_jpeg->v_scale[0] = calc_scale(p_jpeg->y_size, bm->height);
2081     JDEBUGF("luma IDCT size: %dx%d\n", BIT_N(p_jpeg->h_scale[0]),
2082         BIT_N(p_jpeg->v_scale[0]));
2083     if ((p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3 == bm->width &&
2084         (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3 == bm->height)
2085         resize = false;
2086 #ifdef HAVE_LCD_COLOR
2087     p_jpeg->h_scale[1] = p_jpeg->h_scale[0] +
2088         p_jpeg->frameheader[0].horizontal_sampling - 1;
2089     p_jpeg->v_scale[1] = p_jpeg->v_scale[0] +
2090         p_jpeg->frameheader[0].vertical_sampling - 1;
2091     JDEBUGF("chroma IDCT size: %dx%d\n", BIT_N(p_jpeg->h_scale[1]),
2092         BIT_N(p_jpeg->v_scale[1]));
2093 #endif
2094     JDEBUGF("scaling from %dx%d -> %dx%d\n",
2095         (p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3,
2096         (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3,
2097         bm->width, bm->height);
2098     fix_quant_tables(p_jpeg);
2099     int decode_w = BIT_N(p_jpeg->h_scale[0]) - 1;
2100     int decode_h = BIT_N(p_jpeg->v_scale[0]) - 1;
2101     src_dim.width = (p_jpeg->x_size << p_jpeg->h_scale[0]) >> 3;
2102     src_dim.height = (p_jpeg->y_size << p_jpeg->v_scale[0]) >> 3;
2103 #ifdef JPEG_IDCT_TRANSPOSE
2104     if (p_jpeg->v_scale[0] > 2)
2105         p_jpeg->zero_need[0] = (decode_w << 3) + decode_h;
2106     else
2107 #endif
2108         p_jpeg->zero_need[0] = (decode_h << 3) + decode_w;
2109     p_jpeg->k_need[0] = zig[(decode_h << 3) + decode_w];
2110     JDEBUGF("need luma components to %d\n", p_jpeg->k_need[0]);
2111 #ifdef HAVE_LCD_COLOR
2112     decode_w = BIT_N(MIN(p_jpeg->h_scale[1],3)) - 1;
2113     decode_h = BIT_N(MIN(p_jpeg->v_scale[1],3)) - 1;
2114     if (p_jpeg->v_scale[1] > 2)
2115         p_jpeg->zero_need[1] = (decode_w << 3) + decode_h;
2116     else
2117         p_jpeg->zero_need[1] = (decode_h << 3) + decode_w;
2118     p_jpeg->k_need[1] = zig[(decode_h << 3) + decode_w];
2119     JDEBUGF("need chroma components to %d\n", p_jpeg->k_need[1]);
2120 #endif
2121     if (cformat)
2122         bm_size = cformat->get_size(bm);
2123     else
2124         bm_size = BM_SIZE(bm->width,bm->height,FORMAT_NATIVE,false);
2125     if (bm_size > maxsize)
2126         return -1;
2127     char *buf_start = (char *)bm->data + bm_size;
2128     char *buf_end = (char *)bm->data + maxsize;
2129     maxsize = buf_end - buf_start;
2130 #ifndef JPEG_FROM_MEM
2131     ALIGN_BUFFER(buf_start, maxsize, sizeof(uint32_t));
2132     if (maxsize < (int)sizeof(struct jpeg))
2133         return -1;
2134     memmove(buf_start, p_jpeg, sizeof(struct jpeg));
2135     p_jpeg = (struct jpeg *)buf_start;
2136     buf_start += sizeof(struct jpeg);
2137     maxsize = buf_end - buf_start;
2138 #endif
2139     fix_huff_tables(p_jpeg);
2140 #ifdef HAVE_LCD_COLOR
2141     int decode_buf_size = (p_jpeg->x_mbl << p_jpeg->h_scale[1])
2142         << p_jpeg->v_scale[1];
2143 #else
2144     int decode_buf_size = (p_jpeg->x_mbl << p_jpeg->h_scale[0])
2145         << p_jpeg->v_scale[0];
2146     decode_buf_size <<= p_jpeg->frameheader[0].horizontal_sampling +
2147         p_jpeg->frameheader[0].vertical_sampling - 2;
2148 #endif
2149     decode_buf_size *= JPEG_PIX_SZ;
2150     JDEBUGF("decode buffer size: %d\n", decode_buf_size);
2151     p_jpeg->img_buf = (jpeg_pix_t *)buf_start;
2152     if (buf_end - buf_start < decode_buf_size)
2153         return -1;
2154     buf_start += decode_buf_size;
2155     maxsize = buf_end - buf_start;
2156     memset(p_jpeg->img_buf, 0, decode_buf_size);
2157     p_jpeg->mcu_row = 0;
2158     p_jpeg->restart = p_jpeg->restart_interval;
2159     rset.rowstart = 0;
2160     rset.rowstop = bm->height;
2161     rset.rowstep = 1;
2162     p_jpeg->resize = resize;
2163     if (resize)
2164     {
2165         if (resize_on_load(bm, dither, &src_dim, &rset, buf_start, maxsize,
2166             cformat, IF_PIX_FMT(p_jpeg->blocks == 1 ? 0 : 1,) store_row_jpeg,
2167             p_jpeg))
2168             return bm_size;
2169     } else {
2170         int row;
2171         struct scaler_context ctx = {
2172             .bm = bm,
2173             .dither = dither,
2174         };
2175 #if LCD_DEPTH > 1
2176         void (*output_row_8)(uint32_t, void*, struct scaler_context*) =
2177             output_row_8_native;
2178 #elif defined(PLUGIN)
2179         void (*output_row_8)(uint32_t, void*, struct scaler_context*) = NULL;
2180 #endif
2181 #if LCD_DEPTH > 1 || defined(PLUGIN)
2182         if (cformat)
2183             output_row_8 = cformat->output_row_8;
2184 #endif
2185         struct img_part *part;
2186         for (row = 0; row < bm->height; row++)
2187         {
2188             part = store_row_jpeg(p_jpeg);
2189 #ifdef HAVE_LCD_COLOR
2190             if (p_jpeg->blocks > 1)
2191             {
2192                 struct uint8_rgb *qp = part->buf;
2193                 struct uint8_rgb *end = qp + bm->width;
2194                 uint8_t y, u, v;
2195                 unsigned r, g, b;
2196                 for (; qp < end; qp++)
2197                 {
2198                     y = qp->blue;
2199                     u = qp->green;
2200                     v = qp->red;
2201                     yuv_to_rgb(y, u, v, &r, &g, &b);
2202                     qp->red = r;
2203                     qp->blue = b;
2204                     qp->green = g;
2205                 }
2206             }
2207 #endif
2208             output_row_8(row, part->buf, &ctx);
2209         }
2210         return bm_size;
2211     }
2212     return 0;
2213 }
2214
2215 /**************** end JPEG code ********************/