compiler/libjpeg/main/jcdctmgr.c

   1 /*
   2     $Id$
   3 */
   4
   5 /*
   6  * jcdctmgr.c
   7  *
   8  * Copyright (C) 1994-1998, Thomas G. Lane.
   9  * This file is part of the Independent JPEG Group's software.
  10  * For conditions of distribution and use, see the accompanying README file.
  11  *
  12  * This file contains the forward-DCT management logic.
  13  * This code selects a particular DCT implementation to be used,
  14  * and it performs related housekeeping chores including coefficient
  15  * quantization.
  16  */
  17
  18 #define JPEG_INTERNALS
  19 #include "jinclude.h"
  20 #include "jpeglib.h"
  21 #include "jlossy.h"             /* Private declarations for lossy codec */
  22 #include "jdct.h"               /* Private declarations for DCT subsystem */
  23
  24
  25 /* Private subobject for this module */
  26
  27 typedef struct {
  28   /* Pointer to the DCT routine actually in use */
  29   forward_DCT_method_ptr do_dct;
  30
  31   /* The actual post-DCT divisors --- not identical to the quant table
  32    * entries, because of scaling (especially for an unnormalized DCT).
  33    * Each table is given in normal array order.
  34    */
  35   DCTELEM * divisors[NUM_QUANT_TBLS];
  36
  37 #ifdef DCT_FLOAT_SUPPORTED
  38   /* Same as above for the floating-point case. */
  39   float_DCT_method_ptr do_float_dct;
  40   FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
  41 #endif
  42 } fdct_controller;
  43
  44 typedef fdct_controller * fdct_ptr;
  45
  46
  47 /*
  48  * Initialize for a processing pass.
  49  * Verify that all referenced Q-tables are present, and set up
  50  * the divisor table for each one.
  51  * In the current implementation, DCT of all components is done during
  52  * the first pass, even if only some components will be output in the
  53  * first scan.  Hence all components should be examined here.
  54  */
  55
  56 METHODDEF(void)
  57 start_pass_fdctmgr (j_compress_ptr cinfo)
  58 {
  59   j_lossy_c_ptr lossyc = (j_lossy_c_ptr) cinfo->codec;
  60   fdct_ptr fdct = (fdct_ptr) lossyc->fdct_private;
  61   int ci, qtblno, i;
  62   jpeg_component_info *compptr;
  63   JQUANT_TBL * qtbl;
  64   DCTELEM * dtbl;
  65
  66   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
  67        ci++, compptr++) {
  68     qtblno = compptr->quant_tbl_no;
  69     /* Make sure specified quantization table is present */
  70     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
  71         cinfo->quant_tbl_ptrs[qtblno] == NULL)
  72       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
  73     qtbl = cinfo->quant_tbl_ptrs[qtblno];
  74     /* Compute divisors for this quant table */
  75     /* We may do this more than once for same table, but it's not a big deal */
  76     switch (cinfo->dct_method) {
  77 #ifdef DCT_ISLOW_SUPPORTED
  78     case JDCT_ISLOW:
  79       /* For LL&M IDCT method, divisors are equal to raw quantization
  80        * coefficients multiplied by 8 (to counteract scaling).
  81        */
  82       if (fdct->divisors[qtblno] == NULL) {
  83         fdct->divisors[qtblno] = (DCTELEM *)
  84           (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  85                                       DCTSIZE2 * SIZEOF(DCTELEM));
  86       }
  87       dtbl = fdct->divisors[qtblno];
  88       for (i = 0; i < DCTSIZE2; i++) {
  89         dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
  90       }
  91       break;
  92 #endif
  93 #ifdef DCT_IFAST_SUPPORTED
  94     case JDCT_IFAST:
  95       {
  96         /* For AA&N IDCT method, divisors are equal to quantization
  97          * coefficients scaled by scalefactor[row]*scalefactor[col], where
  98          *   scalefactor[0] = 1
  99          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
 100          * We apply a further scale factor of 8.
 101          */
 102 #define CONST_BITS 14
 103         static const INT16 aanscales[DCTSIZE2] = {
 104           /* precomputed values scaled up by 14 bits */
 105           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
 106           22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
 107           21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
 108           19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
 109           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
 110           12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
 111            8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
 112            4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
 113         };
 114         SHIFT_TEMPS
 115
 116         if (fdct->divisors[qtblno] == NULL) {
 117           fdct->divisors[qtblno] = (DCTELEM *)
 118             (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 119                                         DCTSIZE2 * SIZEOF(DCTELEM));
 120         }
 121         dtbl = fdct->divisors[qtblno];
 122         for (i = 0; i < DCTSIZE2; i++) {
 123           dtbl[i] = (DCTELEM)
 124             DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
 125                                   (INT32) aanscales[i]),
 126                     CONST_BITS-3);
 127         }
 128       }
 129       break;
 130 #endif
 131 #ifdef DCT_FLOAT_SUPPORTED
 132     case JDCT_FLOAT:
 133       {
 134         /* For float AA&N IDCT method, divisors are equal to quantization
 135          * coefficients scaled by scalefactor[row]*scalefactor[col], where
 136          *   scalefactor[0] = 1
 137          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
 138          * We apply a further scale factor of 8.
 139          * What's actually stored is 1/divisor so that the inner loop can
 140          * use a multiplication rather than a division.
 141          */
 142         FAST_FLOAT * fdtbl;
 143         int row, col;
 144         static const double aanscalefactor[DCTSIZE] = {
 145           1.0, 1.387039845, 1.306562965, 1.175875602,
 146           1.0, 0.785694958, 0.541196100, 0.275899379
 147         };
 148
 149         if (fdct->float_divisors[qtblno] == NULL) {
 150           fdct->float_divisors[qtblno] = (FAST_FLOAT *)
 151             (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 152                                         DCTSIZE2 * SIZEOF(FAST_FLOAT));
 153         }
 154         fdtbl = fdct->float_divisors[qtblno];
 155         i = 0;
 156         for (row = 0; row < DCTSIZE; row++) {
 157           for (col = 0; col < DCTSIZE; col++) {
 158             fdtbl[i] = (FAST_FLOAT)
 159               (1.0 / (((double) qtbl->quantval[i] *
 160                        aanscalefactor[row] * aanscalefactor[col] * 8.0)));
 161             i++;
 162           }
 163         }
 164       }
 165       break;
 166 #endif
 167     default:
 168       ERREXIT(cinfo, JERR_NOT_COMPILED);
 169       break;
 170     }
 171   }
 172 }
 173
 174
 175 /*
 176  * Perform forward DCT on one or more blocks of a component.
 177  *
 178  * The input samples are taken from the sample_data[] array starting at
 179  * position start_row/start_col, and moving to the right for any additional
 180  * blocks. The quantized coefficients are returned in coef_blocks[].
 181  */
 182
 183 METHODDEF(void)
 184 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
 185              JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
 186              JDIMENSION start_row, JDIMENSION start_col,
 187              JDIMENSION num_blocks)
 188 /* This version is used for integer DCT implementations. */
 189 {
 190   /* This routine is heavily used, so it's worth coding it tightly. */
 191   j_lossy_c_ptr lossyc = (j_lossy_c_ptr) cinfo->codec;
 192   fdct_ptr fdct = (fdct_ptr) lossyc->fdct_private;
 193   forward_DCT_method_ptr do_dct = fdct->do_dct;
 194   DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
 195   DCTELEM workspace[DCTSIZE2];  /* work area for FDCT subroutine */
 196   JDIMENSION bi;
 197
 198   sample_data += start_row;     /* fold in the vertical offset once */
 199
 200   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
 201     /* Load data into workspace, applying unsigned->signed conversion */
 202     { register DCTELEM *workspaceptr;
 203       register JSAMPROW elemptr;
 204       register int elemr;
 205
 206       workspaceptr = workspace;
 207       for (elemr = 0; elemr < DCTSIZE; elemr++) {
 208         elemptr = sample_data[elemr] + start_col;
 209 #if DCTSIZE == 8                /* unroll the inner loop */
 210         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 211         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 212         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 213         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 214         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 215         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 216         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 217         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 218 #else
 219         { register int elemc;
 220           for (elemc = DCTSIZE; elemc > 0; elemc--) {
 221             *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 222           }
 223         }
 224 #endif
 225       }
 226     }
 227
 228     /* Perform the DCT */
 229     (*do_dct) (workspace);
 230
 231     /* Quantize/descale the coefficients, and store into coef_blocks[] */
 232     { register DCTELEM temp, qval;
 233       register int i;
 234       register JCOEFPTR output_ptr = coef_blocks[bi];
 235
 236       for (i = 0; i < DCTSIZE2; i++) {
 237         qval = divisors[i];
 238         temp = workspace[i];
 239         /* Divide the coefficient value by qval, ensuring proper rounding.
 240          * Since C does not specify the direction of rounding for negative
 241          * quotients, we have to force the dividend positive for portability.
 242          *
 243          * In most files, at least half of the output values will be zero
 244          * (at default quantization settings, more like three-quarters...)
 245          * so we should ensure that this case is fast.  On many machines,
 246          * a comparison is enough cheaper than a divide to make a special test
 247          * a win.  Since both inputs will be nonnegative, we need only test
 248          * for a < b to discover whether a/b is 0.
 249          * If your machine's division is fast enough, define FAST_DIVIDE.
 250          */
 251 #ifdef FAST_DIVIDE
 252 #define DIVIDE_BY(a,b)  a /= b
 253 #else
 254 #define DIVIDE_BY(a,b)  if (a >= b) a /= b; else a = 0
 255 #endif
 256         if (temp < 0) {
 257           temp = -temp;
 258           temp += qval>>1;      /* for rounding */
 259           DIVIDE_BY(temp, qval);
 260           temp = -temp;
 261         } else {
 262           temp += qval>>1;      /* for rounding */
 263           DIVIDE_BY(temp, qval);
 264         }
 265         output_ptr[i] = (JCOEF) temp;
 266       }
 267     }
 268   }
 269 }
 270
 271
 272 #ifdef DCT_FLOAT_SUPPORTED
 273
 274 METHODDEF(void)
 275 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
 276                    JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
 277                    JDIMENSION start_row, JDIMENSION start_col,
 278                    JDIMENSION num_blocks)
 279 /* This version is used for floating-point DCT implementations. */
 280 {
 281   /* This routine is heavily used, so it's worth coding it tightly. */
 282   j_lossy_c_ptr lossyc = (j_lossy_c_ptr) cinfo->codec;
 283   fdct_ptr fdct = (fdct_ptr) lossyc->fdct_private;
 284   float_DCT_method_ptr do_dct = fdct->do_float_dct;
 285   FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
 286   FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
 287   JDIMENSION bi;
 288
 289   sample_data += start_row;     /* fold in the vertical offset once */
 290
 291   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
 292     /* Load data into workspace, applying unsigned->signed conversion */
 293     { register FAST_FLOAT *workspaceptr;
 294       register JSAMPROW elemptr;
 295       register int elemr;
 296
 297       workspaceptr = workspace;
 298       for (elemr = 0; elemr < DCTSIZE; elemr++) {
 299         elemptr = sample_data[elemr] + start_col;
 300 #if DCTSIZE == 8                /* unroll the inner loop */
 301         *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 302         *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 303         *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 304         *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 305         *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 306         *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 307         *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 308         *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 309 #else
 310         { register int elemc;
 311           for (elemc = DCTSIZE; elemc > 0; elemc--) {
 312             *workspaceptr++ = (FAST_FLOAT)
 313               (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 314           }
 315         }
 316 #endif
 317       }
 318     }
 319
 320     /* Perform the DCT */
 321     (*do_dct) (workspace);
 322
 323     /* Quantize/descale the coefficients, and store into coef_blocks[] */
 324     { register FAST_FLOAT temp;
 325       register int i;
 326       register JCOEFPTR output_ptr = coef_blocks[bi];
 327
 328       for (i = 0; i < DCTSIZE2; i++) {
 329         /* Apply the quantization and scaling factor */
 330         temp = workspace[i] * divisors[i];
 331         /* Round to nearest integer.
 332          * Since C does not specify the direction of rounding for negative
 333          * quotients, we have to force the dividend positive for portability.
 334          * The maximum coefficient size is +-16K (for 12-bit data), so this
 335          * code should work for either 16-bit or 32-bit ints.
 336          */
 337         output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
 338       }
 339     }
 340   }
 341 }
 342
 343 #endif /* DCT_FLOAT_SUPPORTED */
 344
 345
 346 /*
 347  * Initialize FDCT manager.
 348  */
 349
 350 JGLOBAL(void)
 351 jinit_forward_dct (j_compress_ptr cinfo)
 352 {
 353   j_lossy_c_ptr lossyc = (j_lossy_c_ptr) cinfo->codec;
 354   fdct_ptr fdct;
 355   int i;
 356
 357   fdct = (fdct_ptr)
 358     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 359                                 SIZEOF(fdct_controller));
 360   lossyc->fdct_private = (struct jpeg_forward_dct *) fdct;
 361   lossyc->fdct_start_pass = start_pass_fdctmgr;
 362
 363   switch (cinfo->dct_method) {
 364 #ifdef DCT_ISLOW_SUPPORTED
 365   case JDCT_ISLOW:
 366     lossyc->fdct_forward_DCT = forward_DCT;
 367     fdct->do_dct = jpeg_fdct_islow;
 368     break;
 369 #endif
 370 #ifdef DCT_IFAST_SUPPORTED
 371   case JDCT_IFAST:
 372     lossyc->fdct_forward_DCT = forward_DCT;
 373     fdct->do_dct = jpeg_fdct_ifast;
 374     break;
 375 #endif
 376 #ifdef DCT_FLOAT_SUPPORTED
 377   case JDCT_FLOAT:
 378     lossyc->fdct_forward_DCT = forward_DCT_float;
 379     fdct->do_float_dct = jpeg_fdct_float;
 380     break;
 381 #endif
 382   default:
 383     ERREXIT(cinfo, JERR_NOT_COMPILED);
 384     break;
 385   }
 386
 387   /* Mark divisor tables unallocated */
 388   for (i = 0; i < NUM_QUANT_TBLS; i++) {
 389     fdct->divisors[i] = NULL;
 390 #ifdef DCT_FLOAT_SUPPORTED
 391     fdct->float_divisors[i] = NULL;
 392 #endif
 393   }
 394 }