4 * Copyright (C) 1994-1996, Thomas G. Lane.
5 * Modified 2003-2013 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
9 * This file contains the forward-DCT management logic.
10 * This code selects a particular DCT implementation to be used,
11 * and it performs related housekeeping chores including coefficient
15 #define JPEG_INTERNALS
18 #include "jdct.h" /* Private declarations for DCT subsystem */
21 /* Private subobject for this module */
24 struct jpeg_forward_dct pub
; /* public fields */
26 /* Pointer to the DCT routine actually in use */
27 forward_DCT_method_ptr do_dct
[MAX_COMPONENTS
];
29 #ifdef DCT_FLOAT_SUPPORTED
30 /* Same as above for the floating-point case. */
31 float_DCT_method_ptr do_float_dct
[MAX_COMPONENTS
];
35 typedef my_fdct_controller
* my_fdct_ptr
;
38 /* The allocated post-DCT divisor tables -- big enough for any
39 * supported variant and not identical to the quant table entries,
40 * because of scaling (especially for an unnormalized DCT) --
41 * are pointed to by dct_table in the per-component comp_info
42 * structures. Each table is given in normal array order.
46 DCTELEM int_array
[DCTSIZE2
];
47 #ifdef DCT_FLOAT_SUPPORTED
48 FAST_FLOAT float_array
[DCTSIZE2
];
53 /* The current scaled-DCT routines require ISLOW-style divisor tables,
54 * so be sure to compile that code if either ISLOW or SCALING is requested.
56 #ifdef DCT_ISLOW_SUPPORTED
57 #define PROVIDE_ISLOW_TABLES
59 #ifdef DCT_SCALING_SUPPORTED
60 #define PROVIDE_ISLOW_TABLES
66 * Perform forward DCT on one or more blocks of a component.
68 * The input samples are taken from the sample_data[] array starting at
69 * position start_row/start_col, and moving to the right for any additional
70 * blocks. The quantized coefficients are returned in coef_blocks[].
74 forward_DCT (j_compress_ptr cinfo
, jpeg_component_info
* compptr
,
75 JSAMPARRAY sample_data
, JBLOCKROW coef_blocks
,
76 JDIMENSION start_row
, JDIMENSION start_col
,
77 JDIMENSION num_blocks
)
78 /* This version is used for integer DCT implementations. */
80 /* This routine is heavily used, so it's worth coding it tightly. */
81 my_fdct_ptr fdct
= (my_fdct_ptr
) cinfo
->fdct
;
82 forward_DCT_method_ptr do_dct
= fdct
->do_dct
[compptr
->component_index
];
83 DCTELEM
* divisors
= (DCTELEM
*) compptr
->dct_table
;
84 DCTELEM workspace
[DCTSIZE2
]; /* work area for FDCT subroutine */
87 sample_data
+= start_row
; /* fold in the vertical offset once */
89 for (bi
= 0; bi
< num_blocks
; bi
++, start_col
+= compptr
->DCT_h_scaled_size
) {
91 (*do_dct
) (workspace
, sample_data
, start_col
);
93 /* Quantize/descale the coefficients, and store into coef_blocks[] */
94 { register DCTELEM temp
, qval
;
96 register JCOEFPTR output_ptr
= coef_blocks
[bi
];
98 for (i
= 0; i
< DCTSIZE2
; i
++) {
101 /* Divide the coefficient value by qval, ensuring proper rounding.
102 * Since C does not specify the direction of rounding for negative
103 * quotients, we have to force the dividend positive for portability.
105 * In most files, at least half of the output values will be zero
106 * (at default quantization settings, more like three-quarters...)
107 * so we should ensure that this case is fast. On many machines,
108 * a comparison is enough cheaper than a divide to make a special test
109 * a win. Since both inputs will be nonnegative, we need only test
110 * for a < b to discover whether a/b is 0.
111 * If your machine's division is fast enough, define FAST_DIVIDE.
114 #define DIVIDE_BY(a,b) a /= b
116 #define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
120 temp
+= qval
>>1; /* for rounding */
121 DIVIDE_BY(temp
, qval
);
124 temp
+= qval
>>1; /* for rounding */
125 DIVIDE_BY(temp
, qval
);
127 output_ptr
[i
] = (JCOEF
) temp
;
134 #ifdef DCT_FLOAT_SUPPORTED
137 forward_DCT_float (j_compress_ptr cinfo
, jpeg_component_info
* compptr
,
138 JSAMPARRAY sample_data
, JBLOCKROW coef_blocks
,
139 JDIMENSION start_row
, JDIMENSION start_col
,
140 JDIMENSION num_blocks
)
141 /* This version is used for floating-point DCT implementations. */
143 /* This routine is heavily used, so it's worth coding it tightly. */
144 my_fdct_ptr fdct
= (my_fdct_ptr
) cinfo
->fdct
;
145 float_DCT_method_ptr do_dct
= fdct
->do_float_dct
[compptr
->component_index
];
146 FAST_FLOAT
* divisors
= (FAST_FLOAT
*) compptr
->dct_table
;
147 FAST_FLOAT workspace
[DCTSIZE2
]; /* work area for FDCT subroutine */
150 sample_data
+= start_row
; /* fold in the vertical offset once */
152 for (bi
= 0; bi
< num_blocks
; bi
++, start_col
+= compptr
->DCT_h_scaled_size
) {
153 /* Perform the DCT */
154 (*do_dct
) (workspace
, sample_data
, start_col
);
156 /* Quantize/descale the coefficients, and store into coef_blocks[] */
157 { register FAST_FLOAT temp
;
159 register JCOEFPTR output_ptr
= coef_blocks
[bi
];
161 for (i
= 0; i
< DCTSIZE2
; i
++) {
162 /* Apply the quantization and scaling factor */
163 temp
= workspace
[i
] * divisors
[i
];
164 /* Round to nearest integer.
165 * Since C does not specify the direction of rounding for negative
166 * quotients, we have to force the dividend positive for portability.
167 * The maximum coefficient size is +-16K (for 12-bit data), so this
168 * code should work for either 16-bit or 32-bit ints.
170 output_ptr
[i
] = (JCOEF
) ((int) (temp
+ (FAST_FLOAT
) 16384.5) - 16384);
176 #endif /* DCT_FLOAT_SUPPORTED */
180 * Initialize for a processing pass.
181 * Verify that all referenced Q-tables are present, and set up
182 * the divisor table for each one.
183 * In the current implementation, DCT of all components is done during
184 * the first pass, even if only some components will be output in the
185 * first scan. Hence all components should be examined here.
189 start_pass_fdctmgr (j_compress_ptr cinfo
)
191 my_fdct_ptr fdct
= (my_fdct_ptr
) cinfo
->fdct
;
193 jpeg_component_info
*compptr
;
198 for (ci
= 0, compptr
= cinfo
->comp_info
; ci
< cinfo
->num_components
;
200 /* Select the proper DCT routine for this component's scaling */
201 switch ((compptr
->DCT_h_scaled_size
<< 8) + compptr
->DCT_v_scaled_size
) {
202 #ifdef DCT_SCALING_SUPPORTED
204 fdct
->do_dct
[ci
] = jpeg_fdct_1x1
;
205 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
208 fdct
->do_dct
[ci
] = jpeg_fdct_2x2
;
209 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
212 fdct
->do_dct
[ci
] = jpeg_fdct_3x3
;
213 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
216 fdct
->do_dct
[ci
] = jpeg_fdct_4x4
;
217 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
220 fdct
->do_dct
[ci
] = jpeg_fdct_5x5
;
221 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
224 fdct
->do_dct
[ci
] = jpeg_fdct_6x6
;
225 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
228 fdct
->do_dct
[ci
] = jpeg_fdct_7x7
;
229 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
232 fdct
->do_dct
[ci
] = jpeg_fdct_9x9
;
233 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
235 case ((10 << 8) + 10):
236 fdct
->do_dct
[ci
] = jpeg_fdct_10x10
;
237 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
239 case ((11 << 8) + 11):
240 fdct
->do_dct
[ci
] = jpeg_fdct_11x11
;
241 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
243 case ((12 << 8) + 12):
244 fdct
->do_dct
[ci
] = jpeg_fdct_12x12
;
245 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
247 case ((13 << 8) + 13):
248 fdct
->do_dct
[ci
] = jpeg_fdct_13x13
;
249 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
251 case ((14 << 8) + 14):
252 fdct
->do_dct
[ci
] = jpeg_fdct_14x14
;
253 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
255 case ((15 << 8) + 15):
256 fdct
->do_dct
[ci
] = jpeg_fdct_15x15
;
257 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
259 case ((16 << 8) + 16):
260 fdct
->do_dct
[ci
] = jpeg_fdct_16x16
;
261 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
263 case ((16 << 8) + 8):
264 fdct
->do_dct
[ci
] = jpeg_fdct_16x8
;
265 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
267 case ((14 << 8) + 7):
268 fdct
->do_dct
[ci
] = jpeg_fdct_14x7
;
269 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
271 case ((12 << 8) + 6):
272 fdct
->do_dct
[ci
] = jpeg_fdct_12x6
;
273 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
275 case ((10 << 8) + 5):
276 fdct
->do_dct
[ci
] = jpeg_fdct_10x5
;
277 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
280 fdct
->do_dct
[ci
] = jpeg_fdct_8x4
;
281 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
284 fdct
->do_dct
[ci
] = jpeg_fdct_6x3
;
285 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
288 fdct
->do_dct
[ci
] = jpeg_fdct_4x2
;
289 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
292 fdct
->do_dct
[ci
] = jpeg_fdct_2x1
;
293 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
295 case ((8 << 8) + 16):
296 fdct
->do_dct
[ci
] = jpeg_fdct_8x16
;
297 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
299 case ((7 << 8) + 14):
300 fdct
->do_dct
[ci
] = jpeg_fdct_7x14
;
301 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
303 case ((6 << 8) + 12):
304 fdct
->do_dct
[ci
] = jpeg_fdct_6x12
;
305 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
307 case ((5 << 8) + 10):
308 fdct
->do_dct
[ci
] = jpeg_fdct_5x10
;
309 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
312 fdct
->do_dct
[ci
] = jpeg_fdct_4x8
;
313 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
316 fdct
->do_dct
[ci
] = jpeg_fdct_3x6
;
317 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
320 fdct
->do_dct
[ci
] = jpeg_fdct_2x4
;
321 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
324 fdct
->do_dct
[ci
] = jpeg_fdct_1x2
;
325 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
328 case ((DCTSIZE
<< 8) + DCTSIZE
):
329 switch (cinfo
->dct_method
) {
330 #ifdef DCT_ISLOW_SUPPORTED
332 fdct
->do_dct
[ci
] = jpeg_fdct_islow
;
336 #ifdef DCT_IFAST_SUPPORTED
338 fdct
->do_dct
[ci
] = jpeg_fdct_ifast
;
342 #ifdef DCT_FLOAT_SUPPORTED
344 fdct
->do_float_dct
[ci
] = jpeg_fdct_float
;
349 ERREXIT(cinfo
, JERR_NOT_COMPILED
);
354 ERREXIT2(cinfo
, JERR_BAD_DCTSIZE
,
355 compptr
->DCT_h_scaled_size
, compptr
->DCT_v_scaled_size
);
358 qtblno
= compptr
->quant_tbl_no
;
359 /* Make sure specified quantization table is present */
360 if (qtblno
< 0 || qtblno
>= NUM_QUANT_TBLS
||
361 cinfo
->quant_tbl_ptrs
[qtblno
] == NULL
)
362 ERREXIT1(cinfo
, JERR_NO_QUANT_TABLE
, qtblno
);
363 qtbl
= cinfo
->quant_tbl_ptrs
[qtblno
];
364 /* Create divisor table from quant table */
366 #ifdef PROVIDE_ISLOW_TABLES
368 /* For LL&M IDCT method, divisors are equal to raw quantization
369 * coefficients multiplied by 8 (to counteract scaling).
371 dtbl
= (DCTELEM
*) compptr
->dct_table
;
372 for (i
= 0; i
< DCTSIZE2
; i
++) {
374 ((DCTELEM
) qtbl
->quantval
[i
]) << (compptr
->component_needed
? 4 : 3);
376 fdct
->pub
.forward_DCT
[ci
] = forward_DCT
;
379 #ifdef DCT_IFAST_SUPPORTED
382 /* For AA&N IDCT method, divisors are equal to quantization
383 * coefficients scaled by scalefactor[row]*scalefactor[col], where
385 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
386 * We apply a further scale factor of 8.
388 #define CONST_BITS 14
389 static const INT16 aanscales
[DCTSIZE2
] = {
390 /* precomputed values scaled up by 14 bits */
391 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
392 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
393 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
394 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
395 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
396 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
397 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
398 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
402 dtbl
= (DCTELEM
*) compptr
->dct_table
;
403 for (i
= 0; i
< DCTSIZE2
; i
++) {
405 DESCALE(MULTIPLY16V16((INT32
) qtbl
->quantval
[i
],
406 (INT32
) aanscales
[i
]),
407 compptr
->component_needed
? CONST_BITS
-4 : CONST_BITS
-3);
410 fdct
->pub
.forward_DCT
[ci
] = forward_DCT
;
413 #ifdef DCT_FLOAT_SUPPORTED
416 /* For float AA&N IDCT method, divisors are equal to quantization
417 * coefficients scaled by scalefactor[row]*scalefactor[col], where
419 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
420 * We apply a further scale factor of 8.
421 * What's actually stored is 1/divisor so that the inner loop can
422 * use a multiplication rather than a division.
424 FAST_FLOAT
* fdtbl
= (FAST_FLOAT
*) compptr
->dct_table
;
426 static const double aanscalefactor
[DCTSIZE
] = {
427 1.0, 1.387039845, 1.306562965, 1.175875602,
428 1.0, 0.785694958, 0.541196100, 0.275899379
432 for (row
= 0; row
< DCTSIZE
; row
++) {
433 for (col
= 0; col
< DCTSIZE
; col
++) {
434 fdtbl
[i
] = (FAST_FLOAT
)
435 (1.0 / ((double) qtbl
->quantval
[i
] *
436 aanscalefactor
[row
] * aanscalefactor
[col
] *
437 (compptr
->component_needed
? 16.0 : 8.0)));
442 fdct
->pub
.forward_DCT
[ci
] = forward_DCT_float
;
446 ERREXIT(cinfo
, JERR_NOT_COMPILED
);
454 * Initialize FDCT manager.
458 jinit_forward_dct (j_compress_ptr cinfo
)
462 jpeg_component_info
*compptr
;
465 (*cinfo
->mem
->alloc_small
) ((j_common_ptr
) cinfo
, JPOOL_IMAGE
,
466 SIZEOF(my_fdct_controller
));
467 cinfo
->fdct
= &fdct
->pub
;
468 fdct
->pub
.start_pass
= start_pass_fdctmgr
;
470 for (ci
= 0, compptr
= cinfo
->comp_info
; ci
< cinfo
->num_components
;
472 /* Allocate a divisor table for each component */
474 (*cinfo
->mem
->alloc_small
) ((j_common_ptr
) cinfo
, JPOOL_IMAGE
,
475 SIZEOF(divisor_table
));