update experimental gcc 6 patch to gcc 6.1.0 release
[AROS.git] / workbench / libs / jpeg / jcdctmgr.c
blob0bbdbb685d1b735cfcb06c93130e6e8e09034dab
1 /*
2 * jcdctmgr.c
4 * Copyright (C) 1994-1996, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
8 * This file contains the forward-DCT management logic.
9 * This code selects a particular DCT implementation to be used,
10 * and it performs related housekeeping chores including coefficient
11 * quantization.
14 #define JPEG_INTERNALS
15 #include "jinclude.h"
16 #include "jpeglib.h"
17 #include "jdct.h" /* Private declarations for DCT subsystem */
20 /* Private subobject for this module */
22 typedef struct {
23 struct jpeg_forward_dct pub; /* public fields */
25 /* Pointer to the DCT routine actually in use */
26 forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
28 /* The actual post-DCT divisors --- not identical to the quant table
29 * entries, because of scaling (especially for an unnormalized DCT).
30 * Each table is given in normal array order.
32 DCTELEM * divisors[NUM_QUANT_TBLS];
34 #ifdef DCT_FLOAT_SUPPORTED
35 /* Same as above for the floating-point case. */
36 float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
37 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
38 #endif
39 } my_fdct_controller;
41 typedef my_fdct_controller * my_fdct_ptr;
44 /* The current scaled-DCT routines require ISLOW-style divisor tables,
45 * so be sure to compile that code if either ISLOW or SCALING is requested.
47 #ifdef DCT_ISLOW_SUPPORTED
48 #define PROVIDE_ISLOW_TABLES
49 #else
50 #ifdef DCT_SCALING_SUPPORTED
51 #define PROVIDE_ISLOW_TABLES
52 #endif
53 #endif
57 * Perform forward DCT on one or more blocks of a component.
59 * The input samples are taken from the sample_data[] array starting at
60 * position start_row/start_col, and moving to the right for any additional
61 * blocks. The quantized coefficients are returned in coef_blocks[].
64 METHODDEF(void)
65 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
66 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
67 JDIMENSION start_row, JDIMENSION start_col,
68 JDIMENSION num_blocks)
69 /* This version is used for integer DCT implementations. */
71 /* This routine is heavily used, so it's worth coding it tightly. */
72 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
73 forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
74 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
75 DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
76 JDIMENSION bi;
78 sample_data += start_row; /* fold in the vertical offset once */
80 for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
81 /* Perform the DCT */
82 (*do_dct) (workspace, sample_data, start_col);
84 /* Quantize/descale the coefficients, and store into coef_blocks[] */
85 { register DCTELEM temp, qval;
86 register int i;
87 register JCOEFPTR output_ptr = coef_blocks[bi];
89 for (i = 0; i < DCTSIZE2; i++) {
90 qval = divisors[i];
91 temp = workspace[i];
92 /* Divide the coefficient value by qval, ensuring proper rounding.
93 * Since C does not specify the direction of rounding for negative
94 * quotients, we have to force the dividend positive for portability.
96 * In most files, at least half of the output values will be zero
97 * (at default quantization settings, more like three-quarters...)
98 * so we should ensure that this case is fast. On many machines,
99 * a comparison is enough cheaper than a divide to make a special test
100 * a win. Since both inputs will be nonnegative, we need only test
101 * for a < b to discover whether a/b is 0.
102 * If your machine's division is fast enough, define FAST_DIVIDE.
104 #ifdef FAST_DIVIDE
105 #define DIVIDE_BY(a,b) a /= b
106 #else
107 #define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
108 #endif
109 if (temp < 0) {
110 temp = -temp;
111 temp += qval>>1; /* for rounding */
112 DIVIDE_BY(temp, qval);
113 temp = -temp;
114 } else {
115 temp += qval>>1; /* for rounding */
116 DIVIDE_BY(temp, qval);
118 output_ptr[i] = (JCOEF) temp;
125 #ifdef DCT_FLOAT_SUPPORTED
127 METHODDEF(void)
128 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
129 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
130 JDIMENSION start_row, JDIMENSION start_col,
131 JDIMENSION num_blocks)
132 /* This version is used for floating-point DCT implementations. */
134 /* This routine is heavily used, so it's worth coding it tightly. */
135 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
136 float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
137 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
138 FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
139 JDIMENSION bi;
141 sample_data += start_row; /* fold in the vertical offset once */
143 for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
144 /* Perform the DCT */
145 (*do_dct) (workspace, sample_data, start_col);
147 /* Quantize/descale the coefficients, and store into coef_blocks[] */
148 { register FAST_FLOAT temp;
149 register int i;
150 register JCOEFPTR output_ptr = coef_blocks[bi];
152 for (i = 0; i < DCTSIZE2; i++) {
153 /* Apply the quantization and scaling factor */
154 temp = workspace[i] * divisors[i];
155 /* Round to nearest integer.
156 * Since C does not specify the direction of rounding for negative
157 * quotients, we have to force the dividend positive for portability.
158 * The maximum coefficient size is +-16K (for 12-bit data), so this
159 * code should work for either 16-bit or 32-bit ints.
161 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
167 #endif /* DCT_FLOAT_SUPPORTED */
171 * Initialize for a processing pass.
172 * Verify that all referenced Q-tables are present, and set up
173 * the divisor table for each one.
174 * In the current implementation, DCT of all components is done during
175 * the first pass, even if only some components will be output in the
176 * first scan. Hence all components should be examined here.
179 METHODDEF(void)
180 start_pass_fdctmgr (j_compress_ptr cinfo)
182 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
183 int ci, qtblno, i;
184 jpeg_component_info *compptr;
185 int method = 0;
186 JQUANT_TBL * qtbl;
187 DCTELEM * dtbl;
189 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
190 ci++, compptr++) {
191 /* Select the proper DCT routine for this component's scaling */
192 switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
193 #ifdef DCT_SCALING_SUPPORTED
194 case ((1 << 8) + 1):
195 fdct->do_dct[ci] = jpeg_fdct_1x1;
196 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
197 break;
198 case ((2 << 8) + 2):
199 fdct->do_dct[ci] = jpeg_fdct_2x2;
200 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
201 break;
202 case ((3 << 8) + 3):
203 fdct->do_dct[ci] = jpeg_fdct_3x3;
204 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
205 break;
206 case ((4 << 8) + 4):
207 fdct->do_dct[ci] = jpeg_fdct_4x4;
208 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
209 break;
210 case ((5 << 8) + 5):
211 fdct->do_dct[ci] = jpeg_fdct_5x5;
212 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
213 break;
214 case ((6 << 8) + 6):
215 fdct->do_dct[ci] = jpeg_fdct_6x6;
216 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
217 break;
218 case ((7 << 8) + 7):
219 fdct->do_dct[ci] = jpeg_fdct_7x7;
220 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
221 break;
222 case ((9 << 8) + 9):
223 fdct->do_dct[ci] = jpeg_fdct_9x9;
224 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
225 break;
226 case ((10 << 8) + 10):
227 fdct->do_dct[ci] = jpeg_fdct_10x10;
228 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
229 break;
230 case ((11 << 8) + 11):
231 fdct->do_dct[ci] = jpeg_fdct_11x11;
232 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
233 break;
234 case ((12 << 8) + 12):
235 fdct->do_dct[ci] = jpeg_fdct_12x12;
236 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
237 break;
238 case ((13 << 8) + 13):
239 fdct->do_dct[ci] = jpeg_fdct_13x13;
240 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
241 break;
242 case ((14 << 8) + 14):
243 fdct->do_dct[ci] = jpeg_fdct_14x14;
244 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
245 break;
246 case ((15 << 8) + 15):
247 fdct->do_dct[ci] = jpeg_fdct_15x15;
248 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
249 break;
250 case ((16 << 8) + 16):
251 fdct->do_dct[ci] = jpeg_fdct_16x16;
252 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
253 break;
254 case ((16 << 8) + 8):
255 fdct->do_dct[ci] = jpeg_fdct_16x8;
256 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
257 break;
258 case ((14 << 8) + 7):
259 fdct->do_dct[ci] = jpeg_fdct_14x7;
260 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
261 break;
262 case ((12 << 8) + 6):
263 fdct->do_dct[ci] = jpeg_fdct_12x6;
264 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
265 break;
266 case ((10 << 8) + 5):
267 fdct->do_dct[ci] = jpeg_fdct_10x5;
268 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
269 break;
270 case ((8 << 8) + 4):
271 fdct->do_dct[ci] = jpeg_fdct_8x4;
272 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
273 break;
274 case ((6 << 8) + 3):
275 fdct->do_dct[ci] = jpeg_fdct_6x3;
276 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
277 break;
278 case ((4 << 8) + 2):
279 fdct->do_dct[ci] = jpeg_fdct_4x2;
280 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
281 break;
282 case ((2 << 8) + 1):
283 fdct->do_dct[ci] = jpeg_fdct_2x1;
284 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
285 break;
286 case ((8 << 8) + 16):
287 fdct->do_dct[ci] = jpeg_fdct_8x16;
288 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
289 break;
290 case ((7 << 8) + 14):
291 fdct->do_dct[ci] = jpeg_fdct_7x14;
292 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
293 break;
294 case ((6 << 8) + 12):
295 fdct->do_dct[ci] = jpeg_fdct_6x12;
296 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
297 break;
298 case ((5 << 8) + 10):
299 fdct->do_dct[ci] = jpeg_fdct_5x10;
300 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
301 break;
302 case ((4 << 8) + 8):
303 fdct->do_dct[ci] = jpeg_fdct_4x8;
304 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
305 break;
306 case ((3 << 8) + 6):
307 fdct->do_dct[ci] = jpeg_fdct_3x6;
308 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
309 break;
310 case ((2 << 8) + 4):
311 fdct->do_dct[ci] = jpeg_fdct_2x4;
312 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
313 break;
314 case ((1 << 8) + 2):
315 fdct->do_dct[ci] = jpeg_fdct_1x2;
316 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
317 break;
318 #endif
319 case ((DCTSIZE << 8) + DCTSIZE):
320 switch (cinfo->dct_method) {
321 #ifdef DCT_ISLOW_SUPPORTED
322 case JDCT_ISLOW:
323 fdct->do_dct[ci] = jpeg_fdct_islow;
324 method = JDCT_ISLOW;
325 break;
326 #endif
327 #ifdef DCT_IFAST_SUPPORTED
328 case JDCT_IFAST:
329 fdct->do_dct[ci] = jpeg_fdct_ifast;
330 method = JDCT_IFAST;
331 break;
332 #endif
333 #ifdef DCT_FLOAT_SUPPORTED
334 case JDCT_FLOAT:
335 fdct->do_float_dct[ci] = jpeg_fdct_float;
336 method = JDCT_FLOAT;
337 break;
338 #endif
339 default:
340 ERREXIT(cinfo, JERR_NOT_COMPILED);
341 break;
343 break;
344 default:
345 ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
346 compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
347 break;
349 qtblno = compptr->quant_tbl_no;
350 /* Make sure specified quantization table is present */
351 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
352 cinfo->quant_tbl_ptrs[qtblno] == NULL)
353 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
354 qtbl = cinfo->quant_tbl_ptrs[qtblno];
355 /* Compute divisors for this quant table */
356 /* We may do this more than once for same table, but it's not a big deal */
357 switch (method) {
358 #ifdef PROVIDE_ISLOW_TABLES
359 case JDCT_ISLOW:
360 /* For LL&M IDCT method, divisors are equal to raw quantization
361 * coefficients multiplied by 8 (to counteract scaling).
363 if (fdct->divisors[qtblno] == NULL) {
364 fdct->divisors[qtblno] = (DCTELEM *)
365 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
366 DCTSIZE2 * SIZEOF(DCTELEM));
368 dtbl = fdct->divisors[qtblno];
369 for (i = 0; i < DCTSIZE2; i++) {
370 dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
372 fdct->pub.forward_DCT[ci] = forward_DCT;
373 break;
374 #endif
375 #ifdef DCT_IFAST_SUPPORTED
376 case JDCT_IFAST:
378 /* For AA&N IDCT method, divisors are equal to quantization
379 * coefficients scaled by scalefactor[row]*scalefactor[col], where
380 * scalefactor[0] = 1
381 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
382 * We apply a further scale factor of 8.
384 #define CONST_BITS 14
385 static const INT16 aanscales[DCTSIZE2] = {
386 /* precomputed values scaled up by 14 bits */
387 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
388 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
389 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
390 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
391 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
392 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
393 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
394 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
396 SHIFT_TEMPS
398 if (fdct->divisors[qtblno] == NULL) {
399 fdct->divisors[qtblno] = (DCTELEM *)
400 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
401 DCTSIZE2 * SIZEOF(DCTELEM));
403 dtbl = fdct->divisors[qtblno];
404 for (i = 0; i < DCTSIZE2; i++) {
405 dtbl[i] = (DCTELEM)
406 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
407 (INT32) aanscales[i]),
408 CONST_BITS-3);
411 fdct->pub.forward_DCT[ci] = forward_DCT;
412 break;
413 #endif
414 #ifdef DCT_FLOAT_SUPPORTED
415 case JDCT_FLOAT:
417 /* For float AA&N IDCT method, divisors are equal to quantization
418 * coefficients scaled by scalefactor[row]*scalefactor[col], where
419 * scalefactor[0] = 1
420 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
421 * We apply a further scale factor of 8.
422 * What's actually stored is 1/divisor so that the inner loop can
423 * use a multiplication rather than a division.
425 FAST_FLOAT * fdtbl;
426 int row, col;
427 static const double aanscalefactor[DCTSIZE] = {
428 1.0, 1.387039845, 1.306562965, 1.175875602,
429 1.0, 0.785694958, 0.541196100, 0.275899379
432 if (fdct->float_divisors[qtblno] == NULL) {
433 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
434 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
435 DCTSIZE2 * SIZEOF(FAST_FLOAT));
437 fdtbl = fdct->float_divisors[qtblno];
438 i = 0;
439 for (row = 0; row < DCTSIZE; row++) {
440 for (col = 0; col < DCTSIZE; col++) {
441 fdtbl[i] = (FAST_FLOAT)
442 (1.0 / (((double) qtbl->quantval[i] *
443 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
444 i++;
448 fdct->pub.forward_DCT[ci] = forward_DCT_float;
449 break;
450 #endif
451 default:
452 ERREXIT(cinfo, JERR_NOT_COMPILED);
453 break;
460 * Initialize FDCT manager.
463 GLOBAL(void)
464 jinit_forward_dct (j_compress_ptr cinfo)
466 my_fdct_ptr fdct;
467 int i;
469 fdct = (my_fdct_ptr)
470 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
471 SIZEOF(my_fdct_controller));
472 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
473 fdct->pub.start_pass = start_pass_fdctmgr;
475 /* Mark divisor tables unallocated */
476 for (i = 0; i < NUM_QUANT_TBLS; i++) {
477 fdct->divisors[i] = NULL;
478 #ifdef DCT_FLOAT_SUPPORTED
479 fdct->float_divisors[i] = NULL;
480 #endif