1 /********************************************************************
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
11 ********************************************************************
13 function: C implementation of the Theora iDCT
16 ********************************************************************/
19 #include "codec_internal.h"
21 #include "quant_lookup.h"
23 #define IdctAdjustBeforeShift 8
24 /* cos(n*pi/16) or sin(8-n)*pi/16) */
33 /* compute the 16 bit signed 1D inverse DCT - spec version */
35 static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) {
37 ogg_int16_t *y = InputData;
38 ogg_int16_t *x = OutputData;
42 t[0] = (xC4S4 * t[0]) >> 16;
46 t[1] = (xC4S4 * t[1]) >> 16;
48 t[2] = ((xC6S2 * t[2]) >> 16) - ((xC2S6 * y[6]) >> 16);
49 t[3] = ((xC2S6 * t[2]) >> 16) + ((xC6S2 * y[6]) >> 16);
50 t[4] = ((xC7S1 * t[1]) >> 16) - ((xC1S7 * y[7]) >> 16);
51 t[5] = ((xC3S5 * t[5]) >> 16) - ((xC5S3 * y[3]) >> 16);
52 t[6] = ((xC5S3 * t[5]) >> 16) + ((xC3S5 * y[3]) >> 16);
53 t[7] = ((xC1S7 * t[1]) >> 16) + ((xC7S1 * y[7]) >> 16);
58 t[5] = (xC4S4 * (-t[5])) >> 16;
64 t[6] = (xC4S4 * t[6]) >> 16;
114 static void dequant_slow( ogg_int16_t
* dequant_coeffs
,
115 ogg_int16_t
* quantized_list
,
116 ogg_int32_t
* DCT_block
) {
119 DCT_block
[dezigzag_index
[i
]] = quantized_list
[i
] * dequant_coeffs
[i
];
124 void IDctSlow__c( Q_LIST_ENTRY
* InputData
,
125 ogg_int16_t
*QuantMatrix
,
126 ogg_int16_t
* OutputData
) {
127 ogg_int32_t IntermediateData
[64];
128 ogg_int32_t
* ip
= IntermediateData
;
129 ogg_int16_t
* op
= OutputData
;
131 ogg_int32_t _A
, _B
, _C
, _D
, _Ad
, _Bd
, _Cd
, _Dd
, _E
, _F
, _G
, _H
;
132 ogg_int32_t _Ed
, _Gd
, _Add
, _Bdd
, _Fd
, _Hd
;
137 dequant_slow( QuantMatrix
, InputData
, IntermediateData
);
139 /* Inverse DCT on the rows now */
140 for ( loop
= 0; loop
< 8; loop
++){
141 /* Check for non-zero values */
142 if ( ip
[0] | ip
[1] | ip
[2] | ip
[3] | ip
[4] | ip
[5] | ip
[6] | ip
[7] ) {
143 t1
= (xC1S7
* ip
[1]);
144 t2
= (xC7S1
* ip
[7]);
149 t1
= (xC7S1
* ip
[1]);
150 t2
= (xC1S7
* ip
[7]);
155 t1
= (xC3S5
* ip
[3]);
156 t2
= (xC5S3
* ip
[5]);
161 t1
= (xC3S5
* ip
[5]);
162 t2
= (xC5S3
* ip
[3]);
167 t1
= (xC4S4
* (ogg_int16_t
)(_A
- _C
));
171 t1
= (xC4S4
* (ogg_int16_t
)(_B
- _D
));
179 t1
= (xC4S4
* (ogg_int16_t
)(ip
[0] + ip
[4]));
183 t1
= (xC4S4
* (ogg_int16_t
)(ip
[0] - ip
[4]));
187 t1
= (xC2S6
* ip
[2]);
188 t2
= (xC6S2
* ip
[6]);
193 t1
= (xC6S2
* ip
[2]);
194 t2
= (xC2S6
* ip
[6]);
209 /* Final sequence of operations over-write original inputs. */
210 ip
[0] = (ogg_int16_t
)((_Gd
+ _Cd
) >> 0);
211 ip
[7] = (ogg_int16_t
)((_Gd
- _Cd
) >> 0);
213 ip
[1] = (ogg_int16_t
)((_Add
+ _Hd
) >> 0);
214 ip
[2] = (ogg_int16_t
)((_Add
- _Hd
) >> 0);
216 ip
[3] = (ogg_int16_t
)((_Ed
+ _Dd
) >> 0);
217 ip
[4] = (ogg_int16_t
)((_Ed
- _Dd
) >> 0);
219 ip
[5] = (ogg_int16_t
)((_Fd
+ _Bdd
) >> 0);
220 ip
[6] = (ogg_int16_t
)((_Fd
- _Bdd
) >> 0);
224 ip
+= 8; /* next row */
227 ip
= IntermediateData
;
229 for ( loop
= 0; loop
< 8; loop
++){
230 /* Check for non-zero values (bitwise or faster than ||) */
231 if ( ip
[0 * 8] | ip
[1 * 8] | ip
[2 * 8] | ip
[3 * 8] |
232 ip
[4 * 8] | ip
[5 * 8] | ip
[6 * 8] | ip
[7 * 8] ) {
234 t1
= (xC1S7
* ip
[1*8]);
235 t2
= (xC7S1
* ip
[7*8]);
240 t1
= (xC7S1
* ip
[1*8]);
241 t2
= (xC1S7
* ip
[7*8]);
246 t1
= (xC3S5
* ip
[3*8]);
247 t2
= (xC5S3
* ip
[5*8]);
252 t1
= (xC3S5
* ip
[5*8]);
253 t2
= (xC5S3
* ip
[3*8]);
258 t1
= (xC4S4
* (ogg_int16_t
)(_A
- _C
));
262 t1
= (xC4S4
* (ogg_int16_t
)(_B
- _D
));
270 t1
= (xC4S4
* (ogg_int16_t
)(ip
[0*8] + ip
[4*8]));
274 t1
= (xC4S4
* (ogg_int16_t
)(ip
[0*8] - ip
[4*8]));
278 t1
= (xC2S6
* ip
[2*8]);
279 t2
= (xC6S2
* ip
[6*8]);
284 t1
= (xC6S2
* ip
[2*8]);
285 t2
= (xC2S6
* ip
[6*8]);
299 _Gd
+= IdctAdjustBeforeShift
;
300 _Add
+= IdctAdjustBeforeShift
;
301 _Ed
+= IdctAdjustBeforeShift
;
302 _Fd
+= IdctAdjustBeforeShift
;
304 /* Final sequence of operations over-write original inputs. */
305 op
[0*8] = (ogg_int16_t
)((_Gd
+ _Cd
) >> 4);
306 op
[7*8] = (ogg_int16_t
)((_Gd
- _Cd
) >> 4);
308 op
[1*8] = (ogg_int16_t
)((_Add
+ _Hd
) >> 4);
309 op
[2*8] = (ogg_int16_t
)((_Add
- _Hd
) >> 4);
311 op
[3*8] = (ogg_int16_t
)((_Ed
+ _Dd
) >> 4);
312 op
[4*8] = (ogg_int16_t
)((_Ed
- _Dd
) >> 4);
314 op
[5*8] = (ogg_int16_t
)((_Fd
+ _Bdd
) >> 4);
315 op
[6*8] = (ogg_int16_t
)((_Fd
- _Bdd
) >> 4);
327 ip
++; /* next column */
332 /************************
341 *************************/
343 static void dequant_slow10( ogg_int16_t
* dequant_coeffs
,
344 ogg_int16_t
* quantized_list
,
345 ogg_int32_t
* DCT_block
){
347 memset(DCT_block
,0, 128);
349 DCT_block
[dezigzag_index
[i
]] = quantized_list
[i
] * dequant_coeffs
[i
];
353 void IDct10__c( Q_LIST_ENTRY
* InputData
,
354 ogg_int16_t
*QuantMatrix
,
355 ogg_int16_t
* OutputData
){
356 ogg_int32_t IntermediateData
[64];
357 ogg_int32_t
* ip
= IntermediateData
;
358 ogg_int16_t
* op
= OutputData
;
360 ogg_int32_t _A
, _B
, _C
, _D
, _Ad
, _Bd
, _Cd
, _Dd
, _E
, _F
, _G
, _H
;
361 ogg_int32_t _Ed
, _Gd
, _Add
, _Bdd
, _Fd
, _Hd
;
366 dequant_slow10( QuantMatrix
, InputData
, IntermediateData
);
368 /* Inverse DCT on the rows now */
369 for ( loop
= 0; loop
< 4; loop
++){
370 /* Check for non-zero values */
371 if ( ip
[0] | ip
[1] | ip
[2] | ip
[3] ){
372 t1
= (xC1S7
* ip
[1]);
376 t1
= (xC7S1
* ip
[1]);
380 t1
= (xC3S5
* ip
[3]);
384 t2
= (xC5S3
* ip
[3]);
389 t1
= (xC4S4
* (ogg_int16_t
)(_A
- _C
));
393 t1
= (xC4S4
* (ogg_int16_t
)(_B
- _D
));
401 t1
= (xC4S4
* ip
[0] );
407 t1
= (xC2S6
* ip
[2]);
411 t1
= (xC6S2
* ip
[2]);
425 /* Final sequence of operations over-write original inputs. */
426 ip
[0] = (ogg_int16_t
)((_Gd
+ _Cd
) >> 0);
427 ip
[7] = (ogg_int16_t
)((_Gd
- _Cd
) >> 0);
429 ip
[1] = (ogg_int16_t
)((_Add
+ _Hd
) >> 0);
430 ip
[2] = (ogg_int16_t
)((_Add
- _Hd
) >> 0);
432 ip
[3] = (ogg_int16_t
)((_Ed
+ _Dd
) >> 0);
433 ip
[4] = (ogg_int16_t
)((_Ed
- _Dd
) >> 0);
435 ip
[5] = (ogg_int16_t
)((_Fd
+ _Bdd
) >> 0);
436 ip
[6] = (ogg_int16_t
)((_Fd
- _Bdd
) >> 0);
440 ip
+= 8; /* next row */
443 ip
= IntermediateData
;
445 for ( loop
= 0; loop
< 8; loop
++) {
446 /* Check for non-zero values (bitwise or faster than ||) */
447 if ( ip
[0 * 8] | ip
[1 * 8] | ip
[2 * 8] | ip
[3 * 8] ) {
449 t1
= (xC1S7
* ip
[1*8]);
453 t1
= (xC7S1
* ip
[1*8]);
457 t1
= (xC3S5
* ip
[3*8]);
461 t2
= (xC5S3
* ip
[3*8]);
466 t1
= (xC4S4
* (ogg_int16_t
)(_A
- _C
));
470 t1
= (xC4S4
* (ogg_int16_t
)(_B
- _D
));
478 t1
= (xC4S4
* ip
[0*8]);
483 t1
= (xC2S6
* ip
[2*8]);
487 t1
= (xC6S2
* ip
[2*8]);
501 _Gd
+= IdctAdjustBeforeShift
;
502 _Add
+= IdctAdjustBeforeShift
;
503 _Ed
+= IdctAdjustBeforeShift
;
504 _Fd
+= IdctAdjustBeforeShift
;
506 /* Final sequence of operations over-write original inputs. */
507 op
[0*8] = (ogg_int16_t
)((_Gd
+ _Cd
) >> 4);
508 op
[7*8] = (ogg_int16_t
)((_Gd
- _Cd
) >> 4);
510 op
[1*8] = (ogg_int16_t
)((_Add
+ _Hd
) >> 4);
511 op
[2*8] = (ogg_int16_t
)((_Add
- _Hd
) >> 4);
513 op
[3*8] = (ogg_int16_t
)((_Ed
+ _Dd
) >> 4);
514 op
[4*8] = (ogg_int16_t
)((_Ed
- _Dd
) >> 4);
516 op
[5*8] = (ogg_int16_t
)((_Fd
+ _Bdd
) >> 4);
517 op
[6*8] = (ogg_int16_t
)((_Fd
- _Bdd
) >> 4);
529 ip
++; /* next column */
534 /***************************
543 **************************/
545 void IDct1( Q_LIST_ENTRY
* InputData
,
546 ogg_int16_t
*QuantMatrix
,
547 ogg_int16_t
* OutputData
){
552 OutD
=(ogg_int16_t
) ((ogg_int32_t
)(InputData
[0]*QuantMatrix
[0]+15)>>5);
554 for(loop
=0;loop
<64;loop
++)
555 OutputData
[loop
]=OutD
;
559 void dsp_idct_init (DspFunctions
*funcs
, ogg_uint32_t cpu_flags
)
561 funcs
->IDctSlow
= IDctSlow__c
;
562 funcs
->IDct10
= IDct10__c
;
563 funcs
->IDct3
= IDct10__c
;
565 if (cpu_flags
& CPU_X86_MMX
) {
566 dsp_mmx_idct_init(funcs
);