1 /********************************************************************
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
11 ********************************************************************
16 ********************************************************************/
18 #include "codec_internal.h"
20 static ogg_int32_t xC1S7
= 64277;
21 static ogg_int32_t xC2S6
= 60547;
22 static ogg_int32_t xC3S5
= 54491;
23 static ogg_int32_t xC4S4
= 46341;
24 static ogg_int32_t xC5S3
= 36410;
25 static ogg_int32_t xC6S2
= 25080;
26 static ogg_int32_t xC7S1
= 12785;
28 #define SIGNBITDUPPED(X) ((signed )(((X) & 0x80000000)) >> 31)
29 #define DOROUND(X) ( (SIGNBITDUPPED(X) & (0xffff)) + (X) )
31 static void fdct_short__c ( ogg_int16_t
* InputData
, ogg_int16_t
* OutputData
){
34 ogg_int32_t is07
, is12
, is34
, is56
;
35 ogg_int32_t is0734
, is1256
;
36 ogg_int32_t id07
, id12
, id34
, id56
;
38 ogg_int32_t irot_input_x
, irot_input_y
;
39 ogg_int32_t icommon_product1
; /* Re-used product (c4s4 * (s12 - s56)). */
40 ogg_int32_t icommon_product2
; /* Re-used product (c4s4 * (d12 + d56)). */
42 ogg_int32_t temp1
, temp2
; /* intermediate variable for computation */
44 ogg_int32_t InterData
[64];
45 ogg_int32_t
*ip
= InterData
;
46 ogg_int16_t
* op
= OutputData
;
47 for (loop
= 0; loop
< 8; loop
++){
48 /* Pre calculate some common sums and differences. */
49 is07
= InputData
[0] + InputData
[7];
50 is12
= InputData
[1] + InputData
[2];
51 is34
= InputData
[3] + InputData
[4];
52 is56
= InputData
[5] + InputData
[6];
54 id07
= InputData
[0] - InputData
[7];
55 id12
= InputData
[1] - InputData
[2];
56 id34
= InputData
[3] - InputData
[4];
57 id56
= InputData
[5] - InputData
[6];
62 /* Pre-Calculate some common product terms. */
63 icommon_product1
= xC4S4
*(is12
- is56
);
64 icommon_product1
= DOROUND(icommon_product1
);
65 icommon_product1
>>=16;
67 icommon_product2
= xC4S4
*(id12
+ id56
);
68 icommon_product2
= DOROUND(icommon_product2
);
69 icommon_product2
>>=16;
72 ip
[0] = (xC4S4
*(is0734
+ is1256
));
73 ip
[0] = DOROUND(ip
[0]);
76 ip
[4] = (xC4S4
*(is0734
- is1256
));
77 ip
[4] = DOROUND(ip
[4]);
80 /* Define inputs to rotation for outputs 2 and 6 */
81 irot_input_x
= id12
- id56
;
82 irot_input_y
= is07
- is34
;
84 /* Apply rotation for outputs 2 and 6. */
85 temp1
=xC6S2
*irot_input_x
;
88 temp2
=xC2S6
*irot_input_y
;
91 ip
[2] = temp1
+ temp2
;
93 temp1
=xC6S2
*irot_input_y
;
96 temp2
=xC2S6
*irot_input_x
;
99 ip
[6] = temp1
-temp2
;
101 /* Define inputs to rotation for outputs 1 and 7 */
102 irot_input_x
= icommon_product1
+ id07
;
103 irot_input_y
= -( id34
+ icommon_product2
);
105 /* Apply rotation for outputs 1 and 7. */
107 temp1
=xC1S7
*irot_input_x
;
108 temp1
=DOROUND(temp1
);
110 temp2
=xC7S1
*irot_input_y
;
111 temp2
=DOROUND(temp2
);
113 ip
[1] = temp1
- temp2
;
115 temp1
=xC7S1
*irot_input_x
;
116 temp1
=DOROUND(temp1
);
118 temp2
=xC1S7
*irot_input_y
;
119 temp2
=DOROUND(temp2
);
121 ip
[7] = temp1
+ temp2
;
123 /* Define inputs to rotation for outputs 3 and 5 */
124 irot_input_x
= id07
- icommon_product1
;
125 irot_input_y
= id34
- icommon_product2
;
127 /* Apply rotation for outputs 3 and 5. */
128 temp1
=xC3S5
*irot_input_x
;
129 temp1
=DOROUND(temp1
);
131 temp2
=xC5S3
*irot_input_y
;
132 temp2
=DOROUND(temp2
);
134 ip
[3] = temp1
- temp2
;
136 temp1
=xC5S3
*irot_input_x
;
137 temp1
=DOROUND(temp1
);
139 temp2
=xC3S5
*irot_input_y
;
140 temp2
=DOROUND(temp2
);
142 ip
[5] = temp1
+ temp2
;
144 /* Increment data pointer for next row. */
146 ip
+= 8; /* advance pointer to next row */
151 /* Performed DCT on rows, now transform the columns */
153 for (loop
= 0; loop
< 8; loop
++){
154 /* Pre calculate some common sums and differences. */
155 is07
= ip
[0 * 8] + ip
[7 * 8];
156 is12
= ip
[1 * 8] + ip
[2 * 8];
157 is34
= ip
[3 * 8] + ip
[4 * 8];
158 is56
= ip
[5 * 8] + ip
[6 * 8];
160 id07
= ip
[0 * 8] - ip
[7 * 8];
161 id12
= ip
[1 * 8] - ip
[2 * 8];
162 id34
= ip
[3 * 8] - ip
[4 * 8];
163 id56
= ip
[5 * 8] - ip
[6 * 8];
165 is0734
= is07
+ is34
;
166 is1256
= is12
+ is56
;
168 /* Pre-Calculate some common product terms. */
169 icommon_product1
= xC4S4
*(is12
- is56
) ;
170 icommon_product2
= xC4S4
*(id12
+ id56
) ;
171 icommon_product1
= DOROUND(icommon_product1
);
172 icommon_product2
= DOROUND(icommon_product2
);
173 icommon_product1
>>=16;
174 icommon_product2
>>=16;
177 temp1
= xC4S4
*(is0734
+ is1256
) ;
178 temp2
= xC4S4
*(is0734
- is1256
) ;
179 temp1
= DOROUND(temp1
);
180 temp2
= DOROUND(temp2
);
183 op
[0*8] = (ogg_int16_t
) temp1
;
184 op
[4*8] = (ogg_int16_t
) temp2
;
186 /* Define inputs to rotation for outputs 2 and 6 */
187 irot_input_x
= id12
- id56
;
188 irot_input_y
= is07
- is34
;
190 /* Apply rotation for outputs 2 and 6. */
191 temp1
=xC6S2
*irot_input_x
;
192 temp1
=DOROUND(temp1
);
194 temp2
=xC2S6
*irot_input_y
;
195 temp2
=DOROUND(temp2
);
197 op
[2*8] = (ogg_int16_t
) (temp1
+ temp2
);
199 temp1
=xC6S2
*irot_input_y
;
200 temp1
=DOROUND(temp1
);
202 temp2
=xC2S6
*irot_input_x
;
203 temp2
=DOROUND(temp2
);
205 op
[6*8] = (ogg_int16_t
) (temp1
-temp2
) ;
207 /* Define inputs to rotation for outputs 1 and 7 */
208 irot_input_x
= icommon_product1
+ id07
;
209 irot_input_y
= -( id34
+ icommon_product2
);
211 /* Apply rotation for outputs 1 and 7. */
212 temp1
=xC1S7
*irot_input_x
;
213 temp1
=DOROUND(temp1
);
215 temp2
=xC7S1
*irot_input_y
;
216 temp2
=DOROUND(temp2
);
218 op
[1*8] = (ogg_int16_t
) (temp1
- temp2
);
220 temp1
=xC7S1
*irot_input_x
;
221 temp1
=DOROUND(temp1
);
223 temp2
=xC1S7
*irot_input_y
;
224 temp2
=DOROUND(temp2
);
226 op
[7*8] = (ogg_int16_t
) (temp1
+ temp2
);
228 /* Define inputs to rotation for outputs 3 and 5 */
229 irot_input_x
= id07
- icommon_product1
;
230 irot_input_y
= id34
- icommon_product2
;
232 /* Apply rotation for outputs 3 and 5. */
233 temp1
=xC3S5
*irot_input_x
;
234 temp1
=DOROUND(temp1
);
236 temp2
=xC5S3
*irot_input_y
;
237 temp2
=DOROUND(temp2
);
239 op
[3*8] = (ogg_int16_t
) (temp1
- temp2
) ;
241 temp1
=xC5S3
*irot_input_x
;
242 temp1
=DOROUND(temp1
);
244 temp2
=xC3S5
*irot_input_y
;
245 temp2
=DOROUND(temp2
);
247 op
[5*8] = (ogg_int16_t
) (temp1
+ temp2
);
249 /* Increment data pointer for next column. */
255 void dsp_dct_init (DspFunctions
*funcs
, ogg_uint32_t cpu_flags
)
257 funcs
->fdct_short
= fdct_short__c
;
258 dsp_dct_decode_init(funcs
, cpu_flags
);
259 dsp_idct_init(funcs
, cpu_flags
);
261 if (cpu_flags
& CPU_X86_MMX
) {
262 dsp_mmx_fdct_init(funcs
);