1 /********************************************************************
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
11 ********************************************************************
16 ********************************************************************/
19 #include "codec_internal.h"
21 #define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
22 #define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
23 #define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
25 static void sub8x8__c (unsigned char *FiltPtr
, unsigned char *ReconPtr
,
26 ogg_int16_t
*DctInputPtr
, ogg_uint32_t PixelsPerLine
,
27 ogg_uint32_t ReconPixelsPerLine
) {
30 /* For each block row */
32 DctInputPtr
[0] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[0], ReconPtr
[0]);
33 DctInputPtr
[1] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[1], ReconPtr
[1]);
34 DctInputPtr
[2] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[2], ReconPtr
[2]);
35 DctInputPtr
[3] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[3], ReconPtr
[3]);
36 DctInputPtr
[4] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[4], ReconPtr
[4]);
37 DctInputPtr
[5] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[5], ReconPtr
[5]);
38 DctInputPtr
[6] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[6], ReconPtr
[6]);
39 DctInputPtr
[7] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[7], ReconPtr
[7]);
42 FiltPtr
+= PixelsPerLine
;
43 ReconPtr
+= ReconPixelsPerLine
;
48 static void sub8x8_128__c (unsigned char *FiltPtr
, ogg_int16_t
*DctInputPtr
,
49 ogg_uint32_t PixelsPerLine
) {
51 /* For each block row */
53 /* INTRA mode so code raw image data */
54 /* We convert the data to 8 bit signed (by subtracting 128) as
55 this reduces the internal precision requirments in the DCT
57 DctInputPtr
[0] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[0], 128);
58 DctInputPtr
[1] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[1], 128);
59 DctInputPtr
[2] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[2], 128);
60 DctInputPtr
[3] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[3], 128);
61 DctInputPtr
[4] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[4], 128);
62 DctInputPtr
[5] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[5], 128);
63 DctInputPtr
[6] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[6], 128);
64 DctInputPtr
[7] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[7], 128);
67 FiltPtr
+= PixelsPerLine
;
72 static void sub8x8avg2__c (unsigned char *FiltPtr
, unsigned char *ReconPtr1
,
73 unsigned char *ReconPtr2
, ogg_int16_t
*DctInputPtr
,
74 ogg_uint32_t PixelsPerLine
,
75 ogg_uint32_t ReconPixelsPerLine
)
79 /* For each block row */
81 DctInputPtr
[0] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[0], DSP_OP_AVG (ReconPtr1
[0], ReconPtr2
[0]));
82 DctInputPtr
[1] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[1], DSP_OP_AVG (ReconPtr1
[1], ReconPtr2
[1]));
83 DctInputPtr
[2] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[2], DSP_OP_AVG (ReconPtr1
[2], ReconPtr2
[2]));
84 DctInputPtr
[3] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[3], DSP_OP_AVG (ReconPtr1
[3], ReconPtr2
[3]));
85 DctInputPtr
[4] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[4], DSP_OP_AVG (ReconPtr1
[4], ReconPtr2
[4]));
86 DctInputPtr
[5] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[5], DSP_OP_AVG (ReconPtr1
[5], ReconPtr2
[5]));
87 DctInputPtr
[6] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[6], DSP_OP_AVG (ReconPtr1
[6], ReconPtr2
[6]));
88 DctInputPtr
[7] = (ogg_int16_t
) DSP_OP_DIFF (FiltPtr
[7], DSP_OP_AVG (ReconPtr1
[7], ReconPtr2
[7]));
91 FiltPtr
+= PixelsPerLine
;
92 ReconPtr1
+= ReconPixelsPerLine
;
93 ReconPtr2
+= ReconPixelsPerLine
;
98 static ogg_uint32_t
row_sad8__c (unsigned char *Src1
, unsigned char *Src2
)
100 ogg_uint32_t SadValue
;
101 ogg_uint32_t SadValue1
;
103 SadValue
= DSP_OP_ABS_DIFF (Src1
[0], Src2
[0]) +
104 DSP_OP_ABS_DIFF (Src1
[1], Src2
[1]) +
105 DSP_OP_ABS_DIFF (Src1
[2], Src2
[2]) +
106 DSP_OP_ABS_DIFF (Src1
[3], Src2
[3]);
108 SadValue1
= DSP_OP_ABS_DIFF (Src1
[4], Src2
[4]) +
109 DSP_OP_ABS_DIFF (Src1
[5], Src2
[5]) +
110 DSP_OP_ABS_DIFF (Src1
[6], Src2
[6]) +
111 DSP_OP_ABS_DIFF (Src1
[7], Src2
[7]);
113 SadValue
= ( SadValue
> SadValue1
) ? SadValue
: SadValue1
;
118 static ogg_uint32_t
col_sad8x8__c (unsigned char *Src1
, unsigned char *Src2
,
121 ogg_uint32_t SadValue
[8] = {0,0,0,0,0,0,0,0};
122 ogg_uint32_t SadValue2
[8] = {0,0,0,0,0,0,0,0};
123 ogg_uint32_t MaxSad
= 0;
126 for ( i
= 0; i
< 4; i
++ ){
127 SadValue
[0] += abs(Src1
[0] - Src2
[0]);
128 SadValue
[1] += abs(Src1
[1] - Src2
[1]);
129 SadValue
[2] += abs(Src1
[2] - Src2
[2]);
130 SadValue
[3] += abs(Src1
[3] - Src2
[3]);
131 SadValue
[4] += abs(Src1
[4] - Src2
[4]);
132 SadValue
[5] += abs(Src1
[5] - Src2
[5]);
133 SadValue
[6] += abs(Src1
[6] - Src2
[6]);
134 SadValue
[7] += abs(Src1
[7] - Src2
[7]);
140 for ( i
= 0; i
< 4; i
++ ){
141 SadValue2
[0] += abs(Src1
[0] - Src2
[0]);
142 SadValue2
[1] += abs(Src1
[1] - Src2
[1]);
143 SadValue2
[2] += abs(Src1
[2] - Src2
[2]);
144 SadValue2
[3] += abs(Src1
[3] - Src2
[3]);
145 SadValue2
[4] += abs(Src1
[4] - Src2
[4]);
146 SadValue2
[5] += abs(Src1
[5] - Src2
[5]);
147 SadValue2
[6] += abs(Src1
[6] - Src2
[6]);
148 SadValue2
[7] += abs(Src1
[7] - Src2
[7]);
154 for ( i
= 0; i
< 8; i
++ ){
155 if ( SadValue
[i
] > MaxSad
)
156 MaxSad
= SadValue
[i
];
157 if ( SadValue2
[i
] > MaxSad
)
158 MaxSad
= SadValue2
[i
];
164 static ogg_uint32_t
sad8x8__c (unsigned char *ptr1
, ogg_uint32_t stride1
,
165 unsigned char *ptr2
, ogg_uint32_t stride2
)
168 ogg_uint32_t sad
= 0;
171 sad
+= DSP_OP_ABS_DIFF(ptr1
[0], ptr2
[0]);
172 sad
+= DSP_OP_ABS_DIFF(ptr1
[1], ptr2
[1]);
173 sad
+= DSP_OP_ABS_DIFF(ptr1
[2], ptr2
[2]);
174 sad
+= DSP_OP_ABS_DIFF(ptr1
[3], ptr2
[3]);
175 sad
+= DSP_OP_ABS_DIFF(ptr1
[4], ptr2
[4]);
176 sad
+= DSP_OP_ABS_DIFF(ptr1
[5], ptr2
[5]);
177 sad
+= DSP_OP_ABS_DIFF(ptr1
[6], ptr2
[6]);
178 sad
+= DSP_OP_ABS_DIFF(ptr1
[7], ptr2
[7]);
180 /* Step to next row of block. */
188 static ogg_uint32_t
sad8x8_thres__c (unsigned char *ptr1
, ogg_uint32_t stride1
,
189 unsigned char *ptr2
, ogg_uint32_t stride2
,
193 ogg_uint32_t sad
= 0;
196 sad
+= DSP_OP_ABS_DIFF(ptr1
[0], ptr2
[0]);
197 sad
+= DSP_OP_ABS_DIFF(ptr1
[1], ptr2
[1]);
198 sad
+= DSP_OP_ABS_DIFF(ptr1
[2], ptr2
[2]);
199 sad
+= DSP_OP_ABS_DIFF(ptr1
[3], ptr2
[3]);
200 sad
+= DSP_OP_ABS_DIFF(ptr1
[4], ptr2
[4]);
201 sad
+= DSP_OP_ABS_DIFF(ptr1
[5], ptr2
[5]);
202 sad
+= DSP_OP_ABS_DIFF(ptr1
[6], ptr2
[6]);
203 sad
+= DSP_OP_ABS_DIFF(ptr1
[7], ptr2
[7]);
208 /* Step to next row of block. */
216 static ogg_uint32_t
sad8x8_xy2_thres__c (unsigned char *SrcData
, ogg_uint32_t SrcStride
,
217 unsigned char *RefDataPtr1
,
218 unsigned char *RefDataPtr2
, ogg_uint32_t RefStride
,
222 ogg_uint32_t sad
= 0;
225 sad
+= DSP_OP_ABS_DIFF(SrcData
[0], DSP_OP_AVG (RefDataPtr1
[0], RefDataPtr2
[0]));
226 sad
+= DSP_OP_ABS_DIFF(SrcData
[1], DSP_OP_AVG (RefDataPtr1
[1], RefDataPtr2
[1]));
227 sad
+= DSP_OP_ABS_DIFF(SrcData
[2], DSP_OP_AVG (RefDataPtr1
[2], RefDataPtr2
[2]));
228 sad
+= DSP_OP_ABS_DIFF(SrcData
[3], DSP_OP_AVG (RefDataPtr1
[3], RefDataPtr2
[3]));
229 sad
+= DSP_OP_ABS_DIFF(SrcData
[4], DSP_OP_AVG (RefDataPtr1
[4], RefDataPtr2
[4]));
230 sad
+= DSP_OP_ABS_DIFF(SrcData
[5], DSP_OP_AVG (RefDataPtr1
[5], RefDataPtr2
[5]));
231 sad
+= DSP_OP_ABS_DIFF(SrcData
[6], DSP_OP_AVG (RefDataPtr1
[6], RefDataPtr2
[6]));
232 sad
+= DSP_OP_ABS_DIFF(SrcData
[7], DSP_OP_AVG (RefDataPtr1
[7], RefDataPtr2
[7]));
237 /* Step to next row of block. */
238 SrcData
+= SrcStride
;
239 RefDataPtr1
+= RefStride
;
240 RefDataPtr2
+= RefStride
;
246 static ogg_uint32_t
intra8x8_err__c (unsigned char *DataPtr
, ogg_uint32_t Stride
)
250 ogg_uint32_t XXSum
=0;
253 /* Examine alternate pixel locations. */
255 XXSum
+= DataPtr
[0]*DataPtr
[0];
257 XXSum
+= DataPtr
[1]*DataPtr
[1];
259 XXSum
+= DataPtr
[2]*DataPtr
[2];
261 XXSum
+= DataPtr
[3]*DataPtr
[3];
263 XXSum
+= DataPtr
[4]*DataPtr
[4];
265 XXSum
+= DataPtr
[5]*DataPtr
[5];
267 XXSum
+= DataPtr
[6]*DataPtr
[6];
269 XXSum
+= DataPtr
[7]*DataPtr
[7];
271 /* Step to next row of block. */
275 /* Compute population variance as mis-match metric. */
276 return (( (XXSum
<<6) - XSum
*XSum
) );
279 static ogg_uint32_t
inter8x8_err__c (unsigned char *SrcData
, ogg_uint32_t SrcStride
,
280 unsigned char *RefDataPtr
, ogg_uint32_t RefStride
)
284 ogg_uint32_t XXSum
=0;
288 DiffVal
= DSP_OP_DIFF (SrcData
[0], RefDataPtr
[0]);
290 XXSum
+= DiffVal
*DiffVal
;
292 DiffVal
= DSP_OP_DIFF (SrcData
[1], RefDataPtr
[1]);
294 XXSum
+= DiffVal
*DiffVal
;
296 DiffVal
= DSP_OP_DIFF (SrcData
[2], RefDataPtr
[2]);
298 XXSum
+= DiffVal
*DiffVal
;
300 DiffVal
= DSP_OP_DIFF (SrcData
[3], RefDataPtr
[3]);
302 XXSum
+= DiffVal
*DiffVal
;
304 DiffVal
= DSP_OP_DIFF (SrcData
[4], RefDataPtr
[4]);
306 XXSum
+= DiffVal
*DiffVal
;
308 DiffVal
= DSP_OP_DIFF (SrcData
[5], RefDataPtr
[5]);
310 XXSum
+= DiffVal
*DiffVal
;
312 DiffVal
= DSP_OP_DIFF (SrcData
[6], RefDataPtr
[6]);
314 XXSum
+= DiffVal
*DiffVal
;
316 DiffVal
= DSP_OP_DIFF (SrcData
[7], RefDataPtr
[7]);
318 XXSum
+= DiffVal
*DiffVal
;
320 /* Step to next row of block. */
321 SrcData
+= SrcStride
;
322 RefDataPtr
+= RefStride
;
325 /* Compute and return population variance as mis-match metric. */
326 return (( (XXSum
<<6) - XSum
*XSum
));
329 static ogg_uint32_t
inter8x8_err_xy2__c (unsigned char *SrcData
, ogg_uint32_t SrcStride
,
330 unsigned char *RefDataPtr1
,
331 unsigned char *RefDataPtr2
, ogg_uint32_t RefStride
)
335 ogg_uint32_t XXSum
=0;
339 DiffVal
= DSP_OP_DIFF(SrcData
[0], DSP_OP_AVG (RefDataPtr1
[0], RefDataPtr2
[0]));
341 XXSum
+= DiffVal
*DiffVal
;
343 DiffVal
= DSP_OP_DIFF(SrcData
[1], DSP_OP_AVG (RefDataPtr1
[1], RefDataPtr2
[1]));
345 XXSum
+= DiffVal
*DiffVal
;
347 DiffVal
= DSP_OP_DIFF(SrcData
[2], DSP_OP_AVG (RefDataPtr1
[2], RefDataPtr2
[2]));
349 XXSum
+= DiffVal
*DiffVal
;
351 DiffVal
= DSP_OP_DIFF(SrcData
[3], DSP_OP_AVG (RefDataPtr1
[3], RefDataPtr2
[3]));
353 XXSum
+= DiffVal
*DiffVal
;
355 DiffVal
= DSP_OP_DIFF(SrcData
[4], DSP_OP_AVG (RefDataPtr1
[4], RefDataPtr2
[4]));
357 XXSum
+= DiffVal
*DiffVal
;
359 DiffVal
= DSP_OP_DIFF(SrcData
[5], DSP_OP_AVG (RefDataPtr1
[5], RefDataPtr2
[5]));
361 XXSum
+= DiffVal
*DiffVal
;
363 DiffVal
= DSP_OP_DIFF(SrcData
[6], DSP_OP_AVG (RefDataPtr1
[6], RefDataPtr2
[6]));
365 XXSum
+= DiffVal
*DiffVal
;
367 DiffVal
= DSP_OP_DIFF(SrcData
[7], DSP_OP_AVG (RefDataPtr1
[7], RefDataPtr2
[7]));
369 XXSum
+= DiffVal
*DiffVal
;
371 /* Step to next row of block. */
372 SrcData
+= SrcStride
;
373 RefDataPtr1
+= RefStride
;
374 RefDataPtr2
+= RefStride
;
377 /* Compute and return population variance as mis-match metric. */
378 return (( (XXSum
<<6) - XSum
*XSum
));
381 static void nop (void) { /* NOP */ }
383 void dsp_init(DspFunctions
*funcs
)
385 /* TH_DEBUG("setting dsp functions to C defaults.\n"); */
386 funcs
->save_fpu
= nop
;
387 funcs
->restore_fpu
= nop
;
388 funcs
->sub8x8
= sub8x8__c
;
389 funcs
->sub8x8_128
= sub8x8_128__c
;
390 funcs
->sub8x8avg2
= sub8x8avg2__c
;
391 funcs
->row_sad8
= row_sad8__c
;
392 funcs
->col_sad8x8
= col_sad8x8__c
;
393 funcs
->sad8x8
= sad8x8__c
;
394 funcs
->sad8x8_thres
= sad8x8_thres__c
;
395 funcs
->sad8x8_xy2_thres
= sad8x8_xy2_thres__c
;
396 funcs
->intra8x8_err
= intra8x8_err__c
;
397 funcs
->inter8x8_err
= inter8x8_err__c
;
398 funcs
->inter8x8_err_xy2
= inter8x8_err_xy2__c
;
401 void dsp_static_init(DspFunctions
*funcs
)
403 ogg_uint32_t cpuflags
;
405 cpuflags
= cpu_init ();
408 dsp_recon_init (funcs
, cpuflags
);
409 dsp_dct_init (funcs
, cpuflags
);
411 if (cpuflags
& CPU_X86_MMX
) {
415 /* This is implemented for win32 yet */
416 if (cpuflags
& CPU_X86_MMXEXT
) {
417 dsp_mmxext_init(funcs
);