Add Russian translation provided by Валерий Крувялис <valkru@mail.ru>
[xiph-mirror.git] / theora-old / lib / dsp.c
blob89126e9f0987277bae6607c3875264d86acc205f
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
10 * *
11 ********************************************************************
13 function:
14 last mod: $Id$
16 ********************************************************************/
18 #include <stdlib.h>
19 #include "codec_internal.h"
21 #define DSP_OP_AVG(a,b) ((((int)(a)) + ((int)(b)))/2)
22 #define DSP_OP_DIFF(a,b) (((int)(a)) - ((int)(b)))
23 #define DSP_OP_ABS_DIFF(a,b) abs((((int)(a)) - ((int)(b))))
25 static void sub8x8__c (unsigned char *FiltPtr, unsigned char *ReconPtr,
26 ogg_int16_t *DctInputPtr, ogg_uint32_t PixelsPerLine,
27 ogg_uint32_t ReconPixelsPerLine) {
28 int i;
30 /* For each block row */
31 for (i=8; i; i--) {
32 DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], ReconPtr[0]);
33 DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], ReconPtr[1]);
34 DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], ReconPtr[2]);
35 DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], ReconPtr[3]);
36 DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], ReconPtr[4]);
37 DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], ReconPtr[5]);
38 DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], ReconPtr[6]);
39 DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], ReconPtr[7]);
41 /* Start next row */
42 FiltPtr += PixelsPerLine;
43 ReconPtr += ReconPixelsPerLine;
44 DctInputPtr += 8;
48 static void sub8x8_128__c (unsigned char *FiltPtr, ogg_int16_t *DctInputPtr,
49 ogg_uint32_t PixelsPerLine) {
50 int i;
51 /* For each block row */
52 for (i=8; i; i--) {
53 /* INTRA mode so code raw image data */
54 /* We convert the data to 8 bit signed (by subtracting 128) as
55 this reduces the internal precision requirments in the DCT
56 transform. */
57 DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], 128);
58 DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], 128);
59 DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], 128);
60 DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], 128);
61 DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], 128);
62 DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], 128);
63 DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], 128);
64 DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], 128);
66 /* Start next row */
67 FiltPtr += PixelsPerLine;
68 DctInputPtr += 8;
72 static void sub8x8avg2__c (unsigned char *FiltPtr, unsigned char *ReconPtr1,
73 unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
74 ogg_uint32_t PixelsPerLine,
75 ogg_uint32_t ReconPixelsPerLine)
77 int i;
79 /* For each block row */
80 for (i=8; i; i--) {
81 DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0]));
82 DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1]));
83 DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2]));
84 DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3]));
85 DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4]));
86 DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5]));
87 DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6]));
88 DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7]));
90 /* Start next row */
91 FiltPtr += PixelsPerLine;
92 ReconPtr1 += ReconPixelsPerLine;
93 ReconPtr2 += ReconPixelsPerLine;
94 DctInputPtr += 8;
98 static ogg_uint32_t row_sad8__c (unsigned char *Src1, unsigned char *Src2)
100 ogg_uint32_t SadValue;
101 ogg_uint32_t SadValue1;
103 SadValue = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) +
104 DSP_OP_ABS_DIFF (Src1[1], Src2[1]) +
105 DSP_OP_ABS_DIFF (Src1[2], Src2[2]) +
106 DSP_OP_ABS_DIFF (Src1[3], Src2[3]);
108 SadValue1 = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) +
109 DSP_OP_ABS_DIFF (Src1[5], Src2[5]) +
110 DSP_OP_ABS_DIFF (Src1[6], Src2[6]) +
111 DSP_OP_ABS_DIFF (Src1[7], Src2[7]);
113 SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1;
115 return SadValue;
118 static ogg_uint32_t col_sad8x8__c (unsigned char *Src1, unsigned char *Src2,
119 ogg_uint32_t stride)
121 ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0};
122 ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0};
123 ogg_uint32_t MaxSad = 0;
124 ogg_uint32_t i;
126 for ( i = 0; i < 4; i++ ){
127 SadValue[0] += abs(Src1[0] - Src2[0]);
128 SadValue[1] += abs(Src1[1] - Src2[1]);
129 SadValue[2] += abs(Src1[2] - Src2[2]);
130 SadValue[3] += abs(Src1[3] - Src2[3]);
131 SadValue[4] += abs(Src1[4] - Src2[4]);
132 SadValue[5] += abs(Src1[5] - Src2[5]);
133 SadValue[6] += abs(Src1[6] - Src2[6]);
134 SadValue[7] += abs(Src1[7] - Src2[7]);
136 Src1 += stride;
137 Src2 += stride;
140 for ( i = 0; i < 4; i++ ){
141 SadValue2[0] += abs(Src1[0] - Src2[0]);
142 SadValue2[1] += abs(Src1[1] - Src2[1]);
143 SadValue2[2] += abs(Src1[2] - Src2[2]);
144 SadValue2[3] += abs(Src1[3] - Src2[3]);
145 SadValue2[4] += abs(Src1[4] - Src2[4]);
146 SadValue2[5] += abs(Src1[5] - Src2[5]);
147 SadValue2[6] += abs(Src1[6] - Src2[6]);
148 SadValue2[7] += abs(Src1[7] - Src2[7]);
150 Src1 += stride;
151 Src2 += stride;
154 for ( i = 0; i < 8; i++ ){
155 if ( SadValue[i] > MaxSad )
156 MaxSad = SadValue[i];
157 if ( SadValue2[i] > MaxSad )
158 MaxSad = SadValue2[i];
161 return MaxSad;
164 static ogg_uint32_t sad8x8__c (unsigned char *ptr1, ogg_uint32_t stride1,
165 unsigned char *ptr2, ogg_uint32_t stride2)
167 ogg_uint32_t i;
168 ogg_uint32_t sad = 0;
170 for (i=8; i; i--) {
171 sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
172 sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
173 sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
174 sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
175 sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
176 sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
177 sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
178 sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
180 /* Step to next row of block. */
181 ptr1 += stride1;
182 ptr2 += stride2;
185 return sad;
188 static ogg_uint32_t sad8x8_thres__c (unsigned char *ptr1, ogg_uint32_t stride1,
189 unsigned char *ptr2, ogg_uint32_t stride2,
190 ogg_uint32_t thres)
192 ogg_uint32_t i;
193 ogg_uint32_t sad = 0;
195 for (i=8; i; i--) {
196 sad += DSP_OP_ABS_DIFF(ptr1[0], ptr2[0]);
197 sad += DSP_OP_ABS_DIFF(ptr1[1], ptr2[1]);
198 sad += DSP_OP_ABS_DIFF(ptr1[2], ptr2[2]);
199 sad += DSP_OP_ABS_DIFF(ptr1[3], ptr2[3]);
200 sad += DSP_OP_ABS_DIFF(ptr1[4], ptr2[4]);
201 sad += DSP_OP_ABS_DIFF(ptr1[5], ptr2[5]);
202 sad += DSP_OP_ABS_DIFF(ptr1[6], ptr2[6]);
203 sad += DSP_OP_ABS_DIFF(ptr1[7], ptr2[7]);
205 if (sad > thres )
206 break;
208 /* Step to next row of block. */
209 ptr1 += stride1;
210 ptr2 += stride2;
213 return sad;
216 static ogg_uint32_t sad8x8_xy2_thres__c (unsigned char *SrcData, ogg_uint32_t SrcStride,
217 unsigned char *RefDataPtr1,
218 unsigned char *RefDataPtr2, ogg_uint32_t RefStride,
219 ogg_uint32_t thres)
221 ogg_uint32_t i;
222 ogg_uint32_t sad = 0;
224 for (i=8; i; i--) {
225 sad += DSP_OP_ABS_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
226 sad += DSP_OP_ABS_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
227 sad += DSP_OP_ABS_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
228 sad += DSP_OP_ABS_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
229 sad += DSP_OP_ABS_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
230 sad += DSP_OP_ABS_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
231 sad += DSP_OP_ABS_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
232 sad += DSP_OP_ABS_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
234 if ( sad > thres )
235 break;
237 /* Step to next row of block. */
238 SrcData += SrcStride;
239 RefDataPtr1 += RefStride;
240 RefDataPtr2 += RefStride;
243 return sad;
246 static ogg_uint32_t intra8x8_err__c (unsigned char *DataPtr, ogg_uint32_t Stride)
248 ogg_uint32_t i;
249 ogg_uint32_t XSum=0;
250 ogg_uint32_t XXSum=0;
252 for (i=8; i; i--) {
253 /* Examine alternate pixel locations. */
254 XSum += DataPtr[0];
255 XXSum += DataPtr[0]*DataPtr[0];
256 XSum += DataPtr[1];
257 XXSum += DataPtr[1]*DataPtr[1];
258 XSum += DataPtr[2];
259 XXSum += DataPtr[2]*DataPtr[2];
260 XSum += DataPtr[3];
261 XXSum += DataPtr[3]*DataPtr[3];
262 XSum += DataPtr[4];
263 XXSum += DataPtr[4]*DataPtr[4];
264 XSum += DataPtr[5];
265 XXSum += DataPtr[5]*DataPtr[5];
266 XSum += DataPtr[6];
267 XXSum += DataPtr[6]*DataPtr[6];
268 XSum += DataPtr[7];
269 XXSum += DataPtr[7]*DataPtr[7];
271 /* Step to next row of block. */
272 DataPtr += Stride;
275 /* Compute population variance as mis-match metric. */
276 return (( (XXSum<<6) - XSum*XSum ) );
279 static ogg_uint32_t inter8x8_err__c (unsigned char *SrcData, ogg_uint32_t SrcStride,
280 unsigned char *RefDataPtr, ogg_uint32_t RefStride)
282 ogg_uint32_t i;
283 ogg_uint32_t XSum=0;
284 ogg_uint32_t XXSum=0;
285 ogg_int32_t DiffVal;
287 for (i=8; i; i--) {
288 DiffVal = DSP_OP_DIFF (SrcData[0], RefDataPtr[0]);
289 XSum += DiffVal;
290 XXSum += DiffVal*DiffVal;
292 DiffVal = DSP_OP_DIFF (SrcData[1], RefDataPtr[1]);
293 XSum += DiffVal;
294 XXSum += DiffVal*DiffVal;
296 DiffVal = DSP_OP_DIFF (SrcData[2], RefDataPtr[2]);
297 XSum += DiffVal;
298 XXSum += DiffVal*DiffVal;
300 DiffVal = DSP_OP_DIFF (SrcData[3], RefDataPtr[3]);
301 XSum += DiffVal;
302 XXSum += DiffVal*DiffVal;
304 DiffVal = DSP_OP_DIFF (SrcData[4], RefDataPtr[4]);
305 XSum += DiffVal;
306 XXSum += DiffVal*DiffVal;
308 DiffVal = DSP_OP_DIFF (SrcData[5], RefDataPtr[5]);
309 XSum += DiffVal;
310 XXSum += DiffVal*DiffVal;
312 DiffVal = DSP_OP_DIFF (SrcData[6], RefDataPtr[6]);
313 XSum += DiffVal;
314 XXSum += DiffVal*DiffVal;
316 DiffVal = DSP_OP_DIFF (SrcData[7], RefDataPtr[7]);
317 XSum += DiffVal;
318 XXSum += DiffVal*DiffVal;
320 /* Step to next row of block. */
321 SrcData += SrcStride;
322 RefDataPtr += RefStride;
325 /* Compute and return population variance as mis-match metric. */
326 return (( (XXSum<<6) - XSum*XSum ));
329 static ogg_uint32_t inter8x8_err_xy2__c (unsigned char *SrcData, ogg_uint32_t SrcStride,
330 unsigned char *RefDataPtr1,
331 unsigned char *RefDataPtr2, ogg_uint32_t RefStride)
333 ogg_uint32_t i;
334 ogg_uint32_t XSum=0;
335 ogg_uint32_t XXSum=0;
336 ogg_int32_t DiffVal;
338 for (i=8; i; i--) {
339 DiffVal = DSP_OP_DIFF(SrcData[0], DSP_OP_AVG (RefDataPtr1[0], RefDataPtr2[0]));
340 XSum += DiffVal;
341 XXSum += DiffVal*DiffVal;
343 DiffVal = DSP_OP_DIFF(SrcData[1], DSP_OP_AVG (RefDataPtr1[1], RefDataPtr2[1]));
344 XSum += DiffVal;
345 XXSum += DiffVal*DiffVal;
347 DiffVal = DSP_OP_DIFF(SrcData[2], DSP_OP_AVG (RefDataPtr1[2], RefDataPtr2[2]));
348 XSum += DiffVal;
349 XXSum += DiffVal*DiffVal;
351 DiffVal = DSP_OP_DIFF(SrcData[3], DSP_OP_AVG (RefDataPtr1[3], RefDataPtr2[3]));
352 XSum += DiffVal;
353 XXSum += DiffVal*DiffVal;
355 DiffVal = DSP_OP_DIFF(SrcData[4], DSP_OP_AVG (RefDataPtr1[4], RefDataPtr2[4]));
356 XSum += DiffVal;
357 XXSum += DiffVal*DiffVal;
359 DiffVal = DSP_OP_DIFF(SrcData[5], DSP_OP_AVG (RefDataPtr1[5], RefDataPtr2[5]));
360 XSum += DiffVal;
361 XXSum += DiffVal*DiffVal;
363 DiffVal = DSP_OP_DIFF(SrcData[6], DSP_OP_AVG (RefDataPtr1[6], RefDataPtr2[6]));
364 XSum += DiffVal;
365 XXSum += DiffVal*DiffVal;
367 DiffVal = DSP_OP_DIFF(SrcData[7], DSP_OP_AVG (RefDataPtr1[7], RefDataPtr2[7]));
368 XSum += DiffVal;
369 XXSum += DiffVal*DiffVal;
371 /* Step to next row of block. */
372 SrcData += SrcStride;
373 RefDataPtr1 += RefStride;
374 RefDataPtr2 += RefStride;
377 /* Compute and return population variance as mis-match metric. */
378 return (( (XXSum<<6) - XSum*XSum ));
381 static void nop (void) { /* NOP */ }
383 void dsp_init(DspFunctions *funcs)
385 /* TH_DEBUG("setting dsp functions to C defaults.\n"); */
386 funcs->save_fpu = nop;
387 funcs->restore_fpu = nop;
388 funcs->sub8x8 = sub8x8__c;
389 funcs->sub8x8_128 = sub8x8_128__c;
390 funcs->sub8x8avg2 = sub8x8avg2__c;
391 funcs->row_sad8 = row_sad8__c;
392 funcs->col_sad8x8 = col_sad8x8__c;
393 funcs->sad8x8 = sad8x8__c;
394 funcs->sad8x8_thres = sad8x8_thres__c;
395 funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__c;
396 funcs->intra8x8_err = intra8x8_err__c;
397 funcs->inter8x8_err = inter8x8_err__c;
398 funcs->inter8x8_err_xy2 = inter8x8_err_xy2__c;
401 void dsp_static_init(DspFunctions *funcs)
403 ogg_uint32_t cpuflags;
405 cpuflags = cpu_init ();
406 dsp_init (funcs);
408 dsp_recon_init (funcs, cpuflags);
409 dsp_dct_init (funcs, cpuflags);
410 #if defined(USE_ASM)
411 if (cpuflags & CPU_X86_MMX) {
412 dsp_mmx_init(funcs);
414 # ifndef WIN32
415 /* This is implemented for win32 yet */
416 if (cpuflags & CPU_X86_MMXEXT) {
417 dsp_mmxext_init(funcs);
419 # endif
420 #endif