Add a rounding parameter to ff_acelp_lp_synthesis_filter()
[FFMpeg-mirror/DVCPRO-HD.git] / libavcodec / dct-test.c
blobf3f9408d984f0f55d3bcb1e0ca621649dc6f4f5d
1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file dct-test.c
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/time.h>
32 #include <unistd.h>
33 #include <math.h>
35 #include "libavutil/common.h"
37 #include "simple_idct.h"
38 #include "faandct.h"
39 #include "faanidct.h"
40 #include "i386/idct_xvid.h"
42 #undef printf
43 #undef random
45 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
47 /* reference fdct/idct */
48 extern void fdct(DCTELEM *block);
49 extern void idct(DCTELEM *block);
50 extern void init_fdct();
52 extern void ff_mmx_idct(DCTELEM *data);
53 extern void ff_mmxext_idct(DCTELEM *data);
55 extern void odivx_idct_c (short *block);
57 // BFIN
58 extern void ff_bfin_idct (DCTELEM *block) ;
59 extern void ff_bfin_fdct (DCTELEM *block) ;
61 // ALTIVEC
62 extern void fdct_altivec (DCTELEM *block);
63 //extern void idct_altivec (DCTELEM *block);?? no routine
66 struct algo {
67 const char *name;
68 enum { FDCT, IDCT } is_idct;
69 void (* func) (DCTELEM *block);
70 void (* ref) (DCTELEM *block);
71 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM } format;
72 int mm_support;
75 #ifndef FAAN_POSTSCALE
76 #define FAAN_SCALE SCALE_PERM
77 #else
78 #define FAAN_SCALE NO_PERM
79 #endif
81 static int cpu_flags;
83 struct algo algos[] = {
84 {"REF-DBL", 0, fdct, fdct, NO_PERM},
85 {"FAAN", 0, ff_faandct, fdct, FAAN_SCALE},
86 {"FAANI", 1, ff_faanidct, idct, NO_PERM},
87 {"IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM},
88 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM},
89 {"REF-DBL", 1, idct, idct, NO_PERM},
90 {"INT", 1, j_rev_dct, idct, MMX_PERM},
91 {"SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM},
93 #ifdef HAVE_MMX
94 {"MMX", 0, ff_fdct_mmx, fdct, NO_PERM, MM_MMX},
95 #ifdef HAVE_MMX2
96 {"MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM, MM_MMXEXT},
97 #endif
99 #ifdef CONFIG_GPL
100 {"LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM, MM_MMX},
101 {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM, MM_MMXEXT},
102 #endif
103 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, MM_MMX},
104 {"XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM, MM_MMX},
105 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM, MM_MMXEXT},
106 {"XVID-SSE2", 1, ff_idct_xvid_sse2, idct, SSE2_PERM, MM_SSE2},
107 #endif
109 #ifdef HAVE_ALTIVEC
110 {"altivecfdct", 0, fdct_altivec, fdct, NO_PERM, MM_ALTIVEC},
111 #endif
113 #ifdef ARCH_BFIN
114 {"BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM},
115 {"BFINidct", 1, ff_bfin_idct, idct, NO_PERM},
116 #endif
118 { 0 }
121 #define AANSCALE_BITS 12
122 static const unsigned short aanscales[64] = {
123 /* precomputed values scaled up by 14 bits */
124 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
125 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
126 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
127 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
128 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
129 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
130 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
131 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
134 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
136 int64_t gettime(void)
138 struct timeval tv;
139 gettimeofday(&tv,NULL);
140 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
143 #define NB_ITS 20000
144 #define NB_ITS_SPEED 50000
146 static short idct_mmx_perm[64];
148 static short idct_simple_mmx_perm[64]={
149 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
150 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
151 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
152 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
153 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
154 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
155 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
156 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
159 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
161 void idct_mmx_init(void)
163 int i;
165 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
166 for (i = 0; i < 64; i++) {
167 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
168 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
172 static DCTELEM block[64] __attribute__ ((aligned (16)));
173 static DCTELEM block1[64] __attribute__ ((aligned (8)));
174 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
176 static inline void mmx_emms(void)
178 #ifdef HAVE_MMX
179 if (cpu_flags & MM_MMX)
180 asm volatile ("emms\n\t");
181 #endif
184 void dct_error(const char *name, int is_idct,
185 void (*fdct_func)(DCTELEM *block),
186 void (*fdct_ref)(DCTELEM *block), int form, int test)
188 int it, i, scale;
189 int err_inf, v;
190 int64_t err2, ti, ti1, it1;
191 int64_t sysErr[64], sysErrMax=0;
192 int maxout=0;
193 int blockSumErrMax=0, blockSumErr;
195 srandom(0);
197 err_inf = 0;
198 err2 = 0;
199 for(i=0; i<64; i++) sysErr[i]=0;
200 for(it=0;it<NB_ITS;it++) {
201 for(i=0;i<64;i++)
202 block1[i] = 0;
203 switch(test){
204 case 0:
205 for(i=0;i<64;i++)
206 block1[i] = (random() % 512) -256;
207 if (is_idct){
208 fdct(block1);
210 for(i=0;i<64;i++)
211 block1[i]>>=3;
213 break;
214 case 1:{
215 int num= (random()%10)+1;
216 for(i=0;i<num;i++)
217 block1[random()%64] = (random() % 512) -256;
218 }break;
219 case 2:
220 block1[0]= (random()%4096)-2048;
221 block1[63]= (block1[0]&1)^1;
222 break;
225 #if 0 // simulate mismatch control
226 { int sum=0;
227 for(i=0;i<64;i++)
228 sum+=block1[i];
230 if((sum&1)==0) block1[63]^=1;
232 #endif
234 for(i=0; i<64; i++)
235 block_org[i]= block1[i];
237 if (form == MMX_PERM) {
238 for(i=0;i<64;i++)
239 block[idct_mmx_perm[i]] = block1[i];
240 } else if (form == MMX_SIMPLE_PERM) {
241 for(i=0;i<64;i++)
242 block[idct_simple_mmx_perm[i]] = block1[i];
244 } else if (form == SSE2_PERM) {
245 for(i=0; i<64; i++)
246 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
247 } else {
248 for(i=0; i<64; i++)
249 block[i]= block1[i];
251 #if 0 // simulate mismatch control for tested IDCT but not the ref
252 { int sum=0;
253 for(i=0;i<64;i++)
254 sum+=block[i];
256 if((sum&1)==0) block[63]^=1;
258 #endif
260 fdct_func(block);
261 mmx_emms();
263 if (form == SCALE_PERM) {
264 for(i=0; i<64; i++) {
265 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
266 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
270 fdct_ref(block1);
272 blockSumErr=0;
273 for(i=0;i<64;i++) {
274 v = abs(block[i] - block1[i]);
275 if (v > err_inf)
276 err_inf = v;
277 err2 += v * v;
278 sysErr[i] += block[i] - block1[i];
279 blockSumErr += v;
280 if( abs(block[i])>maxout) maxout=abs(block[i]);
282 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
283 #if 0 // print different matrix pairs
284 if(blockSumErr){
285 printf("\n");
286 for(i=0; i<64; i++){
287 if((i&7)==0) printf("\n");
288 printf("%4d ", block_org[i]);
290 for(i=0; i<64; i++){
291 if((i&7)==0) printf("\n");
292 printf("%4d ", block[i] - block1[i]);
295 #endif
297 for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
299 #if 1 // dump systematic errors
300 for(i=0; i<64; i++){
301 if(i%8==0) printf("\n");
302 printf("%5d ", (int)sysErr[i]);
304 printf("\n");
305 #endif
307 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
308 is_idct ? "IDCT" : "DCT",
309 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
310 #if 1 //Speed test
311 /* speed test */
312 for(i=0;i<64;i++)
313 block1[i] = 0;
314 switch(test){
315 case 0:
316 for(i=0;i<64;i++)
317 block1[i] = (random() % 512) -256;
318 if (is_idct){
319 fdct(block1);
321 for(i=0;i<64;i++)
322 block1[i]>>=3;
324 break;
325 case 1:{
326 case 2:
327 block1[0] = (random() % 512) -256;
328 block1[1] = (random() % 512) -256;
329 block1[2] = (random() % 512) -256;
330 block1[3] = (random() % 512) -256;
331 }break;
334 if (form == MMX_PERM) {
335 for(i=0;i<64;i++)
336 block[idct_mmx_perm[i]] = block1[i];
337 } else if(form == MMX_SIMPLE_PERM) {
338 for(i=0;i<64;i++)
339 block[idct_simple_mmx_perm[i]] = block1[i];
340 } else {
341 for(i=0; i<64; i++)
342 block[i]= block1[i];
345 ti = gettime();
346 it1 = 0;
347 do {
348 for(it=0;it<NB_ITS_SPEED;it++) {
349 for(i=0; i<64; i++)
350 block[i]= block1[i];
351 // memcpy(block, block1, sizeof(DCTELEM) * 64);
352 // do not memcpy especially not fastmemcpy because it does movntq !!!
353 fdct_func(block);
355 it1 += NB_ITS_SPEED;
356 ti1 = gettime() - ti;
357 } while (ti1 < 1000000);
358 mmx_emms();
360 printf("%s %s: %0.1f kdct/s\n",
361 is_idct ? "IDCT" : "DCT",
362 name, (double)it1 * 1000.0 / (double)ti1);
363 #endif
366 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
367 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
369 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
371 static int init;
372 static double c8[8][8];
373 static double c4[4][4];
374 double block1[64], block2[64], block3[64];
375 double s, sum, v;
376 int i, j, k;
378 if (!init) {
379 init = 1;
381 for(i=0;i<8;i++) {
382 sum = 0;
383 for(j=0;j<8;j++) {
384 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
385 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
386 sum += c8[i][j] * c8[i][j];
390 for(i=0;i<4;i++) {
391 sum = 0;
392 for(j=0;j<4;j++) {
393 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
394 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
395 sum += c4[i][j] * c4[i][j];
400 /* butterfly */
401 s = 0.5 * sqrt(2.0);
402 for(i=0;i<4;i++) {
403 for(j=0;j<8;j++) {
404 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
405 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
409 /* idct8 on lines */
410 for(i=0;i<8;i++) {
411 for(j=0;j<8;j++) {
412 sum = 0;
413 for(k=0;k<8;k++)
414 sum += c8[k][j] * block1[8*i+k];
415 block2[8*i+j] = sum;
419 /* idct4 */
420 for(i=0;i<8;i++) {
421 for(j=0;j<4;j++) {
422 /* top */
423 sum = 0;
424 for(k=0;k<4;k++)
425 sum += c4[k][j] * block2[8*(2*k)+i];
426 block3[8*(2*j)+i] = sum;
428 /* bottom */
429 sum = 0;
430 for(k=0;k<4;k++)
431 sum += c4[k][j] * block2[8*(2*k+1)+i];
432 block3[8*(2*j+1)+i] = sum;
436 /* clamp and store the result */
437 for(i=0;i<8;i++) {
438 for(j=0;j<8;j++) {
439 v = block3[8*i+j];
440 if (v < 0)
441 v = 0;
442 else if (v > 255)
443 v = 255;
444 dest[i * linesize + j] = (int)rint(v);
449 void idct248_error(const char *name,
450 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
452 int it, i, it1, ti, ti1, err_max, v;
454 srandom(0);
456 /* just one test to see if code is correct (precision is less
457 important here) */
458 err_max = 0;
459 for(it=0;it<NB_ITS;it++) {
461 /* XXX: use forward transform to generate values */
462 for(i=0;i<64;i++)
463 block1[i] = (random() % 256) - 128;
464 block1[0] += 1024;
466 for(i=0; i<64; i++)
467 block[i]= block1[i];
468 idct248_ref(img_dest1, 8, block);
470 for(i=0; i<64; i++)
471 block[i]= block1[i];
472 idct248_put(img_dest, 8, block);
474 for(i=0;i<64;i++) {
475 v = abs((int)img_dest[i] - (int)img_dest1[i]);
476 if (v == 255)
477 printf("%d %d\n", img_dest[i], img_dest1[i]);
478 if (v > err_max)
479 err_max = v;
481 #if 0
482 printf("ref=\n");
483 for(i=0;i<8;i++) {
484 int j;
485 for(j=0;j<8;j++) {
486 printf(" %3d", img_dest1[i*8+j]);
488 printf("\n");
491 printf("out=\n");
492 for(i=0;i<8;i++) {
493 int j;
494 for(j=0;j<8;j++) {
495 printf(" %3d", img_dest[i*8+j]);
497 printf("\n");
499 #endif
501 printf("%s %s: err_inf=%d\n",
502 1 ? "IDCT248" : "DCT248",
503 name, err_max);
505 ti = gettime();
506 it1 = 0;
507 do {
508 for(it=0;it<NB_ITS_SPEED;it++) {
509 for(i=0; i<64; i++)
510 block[i]= block1[i];
511 // memcpy(block, block1, sizeof(DCTELEM) * 64);
512 // do not memcpy especially not fastmemcpy because it does movntq !!!
513 idct248_put(img_dest, 8, block);
515 it1 += NB_ITS_SPEED;
516 ti1 = gettime() - ti;
517 } while (ti1 < 1000000);
518 mmx_emms();
520 printf("%s %s: %0.1f kdct/s\n",
521 1 ? "IDCT248" : "DCT248",
522 name, (double)it1 * 1000.0 / (double)ti1);
525 void help(void)
527 printf("dct-test [-i] [<test-number>]\n"
528 "test-number 0 -> test with random matrixes\n"
529 " 1 -> test with random sparse matrixes\n"
530 " 2 -> do 3. test from mpeg4 std\n"
531 "-i test IDCT implementations\n"
532 "-4 test IDCT248 implementations\n");
535 int main(int argc, char **argv)
537 int test_idct = 0, test_248_dct = 0;
538 int c,i;
539 int test=1;
540 cpu_flags = mm_support();
542 init_fdct();
543 idct_mmx_init();
545 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
546 for(i=0;i<MAX_NEG_CROP;i++) {
547 cropTbl[i] = 0;
548 cropTbl[i + MAX_NEG_CROP + 256] = 255;
551 for(;;) {
552 c = getopt(argc, argv, "ih4");
553 if (c == -1)
554 break;
555 switch(c) {
556 case 'i':
557 test_idct = 1;
558 break;
559 case '4':
560 test_248_dct = 1;
561 break;
562 default :
563 case 'h':
564 help();
565 return 0;
569 if(optind <argc) test= atoi(argv[optind]);
571 printf("ffmpeg DCT/IDCT test\n");
573 if (test_248_dct) {
574 idct248_error("SIMPLE-C", ff_simple_idct248_put);
575 } else {
576 for (i=0;algos[i].name;i++)
577 if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
578 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
581 return 0;