Rename dec2() function
[FFMpeg-mirror/DVCPRO-HD.git] / libavcodec / dct-test.c
blobf3202cff14304c4609a228829919bac4de5fe978
1 /*
2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file dct-test.c
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/time.h>
32 #include <unistd.h>
33 #include <math.h>
35 #include "dsputil.h"
37 #include "simple_idct.h"
38 #include "faandct.h"
39 #include "faanidct.h"
40 #include "i386/idct_xvid.h"
42 #ifndef MAX
43 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
44 #endif
46 #undef printf
47 #undef random
49 void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
51 /* reference fdct/idct */
52 extern void fdct(DCTELEM *block);
53 extern void idct(DCTELEM *block);
54 extern void init_fdct();
56 extern void ff_mmx_idct(DCTELEM *data);
57 extern void ff_mmxext_idct(DCTELEM *data);
59 extern void odivx_idct_c (short *block);
61 // BFIN
62 extern void ff_bfin_idct (DCTELEM *block) ;
63 extern void ff_bfin_fdct (DCTELEM *block) ;
65 // ALTIVEC
66 extern void fdct_altivec (DCTELEM *block);
67 //extern void idct_altivec (DCTELEM *block);?? no routine
70 struct algo {
71 char *name;
72 enum { FDCT, IDCT } is_idct;
73 void (* func) (DCTELEM *block);
74 void (* ref) (DCTELEM *block);
75 enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM } format;
76 int mm_support;
79 #ifndef FAAN_POSTSCALE
80 #define FAAN_SCALE SCALE_PERM
81 #else
82 #define FAAN_SCALE NO_PERM
83 #endif
85 struct algo algos[] = {
86 {"REF-DBL", 0, fdct, fdct, NO_PERM},
87 {"FAAN", 0, ff_faandct, fdct, FAAN_SCALE},
88 {"FAANI", 1, ff_faanidct, idct, NO_PERM},
89 {"IJG-AAN-INT", 0, fdct_ifast, fdct, SCALE_PERM},
90 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, NO_PERM},
91 {"REF-DBL", 1, idct, idct, NO_PERM},
92 {"INT", 1, j_rev_dct, idct, MMX_PERM},
93 {"SIMPLE-C", 1, ff_simple_idct, idct, NO_PERM},
95 #ifdef HAVE_MMX
96 {"MMX", 0, ff_fdct_mmx, fdct, NO_PERM, MM_MMX},
97 #ifdef HAVE_MMX2
98 {"MMX2", 0, ff_fdct_mmx2, fdct, NO_PERM, MM_MMXEXT},
99 #endif
101 #ifdef CONFIG_GPL
102 {"LIBMPEG2-MMX", 1, ff_mmx_idct, idct, MMX_PERM, MM_MMX},
103 {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, MMX_PERM, MM_MMXEXT},
104 #endif
105 {"SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, MM_MMX},
106 {"XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM, MM_MMX},
107 {"XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM, MM_MMXEXT},
108 {"XVID-SSE2", 1, ff_idct_xvid_sse2, idct, SSE2_PERM, MM_SSE2},
109 #endif
111 #ifdef HAVE_ALTIVEC
112 {"altivecfdct", 0, fdct_altivec, fdct, NO_PERM, MM_ALTIVEC},
113 #endif
115 #ifdef ARCH_BFIN
116 {"BFINfdct", 0, ff_bfin_fdct, fdct, NO_PERM},
117 {"BFINidct", 1, ff_bfin_idct, idct, NO_PERM},
118 #endif
120 { 0 }
123 #define AANSCALE_BITS 12
124 static const unsigned short aanscales[64] = {
125 /* precomputed values scaled up by 14 bits */
126 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
127 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
128 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
129 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
130 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
131 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
132 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
133 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
136 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
138 int64_t gettime(void)
140 struct timeval tv;
141 gettimeofday(&tv,NULL);
142 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
145 #define NB_ITS 20000
146 #define NB_ITS_SPEED 50000
148 static short idct_mmx_perm[64];
150 static short idct_simple_mmx_perm[64]={
151 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
152 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
153 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
154 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
155 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
156 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
157 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
158 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
161 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
163 void idct_mmx_init(void)
165 int i;
167 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
168 for (i = 0; i < 64; i++) {
169 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
170 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
174 static DCTELEM block[64] __attribute__ ((aligned (16)));
175 static DCTELEM block1[64] __attribute__ ((aligned (8)));
176 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
178 void dct_error(const char *name, int is_idct,
179 void (*fdct_func)(DCTELEM *block),
180 void (*fdct_ref)(DCTELEM *block), int form, int test)
182 int it, i, scale;
183 int err_inf, v;
184 int64_t err2, ti, ti1, it1;
185 int64_t sysErr[64], sysErrMax=0;
186 int maxout=0;
187 int blockSumErrMax=0, blockSumErr;
189 srandom(0);
191 err_inf = 0;
192 err2 = 0;
193 for(i=0; i<64; i++) sysErr[i]=0;
194 for(it=0;it<NB_ITS;it++) {
195 for(i=0;i<64;i++)
196 block1[i] = 0;
197 switch(test){
198 case 0:
199 for(i=0;i<64;i++)
200 block1[i] = (random() % 512) -256;
201 if (is_idct){
202 fdct(block1);
204 for(i=0;i<64;i++)
205 block1[i]>>=3;
207 break;
208 case 1:{
209 int num= (random()%10)+1;
210 for(i=0;i<num;i++)
211 block1[random()%64] = (random() % 512) -256;
212 }break;
213 case 2:
214 block1[0]= (random()%4096)-2048;
215 block1[63]= (block1[0]&1)^1;
216 break;
219 #if 0 // simulate mismatch control
220 { int sum=0;
221 for(i=0;i<64;i++)
222 sum+=block1[i];
224 if((sum&1)==0) block1[63]^=1;
226 #endif
228 for(i=0; i<64; i++)
229 block_org[i]= block1[i];
231 if (form == MMX_PERM) {
232 for(i=0;i<64;i++)
233 block[idct_mmx_perm[i]] = block1[i];
234 } else if (form == MMX_SIMPLE_PERM) {
235 for(i=0;i<64;i++)
236 block[idct_simple_mmx_perm[i]] = block1[i];
238 } else if (form == SSE2_PERM) {
239 for(i=0; i<64; i++)
240 block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
241 } else {
242 for(i=0; i<64; i++)
243 block[i]= block1[i];
245 #if 0 // simulate mismatch control for tested IDCT but not the ref
246 { int sum=0;
247 for(i=0;i<64;i++)
248 sum+=block[i];
250 if((sum&1)==0) block[63]^=1;
252 #endif
254 fdct_func(block);
255 emms_c(); /* for ff_mmx_idct */
257 if (form == SCALE_PERM) {
258 for(i=0; i<64; i++) {
259 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
260 block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
264 fdct_ref(block1);
266 blockSumErr=0;
267 for(i=0;i<64;i++) {
268 v = abs(block[i] - block1[i]);
269 if (v > err_inf)
270 err_inf = v;
271 err2 += v * v;
272 sysErr[i] += block[i] - block1[i];
273 blockSumErr += v;
274 if( abs(block[i])>maxout) maxout=abs(block[i]);
276 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
277 #if 0 // print different matrix pairs
278 if(blockSumErr){
279 printf("\n");
280 for(i=0; i<64; i++){
281 if((i&7)==0) printf("\n");
282 printf("%4d ", block_org[i]);
284 for(i=0; i<64; i++){
285 if((i&7)==0) printf("\n");
286 printf("%4d ", block[i] - block1[i]);
289 #endif
291 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
293 #if 1 // dump systematic errors
294 for(i=0; i<64; i++){
295 if(i%8==0) printf("\n");
296 printf("%5d ", (int)sysErr[i]);
298 printf("\n");
299 #endif
301 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
302 is_idct ? "IDCT" : "DCT",
303 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
304 #if 1 //Speed test
305 /* speed test */
306 for(i=0;i<64;i++)
307 block1[i] = 0;
308 switch(test){
309 case 0:
310 for(i=0;i<64;i++)
311 block1[i] = (random() % 512) -256;
312 if (is_idct){
313 fdct(block1);
315 for(i=0;i<64;i++)
316 block1[i]>>=3;
318 break;
319 case 1:{
320 case 2:
321 block1[0] = (random() % 512) -256;
322 block1[1] = (random() % 512) -256;
323 block1[2] = (random() % 512) -256;
324 block1[3] = (random() % 512) -256;
325 }break;
328 if (form == MMX_PERM) {
329 for(i=0;i<64;i++)
330 block[idct_mmx_perm[i]] = block1[i];
331 } else if(form == MMX_SIMPLE_PERM) {
332 for(i=0;i<64;i++)
333 block[idct_simple_mmx_perm[i]] = block1[i];
334 } else {
335 for(i=0; i<64; i++)
336 block[i]= block1[i];
339 ti = gettime();
340 it1 = 0;
341 do {
342 for(it=0;it<NB_ITS_SPEED;it++) {
343 for(i=0; i<64; i++)
344 block[i]= block1[i];
345 // memcpy(block, block1, sizeof(DCTELEM) * 64);
346 // do not memcpy especially not fastmemcpy because it does movntq !!!
347 fdct_func(block);
349 it1 += NB_ITS_SPEED;
350 ti1 = gettime() - ti;
351 } while (ti1 < 1000000);
352 emms_c();
354 printf("%s %s: %0.1f kdct/s\n",
355 is_idct ? "IDCT" : "DCT",
356 name, (double)it1 * 1000.0 / (double)ti1);
357 #endif
360 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
361 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
363 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
365 static int init;
366 static double c8[8][8];
367 static double c4[4][4];
368 double block1[64], block2[64], block3[64];
369 double s, sum, v;
370 int i, j, k;
372 if (!init) {
373 init = 1;
375 for(i=0;i<8;i++) {
376 sum = 0;
377 for(j=0;j<8;j++) {
378 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
379 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
380 sum += c8[i][j] * c8[i][j];
384 for(i=0;i<4;i++) {
385 sum = 0;
386 for(j=0;j<4;j++) {
387 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
388 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
389 sum += c4[i][j] * c4[i][j];
394 /* butterfly */
395 s = 0.5 * sqrt(2.0);
396 for(i=0;i<4;i++) {
397 for(j=0;j<8;j++) {
398 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
399 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
403 /* idct8 on lines */
404 for(i=0;i<8;i++) {
405 for(j=0;j<8;j++) {
406 sum = 0;
407 for(k=0;k<8;k++)
408 sum += c8[k][j] * block1[8*i+k];
409 block2[8*i+j] = sum;
413 /* idct4 */
414 for(i=0;i<8;i++) {
415 for(j=0;j<4;j++) {
416 /* top */
417 sum = 0;
418 for(k=0;k<4;k++)
419 sum += c4[k][j] * block2[8*(2*k)+i];
420 block3[8*(2*j)+i] = sum;
422 /* bottom */
423 sum = 0;
424 for(k=0;k<4;k++)
425 sum += c4[k][j] * block2[8*(2*k+1)+i];
426 block3[8*(2*j+1)+i] = sum;
430 /* clamp and store the result */
431 for(i=0;i<8;i++) {
432 for(j=0;j<8;j++) {
433 v = block3[8*i+j];
434 if (v < 0)
435 v = 0;
436 else if (v > 255)
437 v = 255;
438 dest[i * linesize + j] = (int)rint(v);
443 void idct248_error(const char *name,
444 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
446 int it, i, it1, ti, ti1, err_max, v;
448 srandom(0);
450 /* just one test to see if code is correct (precision is less
451 important here) */
452 err_max = 0;
453 for(it=0;it<NB_ITS;it++) {
455 /* XXX: use forward transform to generate values */
456 for(i=0;i<64;i++)
457 block1[i] = (random() % 256) - 128;
458 block1[0] += 1024;
460 for(i=0; i<64; i++)
461 block[i]= block1[i];
462 idct248_ref(img_dest1, 8, block);
464 for(i=0; i<64; i++)
465 block[i]= block1[i];
466 idct248_put(img_dest, 8, block);
468 for(i=0;i<64;i++) {
469 v = abs((int)img_dest[i] - (int)img_dest1[i]);
470 if (v == 255)
471 printf("%d %d\n", img_dest[i], img_dest1[i]);
472 if (v > err_max)
473 err_max = v;
475 #if 0
476 printf("ref=\n");
477 for(i=0;i<8;i++) {
478 int j;
479 for(j=0;j<8;j++) {
480 printf(" %3d", img_dest1[i*8+j]);
482 printf("\n");
485 printf("out=\n");
486 for(i=0;i<8;i++) {
487 int j;
488 for(j=0;j<8;j++) {
489 printf(" %3d", img_dest[i*8+j]);
491 printf("\n");
493 #endif
495 printf("%s %s: err_inf=%d\n",
496 1 ? "IDCT248" : "DCT248",
497 name, err_max);
499 ti = gettime();
500 it1 = 0;
501 do {
502 for(it=0;it<NB_ITS_SPEED;it++) {
503 for(i=0; i<64; i++)
504 block[i]= block1[i];
505 // memcpy(block, block1, sizeof(DCTELEM) * 64);
506 // do not memcpy especially not fastmemcpy because it does movntq !!!
507 idct248_put(img_dest, 8, block);
509 it1 += NB_ITS_SPEED;
510 ti1 = gettime() - ti;
511 } while (ti1 < 1000000);
512 emms_c();
514 printf("%s %s: %0.1f kdct/s\n",
515 1 ? "IDCT248" : "DCT248",
516 name, (double)it1 * 1000.0 / (double)ti1);
519 void help(void)
521 printf("dct-test [-i] [<test-number>]\n"
522 "test-number 0 -> test with random matrixes\n"
523 " 1 -> test with random sparse matrixes\n"
524 " 2 -> do 3. test from mpeg4 std\n"
525 "-i test IDCT implementations\n"
526 "-4 test IDCT248 implementations\n");
529 int main(int argc, char **argv)
531 int test_idct = 0, test_248_dct = 0;
532 int c,i;
533 int test=1;
535 init_fdct();
536 idct_mmx_init();
537 mm_flags = mm_support();
539 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
540 for(i=0;i<MAX_NEG_CROP;i++) {
541 cropTbl[i] = 0;
542 cropTbl[i + MAX_NEG_CROP + 256] = 255;
545 for(;;) {
546 c = getopt(argc, argv, "ih4");
547 if (c == -1)
548 break;
549 switch(c) {
550 case 'i':
551 test_idct = 1;
552 break;
553 case '4':
554 test_248_dct = 1;
555 break;
556 default :
557 case 'h':
558 help();
559 return 0;
563 if(optind <argc) test= atoi(argv[optind]);
565 printf("ffmpeg DCT/IDCT test\n");
567 if (test_248_dct) {
568 idct248_error("SIMPLE-C", ff_simple_idct248_put);
569 } else {
570 for (i=0;algos[i].name;i++)
571 if (algos[i].is_idct == test_idct && !(~mm_flags & algos[i].mm_support)) {
572 dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
575 return 0;