2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/dct-test.c
24 * DCT test (c) 2001 Fabrice Bellard
25 * Started from sample code by Juan J. Sierralta P.
35 #include "libavutil/common.h"
37 #include "simple_idct.h"
38 #include "aandcttab.h"
41 #include "x86/idct_xvid.h"
46 void *fast_memcpy(void *a
, const void *b
, size_t c
){return memcpy(a
,b
,c
);};
48 /* reference fdct/idct */
49 void fdct(DCTELEM
*block
);
50 void idct(DCTELEM
*block
);
53 void ff_mmx_idct(DCTELEM
*data
);
54 void ff_mmxext_idct(DCTELEM
*data
);
56 void odivx_idct_c(short *block
);
59 void ff_bfin_idct(DCTELEM
*block
);
60 void ff_bfin_fdct(DCTELEM
*block
);
63 void fdct_altivec(DCTELEM
*block
);
64 //void idct_altivec(DCTELEM *block);?? no routine
67 void j_rev_dct_ARM(DCTELEM
*data
);
68 void simple_idct_ARM(DCTELEM
*data
);
69 void simple_idct_armv5te(DCTELEM
*data
);
70 void ff_simple_idct_armv6(DCTELEM
*data
);
71 void ff_simple_idct_neon(DCTELEM
*data
);
73 void ff_simple_idct_axp(DCTELEM
*data
);
77 enum { FDCT
, IDCT
} is_idct
;
78 void (* func
) (DCTELEM
*block
);
79 void (* ref
) (DCTELEM
*block
);
80 enum formattag
{ NO_PERM
,MMX_PERM
, MMX_SIMPLE_PERM
, SCALE_PERM
, SSE2_PERM
, PARTTRANS_PERM
} format
;
84 #ifndef FAAN_POSTSCALE
85 #define FAAN_SCALE SCALE_PERM
87 #define FAAN_SCALE NO_PERM
92 struct algo algos
[] = {
93 {"REF-DBL", 0, fdct
, fdct
, NO_PERM
},
94 {"FAAN", 0, ff_faandct
, fdct
, FAAN_SCALE
},
95 {"FAANI", 1, ff_faanidct
, idct
, NO_PERM
},
96 {"IJG-AAN-INT", 0, fdct_ifast
, fdct
, SCALE_PERM
},
97 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow
, fdct
, NO_PERM
},
98 {"REF-DBL", 1, idct
, idct
, NO_PERM
},
99 {"INT", 1, j_rev_dct
, idct
, MMX_PERM
},
100 {"SIMPLE-C", 1, ff_simple_idct
, idct
, NO_PERM
},
103 {"MMX", 0, ff_fdct_mmx
, fdct
, NO_PERM
, FF_MM_MMX
},
105 {"MMX2", 0, ff_fdct_mmx2
, fdct
, NO_PERM
, FF_MM_MMXEXT
},
106 {"SSE2", 0, ff_fdct_sse2
, fdct
, NO_PERM
, FF_MM_SSE2
},
110 {"LIBMPEG2-MMX", 1, ff_mmx_idct
, idct
, MMX_PERM
, FF_MM_MMX
},
111 {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct
, idct
, MMX_PERM
, FF_MM_MMXEXT
},
113 {"SIMPLE-MMX", 1, ff_simple_idct_mmx
, idct
, MMX_SIMPLE_PERM
, FF_MM_MMX
},
114 {"XVID-MMX", 1, ff_idct_xvid_mmx
, idct
, NO_PERM
, FF_MM_MMX
},
115 {"XVID-MMX2", 1, ff_idct_xvid_mmx2
, idct
, NO_PERM
, FF_MM_MMXEXT
},
116 {"XVID-SSE2", 1, ff_idct_xvid_sse2
, idct
, SSE2_PERM
, FF_MM_SSE2
},
120 {"altivecfdct", 0, fdct_altivec
, fdct
, NO_PERM
, FF_MM_ALTIVEC
},
124 {"BFINfdct", 0, ff_bfin_fdct
, fdct
, NO_PERM
},
125 {"BFINidct", 1, ff_bfin_idct
, idct
, NO_PERM
},
129 {"SIMPLE-ARM", 1, simple_idct_ARM
, idct
, NO_PERM
},
130 {"INT-ARM", 1, j_rev_dct_ARM
, idct
, MMX_PERM
},
132 {"SIMPLE-ARMV5TE", 1, simple_idct_armv5te
, idct
, NO_PERM
},
135 {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6
, idct
, MMX_PERM
},
138 {"SIMPLE-NEON", 1, ff_simple_idct_neon
, idct
, PARTTRANS_PERM
},
140 #endif /* ARCH_ARM */
143 {"SIMPLE-ALPHA", 1, ff_simple_idct_axp
, idct
, NO_PERM
},
149 #define AANSCALE_BITS 12
151 uint8_t cropTbl
[256 + 2 * MAX_NEG_CROP
];
153 int64_t gettime(void)
156 gettimeofday(&tv
,NULL
);
157 return (int64_t)tv
.tv_sec
* 1000000 + tv
.tv_usec
;
161 #define NB_ITS_SPEED 50000
163 static short idct_mmx_perm
[64];
165 static short idct_simple_mmx_perm
[64]={
166 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
167 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
168 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
169 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
170 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
171 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
172 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
173 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
176 static const uint8_t idct_sse2_row_perm
[8] = {0, 4, 1, 5, 2, 6, 3, 7};
178 void idct_mmx_init(void)
182 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
183 for (i
= 0; i
< 64; i
++) {
184 idct_mmx_perm
[i
] = (i
& 0x38) | ((i
& 6) >> 1) | ((i
& 1) << 2);
185 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
189 static DCTELEM block
[64] __attribute__ ((aligned (16)));
190 static DCTELEM block1
[64] __attribute__ ((aligned (8)));
191 static DCTELEM block_org
[64] __attribute__ ((aligned (8)));
193 static inline void mmx_emms(void)
196 if (cpu_flags
& FF_MM_MMX
)
197 __asm__
volatile ("emms\n\t");
201 void dct_error(const char *name
, int is_idct
,
202 void (*fdct_func
)(DCTELEM
*block
),
203 void (*fdct_ref
)(DCTELEM
*block
), int form
, int test
)
207 int64_t err2
, ti
, ti1
, it1
;
208 int64_t sysErr
[64], sysErrMax
=0;
210 int blockSumErrMax
=0, blockSumErr
;
216 for(i
=0; i
<64; i
++) sysErr
[i
]=0;
217 for(it
=0;it
<NB_ITS
;it
++) {
223 block1
[i
] = (random() % 512) -256;
232 int num
= (random()%10)+1;
234 block1
[random()%64] = (random() % 512) -256;
237 block1
[0]= (random()%4096)-2048;
238 block1
[63]= (block1
[0]&1)^1;
242 #if 0 // simulate mismatch control
247 if((sum
&1)==0) block1
[63]^=1;
252 block_org
[i
]= block1
[i
];
254 if (form
== MMX_PERM
) {
256 block
[idct_mmx_perm
[i
]] = block1
[i
];
257 } else if (form
== MMX_SIMPLE_PERM
) {
259 block
[idct_simple_mmx_perm
[i
]] = block1
[i
];
261 } else if (form
== SSE2_PERM
) {
263 block
[(i
&0x38) | idct_sse2_row_perm
[i
&7]] = block1
[i
];
264 } else if (form
== PARTTRANS_PERM
) {
266 block
[(i
&0x24) | ((i
&3)<<3) | ((i
>>3)&3)] = block1
[i
];
271 #if 0 // simulate mismatch control for tested IDCT but not the ref
276 if((sum
&1)==0) block
[63]^=1;
283 if (form
== SCALE_PERM
) {
284 for(i
=0; i
<64; i
++) {
285 scale
= 8*(1 << (AANSCALE_BITS
+ 11)) / ff_aanscales
[i
];
286 block
[i
] = (block
[i
] * scale
/*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS
;
294 v
= abs(block
[i
] - block1
[i
]);
298 sysErr
[i
] += block
[i
] - block1
[i
];
300 if( abs(block
[i
])>maxout
) maxout
=abs(block
[i
]);
302 if(blockSumErrMax
< blockSumErr
) blockSumErrMax
= blockSumErr
;
303 #if 0 // print different matrix pairs
307 if((i
&7)==0) printf("\n");
308 printf("%4d ", block_org
[i
]);
311 if((i
&7)==0) printf("\n");
312 printf("%4d ", block
[i
] - block1
[i
]);
317 for(i
=0; i
<64; i
++) sysErrMax
= FFMAX(sysErrMax
, FFABS(sysErr
[i
]));
319 #if 1 // dump systematic errors
321 if(i
%8==0) printf("\n");
322 printf("%5d ", (int)sysErr
[i
]);
327 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
328 is_idct
? "IDCT" : "DCT",
329 name
, err_inf
, (double)err2
/ NB_ITS
/ 64.0, (double)sysErrMax
/ NB_ITS
, maxout
, blockSumErrMax
);
337 block1
[i
] = (random() % 512) -256;
347 block1
[0] = (random() % 512) -256;
348 block1
[1] = (random() % 512) -256;
349 block1
[2] = (random() % 512) -256;
350 block1
[3] = (random() % 512) -256;
354 if (form
== MMX_PERM
) {
356 block
[idct_mmx_perm
[i
]] = block1
[i
];
357 } else if(form
== MMX_SIMPLE_PERM
) {
359 block
[idct_simple_mmx_perm
[i
]] = block1
[i
];
368 for(it
=0;it
<NB_ITS_SPEED
;it
++) {
371 // memcpy(block, block1, sizeof(DCTELEM) * 64);
372 // do not memcpy especially not fastmemcpy because it does movntq !!!
376 ti1
= gettime() - ti
;
377 } while (ti1
< 1000000);
380 printf("%s %s: %0.1f kdct/s\n",
381 is_idct
? "IDCT" : "DCT",
382 name
, (double)it1
* 1000.0 / (double)ti1
);
386 static uint8_t img_dest
[64] __attribute__ ((aligned (8)));
387 static uint8_t img_dest1
[64] __attribute__ ((aligned (8)));
389 void idct248_ref(uint8_t *dest
, int linesize
, int16_t *block
)
392 static double c8
[8][8];
393 static double c4
[4][4];
394 double block1
[64], block2
[64], block3
[64];
404 s
= (i
==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
405 c8
[i
][j
] = s
* cos(M_PI
* i
* (j
+ 0.5) / 8.0);
406 sum
+= c8
[i
][j
] * c8
[i
][j
];
413 s
= (i
==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
414 c4
[i
][j
] = s
* cos(M_PI
* i
* (j
+ 0.5) / 4.0);
415 sum
+= c4
[i
][j
] * c4
[i
][j
];
424 block1
[8*(2*i
)+j
] = (block
[8*(2*i
)+j
] + block
[8*(2*i
+1)+j
]) * s
;
425 block1
[8*(2*i
+1)+j
] = (block
[8*(2*i
)+j
] - block
[8*(2*i
+1)+j
]) * s
;
434 sum
+= c8
[k
][j
] * block1
[8*i
+k
];
445 sum
+= c4
[k
][j
] * block2
[8*(2*k
)+i
];
446 block3
[8*(2*j
)+i
] = sum
;
451 sum
+= c4
[k
][j
] * block2
[8*(2*k
+1)+i
];
452 block3
[8*(2*j
+1)+i
] = sum
;
456 /* clamp and store the result */
464 dest
[i
* linesize
+ j
] = (int)rint(v
);
469 void idct248_error(const char *name
,
470 void (*idct248_put
)(uint8_t *dest
, int line_size
, int16_t *block
))
472 int it
, i
, it1
, ti
, ti1
, err_max
, v
;
476 /* just one test to see if code is correct (precision is less
479 for(it
=0;it
<NB_ITS
;it
++) {
481 /* XXX: use forward transform to generate values */
483 block1
[i
] = (random() % 256) - 128;
488 idct248_ref(img_dest1
, 8, block
);
492 idct248_put(img_dest
, 8, block
);
495 v
= abs((int)img_dest
[i
] - (int)img_dest1
[i
]);
497 printf("%d %d\n", img_dest
[i
], img_dest1
[i
]);
506 printf(" %3d", img_dest1
[i
*8+j
]);
515 printf(" %3d", img_dest
[i
*8+j
]);
521 printf("%s %s: err_inf=%d\n",
522 1 ? "IDCT248" : "DCT248",
528 for(it
=0;it
<NB_ITS_SPEED
;it
++) {
531 // memcpy(block, block1, sizeof(DCTELEM) * 64);
532 // do not memcpy especially not fastmemcpy because it does movntq !!!
533 idct248_put(img_dest
, 8, block
);
536 ti1
= gettime() - ti
;
537 } while (ti1
< 1000000);
540 printf("%s %s: %0.1f kdct/s\n",
541 1 ? "IDCT248" : "DCT248",
542 name
, (double)it1
* 1000.0 / (double)ti1
);
547 printf("dct-test [-i] [<test-number>]\n"
548 "test-number 0 -> test with random matrixes\n"
549 " 1 -> test with random sparse matrixes\n"
550 " 2 -> do 3. test from mpeg4 std\n"
551 "-i test IDCT implementations\n"
552 "-4 test IDCT248 implementations\n");
555 int main(int argc
, char **argv
)
557 int test_idct
= 0, test_248_dct
= 0;
560 cpu_flags
= mm_support();
565 for(i
=0;i
<256;i
++) cropTbl
[i
+ MAX_NEG_CROP
] = i
;
566 for(i
=0;i
<MAX_NEG_CROP
;i
++) {
568 cropTbl
[i
+ MAX_NEG_CROP
+ 256] = 255;
572 c
= getopt(argc
, argv
, "ih4");
589 if(optind
<argc
) test
= atoi(argv
[optind
]);
591 printf("ffmpeg DCT/IDCT test\n");
594 idct248_error("SIMPLE-C", ff_simple_idct248_put
);
596 for (i
=0;algos
[i
].name
;i
++)
597 if (algos
[i
].is_idct
== test_idct
&& !(~cpu_flags
& algos
[i
].mm_support
)) {
598 dct_error (algos
[i
].name
, algos
[i
].is_idct
, algos
[i
].func
, algos
[i
].ref
, algos
[i
].format
, test
);