2 * (c) 2001 Fabrice Bellard
3 * 2007 Marc Hoffman <marc.hoffman@analog.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * DCT test. (c) 2001 Fabrice Bellard.
25 * Started from sample code by Juan J. Sierralta P.
37 #include "simple_idct.h"
40 #include "i386/idct_xvid.h"
43 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
49 void *fast_memcpy(void *a
, const void *b
, size_t c
){return memcpy(a
,b
,c
);};
51 /* reference fdct/idct */
52 extern void fdct(DCTELEM
*block
);
53 extern void idct(DCTELEM
*block
);
54 extern void init_fdct();
56 extern void ff_mmx_idct(DCTELEM
*data
);
57 extern void ff_mmxext_idct(DCTELEM
*data
);
59 extern void odivx_idct_c (short *block
);
62 extern void ff_bfin_idct (DCTELEM
*block
) ;
63 extern void ff_bfin_fdct (DCTELEM
*block
) ;
66 extern void fdct_altivec (DCTELEM
*block
);
67 //extern void idct_altivec (DCTELEM *block);?? no routine
72 enum { FDCT
, IDCT
} is_idct
;
73 void (* func
) (DCTELEM
*block
);
74 void (* ref
) (DCTELEM
*block
);
75 enum formattag
{ NO_PERM
,MMX_PERM
, MMX_SIMPLE_PERM
, SCALE_PERM
, SSE2_PERM
} format
;
79 #ifndef FAAN_POSTSCALE
80 #define FAAN_SCALE SCALE_PERM
82 #define FAAN_SCALE NO_PERM
85 struct algo algos
[] = {
86 {"REF-DBL", 0, fdct
, fdct
, NO_PERM
},
87 {"FAAN", 0, ff_faandct
, fdct
, FAAN_SCALE
},
88 {"FAANI", 1, ff_faanidct
, idct
, NO_PERM
},
89 {"IJG-AAN-INT", 0, fdct_ifast
, fdct
, SCALE_PERM
},
90 {"IJG-LLM-INT", 0, ff_jpeg_fdct_islow
, fdct
, NO_PERM
},
91 {"REF-DBL", 1, idct
, idct
, NO_PERM
},
92 {"INT", 1, j_rev_dct
, idct
, MMX_PERM
},
93 {"SIMPLE-C", 1, ff_simple_idct
, idct
, NO_PERM
},
96 {"MMX", 0, ff_fdct_mmx
, fdct
, NO_PERM
, MM_MMX
},
98 {"MMX2", 0, ff_fdct_mmx2
, fdct
, NO_PERM
, MM_MMXEXT
},
102 {"LIBMPEG2-MMX", 1, ff_mmx_idct
, idct
, MMX_PERM
, MM_MMX
},
103 {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct
, idct
, MMX_PERM
, MM_MMXEXT
},
105 {"SIMPLE-MMX", 1, ff_simple_idct_mmx
, idct
, MMX_SIMPLE_PERM
, MM_MMX
},
106 {"XVID-MMX", 1, ff_idct_xvid_mmx
, idct
, NO_PERM
, MM_MMX
},
107 {"XVID-MMX2", 1, ff_idct_xvid_mmx2
, idct
, NO_PERM
, MM_MMXEXT
},
108 {"XVID-SSE2", 1, ff_idct_xvid_sse2
, idct
, SSE2_PERM
, MM_SSE2
},
112 {"altivecfdct", 0, fdct_altivec
, fdct
, NO_PERM
, MM_ALTIVEC
},
116 {"BFINfdct", 0, ff_bfin_fdct
, fdct
, NO_PERM
},
117 {"BFINidct", 1, ff_bfin_idct
, idct
, NO_PERM
},
123 #define AANSCALE_BITS 12
124 static const unsigned short aanscales
[64] = {
125 /* precomputed values scaled up by 14 bits */
126 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
127 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
128 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
129 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
130 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
131 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
132 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
133 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
136 uint8_t cropTbl
[256 + 2 * MAX_NEG_CROP
];
138 int64_t gettime(void)
141 gettimeofday(&tv
,NULL
);
142 return (int64_t)tv
.tv_sec
* 1000000 + tv
.tv_usec
;
146 #define NB_ITS_SPEED 50000
148 static short idct_mmx_perm
[64];
150 static short idct_simple_mmx_perm
[64]={
151 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
152 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
153 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
154 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
155 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
156 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
157 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
158 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
161 static const uint8_t idct_sse2_row_perm
[8] = {0, 4, 1, 5, 2, 6, 3, 7};
163 void idct_mmx_init(void)
167 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
168 for (i
= 0; i
< 64; i
++) {
169 idct_mmx_perm
[i
] = (i
& 0x38) | ((i
& 6) >> 1) | ((i
& 1) << 2);
170 // idct_simple_mmx_perm[i] = simple_block_permute_op(i);
174 static DCTELEM block
[64] __attribute__ ((aligned (16)));
175 static DCTELEM block1
[64] __attribute__ ((aligned (8)));
176 static DCTELEM block_org
[64] __attribute__ ((aligned (8)));
178 void dct_error(const char *name
, int is_idct
,
179 void (*fdct_func
)(DCTELEM
*block
),
180 void (*fdct_ref
)(DCTELEM
*block
), int form
, int test
)
184 int64_t err2
, ti
, ti1
, it1
;
185 int64_t sysErr
[64], sysErrMax
=0;
187 int blockSumErrMax
=0, blockSumErr
;
193 for(i
=0; i
<64; i
++) sysErr
[i
]=0;
194 for(it
=0;it
<NB_ITS
;it
++) {
200 block1
[i
] = (random() % 512) -256;
209 int num
= (random()%10)+1;
211 block1
[random()%64] = (random() % 512) -256;
214 block1
[0]= (random()%4096)-2048;
215 block1
[63]= (block1
[0]&1)^1;
219 #if 0 // simulate mismatch control
224 if((sum
&1)==0) block1
[63]^=1;
229 block_org
[i
]= block1
[i
];
231 if (form
== MMX_PERM
) {
233 block
[idct_mmx_perm
[i
]] = block1
[i
];
234 } else if (form
== MMX_SIMPLE_PERM
) {
236 block
[idct_simple_mmx_perm
[i
]] = block1
[i
];
238 } else if (form
== SSE2_PERM
) {
240 block
[(i
&0x38) | idct_sse2_row_perm
[i
&7]] = block1
[i
];
245 #if 0 // simulate mismatch control for tested IDCT but not the ref
250 if((sum
&1)==0) block
[63]^=1;
255 emms_c(); /* for ff_mmx_idct */
257 if (form
== SCALE_PERM
) {
258 for(i
=0; i
<64; i
++) {
259 scale
= 8*(1 << (AANSCALE_BITS
+ 11)) / aanscales
[i
];
260 block
[i
] = (block
[i
] * scale
/*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS
;
268 v
= abs(block
[i
] - block1
[i
]);
272 sysErr
[i
] += block
[i
] - block1
[i
];
274 if( abs(block
[i
])>maxout
) maxout
=abs(block
[i
]);
276 if(blockSumErrMax
< blockSumErr
) blockSumErrMax
= blockSumErr
;
277 #if 0 // print different matrix pairs
281 if((i
&7)==0) printf("\n");
282 printf("%4d ", block_org
[i
]);
285 if((i
&7)==0) printf("\n");
286 printf("%4d ", block
[i
] - block1
[i
]);
291 for(i
=0; i
<64; i
++) sysErrMax
= MAX(sysErrMax
, FFABS(sysErr
[i
]));
293 #if 1 // dump systematic errors
295 if(i
%8==0) printf("\n");
296 printf("%5d ", (int)sysErr
[i
]);
301 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
302 is_idct
? "IDCT" : "DCT",
303 name
, err_inf
, (double)err2
/ NB_ITS
/ 64.0, (double)sysErrMax
/ NB_ITS
, maxout
, blockSumErrMax
);
311 block1
[i
] = (random() % 512) -256;
321 block1
[0] = (random() % 512) -256;
322 block1
[1] = (random() % 512) -256;
323 block1
[2] = (random() % 512) -256;
324 block1
[3] = (random() % 512) -256;
328 if (form
== MMX_PERM
) {
330 block
[idct_mmx_perm
[i
]] = block1
[i
];
331 } else if(form
== MMX_SIMPLE_PERM
) {
333 block
[idct_simple_mmx_perm
[i
]] = block1
[i
];
342 for(it
=0;it
<NB_ITS_SPEED
;it
++) {
345 // memcpy(block, block1, sizeof(DCTELEM) * 64);
346 // do not memcpy especially not fastmemcpy because it does movntq !!!
350 ti1
= gettime() - ti
;
351 } while (ti1
< 1000000);
354 printf("%s %s: %0.1f kdct/s\n",
355 is_idct
? "IDCT" : "DCT",
356 name
, (double)it1
* 1000.0 / (double)ti1
);
360 static uint8_t img_dest
[64] __attribute__ ((aligned (8)));
361 static uint8_t img_dest1
[64] __attribute__ ((aligned (8)));
363 void idct248_ref(uint8_t *dest
, int linesize
, int16_t *block
)
366 static double c8
[8][8];
367 static double c4
[4][4];
368 double block1
[64], block2
[64], block3
[64];
378 s
= (i
==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
379 c8
[i
][j
] = s
* cos(M_PI
* i
* (j
+ 0.5) / 8.0);
380 sum
+= c8
[i
][j
] * c8
[i
][j
];
387 s
= (i
==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
388 c4
[i
][j
] = s
* cos(M_PI
* i
* (j
+ 0.5) / 4.0);
389 sum
+= c4
[i
][j
] * c4
[i
][j
];
398 block1
[8*(2*i
)+j
] = (block
[8*(2*i
)+j
] + block
[8*(2*i
+1)+j
]) * s
;
399 block1
[8*(2*i
+1)+j
] = (block
[8*(2*i
)+j
] - block
[8*(2*i
+1)+j
]) * s
;
408 sum
+= c8
[k
][j
] * block1
[8*i
+k
];
419 sum
+= c4
[k
][j
] * block2
[8*(2*k
)+i
];
420 block3
[8*(2*j
)+i
] = sum
;
425 sum
+= c4
[k
][j
] * block2
[8*(2*k
+1)+i
];
426 block3
[8*(2*j
+1)+i
] = sum
;
430 /* clamp and store the result */
438 dest
[i
* linesize
+ j
] = (int)rint(v
);
443 void idct248_error(const char *name
,
444 void (*idct248_put
)(uint8_t *dest
, int line_size
, int16_t *block
))
446 int it
, i
, it1
, ti
, ti1
, err_max
, v
;
450 /* just one test to see if code is correct (precision is less
453 for(it
=0;it
<NB_ITS
;it
++) {
455 /* XXX: use forward transform to generate values */
457 block1
[i
] = (random() % 256) - 128;
462 idct248_ref(img_dest1
, 8, block
);
466 idct248_put(img_dest
, 8, block
);
469 v
= abs((int)img_dest
[i
] - (int)img_dest1
[i
]);
471 printf("%d %d\n", img_dest
[i
], img_dest1
[i
]);
480 printf(" %3d", img_dest1
[i
*8+j
]);
489 printf(" %3d", img_dest
[i
*8+j
]);
495 printf("%s %s: err_inf=%d\n",
496 1 ? "IDCT248" : "DCT248",
502 for(it
=0;it
<NB_ITS_SPEED
;it
++) {
505 // memcpy(block, block1, sizeof(DCTELEM) * 64);
506 // do not memcpy especially not fastmemcpy because it does movntq !!!
507 idct248_put(img_dest
, 8, block
);
510 ti1
= gettime() - ti
;
511 } while (ti1
< 1000000);
514 printf("%s %s: %0.1f kdct/s\n",
515 1 ? "IDCT248" : "DCT248",
516 name
, (double)it1
* 1000.0 / (double)ti1
);
521 printf("dct-test [-i] [<test-number>]\n"
522 "test-number 0 -> test with random matrixes\n"
523 " 1 -> test with random sparse matrixes\n"
524 " 2 -> do 3. test from mpeg4 std\n"
525 "-i test IDCT implementations\n"
526 "-4 test IDCT248 implementations\n");
529 int main(int argc
, char **argv
)
531 int test_idct
= 0, test_248_dct
= 0;
537 mm_flags
= mm_support();
539 for(i
=0;i
<256;i
++) cropTbl
[i
+ MAX_NEG_CROP
] = i
;
540 for(i
=0;i
<MAX_NEG_CROP
;i
++) {
542 cropTbl
[i
+ MAX_NEG_CROP
+ 256] = 255;
546 c
= getopt(argc
, argv
, "ih4");
563 if(optind
<argc
) test
= atoi(argv
[optind
]);
565 printf("ffmpeg DCT/IDCT test\n");
568 idct248_error("SIMPLE-C", ff_simple_idct248_put
);
570 for (i
=0;algos
[i
].name
;i
++)
571 if (algos
[i
].is_idct
== test_idct
&& !(~mm_flags
& algos
[i
].mm_support
)) {
572 dct_error (algos
[i
].name
, algos
[i
].is_idct
, algos
[i
].func
, algos
[i
].ref
, algos
[i
].format
, test
);