2 * MMX optimized DSP utils
3 * Copyright (c) 2000, 2001 Fabrice Bellard.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
22 #include "../dsputil.h"
24 int mm_flags
; /* multimedia extension flags */
25 /* FIXME use them in static form */
26 int pix_abs16x16_mmx(UINT8
*blk1
, UINT8
*blk2
, int lx
);
27 int pix_abs16x16_x2_mmx(UINT8
*blk1
, UINT8
*blk2
, int lx
);
28 int pix_abs16x16_y2_mmx(UINT8
*blk1
, UINT8
*blk2
, int lx
);
29 int pix_abs16x16_xy2_mmx(UINT8
*blk1
, UINT8
*blk2
, int lx
);
31 int pix_abs16x16_mmx2(UINT8
*blk1
, UINT8
*blk2
, int lx
);
32 int pix_abs16x16_x2_mmx2(UINT8
*blk1
, UINT8
*blk2
, int lx
);
33 int pix_abs16x16_y2_mmx2(UINT8
*blk1
, UINT8
*blk2
, int lx
);
34 int pix_abs16x16_xy2_mmx2(UINT8
*blk1
, UINT8
*blk2
, int lx
);
36 int pix_abs8x8_mmx(UINT8
*blk1
, UINT8
*blk2
, int lx
);
37 int pix_abs8x8_x2_mmx(UINT8
*blk1
, UINT8
*blk2
, int lx
);
38 int pix_abs8x8_y2_mmx(UINT8
*blk1
, UINT8
*blk2
, int lx
);
39 int pix_abs8x8_xy2_mmx(UINT8
*blk1
, UINT8
*blk2
, int lx
);
41 int pix_abs8x8_mmx2(UINT8
*blk1
, UINT8
*blk2
, int lx
);
42 int pix_abs8x8_x2_mmx2(UINT8
*blk1
, UINT8
*blk2
, int lx
);
43 int pix_abs8x8_y2_mmx2(UINT8
*blk1
, UINT8
*blk2
, int lx
);
44 int pix_abs8x8_xy2_mmx2(UINT8
*blk1
, UINT8
*blk2
, int lx
);
46 /* pixel operations */
47 static const uint64_t mm_bone
__attribute__ ((aligned(8))) = 0x0101010101010101ULL
;
48 static const uint64_t mm_wone
__attribute__ ((aligned(8))) = 0x0001000100010001ULL
;
49 static const uint64_t mm_wtwo
__attribute__ ((aligned(8))) = 0x0002000200020002ULL
;
51 #define JUMPALIGN() __asm __volatile (".balign 8"::)
52 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::)
54 #define MOVQ_WONE(regd) \
56 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
57 "psrlw $15, %%" #regd ::)
59 #define MOVQ_BFE(regd) \
61 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\
62 "paddb %%" #regd ", %%" #regd " \n\t" ::)
65 #define MOVQ_BONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone))
66 #define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo))
68 // for shared library it's better to use this way for accessing constants
70 #define MOVQ_BONE(regd) \
72 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
73 "psrlw $15, %%" #regd " \n\t" \
74 "packuswb %%" #regd ", %%" #regd " \n\t" ::)
76 #define MOVQ_WTWO(regd) \
78 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
79 "psrlw $15, %%" #regd " \n\t" \
80 "psllw $1, %%" #regd " \n\t"::)
84 // using regr as temporary and for the output result
85 // first argument is unmodifed and second is trashed
86 // regfe is supposed to contain 0xfefefefefefefefe
87 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
88 "movq " #rega ", " #regr " \n\t"\
89 "pand " #regb ", " #regr " \n\t"\
90 "pxor " #rega ", " #regb " \n\t"\
91 "pand " #regfe "," #regb " \n\t"\
92 "psrlq $1, " #regb " \n\t"\
93 "paddb " #regb ", " #regr " \n\t"
95 #define PAVGB_MMX(rega, regb, regr, regfe) \
96 "movq " #rega ", " #regr " \n\t"\
97 "por " #regb ", " #regr " \n\t"\
98 "pxor " #rega ", " #regb " \n\t"\
99 "pand " #regfe "," #regb " \n\t"\
100 "psrlq $1, " #regb " \n\t"\
101 "psubb " #regb ", " #regr " \n\t"
103 // mm6 is supposed to contain 0xfefefefefefefefe
104 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
105 "movq " #rega ", " #regr " \n\t"\
106 "movq " #regc ", " #regp " \n\t"\
107 "pand " #regb ", " #regr " \n\t"\
108 "pand " #regd ", " #regp " \n\t"\
109 "pxor " #rega ", " #regb " \n\t"\
110 "pxor " #regc ", " #regd " \n\t"\
111 "pand %%mm6, " #regb " \n\t"\
112 "pand %%mm6, " #regd " \n\t"\
113 "psrlq $1, " #regb " \n\t"\
114 "psrlq $1, " #regd " \n\t"\
115 "paddb " #regb ", " #regr " \n\t"\
116 "paddb " #regd ", " #regp " \n\t"
118 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
119 "movq " #rega ", " #regr " \n\t"\
120 "movq " #regc ", " #regp " \n\t"\
121 "por " #regb ", " #regr " \n\t"\
122 "por " #regd ", " #regp " \n\t"\
123 "pxor " #rega ", " #regb " \n\t"\
124 "pxor " #regc ", " #regd " \n\t"\
125 "pand %%mm6, " #regb " \n\t"\
126 "pand %%mm6, " #regd " \n\t"\
127 "psrlq $1, " #regd " \n\t"\
128 "psrlq $1, " #regb " \n\t"\
129 "psubb " #regb ", " #regr " \n\t"\
130 "psubb " #regd ", " #regp " \n\t"
132 /***********************************/
133 /* MMX no rounding */
134 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx
135 #define SET_RND MOVQ_WONE
136 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
137 #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
139 #include "dsputil_mmx_rnd.h"
145 /***********************************/
148 #define DEF(x, y) x ## _ ## y ##_mmx
149 #define SET_RND MOVQ_WTWO
150 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
151 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
153 #include "dsputil_mmx_rnd.h"
160 /***********************************/
163 #define DEF(x) x ## _3dnow
164 /* for Athlons PAVGUSB is prefered */
165 #define PAVGB "pavgusb"
167 #include "dsputil_mmx_avg.h"
172 /***********************************/
175 #define DEF(x) x ## _mmx2
177 /* Introduced only in MMX2 set */
178 #define PAVGB "pavgb"
180 #include "dsputil_mmx_avg.h"
185 /***********************************/
188 static void get_pixels_mmx(DCTELEM
*block
, const UINT8
*pixels
, int line_size
)
191 "movl $-128, %%eax \n\t"
192 "pxor %%mm7, %%mm7 \n\t"
195 "movq (%0), %%mm0 \n\t"
196 "movq (%0, %2), %%mm2 \n\t"
197 "movq %%mm0, %%mm1 \n\t"
198 "movq %%mm2, %%mm3 \n\t"
199 "punpcklbw %%mm7, %%mm0 \n\t"
200 "punpckhbw %%mm7, %%mm1 \n\t"
201 "punpcklbw %%mm7, %%mm2 \n\t"
202 "punpckhbw %%mm7, %%mm3 \n\t"
203 "movq %%mm0, (%1, %%eax)\n\t"
204 "movq %%mm1, 8(%1, %%eax)\n\t"
205 "movq %%mm2, 16(%1, %%eax)\n\t"
206 "movq %%mm3, 24(%1, %%eax)\n\t"
208 "addl $32, %%eax \n\t"
211 : "r" (block
+64), "r" (line_size
), "r" (line_size
*2)
216 static void diff_pixels_mmx(DCTELEM
*block
, const UINT8
*s1
, const UINT8
*s2
, int stride
)
219 "pxor %%mm7, %%mm7 \n\t"
220 "movl $-128, %%eax \n\t"
223 "movq (%0), %%mm0 \n\t"
224 "movq (%1), %%mm2 \n\t"
225 "movq %%mm0, %%mm1 \n\t"
226 "movq %%mm2, %%mm3 \n\t"
227 "punpcklbw %%mm7, %%mm0 \n\t"
228 "punpckhbw %%mm7, %%mm1 \n\t"
229 "punpcklbw %%mm7, %%mm2 \n\t"
230 "punpckhbw %%mm7, %%mm3 \n\t"
231 "psubw %%mm2, %%mm0 \n\t"
232 "psubw %%mm3, %%mm1 \n\t"
233 "movq %%mm0, (%2, %%eax)\n\t"
234 "movq %%mm1, 8(%2, %%eax)\n\t"
237 "addl $16, %%eax \n\t"
239 : "+r" (s1
), "+r" (s2
)
240 : "r" (block
+64), "r" (stride
)
245 void put_pixels_clamped_mmx(const DCTELEM
*block
, UINT8
*pixels
, int line_size
)
250 /* read the pixels */
256 "movq 8%3, %%mm1\n\t"
257 "movq 16%3, %%mm2\n\t"
258 "movq 24%3, %%mm3\n\t"
259 "movq 32%3, %%mm4\n\t"
260 "movq 40%3, %%mm5\n\t"
261 "movq 48%3, %%mm6\n\t"
262 "movq 56%3, %%mm7\n\t"
263 "packuswb %%mm1, %%mm0\n\t"
264 "packuswb %%mm3, %%mm2\n\t"
265 "packuswb %%mm5, %%mm4\n\t"
266 "packuswb %%mm7, %%mm6\n\t"
267 "movq %%mm0, (%0)\n\t"
268 "movq %%mm2, (%0, %1)\n\t"
269 "movq %%mm4, (%0, %1, 2)\n\t"
270 "movq %%mm6, (%0, %2)\n\t"
271 ::"r" (pix
), "r" (line_size
), "r" (line_size
*3), "m"(*p
)
276 // if here would be an exact copy of the code above
277 // compiler would generate some very strange code
280 "movq (%3), %%mm0\n\t"
281 "movq 8(%3), %%mm1\n\t"
282 "movq 16(%3), %%mm2\n\t"
283 "movq 24(%3), %%mm3\n\t"
284 "movq 32(%3), %%mm4\n\t"
285 "movq 40(%3), %%mm5\n\t"
286 "movq 48(%3), %%mm6\n\t"
287 "movq 56(%3), %%mm7\n\t"
288 "packuswb %%mm1, %%mm0\n\t"
289 "packuswb %%mm3, %%mm2\n\t"
290 "packuswb %%mm5, %%mm4\n\t"
291 "packuswb %%mm7, %%mm6\n\t"
292 "movq %%mm0, (%0)\n\t"
293 "movq %%mm2, (%0, %1)\n\t"
294 "movq %%mm4, (%0, %1, 2)\n\t"
295 "movq %%mm6, (%0, %2)\n\t"
296 ::"r" (pix
), "r" (line_size
), "r" (line_size
*3), "r"(p
)
300 void add_pixels_clamped_mmx(const DCTELEM
*block
, UINT8
*pixels
, int line_size
)
306 /* read the pixels */
313 "movq (%2), %%mm0\n\t"
314 "movq 8(%2), %%mm1\n\t"
315 "movq 16(%2), %%mm2\n\t"
316 "movq 24(%2), %%mm3\n\t"
319 "movq %%mm4, %%mm5\n\t"
320 "punpcklbw %%mm7, %%mm4\n\t"
321 "punpckhbw %%mm7, %%mm5\n\t"
322 "paddsw %%mm4, %%mm0\n\t"
323 "paddsw %%mm5, %%mm1\n\t"
324 "movq %%mm6, %%mm5\n\t"
325 "punpcklbw %%mm7, %%mm6\n\t"
326 "punpckhbw %%mm7, %%mm5\n\t"
327 "paddsw %%mm6, %%mm2\n\t"
328 "paddsw %%mm5, %%mm3\n\t"
329 "packuswb %%mm1, %%mm0\n\t"
330 "packuswb %%mm3, %%mm2\n\t"
333 :"+m"(*pix
), "+m"(*(pix
+line_size
))
341 static void put_pixels8_mmx(UINT8
*block
, const UINT8
*pixels
, int line_size
, int h
)
344 "lea (%3, %3), %%eax \n\t"
347 "movq (%1), %%mm0 \n\t"
348 "movq (%1, %3), %%mm1 \n\t"
349 "movq %%mm0, (%2) \n\t"
350 "movq %%mm1, (%2, %3) \n\t"
351 "addl %%eax, %1 \n\t"
352 "addl %%eax, %2 \n\t"
353 "movq (%1), %%mm0 \n\t"
354 "movq (%1, %3), %%mm1 \n\t"
355 "movq %%mm0, (%2) \n\t"
356 "movq %%mm1, (%2, %3) \n\t"
357 "addl %%eax, %1 \n\t"
358 "addl %%eax, %2 \n\t"
361 : "+g"(h
), "+r" (pixels
), "+r" (block
)
367 static void put_pixels16_mmx(UINT8
*block
, const UINT8
*pixels
, int line_size
, int h
)
370 "lea (%3, %3), %%eax \n\t"
373 "movq (%1), %%mm0 \n\t"
374 "movq 8(%1), %%mm4 \n\t"
375 "movq (%1, %3), %%mm1 \n\t"
376 "movq 8(%1, %3), %%mm5 \n\t"
377 "movq %%mm0, (%2) \n\t"
378 "movq %%mm4, 8(%2) \n\t"
379 "movq %%mm1, (%2, %3) \n\t"
380 "movq %%mm5, 8(%2, %3) \n\t"
381 "addl %%eax, %1 \n\t"
382 "addl %%eax, %2 \n\t"
383 "movq (%1), %%mm0 \n\t"
384 "movq 8(%1), %%mm4 \n\t"
385 "movq (%1, %3), %%mm1 \n\t"
386 "movq 8(%1, %3), %%mm5 \n\t"
387 "movq %%mm0, (%2) \n\t"
388 "movq %%mm4, 8(%2) \n\t"
389 "movq %%mm1, (%2, %3) \n\t"
390 "movq %%mm5, 8(%2, %3) \n\t"
391 "addl %%eax, %1 \n\t"
392 "addl %%eax, %2 \n\t"
395 : "+g"(h
), "+r" (pixels
), "+r" (block
)
401 static void clear_blocks_mmx(DCTELEM
*blocks
)
404 "pxor %%mm7, %%mm7 \n\t"
405 "movl $-128*6, %%eax \n\t"
407 "movq %%mm7, (%0, %%eax) \n\t"
408 "movq %%mm7, 8(%0, %%eax) \n\t"
409 "movq %%mm7, 16(%0, %%eax) \n\t"
410 "movq %%mm7, 24(%0, %%eax) \n\t"
411 "addl $32, %%eax \n\t"
413 : : "r" (((int)blocks
)+128*6)
418 static int pix_sum16_mmx(UINT8
* pix
, int line_size
){
421 int index
= -line_size
*h
;
424 "pxor %%mm7, %%mm7 \n\t"
425 "pxor %%mm6, %%mm6 \n\t"
427 "movq (%2, %1), %%mm0 \n\t"
428 "movq (%2, %1), %%mm1 \n\t"
429 "movq 8(%2, %1), %%mm2 \n\t"
430 "movq 8(%2, %1), %%mm3 \n\t"
431 "punpcklbw %%mm7, %%mm0 \n\t"
432 "punpckhbw %%mm7, %%mm1 \n\t"
433 "punpcklbw %%mm7, %%mm2 \n\t"
434 "punpckhbw %%mm7, %%mm3 \n\t"
435 "paddw %%mm0, %%mm1 \n\t"
436 "paddw %%mm2, %%mm3 \n\t"
437 "paddw %%mm1, %%mm3 \n\t"
438 "paddw %%mm3, %%mm6 \n\t"
441 "movq %%mm6, %%mm5 \n\t"
442 "psrlq $32, %%mm6 \n\t"
443 "paddw %%mm5, %%mm6 \n\t"
444 "movq %%mm6, %%mm5 \n\t"
445 "psrlq $16, %%mm6 \n\t"
446 "paddw %%mm5, %%mm6 \n\t"
447 "movd %%mm6, %0 \n\t"
448 "andl $0xFFFF, %0 \n\t"
449 : "=&r" (sum
), "+r" (index
)
450 : "r" (pix
- index
), "r" (line_size
)
456 static void add_bytes_mmx(uint8_t *dst
, uint8_t *src
, int w
){
460 "movq (%1, %0), %%mm0 \n\t"
461 "movq (%2, %0), %%mm1 \n\t"
462 "paddb %%mm0, %%mm1 \n\t"
463 "movq %%mm1, (%2, %0) \n\t"
464 "movq 8(%1, %0), %%mm0 \n\t"
465 "movq 8(%2, %0), %%mm1 \n\t"
466 "paddb %%mm0, %%mm1 \n\t"
467 "movq %%mm1, 8(%2, %0) \n\t"
472 : "r"(src
), "r"(dst
), "r"(w
-15)
475 dst
[i
+0] += src
[i
+0];
478 static void diff_bytes_mmx(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
){
482 "movq (%2, %0), %%mm0 \n\t"
483 "movq (%1, %0), %%mm1 \n\t"
484 "psubb %%mm0, %%mm1 \n\t"
485 "movq %%mm1, (%3, %0) \n\t"
486 "movq 8(%2, %0), %%mm0 \n\t"
487 "movq 8(%1, %0), %%mm1 \n\t"
488 "psubb %%mm0, %%mm1 \n\t"
489 "movq %%mm1, 8(%3, %0) \n\t"
494 : "r"(src1
), "r"(src2
), "r"(dst
), "r"(w
-15)
497 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
502 static void just_return() { return; }
505 void dsputil_init_mmx(DSPContext
* c
, unsigned mask
)
507 mm_flags
= mm_support();
509 fprintf(stderr
, "libavcodec: CPU flags:");
510 if (mm_flags
& MM_MMX
)
511 fprintf(stderr
, " mmx");
512 if (mm_flags
& MM_MMXEXT
)
513 fprintf(stderr
, " mmxext");
514 if (mm_flags
& MM_3DNOW
)
515 fprintf(stderr
, " 3dnow");
516 if (mm_flags
& MM_SSE
)
517 fprintf(stderr
, " sse");
518 if (mm_flags
& MM_SSE2
)
519 fprintf(stderr
, " sse2");
520 fprintf(stderr
, "\n");
523 if (mm_flags
& MM_MMX
) {
524 c
->get_pixels
= get_pixels_mmx
;
525 c
->diff_pixels
= diff_pixels_mmx
;
526 c
->put_pixels_clamped
= put_pixels_clamped_mmx
;
527 c
->add_pixels_clamped
= add_pixels_clamped_mmx
;
528 c
->clear_blocks
= clear_blocks_mmx
;
529 c
->pix_sum
= pix_sum16_mmx
;
531 c
->pix_abs16x16
= pix_abs16x16_mmx
;
532 c
->pix_abs16x16_x2
= pix_abs16x16_x2_mmx
;
533 c
->pix_abs16x16_y2
= pix_abs16x16_y2_mmx
;
534 c
->pix_abs16x16_xy2
= pix_abs16x16_xy2_mmx
;
535 c
->pix_abs8x8
= pix_abs8x8_mmx
;
536 c
->pix_abs8x8_x2
= pix_abs8x8_x2_mmx
;
537 c
->pix_abs8x8_y2
= pix_abs8x8_y2_mmx
;
538 c
->pix_abs8x8_xy2
= pix_abs8x8_xy2_mmx
;
540 c
->put_pixels_tab
[0][0] = put_pixels16_mmx
;
541 c
->put_pixels_tab
[0][1] = put_pixels16_x2_mmx
;
542 c
->put_pixels_tab
[0][2] = put_pixels16_y2_mmx
;
543 c
->put_pixels_tab
[0][3] = put_pixels16_xy2_mmx
;
545 c
->put_no_rnd_pixels_tab
[0][0] = put_pixels16_mmx
;
546 c
->put_no_rnd_pixels_tab
[0][1] = put_no_rnd_pixels16_x2_mmx
;
547 c
->put_no_rnd_pixels_tab
[0][2] = put_no_rnd_pixels16_y2_mmx
;
548 c
->put_no_rnd_pixels_tab
[0][3] = put_no_rnd_pixels16_xy2_mmx
;
550 c
->avg_pixels_tab
[0][0] = avg_pixels16_mmx
;
551 c
->avg_pixels_tab
[0][1] = avg_pixels16_x2_mmx
;
552 c
->avg_pixels_tab
[0][2] = avg_pixels16_y2_mmx
;
553 c
->avg_pixels_tab
[0][3] = avg_pixels16_xy2_mmx
;
555 c
->avg_no_rnd_pixels_tab
[0][0] = avg_no_rnd_pixels16_mmx
;
556 c
->avg_no_rnd_pixels_tab
[0][1] = avg_no_rnd_pixels16_x2_mmx
;
557 c
->avg_no_rnd_pixels_tab
[0][2] = avg_no_rnd_pixels16_y2_mmx
;
558 c
->avg_no_rnd_pixels_tab
[0][3] = avg_no_rnd_pixels16_xy2_mmx
;
560 c
->put_pixels_tab
[1][0] = put_pixels8_mmx
;
561 c
->put_pixels_tab
[1][1] = put_pixels8_x2_mmx
;
562 c
->put_pixels_tab
[1][2] = put_pixels8_y2_mmx
;
563 c
->put_pixels_tab
[1][3] = put_pixels8_xy2_mmx
;
565 c
->put_no_rnd_pixels_tab
[1][0] = put_pixels8_mmx
;
566 c
->put_no_rnd_pixels_tab
[1][1] = put_no_rnd_pixels8_x2_mmx
;
567 c
->put_no_rnd_pixels_tab
[1][2] = put_no_rnd_pixels8_y2_mmx
;
568 c
->put_no_rnd_pixels_tab
[1][3] = put_no_rnd_pixels8_xy2_mmx
;
570 c
->avg_pixels_tab
[1][0] = avg_pixels8_mmx
;
571 c
->avg_pixels_tab
[1][1] = avg_pixels8_x2_mmx
;
572 c
->avg_pixels_tab
[1][2] = avg_pixels8_y2_mmx
;
573 c
->avg_pixels_tab
[1][3] = avg_pixels8_xy2_mmx
;
575 c
->avg_no_rnd_pixels_tab
[1][0] = avg_no_rnd_pixels8_mmx
;
576 c
->avg_no_rnd_pixels_tab
[1][1] = avg_no_rnd_pixels8_x2_mmx
;
577 c
->avg_no_rnd_pixels_tab
[1][2] = avg_no_rnd_pixels8_y2_mmx
;
578 c
->avg_no_rnd_pixels_tab
[1][3] = avg_no_rnd_pixels8_xy2_mmx
;
580 c
->add_bytes
= add_bytes_mmx
;
581 c
->diff_bytes
= diff_bytes_mmx
;
583 if (mm_flags
& MM_MMXEXT
) {
584 c
->pix_abs16x16
= pix_abs16x16_mmx2
;
585 c
->pix_abs16x16_x2
= pix_abs16x16_x2_mmx2
;
586 c
->pix_abs16x16_y2
= pix_abs16x16_y2_mmx2
;
587 c
->pix_abs16x16_xy2
= pix_abs16x16_xy2_mmx2
;
589 c
->pix_abs8x8
= pix_abs8x8_mmx2
;
590 c
->pix_abs8x8_x2
= pix_abs8x8_x2_mmx2
;
591 c
->pix_abs8x8_y2
= pix_abs8x8_y2_mmx2
;
592 c
->pix_abs8x8_xy2
= pix_abs8x8_xy2_mmx2
;
594 c
->put_pixels_tab
[0][1] = put_pixels16_x2_mmx2
;
595 c
->put_pixels_tab
[0][2] = put_pixels16_y2_mmx2
;
596 c
->put_no_rnd_pixels_tab
[0][1] = put_no_rnd_pixels16_x2_mmx2
;
597 c
->put_no_rnd_pixels_tab
[0][2] = put_no_rnd_pixels16_y2_mmx2
;
599 c
->avg_pixels_tab
[0][0] = avg_pixels16_mmx2
;
600 c
->avg_pixels_tab
[0][1] = avg_pixels16_x2_mmx2
;
601 c
->avg_pixels_tab
[0][2] = avg_pixels16_y2_mmx2
;
602 c
->avg_pixels_tab
[0][3] = avg_pixels16_xy2_mmx2
;
604 c
->put_pixels_tab
[1][1] = put_pixels8_x2_mmx2
;
605 c
->put_pixels_tab
[1][2] = put_pixels8_y2_mmx2
;
606 c
->put_no_rnd_pixels_tab
[1][1] = put_no_rnd_pixels8_x2_mmx2
;
607 c
->put_no_rnd_pixels_tab
[1][2] = put_no_rnd_pixels8_y2_mmx2
;
609 c
->avg_pixels_tab
[1][0] = avg_pixels8_mmx2
;
610 c
->avg_pixels_tab
[1][1] = avg_pixels8_x2_mmx2
;
611 c
->avg_pixels_tab
[1][2] = avg_pixels8_y2_mmx2
;
612 c
->avg_pixels_tab
[1][3] = avg_pixels8_xy2_mmx2
;
613 } else if (mm_flags
& MM_3DNOW
) {
614 c
->put_pixels_tab
[0][1] = put_pixels16_x2_3dnow
;
615 c
->put_pixels_tab
[0][2] = put_pixels16_y2_3dnow
;
616 c
->put_no_rnd_pixels_tab
[0][1] = put_no_rnd_pixels16_x2_3dnow
;
617 c
->put_no_rnd_pixels_tab
[0][2] = put_no_rnd_pixels16_y2_3dnow
;
619 c
->avg_pixels_tab
[0][0] = avg_pixels16_3dnow
;
620 c
->avg_pixels_tab
[0][1] = avg_pixels16_x2_3dnow
;
621 c
->avg_pixels_tab
[0][2] = avg_pixels16_y2_3dnow
;
622 c
->avg_pixels_tab
[0][3] = avg_pixels16_xy2_3dnow
;
624 c
->put_pixels_tab
[1][1] = put_pixels8_x2_3dnow
;
625 c
->put_pixels_tab
[1][2] = put_pixels8_y2_3dnow
;
626 c
->put_no_rnd_pixels_tab
[1][1] = put_no_rnd_pixels8_x2_3dnow
;
627 c
->put_no_rnd_pixels_tab
[1][2] = put_no_rnd_pixels8_y2_3dnow
;
629 c
->avg_pixels_tab
[1][0] = avg_pixels8_3dnow
;
630 c
->avg_pixels_tab
[1][1] = avg_pixels8_x2_3dnow
;
631 c
->avg_pixels_tab
[1][2] = avg_pixels8_y2_3dnow
;
632 c
->avg_pixels_tab
[1][3] = avg_pixels8_xy2_3dnow
;
638 get_pixels
= just_return
;
639 put_pixels_clamped
= just_return
;
640 add_pixels_clamped
= just_return
;
642 pix_abs16x16
= just_return
;
643 pix_abs16x16_x2
= just_return
;
644 pix_abs16x16_y2
= just_return
;
645 pix_abs16x16_xy2
= just_return
;
647 put_pixels_tab
[0] = just_return
;
648 put_pixels_tab
[1] = just_return
;
649 put_pixels_tab
[2] = just_return
;
650 put_pixels_tab
[3] = just_return
;
652 put_no_rnd_pixels_tab
[0] = just_return
;
653 put_no_rnd_pixels_tab
[1] = just_return
;
654 put_no_rnd_pixels_tab
[2] = just_return
;
655 put_no_rnd_pixels_tab
[3] = just_return
;
657 avg_pixels_tab
[0] = just_return
;
658 avg_pixels_tab
[1] = just_return
;
659 avg_pixels_tab
[2] = just_return
;
660 avg_pixels_tab
[3] = just_return
;
662 avg_no_rnd_pixels_tab
[0] = just_return
;
663 avg_no_rnd_pixels_tab
[1] = just_return
;
664 avg_no_rnd_pixels_tab
[2] = just_return
;
665 avg_no_rnd_pixels_tab
[3] = just_return
;
667 //av_fdct = just_return;
668 //ff_idct = just_return;
672 /* remove any non bit exact operation (testing purpose). NOTE that
673 this function should be kept as small as possible because it is
674 always difficult to test automatically non bit exact cases. */
675 void dsputil_set_bit_exact_mmx(DSPContext
* c
, unsigned mask
)
677 if (mm_flags
& MM_MMX
) {
679 c
->put_no_rnd_pixels_tab
[0][1] = put_no_rnd_pixels16_x2_mmx
;
680 c
->put_no_rnd_pixels_tab
[0][2] = put_no_rnd_pixels16_y2_mmx
;
681 c
->avg_pixels_tab
[0][3] = avg_pixels16_xy2_mmx
;
682 c
->put_no_rnd_pixels_tab
[1][1] = put_no_rnd_pixels8_x2_mmx
;
683 c
->put_no_rnd_pixels_tab
[1][2] = put_no_rnd_pixels8_y2_mmx
;
684 c
->avg_pixels_tab
[1][3] = avg_pixels8_xy2_mmx
;
686 if (mm_flags
& MM_MMXEXT
) {
687 c
->pix_abs16x16_x2
= pix_abs16x16_x2_mmx
;
688 c
->pix_abs16x16_y2
= pix_abs16x16_y2_mmx
;
689 c
->pix_abs16x16_xy2
= pix_abs16x16_xy2_mmx
;
690 c
->pix_abs8x8_x2
= pix_abs8x8_x2_mmx
;
691 c
->pix_abs8x8_y2
= pix_abs8x8_y2_mmx
;
692 c
->pix_abs8x8_xy2
= pix_abs8x8_xy2_mmx
;