aarch64: Add assembly support for -fsanitize=hwaddress tagged globals.
[libav.git] / libavcodec / x86 / qpeldsp_init.c
blobe280c82f3de74749df4ee1682753c4238f5be924
1 /*
2 * quarterpel DSP functions
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <stddef.h>
22 #include <stdint.h>
24 #include "config.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/x86/cpu.h"
28 #include "libavcodec/pixels.h"
29 #include "libavcodec/qpeldsp.h"
30 #include "fpel.h"
32 void ff_put_pixels8_l2_mmxext(uint8_t *dst,
33 const uint8_t *src1, const uint8_t *src2,
34 int dstStride, int src1Stride, int h);
35 void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst,
36 const uint8_t *src1, const uint8_t *src2,
37 int dstStride, int src1Stride, int h);
38 void ff_avg_pixels8_l2_mmxext(uint8_t *dst,
39 const uint8_t *src1, const uint8_t *src2,
40 int dstStride, int src1Stride, int h);
41 void ff_put_pixels16_l2_mmxext(uint8_t *dst,
42 const uint8_t *src1, const uint8_t *src2,
43 int dstStride, int src1Stride, int h);
44 void ff_avg_pixels16_l2_mmxext(uint8_t *dst,
45 const uint8_t *src1, const uint8_t *src2,
46 int dstStride, int src1Stride, int h);
47 void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst,
48 const uint8_t *src1, const uint8_t *src2,
49 int dstStride, int src1Stride, int h);
50 void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
51 int dstStride, int srcStride, int h);
52 void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
53 int dstStride, int srcStride, int h);
54 void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,
55 const uint8_t *src,
56 int dstStride, int srcStride,
57 int h);
58 void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
59 int dstStride, int srcStride, int h);
60 void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
61 int dstStride, int srcStride, int h);
62 void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,
63 const uint8_t *src,
64 int dstStride, int srcStride,
65 int h);
66 void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
67 int dstStride, int srcStride);
68 void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
69 int dstStride, int srcStride);
70 void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst,
71 const uint8_t *src,
72 int dstStride, int srcStride);
73 void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
74 int dstStride, int srcStride);
75 void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
76 int dstStride, int srcStride);
77 void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
78 const uint8_t *src,
79 int dstStride, int srcStride);
80 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
81 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
83 #if HAVE_X86ASM
85 CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
86 CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8)
88 #define QPEL_OP(OPNAME, RND, MMX) \
89 static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \
90 const uint8_t *src, \
91 ptrdiff_t stride) \
92 { \
93 ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
94 } \
96 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
97 const uint8_t *src, \
98 ptrdiff_t stride) \
99 { \
100 uint64_t temp[8]; \
101 uint8_t *const half = (uint8_t *) temp; \
102 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
103 stride, 8); \
104 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
105 stride, stride, 8); \
108 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
109 const uint8_t *src, \
110 ptrdiff_t stride) \
112 ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
113 stride, 8); \
116 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
117 const uint8_t *src, \
118 ptrdiff_t stride) \
120 uint64_t temp[8]; \
121 uint8_t *const half = (uint8_t *) temp; \
122 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
123 stride, 8); \
124 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
125 stride, 8); \
128 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
129 const uint8_t *src, \
130 ptrdiff_t stride) \
132 uint64_t temp[8]; \
133 uint8_t *const half = (uint8_t *) temp; \
134 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
135 8, stride); \
136 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
137 stride, stride, 8); \
140 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
141 const uint8_t *src, \
142 ptrdiff_t stride) \
144 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
145 stride, stride); \
148 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
149 const uint8_t *src, \
150 ptrdiff_t stride) \
152 uint64_t temp[8]; \
153 uint8_t *const half = (uint8_t *) temp; \
154 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
155 8, stride); \
156 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
157 stride, 8); \
160 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
161 const uint8_t *src, \
162 ptrdiff_t stride) \
164 uint64_t half[8 + 9]; \
165 uint8_t *const halfH = (uint8_t *) half + 64; \
166 uint8_t *const halfHV = (uint8_t *) half; \
167 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
168 stride, 9); \
169 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
170 stride, 9); \
171 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
172 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
173 stride, 8, 8); \
176 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
177 const uint8_t *src, \
178 ptrdiff_t stride) \
180 uint64_t half[8 + 9]; \
181 uint8_t *const halfH = (uint8_t *) half + 64; \
182 uint8_t *const halfHV = (uint8_t *) half; \
183 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
184 stride, 9); \
185 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
186 stride, 9); \
187 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
188 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
189 stride, 8, 8); \
192 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
193 const uint8_t *src, \
194 ptrdiff_t stride) \
196 uint64_t half[8 + 9]; \
197 uint8_t *const halfH = (uint8_t *) half + 64; \
198 uint8_t *const halfHV = (uint8_t *) half; \
199 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
200 stride, 9); \
201 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
202 stride, 9); \
203 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
204 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
205 stride, 8, 8); \
208 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
209 const uint8_t *src, \
210 ptrdiff_t stride) \
212 uint64_t half[8 + 9]; \
213 uint8_t *const halfH = (uint8_t *) half + 64; \
214 uint8_t *const halfHV = (uint8_t *) half; \
215 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
216 stride, 9); \
217 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
218 stride, 9); \
219 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
220 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
221 stride, 8, 8); \
224 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
225 const uint8_t *src, \
226 ptrdiff_t stride) \
228 uint64_t half[8 + 9]; \
229 uint8_t *const halfH = (uint8_t *) half + 64; \
230 uint8_t *const halfHV = (uint8_t *) half; \
231 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
232 stride, 9); \
233 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
234 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
235 stride, 8, 8); \
238 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
239 const uint8_t *src, \
240 ptrdiff_t stride) \
242 uint64_t half[8 + 9]; \
243 uint8_t *const halfH = (uint8_t *) half + 64; \
244 uint8_t *const halfHV = (uint8_t *) half; \
245 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
246 stride, 9); \
247 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
248 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
249 stride, 8, 8); \
252 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
253 const uint8_t *src, \
254 ptrdiff_t stride) \
256 uint64_t half[8 + 9]; \
257 uint8_t *const halfH = (uint8_t *) half; \
258 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
259 stride, 9); \
260 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
261 8, stride, 9); \
262 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
263 stride, 8); \
266 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
267 const uint8_t *src, \
268 ptrdiff_t stride) \
270 uint64_t half[8 + 9]; \
271 uint8_t *const halfH = (uint8_t *) half; \
272 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
273 stride, 9); \
274 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
275 stride, 9); \
276 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
277 stride, 8); \
280 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
281 const uint8_t *src, \
282 ptrdiff_t stride) \
284 uint64_t half[9]; \
285 uint8_t *const halfH = (uint8_t *) half; \
286 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
287 stride, 9); \
288 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
289 stride, 8); \
292 static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \
293 const uint8_t *src, \
294 ptrdiff_t stride) \
296 ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
299 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
300 const uint8_t *src, \
301 ptrdiff_t stride) \
303 uint64_t temp[32]; \
304 uint8_t *const half = (uint8_t *) temp; \
305 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
306 stride, 16); \
307 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
308 stride, 16); \
311 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
312 const uint8_t *src, \
313 ptrdiff_t stride) \
315 ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
316 stride, stride, 16);\
319 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
320 const uint8_t *src, \
321 ptrdiff_t stride) \
323 uint64_t temp[32]; \
324 uint8_t *const half = (uint8_t*) temp; \
325 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
326 stride, 16); \
327 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
328 stride, stride, 16); \
331 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
332 const uint8_t *src, \
333 ptrdiff_t stride) \
335 uint64_t temp[32]; \
336 uint8_t *const half = (uint8_t *) temp; \
337 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
338 stride); \
339 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
340 stride, 16); \
343 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
344 const uint8_t *src, \
345 ptrdiff_t stride) \
347 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
348 stride, stride); \
351 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
352 const uint8_t *src, \
353 ptrdiff_t stride) \
355 uint64_t temp[32]; \
356 uint8_t *const half = (uint8_t *) temp; \
357 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
358 stride); \
359 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
360 stride, stride, 16); \
363 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
364 const uint8_t *src, \
365 ptrdiff_t stride) \
367 uint64_t half[16 * 2 + 17 * 2]; \
368 uint8_t *const halfH = (uint8_t *) half + 256; \
369 uint8_t *const halfHV = (uint8_t *) half; \
370 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
371 stride, 17); \
372 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
373 stride, 17); \
374 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
375 16, 16); \
376 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
377 stride, 16, 16); \
380 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
381 const uint8_t *src, \
382 ptrdiff_t stride) \
384 uint64_t half[16 * 2 + 17 * 2]; \
385 uint8_t *const halfH = (uint8_t *) half + 256; \
386 uint8_t *const halfHV = (uint8_t *) half; \
387 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
388 stride, 17); \
389 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
390 stride, 17); \
391 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
392 16, 16); \
393 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
394 stride, 16, 16); \
397 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
398 const uint8_t *src, \
399 ptrdiff_t stride) \
401 uint64_t half[16 * 2 + 17 * 2]; \
402 uint8_t *const halfH = (uint8_t *) half + 256; \
403 uint8_t *const halfHV = (uint8_t *) half; \
404 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
405 stride, 17); \
406 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
407 stride, 17); \
408 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
409 16, 16); \
410 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
411 stride, 16, 16); \
414 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
415 const uint8_t *src, \
416 ptrdiff_t stride) \
418 uint64_t half[16 * 2 + 17 * 2]; \
419 uint8_t *const halfH = (uint8_t *) half + 256; \
420 uint8_t *const halfHV = (uint8_t *) half; \
421 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
422 stride, 17); \
423 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
424 stride, 17); \
425 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
426 16, 16); \
427 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
428 stride, 16, 16); \
431 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
432 const uint8_t *src, \
433 ptrdiff_t stride) \
435 uint64_t half[16 * 2 + 17 * 2]; \
436 uint8_t *const halfH = (uint8_t *) half + 256; \
437 uint8_t *const halfHV = (uint8_t *) half; \
438 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
439 stride, 17); \
440 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
441 16, 16); \
442 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
443 stride, 16, 16); \
446 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
447 const uint8_t *src, \
448 ptrdiff_t stride) \
450 uint64_t half[16 * 2 + 17 * 2]; \
451 uint8_t *const halfH = (uint8_t *) half + 256; \
452 uint8_t *const halfHV = (uint8_t *) half; \
453 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
454 stride, 17); \
455 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
456 16, 16); \
457 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
458 stride, 16, 16); \
461 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
462 const uint8_t *src, \
463 ptrdiff_t stride) \
465 uint64_t half[17 * 2]; \
466 uint8_t *const halfH = (uint8_t *) half; \
467 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
468 stride, 17); \
469 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
470 stride, 17); \
471 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
472 stride, 16); \
475 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
476 const uint8_t *src, \
477 ptrdiff_t stride) \
479 uint64_t half[17 * 2]; \
480 uint8_t *const halfH = (uint8_t *) half; \
481 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
482 stride, 17); \
483 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
484 stride, 17); \
485 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
486 stride, 16); \
489 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
490 const uint8_t *src, \
491 ptrdiff_t stride) \
493 uint64_t half[17 * 2]; \
494 uint8_t *const halfH = (uint8_t *) half; \
495 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
496 stride, 17); \
497 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
498 stride, 16); \
501 QPEL_OP(put_, _, mmxext)
502 QPEL_OP(avg_, _, mmxext)
503 QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
505 #endif /* HAVE_X86ASM */
507 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
508 do { \
509 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
510 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
511 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
512 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
513 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
514 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
515 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
516 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
517 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
518 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
519 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
520 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
521 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
522 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
523 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
524 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
525 } while (0)
527 av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
529 int cpu_flags = av_get_cpu_flags();
531 if (X86_MMXEXT(cpu_flags)) {
532 #if HAVE_MMXEXT_EXTERNAL
533 SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
534 SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
536 SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
537 SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
538 SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
539 SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );
540 #endif /* HAVE_MMXEXT_EXTERNAL */