2 * quarterpel DSP functions
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/x86/cpu.h"
28 #include "libavcodec/pixels.h"
29 #include "libavcodec/qpeldsp.h"
32 void ff_put_pixels8_l2_mmxext(uint8_t *dst
,
33 const uint8_t *src1
, const uint8_t *src2
,
34 int dstStride
, int src1Stride
, int h
);
35 void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst
,
36 const uint8_t *src1
, const uint8_t *src2
,
37 int dstStride
, int src1Stride
, int h
);
38 void ff_avg_pixels8_l2_mmxext(uint8_t *dst
,
39 const uint8_t *src1
, const uint8_t *src2
,
40 int dstStride
, int src1Stride
, int h
);
41 void ff_put_pixels16_l2_mmxext(uint8_t *dst
,
42 const uint8_t *src1
, const uint8_t *src2
,
43 int dstStride
, int src1Stride
, int h
);
44 void ff_avg_pixels16_l2_mmxext(uint8_t *dst
,
45 const uint8_t *src1
, const uint8_t *src2
,
46 int dstStride
, int src1Stride
, int h
);
47 void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst
,
48 const uint8_t *src1
, const uint8_t *src2
,
49 int dstStride
, int src1Stride
, int h
);
50 void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst
, const uint8_t *src
,
51 int dstStride
, int srcStride
, int h
);
52 void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst
, const uint8_t *src
,
53 int dstStride
, int srcStride
, int h
);
54 void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst
,
56 int dstStride
, int srcStride
,
58 void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst
, const uint8_t *src
,
59 int dstStride
, int srcStride
, int h
);
60 void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst
, const uint8_t *src
,
61 int dstStride
, int srcStride
, int h
);
62 void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst
,
64 int dstStride
, int srcStride
,
66 void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst
, const uint8_t *src
,
67 int dstStride
, int srcStride
);
68 void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst
, const uint8_t *src
,
69 int dstStride
, int srcStride
);
70 void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst
,
72 int dstStride
, int srcStride
);
73 void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst
, const uint8_t *src
,
74 int dstStride
, int srcStride
);
75 void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst
, const uint8_t *src
,
76 int dstStride
, int srcStride
);
77 void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst
,
79 int dstStride
, int srcStride
);
80 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
81 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
85 CALL_2X_PIXELS(ff_avg_pixels16_mmxext
, ff_avg_pixels8_mmxext
, 8)
86 CALL_2X_PIXELS(ff_put_pixels16_mmxext
, ff_put_pixels8_mmxext
, 8)
88 #define QPEL_OP(OPNAME, RND, MMX) \
89 static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, \
93 ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
96 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, \
101 uint8_t *const half = (uint8_t *) temp; \
102 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
104 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
105 stride, stride, 8); \
108 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, \
109 const uint8_t *src, \
112 ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
116 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, \
117 const uint8_t *src, \
121 uint8_t *const half = (uint8_t *) temp; \
122 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
124 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
128 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, \
129 const uint8_t *src, \
133 uint8_t *const half = (uint8_t *) temp; \
134 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
136 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
137 stride, stride, 8); \
140 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, \
141 const uint8_t *src, \
144 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
148 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, \
149 const uint8_t *src, \
153 uint8_t *const half = (uint8_t *) temp; \
154 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
156 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
160 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, \
161 const uint8_t *src, \
164 uint64_t half[8 + 9]; \
165 uint8_t *const halfH = (uint8_t *) half + 64; \
166 uint8_t *const halfHV = (uint8_t *) half; \
167 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
169 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
171 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
172 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
176 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, \
177 const uint8_t *src, \
180 uint64_t half[8 + 9]; \
181 uint8_t *const halfH = (uint8_t *) half + 64; \
182 uint8_t *const halfHV = (uint8_t *) half; \
183 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
185 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
187 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
188 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
192 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, \
193 const uint8_t *src, \
196 uint64_t half[8 + 9]; \
197 uint8_t *const halfH = (uint8_t *) half + 64; \
198 uint8_t *const halfHV = (uint8_t *) half; \
199 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
201 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
203 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
204 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
208 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, \
209 const uint8_t *src, \
212 uint64_t half[8 + 9]; \
213 uint8_t *const halfH = (uint8_t *) half + 64; \
214 uint8_t *const halfHV = (uint8_t *) half; \
215 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
217 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
219 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
220 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
224 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, \
225 const uint8_t *src, \
228 uint64_t half[8 + 9]; \
229 uint8_t *const halfH = (uint8_t *) half + 64; \
230 uint8_t *const halfHV = (uint8_t *) half; \
231 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
233 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
234 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
238 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, \
239 const uint8_t *src, \
242 uint64_t half[8 + 9]; \
243 uint8_t *const halfH = (uint8_t *) half + 64; \
244 uint8_t *const halfHV = (uint8_t *) half; \
245 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
247 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
248 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
252 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, \
253 const uint8_t *src, \
256 uint64_t half[8 + 9]; \
257 uint8_t *const halfH = (uint8_t *) half; \
258 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
260 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
262 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
266 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, \
267 const uint8_t *src, \
270 uint64_t half[8 + 9]; \
271 uint8_t *const halfH = (uint8_t *) half; \
272 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
274 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
276 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
280 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, \
281 const uint8_t *src, \
285 uint8_t *const halfH = (uint8_t *) half; \
286 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
288 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
292 static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, \
293 const uint8_t *src, \
296 ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
299 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, \
300 const uint8_t *src, \
304 uint8_t *const half = (uint8_t *) temp; \
305 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
307 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
311 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, \
312 const uint8_t *src, \
315 ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
316 stride, stride, 16);\
319 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, \
320 const uint8_t *src, \
324 uint8_t *const half = (uint8_t*) temp; \
325 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
327 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
328 stride, stride, 16); \
331 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, \
332 const uint8_t *src, \
336 uint8_t *const half = (uint8_t *) temp; \
337 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
339 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
343 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, \
344 const uint8_t *src, \
347 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
351 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, \
352 const uint8_t *src, \
356 uint8_t *const half = (uint8_t *) temp; \
357 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
359 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
360 stride, stride, 16); \
363 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, \
364 const uint8_t *src, \
367 uint64_t half[16 * 2 + 17 * 2]; \
368 uint8_t *const halfH = (uint8_t *) half + 256; \
369 uint8_t *const halfHV = (uint8_t *) half; \
370 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
372 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
374 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
376 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
380 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, \
381 const uint8_t *src, \
384 uint64_t half[16 * 2 + 17 * 2]; \
385 uint8_t *const halfH = (uint8_t *) half + 256; \
386 uint8_t *const halfHV = (uint8_t *) half; \
387 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
389 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
391 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
393 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
397 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, \
398 const uint8_t *src, \
401 uint64_t half[16 * 2 + 17 * 2]; \
402 uint8_t *const halfH = (uint8_t *) half + 256; \
403 uint8_t *const halfHV = (uint8_t *) half; \
404 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
406 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
408 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
410 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
414 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, \
415 const uint8_t *src, \
418 uint64_t half[16 * 2 + 17 * 2]; \
419 uint8_t *const halfH = (uint8_t *) half + 256; \
420 uint8_t *const halfHV = (uint8_t *) half; \
421 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
423 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
425 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
427 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
431 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, \
432 const uint8_t *src, \
435 uint64_t half[16 * 2 + 17 * 2]; \
436 uint8_t *const halfH = (uint8_t *) half + 256; \
437 uint8_t *const halfHV = (uint8_t *) half; \
438 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
440 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
442 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
446 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, \
447 const uint8_t *src, \
450 uint64_t half[16 * 2 + 17 * 2]; \
451 uint8_t *const halfH = (uint8_t *) half + 256; \
452 uint8_t *const halfHV = (uint8_t *) half; \
453 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
455 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
457 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
461 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, \
462 const uint8_t *src, \
465 uint64_t half[17 * 2]; \
466 uint8_t *const halfH = (uint8_t *) half; \
467 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
469 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
471 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
475 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, \
476 const uint8_t *src, \
479 uint64_t half[17 * 2]; \
480 uint8_t *const halfH = (uint8_t *) half; \
481 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
483 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
485 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
489 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, \
490 const uint8_t *src, \
493 uint64_t half[17 * 2]; \
494 uint8_t *const halfH = (uint8_t *) half; \
495 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
497 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
501 QPEL_OP(put_
, _
, mmxext
)
502 QPEL_OP(avg_
, _
, mmxext
)
503 QPEL_OP(put_no_rnd_
, _no_rnd_
, mmxext
)
505 #endif /* HAVE_X86ASM */
507 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
509 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
510 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
511 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
512 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
513 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
514 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
515 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
516 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
517 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
518 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
519 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
520 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
521 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
522 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
523 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
524 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
527 av_cold
void ff_qpeldsp_init_x86(QpelDSPContext
*c
)
529 int cpu_flags
= av_get_cpu_flags();
531 if (X86_MMXEXT(cpu_flags
)) {
532 #if HAVE_MMXEXT_EXTERNAL
533 SET_QPEL_FUNCS(avg_qpel
, 0, 16, mmxext
, );
534 SET_QPEL_FUNCS(avg_qpel
, 1, 8, mmxext
, );
536 SET_QPEL_FUNCS(put_qpel
, 0, 16, mmxext
, );
537 SET_QPEL_FUNCS(put_qpel
, 1, 8, mmxext
, );
538 SET_QPEL_FUNCS(put_no_rnd_qpel
, 0, 16, mmxext
, );
539 SET_QPEL_FUNCS(put_no_rnd_qpel
, 1, 8, mmxext
, );
540 #endif /* HAVE_MMXEXT_EXTERNAL */