2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vpx_ports/config.h"
13 #include "vpx_ports/mem.h"
16 extern const short vp8_six_tap_mmx
[8][6*8];
17 extern const short vp8_bilinear_filters_mmx
[8][2*8];
19 extern void vp8_filter_block1d_h6_mmx
21 unsigned char *src_ptr
,
22 unsigned short *output_ptr
,
23 unsigned int src_pixels_per_line
,
24 unsigned int pixel_step
,
25 unsigned int output_height
,
26 unsigned int output_width
,
27 const short *vp8_filter
29 extern void vp8_filter_block1dc_v6_mmx
31 unsigned short *src_ptr
,
32 unsigned char *output_ptr
,
34 unsigned int pixels_per_line
,
35 unsigned int pixel_step
,
36 unsigned int output_height
,
37 unsigned int output_width
,
38 const short *vp8_filter
40 extern void vp8_filter_block1d8_h6_sse2
42 unsigned char *src_ptr
,
43 unsigned short *output_ptr
,
44 unsigned int src_pixels_per_line
,
45 unsigned int pixel_step
,
46 unsigned int output_height
,
47 unsigned int output_width
,
48 const short *vp8_filter
50 extern void vp8_filter_block1d16_h6_sse2
52 unsigned char *src_ptr
,
53 unsigned short *output_ptr
,
54 unsigned int src_pixels_per_line
,
55 unsigned int pixel_step
,
56 unsigned int output_height
,
57 unsigned int output_width
,
58 const short *vp8_filter
60 extern void vp8_filter_block1d8_v6_sse2
62 unsigned short *src_ptr
,
63 unsigned char *output_ptr
,
65 unsigned int pixels_per_line
,
66 unsigned int pixel_step
,
67 unsigned int output_height
,
68 unsigned int output_width
,
69 const short *vp8_filter
71 extern void vp8_filter_block1d16_v6_sse2
73 unsigned short *src_ptr
,
74 unsigned char *output_ptr
,
76 unsigned int pixels_per_line
,
77 unsigned int pixel_step
,
78 unsigned int output_height
,
79 unsigned int output_width
,
80 const short *vp8_filter
82 extern void vp8_unpack_block1d16_h6_sse2
84 unsigned char *src_ptr
,
85 unsigned short *output_ptr
,
86 unsigned int src_pixels_per_line
,
87 unsigned int output_height
,
88 unsigned int output_width
90 extern void vp8_filter_block1d8_h6_only_sse2
92 unsigned char *src_ptr
,
93 unsigned int src_pixels_per_line
,
94 unsigned char *output_ptr
,
96 unsigned int output_height
,
97 const short *vp8_filter
99 extern void vp8_filter_block1d16_h6_only_sse2
101 unsigned char *src_ptr
,
102 unsigned int src_pixels_per_line
,
103 unsigned char *output_ptr
,
105 unsigned int output_height
,
106 const short *vp8_filter
108 extern void vp8_filter_block1d8_v6_only_sse2
110 unsigned char *src_ptr
,
111 unsigned int src_pixels_per_line
,
112 unsigned char *output_ptr
,
114 unsigned int output_height
,
115 const short *vp8_filter
117 extern prototype_subpixel_predict(vp8_bilinear_predict8x8_mmx
);
121 void vp8_sixtap_predict4x4_mmx
123 unsigned char *src_ptr
,
124 int src_pixels_per_line
,
127 unsigned char *dst_ptr
,
131 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2
, 16*16); /* Temp data bufffer used in filtering */
132 const short *HFilter
, *VFilter
;
133 HFilter
= vp8_six_tap_mmx
[xoffset
];
134 vp8_filter_block1d_h6_mmx(src_ptr
- (2 * src_pixels_per_line
), FData2
, src_pixels_per_line
, 1, 9, 8, HFilter
);
135 VFilter
= vp8_six_tap_mmx
[yoffset
];
136 vp8_filter_block1dc_v6_mmx(FData2
+ 8, dst_ptr
, dst_pitch
, 8, 4 , 4, 4, VFilter
);
141 void vp8_sixtap_predict16x16_mmx
143 unsigned char *src_ptr
,
144 int src_pixels_per_line
,
147 unsigned char *dst_ptr
,
152 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2
, 24*24); /* Temp data bufffer used in filtering */
154 const short *HFilter
, *VFilter
;
157 HFilter
= vp8_six_tap_mmx
[xoffset
];
159 vp8_filter_block1d_h6_mmx(src_ptr
- (2 * src_pixels_per_line
), FData2
, src_pixels_per_line
, 1, 21, 32, HFilter
);
160 vp8_filter_block1d_h6_mmx(src_ptr
- (2 * src_pixels_per_line
) + 4, FData2
+ 4, src_pixels_per_line
, 1, 21, 32, HFilter
);
161 vp8_filter_block1d_h6_mmx(src_ptr
- (2 * src_pixels_per_line
) + 8, FData2
+ 8, src_pixels_per_line
, 1, 21, 32, HFilter
);
162 vp8_filter_block1d_h6_mmx(src_ptr
- (2 * src_pixels_per_line
) + 12, FData2
+ 12, src_pixels_per_line
, 1, 21, 32, HFilter
);
164 VFilter
= vp8_six_tap_mmx
[yoffset
];
165 vp8_filter_block1dc_v6_mmx(FData2
+ 32, dst_ptr
, dst_pitch
, 32, 16 , 16, 16, VFilter
);
166 vp8_filter_block1dc_v6_mmx(FData2
+ 36, dst_ptr
+ 4, dst_pitch
, 32, 16 , 16, 16, VFilter
);
167 vp8_filter_block1dc_v6_mmx(FData2
+ 40, dst_ptr
+ 8, dst_pitch
, 32, 16 , 16, 16, VFilter
);
168 vp8_filter_block1dc_v6_mmx(FData2
+ 44, dst_ptr
+ 12, dst_pitch
, 32, 16 , 16, 16, VFilter
);
173 void vp8_sixtap_predict8x8_mmx
175 unsigned char *src_ptr
,
176 int src_pixels_per_line
,
179 unsigned char *dst_ptr
,
184 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2
, 256); /* Temp data bufffer used in filtering */
186 const short *HFilter
, *VFilter
;
188 HFilter
= vp8_six_tap_mmx
[xoffset
];
189 vp8_filter_block1d_h6_mmx(src_ptr
- (2 * src_pixels_per_line
), FData2
, src_pixels_per_line
, 1, 13, 16, HFilter
);
190 vp8_filter_block1d_h6_mmx(src_ptr
- (2 * src_pixels_per_line
) + 4, FData2
+ 4, src_pixels_per_line
, 1, 13, 16, HFilter
);
192 VFilter
= vp8_six_tap_mmx
[yoffset
];
193 vp8_filter_block1dc_v6_mmx(FData2
+ 16, dst_ptr
, dst_pitch
, 16, 8 , 8, 8, VFilter
);
194 vp8_filter_block1dc_v6_mmx(FData2
+ 20, dst_ptr
+ 4, dst_pitch
, 16, 8 , 8, 8, VFilter
);
199 void vp8_sixtap_predict8x4_mmx
201 unsigned char *src_ptr
,
202 int src_pixels_per_line
,
205 unsigned char *dst_ptr
,
210 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2
, 256); /* Temp data bufffer used in filtering */
212 const short *HFilter
, *VFilter
;
214 HFilter
= vp8_six_tap_mmx
[xoffset
];
215 vp8_filter_block1d_h6_mmx(src_ptr
- (2 * src_pixels_per_line
), FData2
, src_pixels_per_line
, 1, 9, 16, HFilter
);
216 vp8_filter_block1d_h6_mmx(src_ptr
- (2 * src_pixels_per_line
) + 4, FData2
+ 4, src_pixels_per_line
, 1, 9, 16, HFilter
);
218 VFilter
= vp8_six_tap_mmx
[yoffset
];
219 vp8_filter_block1dc_v6_mmx(FData2
+ 16, dst_ptr
, dst_pitch
, 16, 8 , 4, 8, VFilter
);
220 vp8_filter_block1dc_v6_mmx(FData2
+ 20, dst_ptr
+ 4, dst_pitch
, 16, 8 , 4, 8, VFilter
);
226 void vp8_bilinear_predict16x16_mmx
228 unsigned char *src_ptr
,
229 int src_pixels_per_line
,
232 unsigned char *dst_ptr
,
236 vp8_bilinear_predict8x8_mmx(src_ptr
, src_pixels_per_line
, xoffset
, yoffset
, dst_ptr
, dst_pitch
);
237 vp8_bilinear_predict8x8_mmx(src_ptr
+ 8, src_pixels_per_line
, xoffset
, yoffset
, dst_ptr
+ 8, dst_pitch
);
238 vp8_bilinear_predict8x8_mmx(src_ptr
+ 8 * src_pixels_per_line
, src_pixels_per_line
, xoffset
, yoffset
, dst_ptr
+ dst_pitch
* 8, dst_pitch
);
239 vp8_bilinear_predict8x8_mmx(src_ptr
+ 8 * src_pixels_per_line
+ 8, src_pixels_per_line
, xoffset
, yoffset
, dst_ptr
+ dst_pitch
* 8 + 8, dst_pitch
);
245 void vp8_sixtap_predict16x16_sse2
247 unsigned char *src_ptr
,
248 int src_pixels_per_line
,
251 unsigned char *dst_ptr
,
256 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2
, 24*24); /* Temp data bufffer used in filtering */
258 const short *HFilter
, *VFilter
;
264 HFilter
= vp8_six_tap_mmx
[xoffset
];
265 vp8_filter_block1d16_h6_sse2(src_ptr
- (2 * src_pixels_per_line
), FData2
, src_pixels_per_line
, 1, 21, 32, HFilter
);
266 VFilter
= vp8_six_tap_mmx
[yoffset
];
267 vp8_filter_block1d16_v6_sse2(FData2
+ 32, dst_ptr
, dst_pitch
, 32, 16 , 16, dst_pitch
, VFilter
);
271 /* First-pass only */
272 HFilter
= vp8_six_tap_mmx
[xoffset
];
273 vp8_filter_block1d16_h6_only_sse2(src_ptr
, src_pixels_per_line
, dst_ptr
, dst_pitch
, 16, HFilter
);
278 /* Second-pass only */
279 VFilter
= vp8_six_tap_mmx
[yoffset
];
280 vp8_unpack_block1d16_h6_sse2(src_ptr
- (2 * src_pixels_per_line
), FData2
, src_pixels_per_line
, 21, 32);
281 vp8_filter_block1d16_v6_sse2(FData2
+ 32, dst_ptr
, dst_pitch
, 32, 16 , 16, dst_pitch
, VFilter
);
286 void vp8_sixtap_predict8x8_sse2
288 unsigned char *src_ptr
,
289 int src_pixels_per_line
,
292 unsigned char *dst_ptr
,
296 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2
, 256); /* Temp data bufffer used in filtering */
297 const short *HFilter
, *VFilter
;
303 HFilter
= vp8_six_tap_mmx
[xoffset
];
304 vp8_filter_block1d8_h6_sse2(src_ptr
- (2 * src_pixels_per_line
), FData2
, src_pixels_per_line
, 1, 13, 16, HFilter
);
305 VFilter
= vp8_six_tap_mmx
[yoffset
];
306 vp8_filter_block1d8_v6_sse2(FData2
+ 16, dst_ptr
, dst_pitch
, 16, 8 , 8, dst_pitch
, VFilter
);
310 /* First-pass only */
311 HFilter
= vp8_six_tap_mmx
[xoffset
];
312 vp8_filter_block1d8_h6_only_sse2(src_ptr
, src_pixels_per_line
, dst_ptr
, dst_pitch
, 8, HFilter
);
317 /* Second-pass only */
318 VFilter
= vp8_six_tap_mmx
[yoffset
];
319 vp8_filter_block1d8_v6_only_sse2(src_ptr
- (2 * src_pixels_per_line
), src_pixels_per_line
, dst_ptr
, dst_pitch
, 8, VFilter
);
324 void vp8_sixtap_predict8x4_sse2
326 unsigned char *src_ptr
,
327 int src_pixels_per_line
,
330 unsigned char *dst_ptr
,
334 DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2
, 256); /* Temp data bufffer used in filtering */
335 const short *HFilter
, *VFilter
;
341 HFilter
= vp8_six_tap_mmx
[xoffset
];
342 vp8_filter_block1d8_h6_sse2(src_ptr
- (2 * src_pixels_per_line
), FData2
, src_pixels_per_line
, 1, 9, 16, HFilter
);
343 VFilter
= vp8_six_tap_mmx
[yoffset
];
344 vp8_filter_block1d8_v6_sse2(FData2
+ 16, dst_ptr
, dst_pitch
, 16, 8 , 4, dst_pitch
, VFilter
);
348 /* First-pass only */
349 HFilter
= vp8_six_tap_mmx
[xoffset
];
350 vp8_filter_block1d8_h6_only_sse2(src_ptr
, src_pixels_per_line
, dst_ptr
, dst_pitch
, 4, HFilter
);
355 /* Second-pass only */
356 VFilter
= vp8_six_tap_mmx
[yoffset
];
357 vp8_filter_block1d8_v6_only_sse2(src_ptr
- (2 * src_pixels_per_line
), src_pixels_per_line
, dst_ptr
, dst_pitch
, 4, VFilter
);
365 extern void vp8_filter_block1d8_h6_ssse3
367 unsigned char *src_ptr
,
368 unsigned int src_pixels_per_line
,
369 unsigned char *output_ptr
,
370 unsigned int output_pitch
,
371 unsigned int output_height
,
372 unsigned int vp8_filter_index
375 extern void vp8_filter_block1d16_h6_ssse3
377 unsigned char *src_ptr
,
378 unsigned int src_pixels_per_line
,
379 unsigned char *output_ptr
,
380 unsigned int output_pitch
,
381 unsigned int output_height
,
382 unsigned int vp8_filter_index
385 extern void vp8_filter_block1d16_v6_ssse3
387 unsigned char *src_ptr
,
388 unsigned int src_pitch
,
389 unsigned char *output_ptr
,
390 unsigned int out_pitch
,
391 unsigned int output_height
,
392 unsigned int vp8_filter_index
395 extern void vp8_filter_block1d8_v6_ssse3
397 unsigned char *src_ptr
,
398 unsigned int src_pitch
,
399 unsigned char *output_ptr
,
400 unsigned int out_pitch
,
401 unsigned int output_height
,
402 unsigned int vp8_filter_index
405 extern void vp8_filter_block1d4_h6_ssse3
407 unsigned char *src_ptr
,
408 unsigned int src_pixels_per_line
,
409 unsigned char *output_ptr
,
410 unsigned int output_pitch
,
411 unsigned int output_height
,
412 unsigned int vp8_filter_index
415 extern void vp8_filter_block1d4_v6_ssse3
417 unsigned char *src_ptr
,
418 unsigned int src_pitch
,
419 unsigned char *output_ptr
,
420 unsigned int out_pitch
,
421 unsigned int output_height
,
422 unsigned int vp8_filter_index
425 void vp8_sixtap_predict16x16_ssse3
427 unsigned char *src_ptr
,
428 int src_pixels_per_line
,
431 unsigned char *dst_ptr
,
436 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2
, 24*24);
442 vp8_filter_block1d16_h6_ssse3(src_ptr
- (2 * src_pixels_per_line
), src_pixels_per_line
, FData2
, 16, 21, xoffset
);
443 vp8_filter_block1d16_v6_ssse3(FData2
, 16, dst_ptr
, dst_pitch
, 16, yoffset
);
447 /* First-pass only */
448 vp8_filter_block1d16_h6_ssse3(src_ptr
, src_pixels_per_line
, dst_ptr
, dst_pitch
, 16, xoffset
);
453 /* Second-pass only */
454 vp8_filter_block1d16_v6_ssse3(src_ptr
- (2 * src_pixels_per_line
) , src_pixels_per_line
, dst_ptr
, dst_pitch
, 16, yoffset
);
458 void vp8_sixtap_predict8x8_ssse3
460 unsigned char *src_ptr
,
461 int src_pixels_per_line
,
464 unsigned char *dst_ptr
,
468 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2
, 256);
474 vp8_filter_block1d8_h6_ssse3(src_ptr
- (2 * src_pixels_per_line
), src_pixels_per_line
, FData2
, 8, 13, xoffset
);
475 vp8_filter_block1d8_v6_ssse3(FData2
, 8, dst_ptr
, dst_pitch
, 8, yoffset
);
479 vp8_filter_block1d8_h6_ssse3(src_ptr
, src_pixels_per_line
, dst_ptr
, dst_pitch
, 8, xoffset
);
484 /* Second-pass only */
485 vp8_filter_block1d8_v6_ssse3(src_ptr
- (2 * src_pixels_per_line
), src_pixels_per_line
, dst_ptr
, dst_pitch
, 8, yoffset
);
490 void vp8_sixtap_predict8x4_ssse3
492 unsigned char *src_ptr
,
493 int src_pixels_per_line
,
496 unsigned char *dst_ptr
,
500 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2
, 256);
506 vp8_filter_block1d8_h6_ssse3(src_ptr
- (2 * src_pixels_per_line
), src_pixels_per_line
, FData2
, 8, 9, xoffset
);
507 vp8_filter_block1d8_v6_ssse3(FData2
, 8, dst_ptr
, dst_pitch
, 4, yoffset
);
511 /* First-pass only */
512 vp8_filter_block1d8_h6_ssse3(src_ptr
, src_pixels_per_line
, dst_ptr
, dst_pitch
, 4, xoffset
);
517 /* Second-pass only */
518 vp8_filter_block1d8_v6_ssse3(src_ptr
- (2 * src_pixels_per_line
), src_pixels_per_line
, dst_ptr
, dst_pitch
, 4, yoffset
);
522 void vp8_sixtap_predict4x4_ssse3
524 unsigned char *src_ptr
,
525 int src_pixels_per_line
,
528 unsigned char *dst_ptr
,
532 DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2
, 4*9);
538 vp8_filter_block1d4_h6_ssse3(src_ptr
- (2 * src_pixels_per_line
), src_pixels_per_line
, FData2
, 4, 9, xoffset
);
539 vp8_filter_block1d4_v6_ssse3(FData2
, 4, dst_ptr
, dst_pitch
, 4, yoffset
);
543 vp8_filter_block1d4_h6_ssse3(src_ptr
, src_pixels_per_line
, dst_ptr
, dst_pitch
, 4, xoffset
);
548 vp8_filter_block1d4_v6_ssse3(src_ptr
- (2 * src_pixels_per_line
), src_pixels_per_line
, dst_ptr
, dst_pitch
, 4, yoffset
);