2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vp8/encoder/variance.h"
13 #include "vp8/common/pragmas.h"
14 #include "vpx_ports/mem.h"
16 extern void filter_block1d_h6_mmx(const unsigned char *src_ptr
, unsigned short *output_ptr
, unsigned int src_pixels_per_line
, unsigned int pixel_step
, unsigned int output_height
, unsigned int output_width
, short *vp7_filter
);
17 extern void filter_block1d_v6_mmx(const short *src_ptr
, unsigned char *output_ptr
, unsigned int pixels_per_line
, unsigned int pixel_step
, unsigned int output_height
, unsigned int output_width
, short *vp7_filter
);
18 extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr
, unsigned short *output_ptr
, unsigned int src_pixels_per_line
, unsigned int pixel_step
, unsigned int output_height
, unsigned int output_width
, short *vp7_filter
);
19 extern void filter_block1d8_v6_sse2(const short *src_ptr
, unsigned char *output_ptr
, unsigned int pixels_per_line
, unsigned int pixel_step
, unsigned int output_height
, unsigned int output_width
, short *vp7_filter
);
21 extern void vp8_filter_block2d_bil4x4_var_mmx
23 const unsigned char *ref_ptr
,
24 int ref_pixels_per_line
,
25 const unsigned char *src_ptr
,
26 int src_pixels_per_line
,
30 unsigned int *sumsquared
33 extern unsigned int vp8_get4x4var_mmx
35 const unsigned char *src_ptr
,
37 const unsigned char *ref_ptr
,
43 unsigned int vp8_get_mb_ss_sse2
47 unsigned int vp8_get16x16var_sse2
49 const unsigned char *src_ptr
,
51 const unsigned char *ref_ptr
,
56 unsigned int vp8_get16x16pred_error_sse2
58 const unsigned char *src_ptr
,
60 const unsigned char *ref_ptr
,
63 unsigned int vp8_get8x8var_sse2
65 const unsigned char *src_ptr
,
67 const unsigned char *ref_ptr
,
72 void vp8_filter_block2d_bil_var_sse2
74 const unsigned char *ref_ptr
,
75 int ref_pixels_per_line
,
76 const unsigned char *src_ptr
,
77 int src_pixels_per_line
,
82 unsigned int *sumsquared
84 void vp8_half_horiz_vert_variance8x_h_sse2
86 const unsigned char *ref_ptr
,
87 int ref_pixels_per_line
,
88 const unsigned char *src_ptr
,
89 int src_pixels_per_line
,
92 unsigned int *sumsquared
94 void vp8_half_horiz_vert_variance16x_h_sse2
96 const unsigned char *ref_ptr
,
97 int ref_pixels_per_line
,
98 const unsigned char *src_ptr
,
99 int src_pixels_per_line
,
102 unsigned int *sumsquared
104 void vp8_half_horiz_variance8x_h_sse2
106 const unsigned char *ref_ptr
,
107 int ref_pixels_per_line
,
108 const unsigned char *src_ptr
,
109 int src_pixels_per_line
,
112 unsigned int *sumsquared
114 void vp8_half_horiz_variance16x_h_sse2
116 const unsigned char *ref_ptr
,
117 int ref_pixels_per_line
,
118 const unsigned char *src_ptr
,
119 int src_pixels_per_line
,
122 unsigned int *sumsquared
124 void vp8_half_vert_variance8x_h_sse2
126 const unsigned char *ref_ptr
,
127 int ref_pixels_per_line
,
128 const unsigned char *src_ptr
,
129 int src_pixels_per_line
,
132 unsigned int *sumsquared
134 void vp8_half_vert_variance16x_h_sse2
136 const unsigned char *ref_ptr
,
137 int ref_pixels_per_line
,
138 const unsigned char *src_ptr
,
139 int src_pixels_per_line
,
142 unsigned int *sumsquared
145 DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx
[8][8]);
147 unsigned int vp8_variance4x4_wmt(
148 const unsigned char *src_ptr
,
150 const unsigned char *ref_ptr
,
156 vp8_get4x4var_mmx(src_ptr
, source_stride
, ref_ptr
, recon_stride
, &var
, &avg
) ;
157 return (var
- ((avg
* avg
) >> 4));
163 unsigned int vp8_variance8x8_wmt
165 const unsigned char *src_ptr
,
167 const unsigned char *ref_ptr
,
173 vp8_get8x8var_sse2(src_ptr
, source_stride
, ref_ptr
, recon_stride
, &var
, &avg
) ;
175 return (var
- ((avg
* avg
) >> 6));
180 unsigned int vp8_variance16x16_wmt
182 const unsigned char *src_ptr
,
184 const unsigned char *ref_ptr
,
192 vp8_get16x16var_sse2(src_ptr
, source_stride
, ref_ptr
, recon_stride
, &sse0
, &sum0
) ;
194 return (sse0
- ((sum0
* sum0
) >> 8));
196 unsigned int vp8_mse16x16_wmt(
197 const unsigned char *src_ptr
,
199 const unsigned char *ref_ptr
,
206 vp8_get16x16var_sse2(src_ptr
, source_stride
, ref_ptr
, recon_stride
, &sse0
, &sum0
) ;
213 unsigned int vp8_variance16x8_wmt
215 const unsigned char *src_ptr
,
217 const unsigned char *ref_ptr
,
221 unsigned int sse0
, sse1
, var
;
224 vp8_get8x8var_sse2(src_ptr
, source_stride
, ref_ptr
, recon_stride
, &sse0
, &sum0
) ;
225 vp8_get8x8var_sse2(src_ptr
+ 8, source_stride
, ref_ptr
+ 8, recon_stride
, &sse1
, &sum1
);
230 return (var
- ((avg
* avg
) >> 7));
234 unsigned int vp8_variance8x16_wmt
236 const unsigned char *src_ptr
,
238 const unsigned char *ref_ptr
,
242 unsigned int sse0
, sse1
, var
;
245 vp8_get8x8var_sse2(src_ptr
, source_stride
, ref_ptr
, recon_stride
, &sse0
, &sum0
) ;
246 vp8_get8x8var_sse2(src_ptr
+ 8 * source_stride
, source_stride
, ref_ptr
+ 8 * recon_stride
, recon_stride
, &sse1
, &sum1
) ;
251 return (var
- ((avg
* avg
) >> 7));
255 unsigned int vp8_sub_pixel_variance4x4_wmt
257 const unsigned char *src_ptr
,
258 int src_pixels_per_line
,
261 const unsigned char *dst_ptr
,
262 int dst_pixels_per_line
,
268 vp8_filter_block2d_bil4x4_var_mmx(
269 src_ptr
, src_pixels_per_line
,
270 dst_ptr
, dst_pixels_per_line
,
271 vp8_vp7_bilinear_filters_mmx
[xoffset
], vp8_vp7_bilinear_filters_mmx
[yoffset
],
275 return (xxsum
- ((xsum
* xsum
) >> 4));
279 unsigned int vp8_sub_pixel_variance8x8_wmt
281 const unsigned char *src_ptr
,
282 int src_pixels_per_line
,
285 const unsigned char *dst_ptr
,
286 int dst_pixels_per_line
,
293 if (xoffset
== 4 && yoffset
== 0)
295 vp8_half_horiz_variance8x_h_sse2(
296 src_ptr
, src_pixels_per_line
,
297 dst_ptr
, dst_pixels_per_line
, 8,
300 else if (xoffset
== 0 && yoffset
== 4)
302 vp8_half_vert_variance8x_h_sse2(
303 src_ptr
, src_pixels_per_line
,
304 dst_ptr
, dst_pixels_per_line
, 8,
307 else if (xoffset
== 4 && yoffset
== 4)
309 vp8_half_horiz_vert_variance8x_h_sse2(
310 src_ptr
, src_pixels_per_line
,
311 dst_ptr
, dst_pixels_per_line
, 8,
316 vp8_filter_block2d_bil_var_sse2(
317 src_ptr
, src_pixels_per_line
,
318 dst_ptr
, dst_pixels_per_line
, 8,
324 return (xxsum
- ((xsum
* xsum
) >> 6));
327 unsigned int vp8_sub_pixel_variance16x16_wmt
329 const unsigned char *src_ptr
,
330 int src_pixels_per_line
,
333 const unsigned char *dst_ptr
,
334 int dst_pixels_per_line
,
339 unsigned int xxsum0
, xxsum1
;
342 // note we could avoid these if statements if the calling function
343 // just called the appropriate functions inside.
344 if (xoffset
== 4 && yoffset
== 0)
346 vp8_half_horiz_variance16x_h_sse2(
347 src_ptr
, src_pixels_per_line
,
348 dst_ptr
, dst_pixels_per_line
, 16,
351 else if (xoffset
== 0 && yoffset
== 4)
353 vp8_half_vert_variance16x_h_sse2(
354 src_ptr
, src_pixels_per_line
,
355 dst_ptr
, dst_pixels_per_line
, 16,
358 else if (xoffset
== 4 && yoffset
== 4)
360 vp8_half_horiz_vert_variance16x_h_sse2(
361 src_ptr
, src_pixels_per_line
,
362 dst_ptr
, dst_pixels_per_line
, 16,
367 vp8_filter_block2d_bil_var_sse2(
368 src_ptr
, src_pixels_per_line
,
369 dst_ptr
, dst_pixels_per_line
, 16,
374 vp8_filter_block2d_bil_var_sse2(
375 src_ptr
+ 8, src_pixels_per_line
,
376 dst_ptr
+ 8, dst_pixels_per_line
, 16,
385 return (xxsum0
- ((xsum0
* xsum0
) >> 8));
388 unsigned int vp8_sub_pixel_mse16x16_wmt(
389 const unsigned char *src_ptr
,
390 int src_pixels_per_line
,
393 const unsigned char *dst_ptr
,
394 int dst_pixels_per_line
,
398 vp8_sub_pixel_variance16x16_wmt(src_ptr
, src_pixels_per_line
, xoffset
, yoffset
, dst_ptr
, dst_pixels_per_line
, sse
);
402 unsigned int vp8_sub_pixel_variance16x8_wmt
404 const unsigned char *src_ptr
,
405 int src_pixels_per_line
,
408 const unsigned char *dst_ptr
,
409 int dst_pixels_per_line
,
415 unsigned int xxsum0
, xxsum1
;
417 if (xoffset
== 4 && yoffset
== 0)
419 vp8_half_horiz_variance16x_h_sse2(
420 src_ptr
, src_pixels_per_line
,
421 dst_ptr
, dst_pixels_per_line
, 8,
424 else if (xoffset
== 0 && yoffset
== 4)
426 vp8_half_vert_variance16x_h_sse2(
427 src_ptr
, src_pixels_per_line
,
428 dst_ptr
, dst_pixels_per_line
, 8,
431 else if (xoffset
== 4 && yoffset
== 4)
433 vp8_half_horiz_vert_variance16x_h_sse2(
434 src_ptr
, src_pixels_per_line
,
435 dst_ptr
, dst_pixels_per_line
, 8,
440 vp8_filter_block2d_bil_var_sse2(
441 src_ptr
, src_pixels_per_line
,
442 dst_ptr
, dst_pixels_per_line
, 8,
446 vp8_filter_block2d_bil_var_sse2(
447 src_ptr
+ 8, src_pixels_per_line
,
448 dst_ptr
+ 8, dst_pixels_per_line
, 8,
456 return (xxsum0
- ((xsum0
* xsum0
) >> 7));
459 unsigned int vp8_sub_pixel_variance8x16_wmt
461 const unsigned char *src_ptr
,
462 int src_pixels_per_line
,
465 const unsigned char *dst_ptr
,
466 int dst_pixels_per_line
,
473 if (xoffset
== 4 && yoffset
== 0)
475 vp8_half_horiz_variance8x_h_sse2(
476 src_ptr
, src_pixels_per_line
,
477 dst_ptr
, dst_pixels_per_line
, 16,
480 else if (xoffset
== 0 && yoffset
== 4)
482 vp8_half_vert_variance8x_h_sse2(
483 src_ptr
, src_pixels_per_line
,
484 dst_ptr
, dst_pixels_per_line
, 16,
487 else if (xoffset
== 4 && yoffset
== 4)
489 vp8_half_horiz_vert_variance8x_h_sse2(
490 src_ptr
, src_pixels_per_line
,
491 dst_ptr
, dst_pixels_per_line
, 16,
496 vp8_filter_block2d_bil_var_sse2(
497 src_ptr
, src_pixels_per_line
,
498 dst_ptr
, dst_pixels_per_line
, 16,
504 return (xxsum
- ((xsum
* xsum
) >> 7));
508 unsigned int vp8_variance_halfpixvar16x16_h_wmt(
509 const unsigned char *src_ptr
,
510 int src_pixels_per_line
,
511 const unsigned char *dst_ptr
,
512 int dst_pixels_per_line
,
518 vp8_half_horiz_variance16x_h_sse2(
519 src_ptr
, src_pixels_per_line
,
520 dst_ptr
, dst_pixels_per_line
, 16,
524 return (xxsum0
- ((xsum0
* xsum0
) >> 8));
528 unsigned int vp8_variance_halfpixvar16x16_v_wmt(
529 const unsigned char *src_ptr
,
530 int src_pixels_per_line
,
531 const unsigned char *dst_ptr
,
532 int dst_pixels_per_line
,
537 vp8_half_vert_variance16x_h_sse2(
538 src_ptr
, src_pixels_per_line
,
539 dst_ptr
, dst_pixels_per_line
, 16,
543 return (xxsum0
- ((xsum0
* xsum0
) >> 8));
547 unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
548 const unsigned char *src_ptr
,
549 int src_pixels_per_line
,
550 const unsigned char *dst_ptr
,
551 int dst_pixels_per_line
,
557 vp8_half_horiz_vert_variance16x_h_sse2(
558 src_ptr
, src_pixels_per_line
,
559 dst_ptr
, dst_pixels_per_line
, 16,
563 return (xxsum0
- ((xsum0
* xsum0
) >> 8));