Removed unused vp8_recon_intra4x4mb function
[libvpx.git] / vp8 / encoder / x86 / variance_mmx.c
blob2df73a635e3db7ebdf1f8d28a3fd0968ca5574c2
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
12 #include "variance.h"
13 #include "pragmas.h"
14 #include "vpx_ports/mem.h"
16 extern void filter_block1d_h6_mmx
18 const unsigned char *src_ptr,
19 unsigned short *output_ptr,
20 unsigned int src_pixels_per_line,
21 unsigned int pixel_step,
22 unsigned int output_height,
23 unsigned int output_width,
24 short *vp7_filter
26 extern void filter_block1d_v6_mmx
28 const short *src_ptr,
29 unsigned char *output_ptr,
30 unsigned int pixels_per_line,
31 unsigned int pixel_step,
32 unsigned int output_height,
33 unsigned int output_width,
34 short *vp7_filter
37 extern unsigned int vp8_get_mb_ss_mmx(short *src_ptr);
38 extern unsigned int vp8_get8x8var_mmx
40 const unsigned char *src_ptr,
41 int source_stride,
42 const unsigned char *ref_ptr,
43 int recon_stride,
44 unsigned int *SSE,
45 int *Sum
47 extern unsigned int vp8_get4x4var_mmx
49 const unsigned char *src_ptr,
50 int source_stride,
51 const unsigned char *ref_ptr,
52 int recon_stride,
53 unsigned int *SSE,
54 int *Sum
56 extern unsigned int vp8_get4x4sse_cs_mmx
58 const unsigned char *src_ptr,
59 int source_stride,
60 const unsigned char *ref_ptr,
61 int recon_stride
63 extern void vp8_filter_block2d_bil4x4_var_mmx
65 const unsigned char *ref_ptr,
66 int ref_pixels_per_line,
67 const unsigned char *src_ptr,
68 int src_pixels_per_line,
69 const short *HFilter,
70 const short *VFilter,
71 int *sum,
72 unsigned int *sumsquared
74 extern void vp8_filter_block2d_bil_var_mmx
76 const unsigned char *ref_ptr,
77 int ref_pixels_per_line,
78 const unsigned char *src_ptr,
79 int src_pixels_per_line,
80 unsigned int Height,
81 const short *HFilter,
82 const short *VFilter,
83 int *sum,
84 unsigned int *sumsquared
86 extern unsigned int vp8_get16x16pred_error_mmx
88 unsigned char *src_ptr,
89 int src_stride,
90 unsigned char *ref_ptr,
91 int ref_stride
95 void vp8_test_get_mb_ss(void)
97 short zz[] =
99 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
100 -2, -2, -2, -2, 2, 2, 2, 2, -2, -2, -2, -2, 2, 2, 2, 2,
101 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
102 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
103 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
104 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
105 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
106 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
107 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
108 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
109 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
110 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
111 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
112 -3, -3, -3, -3, 3, 3, 3, 3, -3, -3, -3, -3, 3, 3, 3, 3,
113 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
114 -4, -4, -4, -4, 4, 4, 4, 4, -4, -4, -4, -4, 4, 4, 4, 4,
116 int s = 0, x = vp8_get_mb_ss_mmx(zz);
118 int y;
120 for (y = 0; y < 256; y++)
121 s += (zz[y] * zz[y]);
124 x += 0;
128 unsigned int vp8_get16x16var_mmx(
129 const unsigned char *src_ptr,
130 int source_stride,
131 const unsigned char *ref_ptr,
132 int recon_stride,
133 unsigned *SSE,
134 unsigned *SUM
137 unsigned int sse0, sse1, sse2, sse3, var;
138 int sum0, sum1, sum2, sum3, avg;
141 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
142 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
143 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
144 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
146 var = sse0 + sse1 + sse2 + sse3;
147 avg = sum0 + sum1 + sum2 + sum3;
149 *SSE = var;
150 *SUM = avg;
151 return (var - ((avg * avg) >> 8));
159 unsigned int vp8_variance4x4_mmx(
160 const unsigned char *src_ptr,
161 int source_stride,
162 const unsigned char *ref_ptr,
163 int recon_stride,
164 unsigned int *sse)
166 unsigned int var;
167 int avg;
169 vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
170 *sse = var;
171 return (var - ((avg * avg) >> 4));
175 unsigned int vp8_variance8x8_mmx(
176 const unsigned char *src_ptr,
177 int source_stride,
178 const unsigned char *ref_ptr,
179 int recon_stride,
180 unsigned int *sse)
182 unsigned int var;
183 int avg;
185 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
186 *sse = var;
188 return (var - ((avg * avg) >> 6));
192 unsigned int vp8_mse16x16_mmx(
193 const unsigned char *src_ptr,
194 int source_stride,
195 const unsigned char *ref_ptr,
196 int recon_stride,
197 unsigned int *sse)
199 unsigned int sse0, sse1, sse2, sse3, var;
200 int sum0, sum1, sum2, sum3;
203 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
204 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
205 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
206 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
208 var = sse0 + sse1 + sse2 + sse3;
209 *sse = var;
210 return var;
214 unsigned int vp8_variance16x16_mmx(
215 const unsigned char *src_ptr,
216 int source_stride,
217 const unsigned char *ref_ptr,
218 int recon_stride,
219 int *sse)
221 unsigned int sse0, sse1, sse2, sse3, var;
222 int sum0, sum1, sum2, sum3, avg;
225 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
226 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
227 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
228 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
230 var = sse0 + sse1 + sse2 + sse3;
231 avg = sum0 + sum1 + sum2 + sum3;
232 *sse = var;
233 return (var - ((avg * avg) >> 8));
236 unsigned int vp8_variance16x8_mmx(
237 const unsigned char *src_ptr,
238 int source_stride,
239 const unsigned char *ref_ptr,
240 int recon_stride,
241 unsigned int *sse)
243 unsigned int sse0, sse1, var;
244 int sum0, sum1, avg;
246 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
247 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
249 var = sse0 + sse1;
250 avg = sum0 + sum1;
251 *sse = var;
252 return (var - ((avg * avg) >> 7));
257 unsigned int vp8_variance8x16_mmx(
258 const unsigned char *src_ptr,
259 int source_stride,
260 const unsigned char *ref_ptr,
261 int recon_stride,
262 unsigned int *sse)
264 unsigned int sse0, sse1, var;
265 int sum0, sum1, avg;
267 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
268 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
270 var = sse0 + sse1;
271 avg = sum0 + sum1;
272 *sse = var;
274 return (var - ((avg * avg) >> 7));
281 ///////////////////////////////////////////////////////////////////////////
282 // the mmx function that does the bilinear filtering and var calculation //
283 // int one pass //
284 ///////////////////////////////////////////////////////////////////////////
285 DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
287 { 128, 128, 128, 128, 0, 0, 0, 0 },
288 { 112, 112, 112, 112, 16, 16, 16, 16 },
289 { 96, 96, 96, 96, 32, 32, 32, 32 },
290 { 80, 80, 80, 80, 48, 48, 48, 48 },
291 { 64, 64, 64, 64, 64, 64, 64, 64 },
292 { 48, 48, 48, 48, 80, 80, 80, 80 },
293 { 32, 32, 32, 32, 96, 96, 96, 96 },
294 { 16, 16, 16, 16, 112, 112, 112, 112 }
297 unsigned int vp8_sub_pixel_variance4x4_mmx
299 const unsigned char *src_ptr,
300 int src_pixels_per_line,
301 int xoffset,
302 int yoffset,
303 const unsigned char *dst_ptr,
304 int dst_pixels_per_line,
305 unsigned int *sse)
308 int xsum;
309 unsigned int xxsum;
310 vp8_filter_block2d_bil4x4_var_mmx(
311 src_ptr, src_pixels_per_line,
312 dst_ptr, dst_pixels_per_line,
313 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
314 &xsum, &xxsum
316 *sse = xxsum;
317 return (xxsum - ((xsum * xsum) >> 4));
321 unsigned int vp8_sub_pixel_variance8x8_mmx
323 const unsigned char *src_ptr,
324 int src_pixels_per_line,
325 int xoffset,
326 int yoffset,
327 const unsigned char *dst_ptr,
328 int dst_pixels_per_line,
329 unsigned int *sse
333 int xsum;
334 unsigned int xxsum;
335 vp8_filter_block2d_bil_var_mmx(
336 src_ptr, src_pixels_per_line,
337 dst_ptr, dst_pixels_per_line, 8,
338 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
339 &xsum, &xxsum
341 *sse = xxsum;
342 return (xxsum - ((xsum * xsum) >> 6));
345 unsigned int vp8_sub_pixel_variance16x16_mmx
347 const unsigned char *src_ptr,
348 int src_pixels_per_line,
349 int xoffset,
350 int yoffset,
351 const unsigned char *dst_ptr,
352 int dst_pixels_per_line,
353 unsigned int *sse
357 int xsum0, xsum1;
358 unsigned int xxsum0, xxsum1;
361 vp8_filter_block2d_bil_var_mmx(
362 src_ptr, src_pixels_per_line,
363 dst_ptr, dst_pixels_per_line, 16,
364 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
365 &xsum0, &xxsum0
369 vp8_filter_block2d_bil_var_mmx(
370 src_ptr + 8, src_pixels_per_line,
371 dst_ptr + 8, dst_pixels_per_line, 16,
372 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
373 &xsum1, &xxsum1
376 xsum0 += xsum1;
377 xxsum0 += xxsum1;
379 *sse = xxsum0;
380 return (xxsum0 - ((xsum0 * xsum0) >> 8));
385 unsigned int vp8_sub_pixel_mse16x16_mmx(
386 const unsigned char *src_ptr,
387 int src_pixels_per_line,
388 int xoffset,
389 int yoffset,
390 const unsigned char *dst_ptr,
391 int dst_pixels_per_line,
392 unsigned int *sse
395 vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
396 return *sse;
399 unsigned int vp8_sub_pixel_variance16x8_mmx
401 const unsigned char *src_ptr,
402 int src_pixels_per_line,
403 int xoffset,
404 int yoffset,
405 const unsigned char *dst_ptr,
406 int dst_pixels_per_line,
407 unsigned int *sse
410 int xsum0, xsum1;
411 unsigned int xxsum0, xxsum1;
414 vp8_filter_block2d_bil_var_mmx(
415 src_ptr, src_pixels_per_line,
416 dst_ptr, dst_pixels_per_line, 8,
417 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
418 &xsum0, &xxsum0
422 vp8_filter_block2d_bil_var_mmx(
423 src_ptr + 8, src_pixels_per_line,
424 dst_ptr + 8, dst_pixels_per_line, 8,
425 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
426 &xsum1, &xxsum1
429 xsum0 += xsum1;
430 xxsum0 += xxsum1;
432 *sse = xxsum0;
433 return (xxsum0 - ((xsum0 * xsum0) >> 7));
436 unsigned int vp8_sub_pixel_variance8x16_mmx
438 const unsigned char *src_ptr,
439 int src_pixels_per_line,
440 int xoffset,
441 int yoffset,
442 const unsigned char *dst_ptr,
443 int dst_pixels_per_line,
444 int *sse
447 int xsum;
448 unsigned int xxsum;
449 vp8_filter_block2d_bil_var_mmx(
450 src_ptr, src_pixels_per_line,
451 dst_ptr, dst_pixels_per_line, 16,
452 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
453 &xsum, &xxsum
455 *sse = xxsum;
456 return (xxsum - ((xsum * xsum) >> 7));
459 unsigned int vp8_i_variance16x16_mmx(
460 const unsigned char *src_ptr,
461 int source_stride,
462 const unsigned char *ref_ptr,
463 int recon_stride,
464 unsigned int *sse)
466 unsigned int sse0, sse1, sse2, sse3, var;
467 int sum0, sum1, sum2, sum3, avg;
470 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
471 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
472 vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
473 vp8_get8x8var_mmx(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
475 var = sse0 + sse1 + sse2 + sse3;
476 avg = sum0 + sum1 + sum2 + sum3;
477 *sse = var;
478 return (var - ((avg * avg) >> 8));
482 unsigned int vp8_i_variance8x16_mmx(
483 const unsigned char *src_ptr,
484 int source_stride,
485 const unsigned char *ref_ptr,
486 int recon_stride,
487 unsigned int *sse)
489 unsigned int sse0, sse1, var;
490 int sum0, sum1, avg;
491 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
492 vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
494 var = sse0 + sse1;
495 avg = sum0 + sum1;
497 *sse = var;
498 return (var - ((avg * avg) >> 7));
502 unsigned int vp8_i_sub_pixel_variance16x16_mmx
504 const unsigned char *src_ptr,
505 int src_pixels_per_line,
506 int xoffset,
507 int yoffset,
508 const unsigned char *dst_ptr,
509 int dst_pixels_per_line,
510 unsigned int *sse
513 int xsum0, xsum1;
514 unsigned int xxsum0, xxsum1;
515 int f2soffset = (src_pixels_per_line >> 1);
516 int f2doffset = (dst_pixels_per_line >> 1);
519 vp8_filter_block2d_bil_var_mmx(
520 src_ptr, src_pixels_per_line,
521 dst_ptr, dst_pixels_per_line, 8,
522 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
523 &xsum0, &xxsum0
527 vp8_filter_block2d_bil_var_mmx(
528 src_ptr + 8, src_pixels_per_line,
529 dst_ptr + 8, dst_pixels_per_line, 8,
530 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
531 &xsum1, &xxsum1
534 xsum0 += xsum1;
535 xxsum0 += xxsum1;
537 vp8_filter_block2d_bil_var_mmx(
538 src_ptr + f2soffset, src_pixels_per_line,
539 dst_ptr + f2doffset, dst_pixels_per_line, 8,
540 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
541 &xsum1, &xxsum1
544 xsum0 += xsum1;
545 xxsum0 += xxsum1;
547 vp8_filter_block2d_bil_var_mmx(
548 src_ptr + f2soffset + 8, src_pixels_per_line,
549 dst_ptr + f2doffset + 8, dst_pixels_per_line, 8,
550 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
551 &xsum1, &xxsum1
554 xsum0 += xsum1;
555 xxsum0 += xxsum1;
556 *sse = xxsum0;
557 return (xxsum0 - ((xsum0 * xsum0) >> 8));
561 unsigned int vp8_i_sub_pixel_variance8x16_mmx
563 const unsigned char *src_ptr,
564 int src_pixels_per_line,
565 int xoffset,
566 int yoffset,
567 const unsigned char *dst_ptr,
568 int dst_pixels_per_line,
569 unsigned int *sse
572 int xsum0, xsum1;
573 unsigned int xxsum0, xxsum1;
574 int f2soffset = (src_pixels_per_line >> 1);
575 int f2doffset = (dst_pixels_per_line >> 1);
578 vp8_filter_block2d_bil_var_mmx(
579 src_ptr, src_pixels_per_line,
580 dst_ptr, dst_pixels_per_line, 8,
581 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
582 &xsum0, &xxsum0
586 vp8_filter_block2d_bil_var_mmx(
587 src_ptr + f2soffset, src_pixels_per_line,
588 dst_ptr + f2doffset, dst_pixels_per_line, 8,
589 vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
590 &xsum1, &xxsum1
593 xsum0 += xsum1;
594 xxsum0 += xxsum1;
595 *sse = xxsum0;
596 return (xxsum0 - ((xsum0 * xsum0) >> 7));
600 unsigned int vp8_variance_halfpixvar16x16_h_mmx(
601 const unsigned char *src_ptr,
602 int source_stride,
603 const unsigned char *ref_ptr,
604 int recon_stride,
605 unsigned int *sse)
607 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
608 ref_ptr, recon_stride, sse);
612 unsigned int vp8_variance_halfpixvar16x16_v_mmx(
613 const unsigned char *src_ptr,
614 int source_stride,
615 const unsigned char *ref_ptr,
616 int recon_stride,
617 unsigned int *sse)
619 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
620 ref_ptr, recon_stride, sse);
624 unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
625 const unsigned char *src_ptr,
626 int source_stride,
627 const unsigned char *ref_ptr,
628 int recon_stride,
629 unsigned int *sse)
631 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
632 ref_ptr, recon_stride, sse);