Merge "respect alignment in arm asm files"
[libvpx.git] / vp8 / encoder / variance_c.c
blobc7b9c22093646e35b11fd461bea30041b8ba5916
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
12 #include "variance.h"
13 #include "vp8/common/filter.h"
16 unsigned int vp8_get_mb_ss_c
18 const short *src_ptr
21 unsigned int i = 0, sum = 0;
25 sum += (src_ptr[i] * src_ptr[i]);
26 i++;
28 while (i < 256);
30 return sum;
34 static void variance(
35 const unsigned char *src_ptr,
36 int source_stride,
37 const unsigned char *ref_ptr,
38 int recon_stride,
39 int w,
40 int h,
41 unsigned int *sse,
42 int *sum)
44 int i, j;
45 int diff;
47 *sum = 0;
48 *sse = 0;
50 for (i = 0; i < h; i++)
52 for (j = 0; j < w; j++)
54 diff = src_ptr[j] - ref_ptr[j];
55 *sum += diff;
56 *sse += diff * diff;
59 src_ptr += source_stride;
60 ref_ptr += recon_stride;
65 unsigned int vp8_variance16x16_c(
66 const unsigned char *src_ptr,
67 int source_stride,
68 const unsigned char *ref_ptr,
69 int recon_stride,
70 unsigned int *sse)
72 unsigned int var;
73 int avg;
76 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
77 *sse = var;
78 return (var - ((avg * avg) >> 8));
81 unsigned int vp8_variance8x16_c(
82 const unsigned char *src_ptr,
83 int source_stride,
84 const unsigned char *ref_ptr,
85 int recon_stride,
86 unsigned int *sse)
88 unsigned int var;
89 int avg;
92 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
93 *sse = var;
94 return (var - ((avg * avg) >> 7));
97 unsigned int vp8_variance16x8_c(
98 const unsigned char *src_ptr,
99 int source_stride,
100 const unsigned char *ref_ptr,
101 int recon_stride,
102 unsigned int *sse)
104 unsigned int var;
105 int avg;
108 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
109 *sse = var;
110 return (var - ((avg * avg) >> 7));
114 unsigned int vp8_variance8x8_c(
115 const unsigned char *src_ptr,
116 int source_stride,
117 const unsigned char *ref_ptr,
118 int recon_stride,
119 unsigned int *sse)
121 unsigned int var;
122 int avg;
125 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
126 *sse = var;
127 return (var - ((avg * avg) >> 6));
130 unsigned int vp8_variance4x4_c(
131 const unsigned char *src_ptr,
132 int source_stride,
133 const unsigned char *ref_ptr,
134 int recon_stride,
135 unsigned int *sse)
137 unsigned int var;
138 int avg;
141 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
142 *sse = var;
143 return (var - ((avg * avg) >> 4));
147 unsigned int vp8_mse16x16_c(
148 const unsigned char *src_ptr,
149 int source_stride,
150 const unsigned char *ref_ptr,
151 int recon_stride,
152 unsigned int *sse)
154 unsigned int var;
155 int avg;
157 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
158 *sse = var;
159 return var;
163 /****************************************************************************
165 * ROUTINE : filter_block2d_bil_first_pass
167 * INPUTS : UINT8 *src_ptr : Pointer to source block.
168 * UINT32 src_pixels_per_line : Stride of input block.
169 * UINT32 pixel_step : Offset between filter input samples (see notes).
170 * UINT32 output_height : Input block height.
171 * UINT32 output_width : Input block width.
172 * INT32 *vp8_filter : Array of 2 bi-linear filter taps.
174 * OUTPUTS : INT32 *output_ptr : Pointer to filtered block.
176 * RETURNS : void
178 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
179 * either horizontal or vertical direction to produce the
180 * filtered output block. Used to implement first-pass
181 * of 2-D separable filter.
183 * SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
184 * Two filter taps should sum to VP8_FILTER_WEIGHT.
185 * pixel_step defines whether the filter is applied
186 * horizontally (pixel_step=1) or vertically (pixel_step=stride).
187 * It defines the offset required to move from one input
188 * to the next.
190 ****************************************************************************/
191 static void var_filter_block2d_bil_first_pass
193 const unsigned char *src_ptr,
194 unsigned short *output_ptr,
195 unsigned int src_pixels_per_line,
196 int pixel_step,
197 unsigned int output_height,
198 unsigned int output_width,
199 const short *vp8_filter
202 unsigned int i, j;
204 for (i = 0; i < output_height; i++)
206 for (j = 0; j < output_width; j++)
208 // Apply bilinear filter
209 output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
210 ((int)src_ptr[pixel_step] * vp8_filter[1]) +
211 (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
212 src_ptr++;
215 // Next row...
216 src_ptr += src_pixels_per_line - output_width;
217 output_ptr += output_width;
221 /****************************************************************************
223 * ROUTINE : filter_block2d_bil_second_pass
225 * INPUTS : INT32 *src_ptr : Pointer to source block.
226 * UINT32 src_pixels_per_line : Stride of input block.
227 * UINT32 pixel_step : Offset between filter input samples (see notes).
228 * UINT32 output_height : Input block height.
229 * UINT32 output_width : Input block width.
230 * INT32 *vp8_filter : Array of 2 bi-linear filter taps.
232 * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
234 * RETURNS : void
236 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
237 * either horizontal or vertical direction to produce the
238 * filtered output block. Used to implement second-pass
239 * of 2-D separable filter.
241 * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
242 * Two filter taps should sum to VP8_FILTER_WEIGHT.
243 * pixel_step defines whether the filter is applied
244 * horizontally (pixel_step=1) or vertically (pixel_step=stride).
245 * It defines the offset required to move from one input
246 * to the next.
248 ****************************************************************************/
249 static void var_filter_block2d_bil_second_pass
251 const unsigned short *src_ptr,
252 unsigned char *output_ptr,
253 unsigned int src_pixels_per_line,
254 unsigned int pixel_step,
255 unsigned int output_height,
256 unsigned int output_width,
257 const short *vp8_filter
260 unsigned int i, j;
261 int Temp;
263 for (i = 0; i < output_height; i++)
265 for (j = 0; j < output_width; j++)
267 // Apply filter
268 Temp = ((int)src_ptr[0] * vp8_filter[0]) +
269 ((int)src_ptr[pixel_step] * vp8_filter[1]) +
270 (VP8_FILTER_WEIGHT / 2);
271 output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
272 src_ptr++;
275 // Next row...
276 src_ptr += src_pixels_per_line - output_width;
277 output_ptr += output_width;
282 unsigned int vp8_sub_pixel_variance4x4_c
284 const unsigned char *src_ptr,
285 int src_pixels_per_line,
286 int xoffset,
287 int yoffset,
288 const unsigned char *dst_ptr,
289 int dst_pixels_per_line,
290 unsigned int *sse
293 unsigned char temp2[20*16];
294 const short *HFilter, *VFilter;
295 unsigned short FData3[5*4]; // Temp data bufffer used in filtering
297 HFilter = vp8_bilinear_filters[xoffset];
298 VFilter = vp8_bilinear_filters[yoffset];
300 // First filter 1d Horizontal
301 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
303 // Now filter Verticaly
304 var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
306 return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
310 unsigned int vp8_sub_pixel_variance8x8_c
312 const unsigned char *src_ptr,
313 int src_pixels_per_line,
314 int xoffset,
315 int yoffset,
316 const unsigned char *dst_ptr,
317 int dst_pixels_per_line,
318 unsigned int *sse
321 unsigned short FData3[9*8]; // Temp data bufffer used in filtering
322 unsigned char temp2[20*16];
323 const short *HFilter, *VFilter;
325 HFilter = vp8_bilinear_filters[xoffset];
326 VFilter = vp8_bilinear_filters[yoffset];
328 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
329 var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
331 return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
334 unsigned int vp8_sub_pixel_variance16x16_c
336 const unsigned char *src_ptr,
337 int src_pixels_per_line,
338 int xoffset,
339 int yoffset,
340 const unsigned char *dst_ptr,
341 int dst_pixels_per_line,
342 unsigned int *sse
345 unsigned short FData3[17*16]; // Temp data bufffer used in filtering
346 unsigned char temp2[20*16];
347 const short *HFilter, *VFilter;
349 HFilter = vp8_bilinear_filters[xoffset];
350 VFilter = vp8_bilinear_filters[yoffset];
352 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
353 var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
355 return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
359 unsigned int vp8_variance_halfpixvar16x16_h_c(
360 const unsigned char *src_ptr,
361 int source_stride,
362 const unsigned char *ref_ptr,
363 int recon_stride,
364 unsigned int *sse)
366 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
367 ref_ptr, recon_stride, sse);
371 unsigned int vp8_variance_halfpixvar16x16_v_c(
372 const unsigned char *src_ptr,
373 int source_stride,
374 const unsigned char *ref_ptr,
375 int recon_stride,
376 unsigned int *sse)
378 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
379 ref_ptr, recon_stride, sse);
383 unsigned int vp8_variance_halfpixvar16x16_hv_c(
384 const unsigned char *src_ptr,
385 int source_stride,
386 const unsigned char *ref_ptr,
387 int recon_stride,
388 unsigned int *sse)
390 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
391 ref_ptr, recon_stride, sse);
395 unsigned int vp8_sub_pixel_mse16x16_c
397 const unsigned char *src_ptr,
398 int src_pixels_per_line,
399 int xoffset,
400 int yoffset,
401 const unsigned char *dst_ptr,
402 int dst_pixels_per_line,
403 unsigned int *sse
406 vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
407 return *sse;
410 unsigned int vp8_sub_pixel_variance16x8_c
412 const unsigned char *src_ptr,
413 int src_pixels_per_line,
414 int xoffset,
415 int yoffset,
416 const unsigned char *dst_ptr,
417 int dst_pixels_per_line,
418 unsigned int *sse
421 unsigned short FData3[16*9]; // Temp data bufffer used in filtering
422 unsigned char temp2[20*16];
423 const short *HFilter, *VFilter;
425 HFilter = vp8_bilinear_filters[xoffset];
426 VFilter = vp8_bilinear_filters[yoffset];
428 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
429 var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
431 return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
434 unsigned int vp8_sub_pixel_variance8x16_c
436 const unsigned char *src_ptr,
437 int src_pixels_per_line,
438 int xoffset,
439 int yoffset,
440 const unsigned char *dst_ptr,
441 int dst_pixels_per_line,
442 unsigned int *sse
445 unsigned short FData3[9*16]; // Temp data bufffer used in filtering
446 unsigned char temp2[20*16];
447 const short *HFilter, *VFilter;
450 HFilter = vp8_bilinear_filters[xoffset];
451 VFilter = vp8_bilinear_filters[yoffset];
454 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
455 var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
457 return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);