Define RDCOST only once
[libvpx.git] / vp8 / encoder / variance_c.c
blobede07c8db3bf877aa6a8cc8b9359b70c371d9647
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
12 #include "variance.h"
13 #include "vp8/common/filter.h"
16 unsigned int vp8_get_mb_ss_c
18 const short *src_ptr
21 unsigned int i = 0, sum = 0;
25 sum += (src_ptr[i] * src_ptr[i]);
26 i++;
28 while (i < 256);
30 return sum;
34 static void variance(
35 const unsigned char *src_ptr,
36 int source_stride,
37 const unsigned char *ref_ptr,
38 int recon_stride,
39 int w,
40 int h,
41 unsigned int *sse,
42 int *sum)
44 int i, j;
45 int diff;
47 *sum = 0;
48 *sse = 0;
50 for (i = 0; i < h; i++)
52 for (j = 0; j < w; j++)
54 diff = src_ptr[j] - ref_ptr[j];
55 *sum += diff;
56 *sse += diff * diff;
59 src_ptr += source_stride;
60 ref_ptr += recon_stride;
64 unsigned int
65 vp8_get8x8var_c
67 const unsigned char *src_ptr,
68 int source_stride,
69 const unsigned char *ref_ptr,
70 int recon_stride,
71 unsigned int *SSE,
72 int *Sum
76 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);
77 return (*SSE - (((*Sum) * (*Sum)) >> 6));
80 unsigned int
81 vp8_get16x16var_c
83 const unsigned char *src_ptr,
84 int source_stride,
85 const unsigned char *ref_ptr,
86 int recon_stride,
87 unsigned int *SSE,
88 int *Sum
92 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);
93 return (*SSE - (((*Sum) * (*Sum)) >> 8));
99 unsigned int vp8_variance16x16_c(
100 const unsigned char *src_ptr,
101 int source_stride,
102 const unsigned char *ref_ptr,
103 int recon_stride,
104 unsigned int *sse)
106 unsigned int var;
107 int avg;
110 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
111 *sse = var;
112 return (var - ((avg * avg) >> 8));
115 unsigned int vp8_variance8x16_c(
116 const unsigned char *src_ptr,
117 int source_stride,
118 const unsigned char *ref_ptr,
119 int recon_stride,
120 unsigned int *sse)
122 unsigned int var;
123 int avg;
126 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
127 *sse = var;
128 return (var - ((avg * avg) >> 7));
131 unsigned int vp8_variance16x8_c(
132 const unsigned char *src_ptr,
133 int source_stride,
134 const unsigned char *ref_ptr,
135 int recon_stride,
136 unsigned int *sse)
138 unsigned int var;
139 int avg;
142 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
143 *sse = var;
144 return (var - ((avg * avg) >> 7));
148 unsigned int vp8_variance8x8_c(
149 const unsigned char *src_ptr,
150 int source_stride,
151 const unsigned char *ref_ptr,
152 int recon_stride,
153 unsigned int *sse)
155 unsigned int var;
156 int avg;
159 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
160 *sse = var;
161 return (var - ((avg * avg) >> 6));
164 unsigned int vp8_variance4x4_c(
165 const unsigned char *src_ptr,
166 int source_stride,
167 const unsigned char *ref_ptr,
168 int recon_stride,
169 unsigned int *sse)
171 unsigned int var;
172 int avg;
175 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
176 *sse = var;
177 return (var - ((avg * avg) >> 4));
181 unsigned int vp8_mse16x16_c(
182 const unsigned char *src_ptr,
183 int source_stride,
184 const unsigned char *ref_ptr,
185 int recon_stride,
186 unsigned int *sse)
188 unsigned int var;
189 int avg;
191 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
192 *sse = var;
193 return var;
197 /****************************************************************************
199 * ROUTINE : filter_block2d_bil_first_pass
201 * INPUTS : UINT8 *src_ptr : Pointer to source block.
202 * UINT32 src_pixels_per_line : Stride of input block.
203 * UINT32 pixel_step : Offset between filter input samples (see notes).
204 * UINT32 output_height : Input block height.
205 * UINT32 output_width : Input block width.
206 * INT32 *vp8_filter : Array of 2 bi-linear filter taps.
208 * OUTPUTS : INT32 *output_ptr : Pointer to filtered block.
210 * RETURNS : void
212 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
213 * either horizontal or vertical direction to produce the
214 * filtered output block. Used to implement first-pass
215 * of 2-D separable filter.
217 * SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
218 * Two filter taps should sum to VP8_FILTER_WEIGHT.
219 * pixel_step defines whether the filter is applied
220 * horizontally (pixel_step=1) or vertically (pixel_step=stride).
221 * It defines the offset required to move from one input
222 * to the next.
224 ****************************************************************************/
225 static void var_filter_block2d_bil_first_pass
227 const unsigned char *src_ptr,
228 unsigned short *output_ptr,
229 unsigned int src_pixels_per_line,
230 int pixel_step,
231 unsigned int output_height,
232 unsigned int output_width,
233 const short *vp8_filter
236 unsigned int i, j;
238 for (i = 0; i < output_height; i++)
240 for (j = 0; j < output_width; j++)
242 // Apply bilinear filter
243 output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
244 ((int)src_ptr[pixel_step] * vp8_filter[1]) +
245 (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
246 src_ptr++;
249 // Next row...
250 src_ptr += src_pixels_per_line - output_width;
251 output_ptr += output_width;
255 /****************************************************************************
257 * ROUTINE : filter_block2d_bil_second_pass
259 * INPUTS : INT32 *src_ptr : Pointer to source block.
260 * UINT32 src_pixels_per_line : Stride of input block.
261 * UINT32 pixel_step : Offset between filter input samples (see notes).
262 * UINT32 output_height : Input block height.
263 * UINT32 output_width : Input block width.
264 * INT32 *vp8_filter : Array of 2 bi-linear filter taps.
266 * OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
268 * RETURNS : void
270 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
271 * either horizontal or vertical direction to produce the
272 * filtered output block. Used to implement second-pass
273 * of 2-D separable filter.
275 * SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
276 * Two filter taps should sum to VP8_FILTER_WEIGHT.
277 * pixel_step defines whether the filter is applied
278 * horizontally (pixel_step=1) or vertically (pixel_step=stride).
279 * It defines the offset required to move from one input
280 * to the next.
282 ****************************************************************************/
283 static void var_filter_block2d_bil_second_pass
285 const unsigned short *src_ptr,
286 unsigned char *output_ptr,
287 unsigned int src_pixels_per_line,
288 unsigned int pixel_step,
289 unsigned int output_height,
290 unsigned int output_width,
291 const short *vp8_filter
294 unsigned int i, j;
295 int Temp;
297 for (i = 0; i < output_height; i++)
299 for (j = 0; j < output_width; j++)
301 // Apply filter
302 Temp = ((int)src_ptr[0] * vp8_filter[0]) +
303 ((int)src_ptr[pixel_step] * vp8_filter[1]) +
304 (VP8_FILTER_WEIGHT / 2);
305 output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
306 src_ptr++;
309 // Next row...
310 src_ptr += src_pixels_per_line - output_width;
311 output_ptr += output_width;
316 unsigned int vp8_sub_pixel_variance4x4_c
318 const unsigned char *src_ptr,
319 int src_pixels_per_line,
320 int xoffset,
321 int yoffset,
322 const unsigned char *dst_ptr,
323 int dst_pixels_per_line,
324 unsigned int *sse
327 unsigned char temp2[20*16];
328 const short *HFilter, *VFilter;
329 unsigned short FData3[5*4]; // Temp data bufffer used in filtering
331 HFilter = vp8_bilinear_filters[xoffset];
332 VFilter = vp8_bilinear_filters[yoffset];
334 // First filter 1d Horizontal
335 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
337 // Now filter Verticaly
338 var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
340 return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
344 unsigned int vp8_sub_pixel_variance8x8_c
346 const unsigned char *src_ptr,
347 int src_pixels_per_line,
348 int xoffset,
349 int yoffset,
350 const unsigned char *dst_ptr,
351 int dst_pixels_per_line,
352 unsigned int *sse
355 unsigned short FData3[9*8]; // Temp data bufffer used in filtering
356 unsigned char temp2[20*16];
357 const short *HFilter, *VFilter;
359 HFilter = vp8_bilinear_filters[xoffset];
360 VFilter = vp8_bilinear_filters[yoffset];
362 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
363 var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
365 return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
368 unsigned int vp8_sub_pixel_variance16x16_c
370 const unsigned char *src_ptr,
371 int src_pixels_per_line,
372 int xoffset,
373 int yoffset,
374 const unsigned char *dst_ptr,
375 int dst_pixels_per_line,
376 unsigned int *sse
379 unsigned short FData3[17*16]; // Temp data bufffer used in filtering
380 unsigned char temp2[20*16];
381 const short *HFilter, *VFilter;
383 HFilter = vp8_bilinear_filters[xoffset];
384 VFilter = vp8_bilinear_filters[yoffset];
386 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
387 var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
389 return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
393 unsigned int vp8_variance_halfpixvar16x16_h_c(
394 const unsigned char *src_ptr,
395 int source_stride,
396 const unsigned char *ref_ptr,
397 int recon_stride,
398 unsigned int *sse)
400 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
401 ref_ptr, recon_stride, sse);
405 unsigned int vp8_variance_halfpixvar16x16_v_c(
406 const unsigned char *src_ptr,
407 int source_stride,
408 const unsigned char *ref_ptr,
409 int recon_stride,
410 unsigned int *sse)
412 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
413 ref_ptr, recon_stride, sse);
417 unsigned int vp8_variance_halfpixvar16x16_hv_c(
418 const unsigned char *src_ptr,
419 int source_stride,
420 const unsigned char *ref_ptr,
421 int recon_stride,
422 unsigned int *sse)
424 return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
425 ref_ptr, recon_stride, sse);
429 unsigned int vp8_sub_pixel_mse16x16_c
431 const unsigned char *src_ptr,
432 int src_pixels_per_line,
433 int xoffset,
434 int yoffset,
435 const unsigned char *dst_ptr,
436 int dst_pixels_per_line,
437 unsigned int *sse
440 vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
441 return *sse;
444 unsigned int vp8_sub_pixel_variance16x8_c
446 const unsigned char *src_ptr,
447 int src_pixels_per_line,
448 int xoffset,
449 int yoffset,
450 const unsigned char *dst_ptr,
451 int dst_pixels_per_line,
452 unsigned int *sse
455 unsigned short FData3[16*9]; // Temp data bufffer used in filtering
456 unsigned char temp2[20*16];
457 const short *HFilter, *VFilter;
459 HFilter = vp8_bilinear_filters[xoffset];
460 VFilter = vp8_bilinear_filters[yoffset];
462 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
463 var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
465 return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
468 unsigned int vp8_sub_pixel_variance8x16_c
470 const unsigned char *src_ptr,
471 int src_pixels_per_line,
472 int xoffset,
473 int yoffset,
474 const unsigned char *dst_ptr,
475 int dst_pixels_per_line,
476 unsigned int *sse
479 unsigned short FData3[9*16]; // Temp data bufffer used in filtering
480 unsigned char temp2[20*16];
481 const short *HFilter, *VFilter;
484 HFilter = vp8_bilinear_filters[xoffset];
485 VFilter = vp8_bilinear_filters[yoffset];
488 var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
489 var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
491 return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);