vp8/encoder/variance_c.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "variance.h"
  13
  14 const int vp8_six_tap[8][6] =
  15 {
  16     { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
  17     { 0, -6,  123,   12,  -1,  0 },
  18     { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
  19     { 0, -9,   93,   50,  -6,  0 },
  20     { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
  21     { 0, -6,   50,   93,  -9,  0 },
  22     { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
  23     { 0, -1,   12,  123,  -6,  0 }
  24 };
  25
  26
  27 const int VP8_FILTER_WEIGHT = 128;
  28 const int VP8_FILTER_SHIFT  =   7;
  29 const int vp8_bilinear_taps[8][2] =
  30 {
  31     { 128,   0 },
  32     { 112,  16 },
  33     {  96,  32 },
  34     {  80,  48 },
  35     {  64,  64 },
  36     {  48,  80 },
  37     {  32,  96 },
  38     {  16, 112 }
  39 };
  40
  41 unsigned int vp8_get_mb_ss_c
  42 (
  43     const short *src_ptr
  44 )
  45 {
  46     unsigned int i = 0, sum = 0;
  47
  48     do
  49     {
  50         sum += (src_ptr[i] * src_ptr[i]);
  51         i++;
  52     }
  53     while (i < 256);
  54
  55     return sum;
  56 }
  57
  58
  59 void  vp8_variance(
  60     const unsigned char *src_ptr,
  61     int  source_stride,
  62     const unsigned char *ref_ptr,
  63     int  recon_stride,
  64     int  w,
  65     int  h,
  66     unsigned int *sse,
  67     int *sum)
  68 {
  69     int i, j;
  70     int diff;
  71
  72     *sum = 0;
  73     *sse = 0;
  74
  75     for (i = 0; i < h; i++)
  76     {
  77         for (j = 0; j < w; j++)
  78         {
  79             diff = src_ptr[j] - ref_ptr[j];
  80             *sum += diff;
  81             *sse += diff * diff;
  82         }
  83
  84         src_ptr += source_stride;
  85         ref_ptr += recon_stride;
  86     }
  87 }
  88
  89 unsigned int
  90 vp8_get8x8var_c
  91 (
  92     const unsigned char *src_ptr,
  93     int  source_stride,
  94     const unsigned char *ref_ptr,
  95     int  recon_stride,
  96     unsigned int *SSE,
  97     int *Sum
  98 )
  99 {
 100
 101     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, SSE, Sum);
 102     return (*SSE - (((*Sum) * (*Sum)) >> 6));
 103 }
 104
 105 unsigned int
 106 vp8_get16x16var_c
 107 (
 108     const unsigned char *src_ptr,
 109     int  source_stride,
 110     const unsigned char *ref_ptr,
 111     int  recon_stride,
 112     unsigned int *SSE,
 113     int *Sum
 114 )
 115 {
 116
 117     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, SSE, Sum);
 118     return (*SSE - (((*Sum) * (*Sum)) >> 8));
 119
 120 }
 121
 122
 123
 124 unsigned int vp8_variance16x16_c(
 125     const unsigned char *src_ptr,
 126     int  source_stride,
 127     const unsigned char *ref_ptr,
 128     int  recon_stride,
 129     unsigned int *sse)
 130 {
 131     unsigned int var;
 132     int avg;
 133
 134
 135     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
 136     *sse = var;
 137     return (var - ((avg * avg) >> 8));
 138 }
 139
 140 unsigned int vp8_variance8x16_c(
 141     const unsigned char *src_ptr,
 142     int  source_stride,
 143     const unsigned char *ref_ptr,
 144     int  recon_stride,
 145     unsigned int *sse)
 146 {
 147     unsigned int var;
 148     int avg;
 149
 150
 151     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
 152     *sse = var;
 153     return (var - ((avg * avg) >> 7));
 154 }
 155
 156 unsigned int vp8_variance16x8_c(
 157     const unsigned char *src_ptr,
 158     int  source_stride,
 159     const unsigned char *ref_ptr,
 160     int  recon_stride,
 161     unsigned int *sse)
 162 {
 163     unsigned int var;
 164     int avg;
 165
 166
 167     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
 168     *sse = var;
 169     return (var - ((avg * avg) >> 7));
 170 }
 171
 172
 173 unsigned int vp8_variance8x8_c(
 174     const unsigned char *src_ptr,
 175     int  source_stride,
 176     const unsigned char *ref_ptr,
 177     int  recon_stride,
 178     unsigned int *sse)
 179 {
 180     unsigned int var;
 181     int avg;
 182
 183
 184     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
 185     *sse = var;
 186     return (var - ((avg * avg) >> 6));
 187 }
 188
 189 unsigned int vp8_variance4x4_c(
 190     const unsigned char *src_ptr,
 191     int  source_stride,
 192     const unsigned char *ref_ptr,
 193     int  recon_stride,
 194     unsigned int *sse)
 195 {
 196     unsigned int var;
 197     int avg;
 198
 199
 200     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
 201     *sse = var;
 202     return (var - ((avg * avg) >> 4));
 203 }
 204
 205
 206 unsigned int vp8_mse16x16_c(
 207     const unsigned char *src_ptr,
 208     int  source_stride,
 209     const unsigned char *ref_ptr,
 210     int  recon_stride,
 211     unsigned int *sse)
 212 {
 213     unsigned int var;
 214     int avg;
 215
 216     vp8_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
 217     *sse = var;
 218     return var;
 219 }
 220
 221
 222 /****************************************************************************
 223  *
 224  *  ROUTINE       : filter_block2d_bil_first_pass
 225  *
 226  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
 227  *                  UINT32 src_pixels_per_line : Stride of input block.
 228  *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
 229  *                  UINT32 output_height     : Input block height.
 230  *                  UINT32 output_width      : Input block width.
 231  *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
 232  *
 233  *  OUTPUTS       : INT32 *output_ptr        : Pointer to filtered block.
 234  *
 235  *  RETURNS       : void
 236  *
 237  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
 238  *                  either horizontal or vertical direction to produce the
 239  *                  filtered output block. Used to implement first-pass
 240  *                  of 2-D separable filter.
 241  *
 242  *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
 243  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
 244  *                  pixel_step defines whether the filter is applied
 245  *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
 246  *                  It defines the offset required to move from one input
 247  *                  to the next.
 248  *
 249  ****************************************************************************/
 250 void vp8e_filter_block2d_bil_first_pass
 251 (
 252     const unsigned char *src_ptr,
 253     unsigned short *output_ptr,
 254     unsigned int src_pixels_per_line,
 255     int pixel_step,
 256     unsigned int output_height,
 257     unsigned int output_width,
 258     const int *vp8_filter
 259 )
 260 {
 261     unsigned int i, j;
 262
 263     for (i = 0; i < output_height; i++)
 264     {
 265         for (j = 0; j < output_width; j++)
 266         {
 267             // Apply bilinear filter
 268             output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
 269                              ((int)src_ptr[pixel_step] * vp8_filter[1]) +
 270                              (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
 271             src_ptr++;
 272         }
 273
 274         // Next row...
 275         src_ptr    += src_pixels_per_line - output_width;
 276         output_ptr += output_width;
 277     }
 278 }
 279
 280 /****************************************************************************
 281  *
 282  *  ROUTINE       : filter_block2d_bil_second_pass
 283  *
 284  *  INPUTS        : INT32  *src_ptr          : Pointer to source block.
 285  *                  UINT32 src_pixels_per_line : Stride of input block.
 286  *                  UINT32 pixel_step        : Offset between filter input samples (see notes).
 287  *                  UINT32 output_height     : Input block height.
 288  *                  UINT32 output_width      : Input block width.
 289  *                  INT32  *vp8_filter          : Array of 2 bi-linear filter taps.
 290  *
 291  *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.
 292  *
 293  *  RETURNS       : void
 294  *
 295  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
 296  *                  either horizontal or vertical direction to produce the
 297  *                  filtered output block. Used to implement second-pass
 298  *                  of 2-D separable filter.
 299  *
 300  *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
 301  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
 302  *                  pixel_step defines whether the filter is applied
 303  *                  horizontally (pixel_step=1) or vertically (pixel_step=stride).
 304  *                  It defines the offset required to move from one input
 305  *                  to the next.
 306  *
 307  ****************************************************************************/
 308 void vp8e_filter_block2d_bil_second_pass
 309 (
 310     const unsigned short *src_ptr,
 311     unsigned char  *output_ptr,
 312     unsigned int  src_pixels_per_line,
 313     unsigned int  pixel_step,
 314     unsigned int  output_height,
 315     unsigned int  output_width,
 316     const int *vp8_filter
 317 )
 318 {
 319     unsigned int  i, j;
 320     int  Temp;
 321
 322     for (i = 0; i < output_height; i++)
 323     {
 324         for (j = 0; j < output_width; j++)
 325         {
 326             // Apply filter
 327             Temp = ((int)src_ptr[0]         * vp8_filter[0]) +
 328                    ((int)src_ptr[pixel_step] * vp8_filter[1]) +
 329                    (VP8_FILTER_WEIGHT / 2);
 330             output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
 331             src_ptr++;
 332         }
 333
 334         // Next row...
 335         src_ptr    += src_pixels_per_line - output_width;
 336         output_ptr += output_width;
 337     }
 338 }
 339
 340
 341 /****************************************************************************
 342  *
 343  *  ROUTINE       : filter_block2d_bil
 344  *
 345  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
 346  *                  UINT32 src_pixels_per_line : Stride of input block.
 347  *                  INT32  *HFilter         : Array of 2 horizontal filter taps.
 348  *                  INT32  *VFilter         : Array of 2 vertical filter taps.
 349  *
 350  *  OUTPUTS       : UINT16 *output_ptr       : Pointer to filtered block.
 351  *
 352  *  RETURNS       : void
 353  *
 354  *  FUNCTION      : 2-D filters an 8x8 input block by applying a 2-tap
 355  *                  bi-linear filter horizontally followed by a 2-tap
 356  *                  bi-linear filter vertically on the result.
 357  *
 358  *  SPECIAL NOTES : The intermediate horizontally filtered block must produce
 359  *                  1 more point than the input block in each column. This
 360  *                  is to ensure that the 2-tap filter has one extra data-point
 361  *                  at the top of each column so filter taps do not extend
 362  *                  beyond data. Thus the output of the first stage filter
 363  *                  is an 8x9 (hx_v) block.
 364  *
 365  ****************************************************************************/
 366 void vp8e_filter_block2d_bil
 367 (
 368     const unsigned char  *src_ptr,
 369     unsigned char *output_ptr,
 370     unsigned int src_pixels_per_line,
 371     int  *HFilter,
 372     int  *VFilter
 373 )
 374 {
 375
 376     unsigned short FData[20*16];    // Temp data bufffer used in filtering
 377
 378     // First filter 1-D horizontally...
 379     vp8e_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, 9, 8, HFilter);
 380
 381     // then 1-D vertically...
 382     vp8e_filter_block2d_bil_second_pass(FData, output_ptr, 8, 8, 8, 8, VFilter);
 383 }
 384
 385
 386
 387 unsigned int vp8_sub_pixel_variance4x4_c
 388 (
 389     const unsigned char  *src_ptr,
 390     int  src_pixels_per_line,
 391     int  xoffset,
 392     int  yoffset,
 393     const unsigned char *dst_ptr,
 394     int dst_pixels_per_line,
 395     unsigned int *sse
 396 )
 397 {
 398     unsigned char  temp2[20*16];
 399     const int *HFilter, *VFilter;
 400     unsigned short FData3[5*4]; // Temp data bufffer used in filtering
 401
 402     HFilter = vp8_bilinear_taps[xoffset];
 403     VFilter = vp8_bilinear_taps[yoffset];
 404
 405     // First filter 1d Horizontal
 406     vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
 407
 408     // Now filter Verticaly
 409     vp8e_filter_block2d_bil_second_pass(FData3, temp2, 4,  4,  4,  4, VFilter);
 410
 411     return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
 412 }
 413
 414
 415 unsigned int vp8_sub_pixel_variance8x8_c
 416 (
 417     const unsigned char  *src_ptr,
 418     int  src_pixels_per_line,
 419     int  xoffset,
 420     int  yoffset,
 421     const unsigned char *dst_ptr,
 422     int dst_pixels_per_line,
 423     unsigned int *sse
 424 )
 425 {
 426     unsigned short FData3[9*8]; // Temp data bufffer used in filtering
 427     unsigned char  temp2[20*16];
 428     const int *HFilter, *VFilter;
 429
 430     HFilter = vp8_bilinear_taps[xoffset];
 431     VFilter = vp8_bilinear_taps[yoffset];
 432
 433     vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
 434     vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
 435
 436     return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
 437 }
 438
 439 unsigned int vp8_sub_pixel_variance16x16_c
 440 (
 441     const unsigned char  *src_ptr,
 442     int  src_pixels_per_line,
 443     int  xoffset,
 444     int  yoffset,
 445     const unsigned char *dst_ptr,
 446     int dst_pixels_per_line,
 447     unsigned int *sse
 448 )
 449 {
 450     unsigned short FData3[17*16];   // Temp data bufffer used in filtering
 451     unsigned char  temp2[20*16];
 452     const int *HFilter, *VFilter;
 453
 454     HFilter = vp8_bilinear_taps[xoffset];
 455     VFilter = vp8_bilinear_taps[yoffset];
 456
 457     vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
 458     vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
 459
 460     return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
 461 }
 462
 463
 464 unsigned int vp8_variance_halfpixvar16x16_h_c(
 465     const unsigned char *src_ptr,
 466     int  source_stride,
 467     const unsigned char *ref_ptr,
 468     int  recon_stride,
 469     unsigned int *sse)
 470 {
 471     return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
 472                                          ref_ptr, recon_stride, sse);
 473 }
 474
 475
 476 unsigned int vp8_variance_halfpixvar16x16_v_c(
 477     const unsigned char *src_ptr,
 478     int  source_stride,
 479     const unsigned char *ref_ptr,
 480     int  recon_stride,
 481     unsigned int *sse)
 482 {
 483     return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
 484                                          ref_ptr, recon_stride, sse);
 485 }
 486
 487
 488 unsigned int vp8_variance_halfpixvar16x16_hv_c(
 489     const unsigned char *src_ptr,
 490     int  source_stride,
 491     const unsigned char *ref_ptr,
 492     int  recon_stride,
 493     unsigned int *sse)
 494 {
 495     return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
 496                                          ref_ptr, recon_stride, sse);
 497 }
 498
 499
 500 unsigned int vp8_sub_pixel_mse16x16_c
 501 (
 502     const unsigned char  *src_ptr,
 503     int  src_pixels_per_line,
 504     int  xoffset,
 505     int  yoffset,
 506     const unsigned char *dst_ptr,
 507     int dst_pixels_per_line,
 508     unsigned int *sse
 509 )
 510 {
 511     vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
 512     return *sse;
 513 }
 514
 515 unsigned int vp8_sub_pixel_variance16x8_c
 516 (
 517     const unsigned char  *src_ptr,
 518     int  src_pixels_per_line,
 519     int  xoffset,
 520     int  yoffset,
 521     const unsigned char *dst_ptr,
 522     int dst_pixels_per_line,
 523     unsigned int *sse
 524 )
 525 {
 526     unsigned short FData3[16*9];    // Temp data bufffer used in filtering
 527     unsigned char  temp2[20*16];
 528     const int *HFilter, *VFilter;
 529
 530     HFilter = vp8_bilinear_taps[xoffset];
 531     VFilter = vp8_bilinear_taps[yoffset];
 532
 533     vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
 534     vp8e_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
 535
 536     return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
 537 }
 538
 539 unsigned int vp8_sub_pixel_variance8x16_c
 540 (
 541     const unsigned char  *src_ptr,
 542     int  src_pixels_per_line,
 543     int  xoffset,
 544     int  yoffset,
 545     const unsigned char *dst_ptr,
 546     int dst_pixels_per_line,
 547     unsigned int *sse
 548 )
 549 {
 550     unsigned short FData3[9*16];    // Temp data bufffer used in filtering
 551     unsigned char  temp2[20*16];
 552     const int *HFilter, *VFilter;
 553
 554
 555     HFilter = vp8_bilinear_taps[xoffset];
 556     VFilter = vp8_bilinear_taps[yoffset];
 557
 558
 559     vp8e_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
 560     vp8e_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
 561
 562     return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
 563 }