vp8/encoder/temporal_filter.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "vp8/common/onyxc_int.h"
  13 #include "onyx_int.h"
  14 #include "vp8/common/systemdependent.h"
  15 #include "quantize.h"
  16 #include "vp8/common/alloccommon.h"
  17 #include "mcomp.h"
  18 #include "firstpass.h"
  19 #include "psnr.h"
  20 #include "vpx_scale/vpxscale.h"
  21 #include "vp8/common/extend.h"
  22 #include "ratectrl.h"
  23 #include "vp8/common/quant_common.h"
  24 #include "segmentation.h"
  25 #include "vp8/common/g_common.h"
  26 #include "vpx_scale/yv12extend.h"
  27 #include "vp8/common/postproc.h"
  28 #include "vpx_mem/vpx_mem.h"
  29 #include "vp8/common/swapyv12buffer.h"
  30 #include "vp8/common/threading.h"
  31 #include "vpx_ports/vpx_timer.h"
  32
  33 #include <math.h>
  34 #include <limits.h>
  35
  36 #define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
  37 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
  38
  39 #if VP8_TEMPORAL_ALT_REF
  40
  41 static void vp8_temporal_filter_predictors_mb_c
  42 (
  43     MACROBLOCKD *x,
  44     unsigned char *y_mb_ptr,
  45     unsigned char *u_mb_ptr,
  46     unsigned char *v_mb_ptr,
  47     int stride,
  48     int mv_row,
  49     int mv_col,
  50     unsigned char *pred
  51 )
  52 {
  53     int offset;
  54     unsigned char *yptr, *uptr, *vptr;
  55
  56     // Y
  57     yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
  58
  59     if ((mv_row | mv_col) & 7)
  60     {
  61         x->subpixel_predict16x16(yptr, stride,
  62                                     mv_col & 7, mv_row & 7, &pred[0], 16);
  63     }
  64     else
  65     {
  66         RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
  67     }
  68
  69     // U & V
  70     mv_row >>= 1;
  71     mv_col >>= 1;
  72     stride = (stride + 1) >> 1;
  73     offset = (mv_row >> 3) * stride + (mv_col >> 3);
  74     uptr = u_mb_ptr + offset;
  75     vptr = v_mb_ptr + offset;
  76
  77     if ((mv_row | mv_col) & 7)
  78     {
  79         x->subpixel_predict8x8(uptr, stride,
  80                             mv_col & 7, mv_row & 7, &pred[256], 8);
  81         x->subpixel_predict8x8(vptr, stride,
  82                             mv_col & 7, mv_row & 7, &pred[320], 8);
  83     }
  84     else
  85     {
  86         RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
  87         RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
  88     }
  89 }
  90 void vp8_temporal_filter_apply_c
  91 (
  92     unsigned char *frame1,
  93     unsigned int stride,
  94     unsigned char *frame2,
  95     unsigned int block_size,
  96     int strength,
  97     int filter_weight,
  98     unsigned int *accumulator,
  99     unsigned short *count
 100 )
 101 {
 102     int i, j, k;
 103     int modifier;
 104     int byte = 0;
 105
 106     for (i = 0,k = 0; i < block_size; i++)
 107     {
 108         for (j = 0; j < block_size; j++, k++)
 109         {
 110
 111             int src_byte = frame1[byte];
 112             int pixel_value = *frame2++;
 113
 114             modifier   = src_byte - pixel_value;
 115             // This is an integer approximation of:
 116             // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
 117             // modifier =  (int)roundf(coeff > 16 ? 0 : 16-coeff);
 118             modifier  *= modifier;
 119             modifier  *= 3;
 120             modifier  += 1 << (strength - 1);
 121             modifier >>= strength;
 122
 123             if (modifier > 16)
 124                 modifier = 16;
 125
 126             modifier = 16 - modifier;
 127             modifier *= filter_weight;
 128
 129             count[k] += modifier;
 130             accumulator[k] += modifier * pixel_value;
 131
 132             byte++;
 133         }
 134
 135         byte += stride - block_size;
 136     }
 137 }
 138
 139 #if ALT_REF_MC_ENABLED
 140 static int dummy_cost[2*mv_max+1];
 141
 142 static int vp8_temporal_filter_find_matching_mb_c
 143 (
 144     VP8_COMP *cpi,
 145     YV12_BUFFER_CONFIG *arf_frame,
 146     YV12_BUFFER_CONFIG *frame_ptr,
 147     int mb_offset,
 148     int error_thresh
 149 )
 150 {
 151     MACROBLOCK *x = &cpi->mb;
 152     int thissme;
 153     int step_param;
 154     int further_steps;
 155     int n = 0;
 156     int sadpb = x->sadperbit16;
 157     int bestsme = INT_MAX;
 158     int num00 = 0;
 159
 160     BLOCK *b = &x->block[0];
 161     BLOCKD *d = &x->e_mbd.block[0];
 162     MV best_ref_mv1 = {0,0};
 163
 164     int *mvcost[2]    = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
 165     int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
 166
 167     // Save input state
 168     unsigned char **base_src = b->base_src;
 169     int src = b->src;
 170     int src_stride = b->src_stride;
 171     unsigned char **base_pre = d->base_pre;
 172     int pre = d->pre;
 173     int pre_stride = d->pre_stride;
 174
 175     // Setup frame pointers
 176     b->base_src = &arf_frame->y_buffer;
 177     b->src_stride = arf_frame->y_stride;
 178     b->src = mb_offset;
 179
 180     d->base_pre = &frame_ptr->y_buffer;
 181     d->pre_stride = frame_ptr->y_stride;
 182     d->pre = mb_offset;
 183
 184     // Further step/diamond searches as necessary
 185     if (cpi->Speed < 8)
 186     {
 187         step_param = cpi->sf.first_step +
 188                     ((cpi->Speed > 5) ? 1 : 0);
 189         further_steps =
 190             (cpi->sf.max_step_search_steps - 1)-step_param;
 191     }
 192     else
 193     {
 194         step_param = cpi->sf.first_step + 2;
 195         further_steps = 0;
 196     }
 197
 198     /*cpi->sf.search_method == HEX*/
 199     // TODO Check that the 16x16 vf & sdf are selected here
 200     bestsme = vp8_hex_search(x, b, d,
 201         &best_ref_mv1, &d->bmi.mv.as_mv,
 202         step_param,
 203         sadpb/*x->errorperbit*/,
 204         &num00, &cpi->fn_ptr[BLOCK_16X16],
 205         mvsadcost, mvcost, &best_ref_mv1);
 206
 207 #if ALT_REF_SUBPEL_ENABLED
 208     // Try sub-pixel MC?
 209     //if (bestsme > error_thresh && bestsme < INT_MAX)
 210     {
 211         bestsme = cpi->find_fractional_mv_step(x, b, d,
 212                     &d->bmi.mv.as_mv, &best_ref_mv1,
 213                     x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
 214                     mvcost);
 215     }
 216 #endif
 217
 218     // Save input state
 219     b->base_src = base_src;
 220     b->src = src;
 221     b->src_stride = src_stride;
 222     d->base_pre = base_pre;
 223     d->pre = pre;
 224     d->pre_stride = pre_stride;
 225
 226     return bestsme;
 227 }
 228 #endif
 229
 230 static void vp8_temporal_filter_iterate_c
 231 (
 232     VP8_COMP *cpi,
 233     int frame_count,
 234     int alt_ref_index,
 235     int strength
 236 )
 237 {
 238     int byte;
 239     int frame;
 240     int mb_col, mb_row;
 241     unsigned int filter_weight;
 242     int mb_cols = cpi->common.mb_cols;
 243     int mb_rows = cpi->common.mb_rows;
 244     int MBs  = cpi->common.MBs;
 245     int mb_y_offset = 0;
 246     int mb_uv_offset = 0;
 247     DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8);
 248     DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8);
 249     MACROBLOCKD *mbd = &cpi->mb.e_mbd;
 250     YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
 251     unsigned char *dst1, *dst2;
 252     DECLARE_ALIGNED_ARRAY(16, unsigned char,  predictor, 16*16 + 8*8 + 8*8);
 253
 254     // Save input state
 255     unsigned char *y_buffer = mbd->pre.y_buffer;
 256     unsigned char *u_buffer = mbd->pre.u_buffer;
 257     unsigned char *v_buffer = mbd->pre.v_buffer;
 258
 259     for (mb_row = 0; mb_row < mb_rows; mb_row++)
 260     {
 261 #if ALT_REF_MC_ENABLED
 262         // Reduced search extent by 3 for 6-tap filter & smaller UMV border
 263         cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
 264         cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
 265                                 + (VP8BORDERINPIXELS - 19);
 266 #endif
 267
 268         for (mb_col = 0; mb_col < mb_cols; mb_col++)
 269         {
 270             int i, j, k, w;
 271             int weight_cap;
 272             int stride;
 273
 274             vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
 275             vpx_memset(count, 0, 384*sizeof(unsigned short));
 276
 277 #if ALT_REF_MC_ENABLED
 278             // Reduced search extent by 3 for 6-tap filter & smaller UMV border
 279             cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
 280             cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
 281                                     + (VP8BORDERINPIXELS - 19);
 282 #endif
 283
 284             for (frame = 0; frame < frame_count; frame++)
 285             {
 286                 int err = 0;
 287
 288                 if (cpi->frames[frame] == NULL)
 289                     continue;
 290
 291                 mbd->block[0].bmi.mv.as_mv.row = 0;
 292                 mbd->block[0].bmi.mv.as_mv.col = 0;
 293
 294 #if ALT_REF_MC_ENABLED
 295 #define THRESH_LOW   10000
 296 #define THRESH_HIGH  20000
 297
 298                 // Find best match in this frame by MC
 299                 err = vp8_temporal_filter_find_matching_mb_c
 300                       (cpi,
 301                        cpi->frames[alt_ref_index],
 302                        cpi->frames[frame],
 303                        mb_y_offset,
 304                        THRESH_LOW);
 305
 306 #endif
 307                 // Assign higher weight to matching MB if it's error
 308                 // score is lower. If not applying MC default behavior
 309                 // is to weight all MBs equal.
 310                 filter_weight = err<THRESH_LOW
 311                                   ? 2 : err<THRESH_HIGH ? 1 : 0;
 312
 313                 if (filter_weight != 0)
 314                 {
 315                     // Construct the predictors
 316                     vp8_temporal_filter_predictors_mb_c
 317                         (mbd,
 318                          cpi->frames[frame]->y_buffer + mb_y_offset,
 319                          cpi->frames[frame]->u_buffer + mb_uv_offset,
 320                          cpi->frames[frame]->v_buffer + mb_uv_offset,
 321                          cpi->frames[frame]->y_stride,
 322                          mbd->block[0].bmi.mv.as_mv.row,
 323                          mbd->block[0].bmi.mv.as_mv.col,
 324                          predictor);
 325
 326                     // Apply the filter (YUV)
 327                     TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
 328                         (f->y_buffer + mb_y_offset,
 329                          f->y_stride,
 330                          predictor,
 331                          16,
 332                          strength,
 333                          filter_weight,
 334                          accumulator,
 335                          count);
 336
 337                     TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
 338                         (f->u_buffer + mb_uv_offset,
 339                          f->uv_stride,
 340                          predictor + 256,
 341                          8,
 342                          strength,
 343                          filter_weight,
 344                          accumulator + 256,
 345                          count + 256);
 346
 347                     TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
 348                         (f->v_buffer + mb_uv_offset,
 349                          f->uv_stride,
 350                          predictor + 320,
 351                          8,
 352                          strength,
 353                          filter_weight,
 354                          accumulator + 320,
 355                          count + 320);
 356                 }
 357             }
 358
 359             // Normalize filter output to produce AltRef frame
 360             dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
 361             stride = cpi->alt_ref_buffer.source_buffer.y_stride;
 362             byte = mb_y_offset;
 363             for (i = 0,k = 0; i < 16; i++)
 364             {
 365                 for (j = 0; j < 16; j++, k++)
 366                 {
 367                     unsigned int pval = accumulator[k] + (count[k] >> 1);
 368                     pval *= cpi->fixed_divide[count[k]];
 369                     pval >>= 19;
 370
 371                     dst1[byte] = (unsigned char)pval;
 372
 373                     // move to next pixel
 374                     byte++;
 375                 }
 376
 377                 byte += stride - 16;
 378             }
 379
 380             dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
 381             dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
 382             stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
 383             byte = mb_uv_offset;
 384             for (i = 0,k = 256; i < 8; i++)
 385             {
 386                 for (j = 0; j < 8; j++, k++)
 387                 {
 388                     int m=k+64;
 389
 390                     // U
 391                     unsigned int pval = accumulator[k] + (count[k] >> 1);
 392                     pval *= cpi->fixed_divide[count[k]];
 393                     pval >>= 19;
 394                     dst1[byte] = (unsigned char)pval;
 395
 396                     // V
 397                     pval = accumulator[m] + (count[m] >> 1);
 398                     pval *= cpi->fixed_divide[count[m]];
 399                     pval >>= 19;
 400                     dst2[byte] = (unsigned char)pval;
 401
 402                     // move to next pixel
 403                     byte++;
 404                 }
 405
 406                 byte += stride - 8;
 407             }
 408
 409             mb_y_offset += 16;
 410             mb_uv_offset += 8;
 411         }
 412
 413         mb_y_offset += 16*(f->y_stride-mb_cols);
 414         mb_uv_offset += 8*(f->uv_stride-mb_cols);
 415     }
 416
 417     // Restore input state
 418     mbd->pre.y_buffer = y_buffer;
 419     mbd->pre.u_buffer = u_buffer;
 420     mbd->pre.v_buffer = v_buffer;
 421 }
 422
 423 void vp8_temporal_filter_prepare_c
 424 (
 425     VP8_COMP *cpi
 426 )
 427 {
 428     int frame = 0;
 429
 430     int num_frames_backward = 0;
 431     int num_frames_forward = 0;
 432     int frames_to_blur_backward = 0;
 433     int frames_to_blur_forward = 0;
 434     int frames_to_blur = 0;
 435     int start_frame = 0;
 436     unsigned int filtered = 0;
 437
 438     int strength = cpi->oxcf.arnr_strength;
 439
 440     int blur_type = cpi->oxcf.arnr_type;
 441
 442     int max_frames = cpi->active_arnr_frames;
 443
 444     num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
 445
 446     if (num_frames_backward < 0)
 447         num_frames_backward += cpi->oxcf.lag_in_frames;
 448
 449     num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
 450
 451     switch (blur_type)
 452     {
 453     case 1:
 454         /////////////////////////////////////////
 455         // Backward Blur
 456
 457         frames_to_blur_backward = num_frames_backward;
 458
 459         if (frames_to_blur_backward >= max_frames)
 460             frames_to_blur_backward = max_frames - 1;
 461
 462         frames_to_blur = frames_to_blur_backward + 1;
 463         break;
 464
 465     case 2:
 466         /////////////////////////////////////////
 467         // Forward Blur
 468
 469         frames_to_blur_forward = num_frames_forward;
 470
 471         if (frames_to_blur_forward >= max_frames)
 472             frames_to_blur_forward = max_frames - 1;
 473
 474         frames_to_blur = frames_to_blur_forward + 1;
 475         break;
 476
 477     case 3:
 478     default:
 479         /////////////////////////////////////////
 480         // Center Blur
 481         frames_to_blur_forward = num_frames_forward;
 482         frames_to_blur_backward = num_frames_backward;
 483
 484         if (frames_to_blur_forward > frames_to_blur_backward)
 485             frames_to_blur_forward = frames_to_blur_backward;
 486
 487         if (frames_to_blur_backward > frames_to_blur_forward)
 488             frames_to_blur_backward = frames_to_blur_forward;
 489
 490         // When max_frames is even we have 1 more frame backward than forward
 491         if (frames_to_blur_forward > (max_frames - 1) / 2)
 492             frames_to_blur_forward = ((max_frames - 1) / 2);
 493
 494         if (frames_to_blur_backward > (max_frames / 2))
 495             frames_to_blur_backward = (max_frames / 2);
 496
 497         frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
 498         break;
 499     }
 500
 501     start_frame = (cpi->last_alt_ref_sei
 502                     + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
 503
 504 #ifdef DEBUGFWG
 505     // DEBUG FWG
 506     printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
 507            , max_frames
 508            , num_frames_backward
 509            , num_frames_forward
 510            , frames_to_blur
 511            , frames_to_blur_backward
 512            , frames_to_blur_forward
 513            , cpi->source_encode_index
 514            , cpi->last_alt_ref_sei
 515            , start_frame);
 516 #endif
 517
 518     // Setup frame pointers, NULL indicates frame not included in filter
 519     vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
 520     for (frame = 0; frame < frames_to_blur; frame++)
 521     {
 522         int which_buffer =  start_frame - frame;
 523
 524         if (which_buffer < 0)
 525             which_buffer += cpi->oxcf.lag_in_frames;
 526
 527         cpi->frames[frames_to_blur-1-frame]
 528                 = &cpi->src_buffer[which_buffer].source_buffer;
 529     }
 530
 531     vp8_temporal_filter_iterate_c (
 532         cpi,
 533         frames_to_blur,
 534         frames_to_blur_backward,
 535         strength );
 536 }
 537 #endif