Merge "Skip computation of distortion in vp8_pick_inter_mode if active_map is used"
[libvpx.git] / vp8 / encoder / temporal_filter.c
blobb9ade1c6cfa4f7e6a46f0fc79f68a63393e23d76
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
12 #include "vp8/common/onyxc_int.h"
13 #include "onyx_int.h"
14 #include "vp8/common/systemdependent.h"
15 #include "quantize.h"
16 #include "vp8/common/alloccommon.h"
17 #include "mcomp.h"
18 #include "firstpass.h"
19 #include "psnr.h"
20 #include "vpx_scale/vpxscale.h"
21 #include "vp8/common/extend.h"
22 #include "ratectrl.h"
23 #include "vp8/common/quant_common.h"
24 #include "segmentation.h"
25 #include "vp8/common/g_common.h"
26 #include "vpx_scale/yv12extend.h"
27 #include "vpx_mem/vpx_mem.h"
28 #include "vp8/common/swapyv12buffer.h"
29 #include "vp8/common/threading.h"
30 #include "vpx_ports/vpx_timer.h"
32 #include <math.h>
33 #include <limits.h>
35 #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
36 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
38 #if VP8_TEMPORAL_ALT_REF
40 static void vp8_temporal_filter_predictors_mb_c
42 MACROBLOCKD *x,
43 unsigned char *y_mb_ptr,
44 unsigned char *u_mb_ptr,
45 unsigned char *v_mb_ptr,
46 int stride,
47 int mv_row,
48 int mv_col,
49 unsigned char *pred
52 int offset;
53 unsigned char *yptr, *uptr, *vptr;
55 // Y
56 yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
58 if ((mv_row | mv_col) & 7)
60 x->subpixel_predict16x16(yptr, stride,
61 mv_col & 7, mv_row & 7, &pred[0], 16);
63 else
65 RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
68 // U & V
69 mv_row >>= 1;
70 mv_col >>= 1;
71 stride = (stride + 1) >> 1;
72 offset = (mv_row >> 3) * stride + (mv_col >> 3);
73 uptr = u_mb_ptr + offset;
74 vptr = v_mb_ptr + offset;
76 if ((mv_row | mv_col) & 7)
78 x->subpixel_predict8x8(uptr, stride,
79 mv_col & 7, mv_row & 7, &pred[256], 8);
80 x->subpixel_predict8x8(vptr, stride,
81 mv_col & 7, mv_row & 7, &pred[320], 8);
83 else
85 RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
86 RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
89 void vp8_temporal_filter_apply_c
91 unsigned char *frame1,
92 unsigned int stride,
93 unsigned char *frame2,
94 unsigned int block_size,
95 int strength,
96 int filter_weight,
97 unsigned int *accumulator,
98 unsigned short *count
101 int i, j, k;
102 int modifier;
103 int byte = 0;
105 for (i = 0,k = 0; i < block_size; i++)
107 for (j = 0; j < block_size; j++, k++)
110 int src_byte = frame1[byte];
111 int pixel_value = *frame2++;
113 modifier = src_byte - pixel_value;
114 // This is an integer approximation of:
115 // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
116 // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
117 modifier *= modifier;
118 modifier *= 3;
119 modifier += 1 << (strength - 1);
120 modifier >>= strength;
122 if (modifier > 16)
123 modifier = 16;
125 modifier = 16 - modifier;
126 modifier *= filter_weight;
128 count[k] += modifier;
129 accumulator[k] += modifier * pixel_value;
131 byte++;
134 byte += stride - block_size;
138 #if ALT_REF_MC_ENABLED
139 static int dummy_cost[2*mv_max+1];
141 static int vp8_temporal_filter_find_matching_mb_c
143 VP8_COMP *cpi,
144 YV12_BUFFER_CONFIG *arf_frame,
145 YV12_BUFFER_CONFIG *frame_ptr,
146 int mb_offset,
147 int error_thresh
150 MACROBLOCK *x = &cpi->mb;
151 int step_param;
152 int further_steps;
153 int sadpb = x->sadperbit16;
154 int bestsme = INT_MAX;
156 BLOCK *b = &x->block[0];
157 BLOCKD *d = &x->e_mbd.block[0];
158 int_mv best_ref_mv1;
159 int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
161 int *mvcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
162 int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
164 // Save input state
165 unsigned char **base_src = b->base_src;
166 int src = b->src;
167 int src_stride = b->src_stride;
168 unsigned char **base_pre = d->base_pre;
169 int pre = d->pre;
170 int pre_stride = d->pre_stride;
172 best_ref_mv1.as_int = 0;
173 best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >>3;
174 best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >>3;
176 // Setup frame pointers
177 b->base_src = &arf_frame->y_buffer;
178 b->src_stride = arf_frame->y_stride;
179 b->src = mb_offset;
181 d->base_pre = &frame_ptr->y_buffer;
182 d->pre_stride = frame_ptr->y_stride;
183 d->pre = mb_offset;
185 // Further step/diamond searches as necessary
186 if (cpi->Speed < 8)
188 step_param = cpi->sf.first_step +
189 ((cpi->Speed > 5) ? 1 : 0);
190 further_steps =
191 (cpi->sf.max_step_search_steps - 1)-step_param;
193 else
195 step_param = cpi->sf.first_step + 2;
196 further_steps = 0;
199 /*cpi->sf.search_method == HEX*/
200 // TODO Check that the 16x16 vf & sdf are selected here
201 bestsme = vp8_hex_search(x, b, d,
202 &best_ref_mv1_full, &d->bmi.mv,
203 step_param,
204 sadpb,
205 &cpi->fn_ptr[BLOCK_16X16],
206 mvsadcost, mvcost, &best_ref_mv1);
208 #if ALT_REF_SUBPEL_ENABLED
209 // Try sub-pixel MC?
210 //if (bestsme > error_thresh && bestsme < INT_MAX)
212 int distortion;
213 unsigned int sse;
214 bestsme = cpi->find_fractional_mv_step(x, b, d,
215 &d->bmi.mv, &best_ref_mv1,
216 x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
217 mvcost, &distortion, &sse);
219 #endif
221 // Save input state
222 b->base_src = base_src;
223 b->src = src;
224 b->src_stride = src_stride;
225 d->base_pre = base_pre;
226 d->pre = pre;
227 d->pre_stride = pre_stride;
229 return bestsme;
231 #endif
233 static void vp8_temporal_filter_iterate_c
235 VP8_COMP *cpi,
236 int frame_count,
237 int alt_ref_index,
238 int strength
241 int byte;
242 int frame;
243 int mb_col, mb_row;
244 unsigned int filter_weight;
245 int mb_cols = cpi->common.mb_cols;
246 int mb_rows = cpi->common.mb_rows;
247 int mb_y_offset = 0;
248 int mb_uv_offset = 0;
249 DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8);
250 DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8);
251 MACROBLOCKD *mbd = &cpi->mb.e_mbd;
252 YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
253 unsigned char *dst1, *dst2;
254 DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8);
256 // Save input state
257 unsigned char *y_buffer = mbd->pre.y_buffer;
258 unsigned char *u_buffer = mbd->pre.u_buffer;
259 unsigned char *v_buffer = mbd->pre.v_buffer;
261 for (mb_row = 0; mb_row < mb_rows; mb_row++)
263 #if ALT_REF_MC_ENABLED
264 // Source frames are extended to 16 pixels. This is different than
265 // L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
266 // A 6 tap filter is used for motion search. This requires 2 pixels
267 // before and 3 pixels after. So the largest Y mv on a border would
268 // then be 16 - 3. The UV blocks are half the size of the Y and
269 // therefore only extended by 8. The largest mv that a UV block
270 // can support is 8 - 3. A UV mv is half of a Y mv.
271 // (16 - 3) >> 1 == 6 which is greater than 8 - 3.
272 // To keep the mv in play for both Y and UV planes the max that it
273 // can be on a border is therefore 16 - 5.
274 cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5));
275 cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
276 + (16 - 5);
277 #endif
279 for (mb_col = 0; mb_col < mb_cols; mb_col++)
281 int i, j, k;
282 int stride;
284 vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
285 vpx_memset(count, 0, 384*sizeof(unsigned short));
287 #if ALT_REF_MC_ENABLED
288 cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5));
289 cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
290 + (16 - 5);
291 #endif
293 for (frame = 0; frame < frame_count; frame++)
295 int err = 0;
297 if (cpi->frames[frame] == NULL)
298 continue;
300 mbd->block[0].bmi.mv.as_mv.row = 0;
301 mbd->block[0].bmi.mv.as_mv.col = 0;
303 #if ALT_REF_MC_ENABLED
304 #define THRESH_LOW 10000
305 #define THRESH_HIGH 20000
307 // Find best match in this frame by MC
308 err = vp8_temporal_filter_find_matching_mb_c
309 (cpi,
310 cpi->frames[alt_ref_index],
311 cpi->frames[frame],
312 mb_y_offset,
313 THRESH_LOW);
315 #endif
316 // Assign higher weight to matching MB if it's error
317 // score is lower. If not applying MC default behavior
318 // is to weight all MBs equal.
319 filter_weight = err<THRESH_LOW
320 ? 2 : err<THRESH_HIGH ? 1 : 0;
322 if (filter_weight != 0)
324 // Construct the predictors
325 vp8_temporal_filter_predictors_mb_c
326 (mbd,
327 cpi->frames[frame]->y_buffer + mb_y_offset,
328 cpi->frames[frame]->u_buffer + mb_uv_offset,
329 cpi->frames[frame]->v_buffer + mb_uv_offset,
330 cpi->frames[frame]->y_stride,
331 mbd->block[0].bmi.mv.as_mv.row,
332 mbd->block[0].bmi.mv.as_mv.col,
333 predictor);
335 // Apply the filter (YUV)
336 TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
337 (f->y_buffer + mb_y_offset,
338 f->y_stride,
339 predictor,
341 strength,
342 filter_weight,
343 accumulator,
344 count);
346 TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
347 (f->u_buffer + mb_uv_offset,
348 f->uv_stride,
349 predictor + 256,
351 strength,
352 filter_weight,
353 accumulator + 256,
354 count + 256);
356 TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
357 (f->v_buffer + mb_uv_offset,
358 f->uv_stride,
359 predictor + 320,
361 strength,
362 filter_weight,
363 accumulator + 320,
364 count + 320);
368 // Normalize filter output to produce AltRef frame
369 dst1 = cpi->alt_ref_buffer.y_buffer;
370 stride = cpi->alt_ref_buffer.y_stride;
371 byte = mb_y_offset;
372 for (i = 0,k = 0; i < 16; i++)
374 for (j = 0; j < 16; j++, k++)
376 unsigned int pval = accumulator[k] + (count[k] >> 1);
377 pval *= cpi->fixed_divide[count[k]];
378 pval >>= 19;
380 dst1[byte] = (unsigned char)pval;
382 // move to next pixel
383 byte++;
386 byte += stride - 16;
389 dst1 = cpi->alt_ref_buffer.u_buffer;
390 dst2 = cpi->alt_ref_buffer.v_buffer;
391 stride = cpi->alt_ref_buffer.uv_stride;
392 byte = mb_uv_offset;
393 for (i = 0,k = 256; i < 8; i++)
395 for (j = 0; j < 8; j++, k++)
397 int m=k+64;
399 // U
400 unsigned int pval = accumulator[k] + (count[k] >> 1);
401 pval *= cpi->fixed_divide[count[k]];
402 pval >>= 19;
403 dst1[byte] = (unsigned char)pval;
405 // V
406 pval = accumulator[m] + (count[m] >> 1);
407 pval *= cpi->fixed_divide[count[m]];
408 pval >>= 19;
409 dst2[byte] = (unsigned char)pval;
411 // move to next pixel
412 byte++;
415 byte += stride - 8;
418 mb_y_offset += 16;
419 mb_uv_offset += 8;
422 mb_y_offset += 16*(f->y_stride-mb_cols);
423 mb_uv_offset += 8*(f->uv_stride-mb_cols);
426 // Restore input state
427 mbd->pre.y_buffer = y_buffer;
428 mbd->pre.u_buffer = u_buffer;
429 mbd->pre.v_buffer = v_buffer;
432 void vp8_temporal_filter_prepare_c
434 VP8_COMP *cpi,
435 int distance
438 int frame = 0;
440 int num_frames_backward = 0;
441 int num_frames_forward = 0;
442 int frames_to_blur_backward = 0;
443 int frames_to_blur_forward = 0;
444 int frames_to_blur = 0;
445 int start_frame = 0;
447 int strength = cpi->oxcf.arnr_strength;
449 int blur_type = cpi->oxcf.arnr_type;
451 int max_frames = cpi->active_arnr_frames;
453 num_frames_backward = distance;
454 num_frames_forward = vp8_lookahead_depth(cpi->lookahead)
455 - (num_frames_backward + 1);
457 switch (blur_type)
459 case 1:
460 /////////////////////////////////////////
461 // Backward Blur
463 frames_to_blur_backward = num_frames_backward;
465 if (frames_to_blur_backward >= max_frames)
466 frames_to_blur_backward = max_frames - 1;
468 frames_to_blur = frames_to_blur_backward + 1;
469 break;
471 case 2:
472 /////////////////////////////////////////
473 // Forward Blur
475 frames_to_blur_forward = num_frames_forward;
477 if (frames_to_blur_forward >= max_frames)
478 frames_to_blur_forward = max_frames - 1;
480 frames_to_blur = frames_to_blur_forward + 1;
481 break;
483 case 3:
484 default:
485 /////////////////////////////////////////
486 // Center Blur
487 frames_to_blur_forward = num_frames_forward;
488 frames_to_blur_backward = num_frames_backward;
490 if (frames_to_blur_forward > frames_to_blur_backward)
491 frames_to_blur_forward = frames_to_blur_backward;
493 if (frames_to_blur_backward > frames_to_blur_forward)
494 frames_to_blur_backward = frames_to_blur_forward;
496 // When max_frames is even we have 1 more frame backward than forward
497 if (frames_to_blur_forward > (max_frames - 1) / 2)
498 frames_to_blur_forward = ((max_frames - 1) / 2);
500 if (frames_to_blur_backward > (max_frames / 2))
501 frames_to_blur_backward = (max_frames / 2);
503 frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
504 break;
507 start_frame = distance + frames_to_blur_forward;
509 #ifdef DEBUGFWG
510 // DEBUG FWG
511 printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
512 , max_frames
513 , num_frames_backward
514 , num_frames_forward
515 , frames_to_blur
516 , frames_to_blur_backward
517 , frames_to_blur_forward
518 , cpi->source_encode_index
519 , cpi->last_alt_ref_sei
520 , start_frame);
521 #endif
523 // Setup frame pointers, NULL indicates frame not included in filter
524 vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
525 for (frame = 0; frame < frames_to_blur; frame++)
527 int which_buffer = start_frame - frame;
528 struct lookahead_entry* buf = vp8_lookahead_peek(cpi->lookahead,
529 which_buffer);
530 cpi->frames[frames_to_blur-1-frame] = &buf->img;
533 vp8_temporal_filter_iterate_c (
534 cpi,
535 frames_to_blur,
536 frames_to_blur_backward,
537 strength );
539 #endif