clarify *_offsets.asm differences
[libvpx.git] / vp8 / encoder / temporal_filter.c
blobf28daaff98837afab3888442d9c7148c6b7c9243
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
12 #include "onyxc_int.h"
13 #include "onyx_int.h"
14 #include "systemdependent.h"
15 #include "quantize.h"
16 #include "alloccommon.h"
17 #include "mcomp.h"
18 #include "firstpass.h"
19 #include "psnr.h"
20 #include "vpx_scale/vpxscale.h"
21 #include "extend.h"
22 #include "ratectrl.h"
23 #include "quant_common.h"
24 #include "segmentation.h"
25 #include "g_common.h"
26 #include "vpx_scale/yv12extend.h"
27 #include "postproc.h"
28 #include "vpx_mem/vpx_mem.h"
29 #include "swapyv12buffer.h"
30 #include "threading.h"
31 #include "vpx_ports/vpx_timer.h"
32 #include "vpxerrors.h"
34 #include <math.h>
35 #include <limits.h>
37 #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
38 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
40 #if VP8_TEMPORAL_ALT_REF
42 static void vp8_temporal_filter_predictors_mb_c
44 MACROBLOCKD *x,
45 unsigned char *y_mb_ptr,
46 unsigned char *u_mb_ptr,
47 unsigned char *v_mb_ptr,
48 int stride,
49 int mv_row,
50 int mv_col,
51 unsigned char *pred
54 int offset;
55 unsigned char *yptr, *uptr, *vptr;
57 // Y
58 yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
60 if ((mv_row | mv_col) & 7)
62 x->subpixel_predict16x16(yptr, stride,
63 mv_col & 7, mv_row & 7, &pred[0], 16);
65 else
67 RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
70 // U & V
71 mv_row >>= 1;
72 mv_col >>= 1;
73 stride >>= 1;
74 offset = (mv_row >> 3) * stride + (mv_col >> 3);
75 uptr = u_mb_ptr + offset;
76 vptr = v_mb_ptr + offset;
78 if ((mv_row | mv_col) & 7)
80 x->subpixel_predict8x8(uptr, stride,
81 mv_col & 7, mv_row & 7, &pred[256], 8);
82 x->subpixel_predict8x8(vptr, stride,
83 mv_col & 7, mv_row & 7, &pred[320], 8);
85 else
87 RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
88 RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
91 void vp8_temporal_filter_apply_c
93 unsigned char *frame1,
94 unsigned int stride,
95 unsigned char *frame2,
96 unsigned int block_size,
97 int strength,
98 int filter_weight,
99 unsigned int *accumulator,
100 unsigned short *count
103 int i, j, k;
104 int modifier;
105 int byte = 0;
107 for (i = 0,k = 0; i < block_size; i++)
109 for (j = 0; j < block_size; j++, k++)
112 int src_byte = frame1[byte];
113 int pixel_value = *frame2++;
115 modifier = src_byte - pixel_value;
116 // This is an integer approximation of:
117 // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
118 // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
119 modifier *= modifier;
120 modifier *= 3;
121 modifier += 1 << (strength - 1);
122 modifier >>= strength;
124 if (modifier > 16)
125 modifier = 16;
127 modifier = 16 - modifier;
128 modifier *= filter_weight;
130 count[k] += modifier;
131 accumulator[k] += modifier * pixel_value;
133 byte++;
136 byte += stride - block_size;
140 #if ALT_REF_MC_ENABLED
141 static int dummy_cost[2*mv_max+1];
143 static int vp8_temporal_filter_find_matching_mb_c
145 VP8_COMP *cpi,
146 YV12_BUFFER_CONFIG *arf_frame,
147 YV12_BUFFER_CONFIG *frame_ptr,
148 int mb_offset,
149 int error_thresh
152 MACROBLOCK *x = &cpi->mb;
153 int thissme;
154 int step_param;
155 int further_steps;
156 int n = 0;
157 int sadpb = x->sadperbit16;
158 int bestsme = INT_MAX;
159 int num00 = 0;
161 BLOCK *b = &x->block[0];
162 BLOCKD *d = &x->e_mbd.block[0];
163 MV best_ref_mv1 = {0,0};
165 int *mvcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
166 int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
168 // Save input state
169 unsigned char **base_src = b->base_src;
170 int src = b->src;
171 int src_stride = b->src_stride;
172 unsigned char **base_pre = d->base_pre;
173 int pre = d->pre;
174 int pre_stride = d->pre_stride;
176 // Setup frame pointers
177 b->base_src = &arf_frame->y_buffer;
178 b->src_stride = arf_frame->y_stride;
179 b->src = mb_offset;
181 d->base_pre = &frame_ptr->y_buffer;
182 d->pre_stride = frame_ptr->y_stride;
183 d->pre = mb_offset;
185 // Further step/diamond searches as necessary
186 if (cpi->Speed < 8)
188 step_param = cpi->sf.first_step +
189 ((cpi->Speed > 5) ? 1 : 0);
190 further_steps =
191 (cpi->sf.max_step_search_steps - 1)-step_param;
193 else
195 step_param = cpi->sf.first_step + 2;
196 further_steps = 0;
199 if (1/*cpi->sf.search_method == HEX*/)
201 // TODO Check that the 16x16 vf & sdf are selected here
202 bestsme = vp8_hex_search(x, b, d,
203 &best_ref_mv1, &d->bmi.mv.as_mv,
204 step_param,
205 sadpb/*x->errorperbit*/,
206 &num00, &cpi->fn_ptr[BLOCK_16X16],
207 mvsadcost, mvcost);
209 else
211 int mv_x, mv_y;
213 bestsme = cpi->diamond_search_sad(x, b, d,
214 &best_ref_mv1, &d->bmi.mv.as_mv,
215 step_param,
216 sadpb / 2/*x->errorperbit*/,
217 &num00, &cpi->fn_ptr[BLOCK_16X16],
218 mvsadcost, mvcost, &best_ref_mv1); //sadpb < 9
220 // Further step/diamond searches as necessary
221 n = 0;
222 //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
224 n = num00;
225 num00 = 0;
227 while (n < further_steps)
229 n++;
231 if (num00)
232 num00--;
233 else
235 thissme = cpi->diamond_search_sad(x, b, d,
236 &best_ref_mv1, &d->bmi.mv.as_mv,
237 step_param + n,
238 sadpb / 4/*x->errorperbit*/,
239 &num00, &cpi->fn_ptr[BLOCK_16X16],
240 mvsadcost, mvcost, &best_ref_mv1); //sadpb = 9
242 if (thissme < bestsme)
244 bestsme = thissme;
245 mv_y = d->bmi.mv.as_mv.row;
246 mv_x = d->bmi.mv.as_mv.col;
248 else
250 d->bmi.mv.as_mv.row = mv_y;
251 d->bmi.mv.as_mv.col = mv_x;
257 #if ALT_REF_SUBPEL_ENABLED
258 // Try sub-pixel MC?
259 //if (bestsme > error_thresh && bestsme < INT_MAX)
261 bestsme = cpi->find_fractional_mv_step(x, b, d,
262 &d->bmi.mv.as_mv, &best_ref_mv1,
263 x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
264 mvcost);
266 #endif
268 // Save input state
269 b->base_src = base_src;
270 b->src = src;
271 b->src_stride = src_stride;
272 d->base_pre = base_pre;
273 d->pre = pre;
274 d->pre_stride = pre_stride;
276 return bestsme;
278 #endif
280 static void vp8_temporal_filter_iterate_c
282 VP8_COMP *cpi,
283 int frame_count,
284 int alt_ref_index,
285 int strength
288 int byte;
289 int frame;
290 int mb_col, mb_row;
291 unsigned int filter_weight[MAX_LAG_BUFFERS];
292 unsigned char *mm_ptr = cpi->fp_motion_map;
293 int mb_cols = cpi->common.mb_cols;
294 int mb_rows = cpi->common.mb_rows;
295 int MBs = cpi->common.MBs;
296 int mb_y_offset = 0;
297 int mb_uv_offset = 0;
298 DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8);
299 DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8);
300 MACROBLOCKD *mbd = &cpi->mb.e_mbd;
301 YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
302 unsigned char *dst1, *dst2;
303 DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8);
305 // Save input state
306 unsigned char *y_buffer = mbd->pre.y_buffer;
307 unsigned char *u_buffer = mbd->pre.u_buffer;
308 unsigned char *v_buffer = mbd->pre.v_buffer;
310 if (!cpi->use_weighted_temporal_filter)
312 // Temporal filtering is unweighted
313 for (frame = 0; frame < frame_count; frame++)
314 filter_weight[frame] = 1;
317 for (mb_row = 0; mb_row < mb_rows; mb_row++)
319 #if ALT_REF_MC_ENABLED
320 // Reduced search extent by 3 for 6-tap filter & smaller UMV border
321 cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
322 cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
323 + (VP8BORDERINPIXELS - 19);
324 #endif
326 for (mb_col = 0; mb_col < mb_cols; mb_col++)
328 int i, j, k, w;
329 int weight_cap;
330 int stride;
332 vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
333 vpx_memset(count, 0, 384*sizeof(unsigned short));
335 #if ALT_REF_MC_ENABLED
336 // Reduced search extent by 3 for 6-tap filter & smaller UMV border
337 cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
338 cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
339 + (VP8BORDERINPIXELS - 19);
340 #endif
342 // Read & process macroblock weights from motion map
343 if (cpi->use_weighted_temporal_filter)
345 weight_cap = 2;
347 for (frame = alt_ref_index-1; frame >= 0; frame--)
349 w = *(mm_ptr + (frame+1)*MBs);
350 filter_weight[frame] = w < weight_cap ? w : weight_cap;
351 weight_cap = w;
354 filter_weight[alt_ref_index] = 2;
356 weight_cap = 2;
358 for (frame = alt_ref_index+1; frame < frame_count; frame++)
360 w = *(mm_ptr + frame*MBs);
361 filter_weight[frame] = w < weight_cap ? w : weight_cap;
362 weight_cap = w;
367 for (frame = 0; frame < frame_count; frame++)
369 int err;
371 if (cpi->frames[frame] == NULL)
372 continue;
374 mbd->block[0].bmi.mv.as_mv.row = 0;
375 mbd->block[0].bmi.mv.as_mv.col = 0;
377 #if ALT_REF_MC_ENABLED
378 //if (filter_weight[frame] == 0)
380 #define THRESH_LOW 10000
381 #define THRESH_HIGH 20000
383 // Correlation has been lost try MC
384 err = vp8_temporal_filter_find_matching_mb_c
385 (cpi,
386 cpi->frames[alt_ref_index],
387 cpi->frames[frame],
388 mb_y_offset,
389 THRESH_LOW);
391 if (filter_weight[frame] < 2)
393 // Set weight depending on error
394 filter_weight[frame] = err<THRESH_LOW
395 ? 2 : err<THRESH_HIGH ? 1 : 0;
398 #endif
399 if (filter_weight[frame] != 0)
401 // Construct the predictors
402 vp8_temporal_filter_predictors_mb_c
403 (mbd,
404 cpi->frames[frame]->y_buffer + mb_y_offset,
405 cpi->frames[frame]->u_buffer + mb_uv_offset,
406 cpi->frames[frame]->v_buffer + mb_uv_offset,
407 cpi->frames[frame]->y_stride,
408 mbd->block[0].bmi.mv.as_mv.row,
409 mbd->block[0].bmi.mv.as_mv.col,
410 predictor);
412 // Apply the filter (YUV)
413 TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
414 (f->y_buffer + mb_y_offset,
415 f->y_stride,
416 predictor,
418 strength,
419 filter_weight[frame],
420 accumulator,
421 count);
423 TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
424 (f->u_buffer + mb_uv_offset,
425 f->uv_stride,
426 predictor + 256,
428 strength,
429 filter_weight[frame],
430 accumulator + 256,
431 count + 256);
433 TEMPORAL_INVOKE(&cpi->rtcd.temporal, apply)
434 (f->v_buffer + mb_uv_offset,
435 f->uv_stride,
436 predictor + 320,
438 strength,
439 filter_weight[frame],
440 accumulator + 320,
441 count + 320);
445 // Normalize filter output to produce AltRef frame
446 dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
447 stride = cpi->alt_ref_buffer.source_buffer.y_stride;
448 byte = mb_y_offset;
449 for (i = 0,k = 0; i < 16; i++)
451 for (j = 0; j < 16; j++, k++)
453 unsigned int pval = accumulator[k] + (count[k] >> 1);
454 pval *= cpi->fixed_divide[count[k]];
455 pval >>= 19;
457 dst1[byte] = (unsigned char)pval;
459 // move to next pixel
460 byte++;
463 byte += stride - 16;
466 dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
467 dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
468 stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
469 byte = mb_uv_offset;
470 for (i = 0,k = 256; i < 8; i++)
472 for (j = 0; j < 8; j++, k++)
474 int m=k+64;
476 // U
477 unsigned int pval = accumulator[k] + (count[k] >> 1);
478 pval *= cpi->fixed_divide[count[k]];
479 pval >>= 19;
480 dst1[byte] = (unsigned char)pval;
482 // V
483 pval = accumulator[m] + (count[m] >> 1);
484 pval *= cpi->fixed_divide[count[m]];
485 pval >>= 19;
486 dst2[byte] = (unsigned char)pval;
488 // move to next pixel
489 byte++;
492 byte += stride - 8;
495 mm_ptr++;
496 mb_y_offset += 16;
497 mb_uv_offset += 8;
500 mb_y_offset += 16*(f->y_stride-mb_cols);
501 mb_uv_offset += 8*(f->uv_stride-mb_cols);
504 // Restore input state
505 mbd->pre.y_buffer = y_buffer;
506 mbd->pre.u_buffer = u_buffer;
507 mbd->pre.v_buffer = v_buffer;
510 void vp8_temporal_filter_prepare_c
512 VP8_COMP *cpi
515 int frame = 0;
517 int num_frames_backward = 0;
518 int num_frames_forward = 0;
519 int frames_to_blur_backward = 0;
520 int frames_to_blur_forward = 0;
521 int frames_to_blur = 0;
522 int start_frame = 0;
523 unsigned int filtered = 0;
525 int strength = cpi->oxcf.arnr_strength;
527 int blur_type = cpi->oxcf.arnr_type;
529 int max_frames = cpi->active_arnr_frames;
531 num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
533 if (num_frames_backward < 0)
534 num_frames_backward += cpi->oxcf.lag_in_frames;
536 num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
538 switch (blur_type)
540 case 1:
541 /////////////////////////////////////////
542 // Backward Blur
544 frames_to_blur_backward = num_frames_backward;
546 if (frames_to_blur_backward >= max_frames)
547 frames_to_blur_backward = max_frames - 1;
549 frames_to_blur = frames_to_blur_backward + 1;
550 break;
552 case 2:
553 /////////////////////////////////////////
554 // Forward Blur
556 frames_to_blur_forward = num_frames_forward;
558 if (frames_to_blur_forward >= max_frames)
559 frames_to_blur_forward = max_frames - 1;
561 frames_to_blur = frames_to_blur_forward + 1;
562 break;
564 case 3:
565 default:
566 /////////////////////////////////////////
567 // Center Blur
568 frames_to_blur_forward = num_frames_forward;
569 frames_to_blur_backward = num_frames_backward;
571 if (frames_to_blur_forward > frames_to_blur_backward)
572 frames_to_blur_forward = frames_to_blur_backward;
574 if (frames_to_blur_backward > frames_to_blur_forward)
575 frames_to_blur_backward = frames_to_blur_forward;
577 // When max_frames is even we have 1 more frame backward than forward
578 if (frames_to_blur_forward > (max_frames - 1) / 2)
579 frames_to_blur_forward = ((max_frames - 1) / 2);
581 if (frames_to_blur_backward > (max_frames / 2))
582 frames_to_blur_backward = (max_frames / 2);
584 frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
585 break;
588 start_frame = (cpi->last_alt_ref_sei
589 + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
591 #ifdef DEBUGFWG
592 // DEBUG FWG
593 printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
594 , max_frames
595 , num_frames_backward
596 , num_frames_forward
597 , frames_to_blur
598 , frames_to_blur_backward
599 , frames_to_blur_forward
600 , cpi->source_encode_index
601 , cpi->last_alt_ref_sei
602 , start_frame);
603 #endif
605 // Setup frame pointers, NULL indicates frame not included in filter
606 vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
607 for (frame = 0; frame < frames_to_blur; frame++)
609 int which_buffer = start_frame - frame;
611 if (which_buffer < 0)
612 which_buffer += cpi->oxcf.lag_in_frames;
614 cpi->frames[frames_to_blur-1-frame]
615 = &cpi->src_buffer[which_buffer].source_buffer;
618 vp8_temporal_filter_iterate_c (
619 cpi,
620 frames_to_blur,
621 frames_to_blur_backward,
622 strength );
624 #endif