2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "onyxc_int.h"
14 #include "systemdependent.h"
16 #include "alloccommon.h"
18 #include "firstpass.h"
20 #include "vpx_scale/vpxscale.h"
23 #include "quant_common.h"
24 #include "segmentation.h"
26 #include "vpx_scale/yv12extend.h"
28 #include "vpx_mem/vpx_mem.h"
29 #include "swapyv12buffer.h"
30 #include "threading.h"
31 #include "vpx_ports/vpx_timer.h"
32 #include "vpxerrors.h"
37 #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
38 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
40 #if VP8_TEMPORAL_ALT_REF
42 static void vp8_temporal_filter_predictors_mb_c
45 unsigned char *y_mb_ptr
,
46 unsigned char *u_mb_ptr
,
47 unsigned char *v_mb_ptr
,
55 unsigned char *yptr
, *uptr
, *vptr
;
58 yptr
= y_mb_ptr
+ (mv_row
>> 3) * stride
+ (mv_col
>> 3);
60 if ((mv_row
| mv_col
) & 7)
62 x
->subpixel_predict16x16(yptr
, stride
,
63 mv_col
& 7, mv_row
& 7, &pred
[0], 16);
67 RECON_INVOKE(&x
->rtcd
->recon
, copy16x16
)(yptr
, stride
, &pred
[0], 16);
73 stride
= (stride
+ 1) >> 1;
74 offset
= (mv_row
>> 3) * stride
+ (mv_col
>> 3);
75 uptr
= u_mb_ptr
+ offset
;
76 vptr
= v_mb_ptr
+ offset
;
78 if ((mv_row
| mv_col
) & 7)
80 x
->subpixel_predict8x8(uptr
, stride
,
81 mv_col
& 7, mv_row
& 7, &pred
[256], 8);
82 x
->subpixel_predict8x8(vptr
, stride
,
83 mv_col
& 7, mv_row
& 7, &pred
[320], 8);
87 RECON_INVOKE(&x
->rtcd
->recon
, copy8x8
)(uptr
, stride
, &pred
[256], 8);
88 RECON_INVOKE(&x
->rtcd
->recon
, copy8x8
)(vptr
, stride
, &pred
[320], 8);
91 void vp8_temporal_filter_apply_c
93 unsigned char *frame1
,
95 unsigned char *frame2
,
96 unsigned int block_size
,
99 unsigned int *accumulator
,
100 unsigned short *count
107 for (i
= 0,k
= 0; i
< block_size
; i
++)
109 for (j
= 0; j
< block_size
; j
++, k
++)
112 int src_byte
= frame1
[byte
];
113 int pixel_value
= *frame2
++;
115 modifier
= src_byte
- pixel_value
;
116 // This is an integer approximation of:
117 // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
118 // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
119 modifier
*= modifier
;
121 modifier
+= 1 << (strength
- 1);
122 modifier
>>= strength
;
127 modifier
= 16 - modifier
;
128 modifier
*= filter_weight
;
130 count
[k
] += modifier
;
131 accumulator
[k
] += modifier
* pixel_value
;
136 byte
+= stride
- block_size
;
140 #if ALT_REF_MC_ENABLED
141 static int dummy_cost
[2*mv_max
+1];
143 static int vp8_temporal_filter_find_matching_mb_c
146 YV12_BUFFER_CONFIG
*arf_frame
,
147 YV12_BUFFER_CONFIG
*frame_ptr
,
152 MACROBLOCK
*x
= &cpi
->mb
;
157 int sadpb
= x
->sadperbit16
;
158 int bestsme
= INT_MAX
;
161 BLOCK
*b
= &x
->block
[0];
162 BLOCKD
*d
= &x
->e_mbd
.block
[0];
163 MV best_ref_mv1
= {0,0};
165 int *mvcost
[2] = { &dummy_cost
[mv_max
+1], &dummy_cost
[mv_max
+1] };
166 int *mvsadcost
[2] = { &dummy_cost
[mv_max
+1], &dummy_cost
[mv_max
+1] };
169 unsigned char **base_src
= b
->base_src
;
171 int src_stride
= b
->src_stride
;
172 unsigned char **base_pre
= d
->base_pre
;
174 int pre_stride
= d
->pre_stride
;
176 // Setup frame pointers
177 b
->base_src
= &arf_frame
->y_buffer
;
178 b
->src_stride
= arf_frame
->y_stride
;
181 d
->base_pre
= &frame_ptr
->y_buffer
;
182 d
->pre_stride
= frame_ptr
->y_stride
;
185 // Further step/diamond searches as necessary
188 step_param
= cpi
->sf
.first_step
+
189 ((cpi
->Speed
> 5) ? 1 : 0);
191 (cpi
->sf
.max_step_search_steps
- 1)-step_param
;
195 step_param
= cpi
->sf
.first_step
+ 2;
199 if (1/*cpi->sf.search_method == HEX*/)
201 // TODO Check that the 16x16 vf & sdf are selected here
202 bestsme
= vp8_hex_search(x
, b
, d
,
203 &best_ref_mv1
, &d
->bmi
.mv
.as_mv
,
205 sadpb
/*x->errorperbit*/,
206 &num00
, &cpi
->fn_ptr
[BLOCK_16X16
],
207 mvsadcost
, mvcost
, &best_ref_mv1
);
213 bestsme
= cpi
->diamond_search_sad(x
, b
, d
,
214 &best_ref_mv1
, &d
->bmi
.mv
.as_mv
,
216 sadpb
/ 2/*x->errorperbit*/,
217 &num00
, &cpi
->fn_ptr
[BLOCK_16X16
],
218 mvsadcost
, mvcost
, &best_ref_mv1
); //sadpb < 9
220 // Further step/diamond searches as necessary
222 //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
227 while (n
< further_steps
)
235 thissme
= cpi
->diamond_search_sad(x
, b
, d
,
236 &best_ref_mv1
, &d
->bmi
.mv
.as_mv
,
238 sadpb
/ 4/*x->errorperbit*/,
239 &num00
, &cpi
->fn_ptr
[BLOCK_16X16
],
240 mvsadcost
, mvcost
, &best_ref_mv1
); //sadpb = 9
242 if (thissme
< bestsme
)
245 mv_y
= d
->bmi
.mv
.as_mv
.row
;
246 mv_x
= d
->bmi
.mv
.as_mv
.col
;
250 d
->bmi
.mv
.as_mv
.row
= mv_y
;
251 d
->bmi
.mv
.as_mv
.col
= mv_x
;
257 #if ALT_REF_SUBPEL_ENABLED
259 //if (bestsme > error_thresh && bestsme < INT_MAX)
261 bestsme
= cpi
->find_fractional_mv_step(x
, b
, d
,
262 &d
->bmi
.mv
.as_mv
, &best_ref_mv1
,
263 x
->errorperbit
, &cpi
->fn_ptr
[BLOCK_16X16
],
269 b
->base_src
= base_src
;
271 b
->src_stride
= src_stride
;
272 d
->base_pre
= base_pre
;
274 d
->pre_stride
= pre_stride
;
280 static void vp8_temporal_filter_iterate_c
291 unsigned int filter_weight
[MAX_LAG_BUFFERS
];
292 unsigned char *mm_ptr
= cpi
->fp_motion_map
;
293 int mb_cols
= cpi
->common
.mb_cols
;
294 int mb_rows
= cpi
->common
.mb_rows
;
295 int MBs
= cpi
->common
.MBs
;
297 int mb_uv_offset
= 0;
298 DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator
, 16*16 + 8*8 + 8*8);
299 DECLARE_ALIGNED_ARRAY(16, unsigned short, count
, 16*16 + 8*8 + 8*8);
300 MACROBLOCKD
*mbd
= &cpi
->mb
.e_mbd
;
301 YV12_BUFFER_CONFIG
*f
= cpi
->frames
[alt_ref_index
];
302 unsigned char *dst1
, *dst2
;
303 DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor
, 16*16 + 8*8 + 8*8);
306 unsigned char *y_buffer
= mbd
->pre
.y_buffer
;
307 unsigned char *u_buffer
= mbd
->pre
.u_buffer
;
308 unsigned char *v_buffer
= mbd
->pre
.v_buffer
;
310 if (!cpi
->use_weighted_temporal_filter
)
312 // Temporal filtering is unweighted
313 for (frame
= 0; frame
< frame_count
; frame
++)
314 filter_weight
[frame
] = 1;
317 for (mb_row
= 0; mb_row
< mb_rows
; mb_row
++)
319 #if ALT_REF_MC_ENABLED
320 // Reduced search extent by 3 for 6-tap filter & smaller UMV border
321 cpi
->mb
.mv_row_min
= -((mb_row
* 16) + (VP8BORDERINPIXELS
- 19));
322 cpi
->mb
.mv_row_max
= ((cpi
->common
.mb_rows
- 1 - mb_row
) * 16)
323 + (VP8BORDERINPIXELS
- 19);
326 for (mb_col
= 0; mb_col
< mb_cols
; mb_col
++)
332 vpx_memset(accumulator
, 0, 384*sizeof(unsigned int));
333 vpx_memset(count
, 0, 384*sizeof(unsigned short));
335 #if ALT_REF_MC_ENABLED
336 // Reduced search extent by 3 for 6-tap filter & smaller UMV border
337 cpi
->mb
.mv_col_min
= -((mb_col
* 16) + (VP8BORDERINPIXELS
- 19));
338 cpi
->mb
.mv_col_max
= ((cpi
->common
.mb_cols
- 1 - mb_col
) * 16)
339 + (VP8BORDERINPIXELS
- 19);
342 // Read & process macroblock weights from motion map
343 if (cpi
->use_weighted_temporal_filter
)
347 for (frame
= alt_ref_index
-1; frame
>= 0; frame
--)
349 w
= *(mm_ptr
+ (frame
+1)*MBs
);
350 filter_weight
[frame
] = w
< weight_cap
? w
: weight_cap
;
354 filter_weight
[alt_ref_index
] = 2;
358 for (frame
= alt_ref_index
+1; frame
< frame_count
; frame
++)
360 w
= *(mm_ptr
+ frame
*MBs
);
361 filter_weight
[frame
] = w
< weight_cap
? w
: weight_cap
;
367 for (frame
= 0; frame
< frame_count
; frame
++)
371 if (cpi
->frames
[frame
] == NULL
)
374 mbd
->block
[0].bmi
.mv
.as_mv
.row
= 0;
375 mbd
->block
[0].bmi
.mv
.as_mv
.col
= 0;
377 #if ALT_REF_MC_ENABLED
378 //if (filter_weight[frame] == 0)
380 #define THRESH_LOW 10000
381 #define THRESH_HIGH 20000
383 // Correlation has been lost try MC
384 err
= vp8_temporal_filter_find_matching_mb_c
386 cpi
->frames
[alt_ref_index
],
391 if (filter_weight
[frame
] < 2)
393 // Set weight depending on error
394 filter_weight
[frame
] = err
<THRESH_LOW
395 ? 2 : err
<THRESH_HIGH
? 1 : 0;
399 if (filter_weight
[frame
] != 0)
401 // Construct the predictors
402 vp8_temporal_filter_predictors_mb_c
404 cpi
->frames
[frame
]->y_buffer
+ mb_y_offset
,
405 cpi
->frames
[frame
]->u_buffer
+ mb_uv_offset
,
406 cpi
->frames
[frame
]->v_buffer
+ mb_uv_offset
,
407 cpi
->frames
[frame
]->y_stride
,
408 mbd
->block
[0].bmi
.mv
.as_mv
.row
,
409 mbd
->block
[0].bmi
.mv
.as_mv
.col
,
412 // Apply the filter (YUV)
413 TEMPORAL_INVOKE(&cpi
->rtcd
.temporal
, apply
)
414 (f
->y_buffer
+ mb_y_offset
,
419 filter_weight
[frame
],
423 TEMPORAL_INVOKE(&cpi
->rtcd
.temporal
, apply
)
424 (f
->u_buffer
+ mb_uv_offset
,
429 filter_weight
[frame
],
433 TEMPORAL_INVOKE(&cpi
->rtcd
.temporal
, apply
)
434 (f
->v_buffer
+ mb_uv_offset
,
439 filter_weight
[frame
],
445 // Normalize filter output to produce AltRef frame
446 dst1
= cpi
->alt_ref_buffer
.source_buffer
.y_buffer
;
447 stride
= cpi
->alt_ref_buffer
.source_buffer
.y_stride
;
449 for (i
= 0,k
= 0; i
< 16; i
++)
451 for (j
= 0; j
< 16; j
++, k
++)
453 unsigned int pval
= accumulator
[k
] + (count
[k
] >> 1);
454 pval
*= cpi
->fixed_divide
[count
[k
]];
457 dst1
[byte
] = (unsigned char)pval
;
459 // move to next pixel
466 dst1
= cpi
->alt_ref_buffer
.source_buffer
.u_buffer
;
467 dst2
= cpi
->alt_ref_buffer
.source_buffer
.v_buffer
;
468 stride
= cpi
->alt_ref_buffer
.source_buffer
.uv_stride
;
470 for (i
= 0,k
= 256; i
< 8; i
++)
472 for (j
= 0; j
< 8; j
++, k
++)
477 unsigned int pval
= accumulator
[k
] + (count
[k
] >> 1);
478 pval
*= cpi
->fixed_divide
[count
[k
]];
480 dst1
[byte
] = (unsigned char)pval
;
483 pval
= accumulator
[m
] + (count
[m
] >> 1);
484 pval
*= cpi
->fixed_divide
[count
[m
]];
486 dst2
[byte
] = (unsigned char)pval
;
488 // move to next pixel
500 mb_y_offset
+= 16*(f
->y_stride
-mb_cols
);
501 mb_uv_offset
+= 8*(f
->uv_stride
-mb_cols
);
504 // Restore input state
505 mbd
->pre
.y_buffer
= y_buffer
;
506 mbd
->pre
.u_buffer
= u_buffer
;
507 mbd
->pre
.v_buffer
= v_buffer
;
510 void vp8_temporal_filter_prepare_c
517 int num_frames_backward
= 0;
518 int num_frames_forward
= 0;
519 int frames_to_blur_backward
= 0;
520 int frames_to_blur_forward
= 0;
521 int frames_to_blur
= 0;
523 unsigned int filtered
= 0;
525 int strength
= cpi
->oxcf
.arnr_strength
;
527 int blur_type
= cpi
->oxcf
.arnr_type
;
529 int max_frames
= cpi
->active_arnr_frames
;
531 num_frames_backward
= cpi
->last_alt_ref_sei
- cpi
->source_encode_index
;
533 if (num_frames_backward
< 0)
534 num_frames_backward
+= cpi
->oxcf
.lag_in_frames
;
536 num_frames_forward
= cpi
->oxcf
.lag_in_frames
- (num_frames_backward
+ 1);
541 /////////////////////////////////////////
544 frames_to_blur_backward
= num_frames_backward
;
546 if (frames_to_blur_backward
>= max_frames
)
547 frames_to_blur_backward
= max_frames
- 1;
549 frames_to_blur
= frames_to_blur_backward
+ 1;
553 /////////////////////////////////////////
556 frames_to_blur_forward
= num_frames_forward
;
558 if (frames_to_blur_forward
>= max_frames
)
559 frames_to_blur_forward
= max_frames
- 1;
561 frames_to_blur
= frames_to_blur_forward
+ 1;
566 /////////////////////////////////////////
568 frames_to_blur_forward
= num_frames_forward
;
569 frames_to_blur_backward
= num_frames_backward
;
571 if (frames_to_blur_forward
> frames_to_blur_backward
)
572 frames_to_blur_forward
= frames_to_blur_backward
;
574 if (frames_to_blur_backward
> frames_to_blur_forward
)
575 frames_to_blur_backward
= frames_to_blur_forward
;
577 // When max_frames is even we have 1 more frame backward than forward
578 if (frames_to_blur_forward
> (max_frames
- 1) / 2)
579 frames_to_blur_forward
= ((max_frames
- 1) / 2);
581 if (frames_to_blur_backward
> (max_frames
/ 2))
582 frames_to_blur_backward
= (max_frames
/ 2);
584 frames_to_blur
= frames_to_blur_backward
+ frames_to_blur_forward
+ 1;
588 start_frame
= (cpi
->last_alt_ref_sei
589 + frames_to_blur_forward
) % cpi
->oxcf
.lag_in_frames
;
593 printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
595 , num_frames_backward
598 , frames_to_blur_backward
599 , frames_to_blur_forward
600 , cpi
->source_encode_index
601 , cpi
->last_alt_ref_sei
605 // Setup frame pointers, NULL indicates frame not included in filter
606 vpx_memset(cpi
->frames
, 0, max_frames
*sizeof(YV12_BUFFER_CONFIG
*));
607 for (frame
= 0; frame
< frames_to_blur
; frame
++)
609 int which_buffer
= start_frame
- frame
;
611 if (which_buffer
< 0)
612 which_buffer
+= cpi
->oxcf
.lag_in_frames
;
614 cpi
->frames
[frames_to_blur
-1-frame
]
615 = &cpi
->src_buffer
[which_buffer
].source_buffer
;
618 vp8_temporal_filter_iterate_c (
621 frames_to_blur_backward
,