2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vp8/common/onyxc_int.h"
14 #include "vp8/common/systemdependent.h"
16 #include "vp8/common/alloccommon.h"
18 #include "firstpass.h"
20 #include "vpx_scale/vpxscale.h"
21 #include "vp8/common/extend.h"
23 #include "vp8/common/quant_common.h"
24 #include "segmentation.h"
25 #include "vp8/common/g_common.h"
26 #include "vpx_scale/yv12extend.h"
27 #include "vpx_mem/vpx_mem.h"
28 #include "vp8/common/swapyv12buffer.h"
29 #include "vp8/common/threading.h"
30 #include "vpx_ports/vpx_timer.h"
35 #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
36 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
38 #if VP8_TEMPORAL_ALT_REF
40 static void vp8_temporal_filter_predictors_mb_c
43 unsigned char *y_mb_ptr
,
44 unsigned char *u_mb_ptr
,
45 unsigned char *v_mb_ptr
,
53 unsigned char *yptr
, *uptr
, *vptr
;
56 yptr
= y_mb_ptr
+ (mv_row
>> 3) * stride
+ (mv_col
>> 3);
58 if ((mv_row
| mv_col
) & 7)
60 x
->subpixel_predict16x16(yptr
, stride
,
61 mv_col
& 7, mv_row
& 7, &pred
[0], 16);
65 RECON_INVOKE(&x
->rtcd
->recon
, copy16x16
)(yptr
, stride
, &pred
[0], 16);
71 stride
= (stride
+ 1) >> 1;
72 offset
= (mv_row
>> 3) * stride
+ (mv_col
>> 3);
73 uptr
= u_mb_ptr
+ offset
;
74 vptr
= v_mb_ptr
+ offset
;
76 if ((mv_row
| mv_col
) & 7)
78 x
->subpixel_predict8x8(uptr
, stride
,
79 mv_col
& 7, mv_row
& 7, &pred
[256], 8);
80 x
->subpixel_predict8x8(vptr
, stride
,
81 mv_col
& 7, mv_row
& 7, &pred
[320], 8);
85 RECON_INVOKE(&x
->rtcd
->recon
, copy8x8
)(uptr
, stride
, &pred
[256], 8);
86 RECON_INVOKE(&x
->rtcd
->recon
, copy8x8
)(vptr
, stride
, &pred
[320], 8);
89 void vp8_temporal_filter_apply_c
91 unsigned char *frame1
,
93 unsigned char *frame2
,
94 unsigned int block_size
,
97 unsigned int *accumulator
,
105 for (i
= 0,k
= 0; i
< block_size
; i
++)
107 for (j
= 0; j
< block_size
; j
++, k
++)
110 int src_byte
= frame1
[byte
];
111 int pixel_value
= *frame2
++;
113 modifier
= src_byte
- pixel_value
;
114 // This is an integer approximation of:
115 // float coeff = (3.0 * modifer * modifier) / pow(2, strength);
116 // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff);
117 modifier
*= modifier
;
119 modifier
+= 1 << (strength
- 1);
120 modifier
>>= strength
;
125 modifier
= 16 - modifier
;
126 modifier
*= filter_weight
;
128 count
[k
] += modifier
;
129 accumulator
[k
] += modifier
* pixel_value
;
134 byte
+= stride
- block_size
;
138 #if ALT_REF_MC_ENABLED
139 static int dummy_cost
[2*mv_max
+1];
141 static int vp8_temporal_filter_find_matching_mb_c
144 YV12_BUFFER_CONFIG
*arf_frame
,
145 YV12_BUFFER_CONFIG
*frame_ptr
,
150 MACROBLOCK
*x
= &cpi
->mb
;
153 int sadpb
= x
->sadperbit16
;
154 int bestsme
= INT_MAX
;
156 BLOCK
*b
= &x
->block
[0];
157 BLOCKD
*d
= &x
->e_mbd
.block
[0];
159 int_mv best_ref_mv1_full
; /* full-pixel value of best_ref_mv1 */
161 int *mvcost
[2] = { &dummy_cost
[mv_max
+1], &dummy_cost
[mv_max
+1] };
162 int *mvsadcost
[2] = { &dummy_cost
[mv_max
+1], &dummy_cost
[mv_max
+1] };
165 unsigned char **base_src
= b
->base_src
;
167 int src_stride
= b
->src_stride
;
168 unsigned char **base_pre
= d
->base_pre
;
170 int pre_stride
= d
->pre_stride
;
172 best_ref_mv1
.as_int
= 0;
173 best_ref_mv1_full
.as_mv
.col
= best_ref_mv1
.as_mv
.col
>>3;
174 best_ref_mv1_full
.as_mv
.row
= best_ref_mv1
.as_mv
.row
>>3;
176 // Setup frame pointers
177 b
->base_src
= &arf_frame
->y_buffer
;
178 b
->src_stride
= arf_frame
->y_stride
;
181 d
->base_pre
= &frame_ptr
->y_buffer
;
182 d
->pre_stride
= frame_ptr
->y_stride
;
185 // Further step/diamond searches as necessary
188 step_param
= cpi
->sf
.first_step
+
189 ((cpi
->Speed
> 5) ? 1 : 0);
191 (cpi
->sf
.max_step_search_steps
- 1)-step_param
;
195 step_param
= cpi
->sf
.first_step
+ 2;
199 /*cpi->sf.search_method == HEX*/
200 // TODO Check that the 16x16 vf & sdf are selected here
201 bestsme
= vp8_hex_search(x
, b
, d
,
202 &best_ref_mv1_full
, &d
->bmi
.mv
,
205 &cpi
->fn_ptr
[BLOCK_16X16
],
206 mvsadcost
, mvcost
, &best_ref_mv1
);
208 #if ALT_REF_SUBPEL_ENABLED
210 //if (bestsme > error_thresh && bestsme < INT_MAX)
214 bestsme
= cpi
->find_fractional_mv_step(x
, b
, d
,
215 &d
->bmi
.mv
, &best_ref_mv1
,
216 x
->errorperbit
, &cpi
->fn_ptr
[BLOCK_16X16
],
217 mvcost
, &distortion
, &sse
);
222 b
->base_src
= base_src
;
224 b
->src_stride
= src_stride
;
225 d
->base_pre
= base_pre
;
227 d
->pre_stride
= pre_stride
;
233 static void vp8_temporal_filter_iterate_c
244 unsigned int filter_weight
;
245 int mb_cols
= cpi
->common
.mb_cols
;
246 int mb_rows
= cpi
->common
.mb_rows
;
248 int mb_uv_offset
= 0;
249 DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator
, 16*16 + 8*8 + 8*8);
250 DECLARE_ALIGNED_ARRAY(16, unsigned short, count
, 16*16 + 8*8 + 8*8);
251 MACROBLOCKD
*mbd
= &cpi
->mb
.e_mbd
;
252 YV12_BUFFER_CONFIG
*f
= cpi
->frames
[alt_ref_index
];
253 unsigned char *dst1
, *dst2
;
254 DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor
, 16*16 + 8*8 + 8*8);
257 unsigned char *y_buffer
= mbd
->pre
.y_buffer
;
258 unsigned char *u_buffer
= mbd
->pre
.u_buffer
;
259 unsigned char *v_buffer
= mbd
->pre
.v_buffer
;
261 for (mb_row
= 0; mb_row
< mb_rows
; mb_row
++)
263 #if ALT_REF_MC_ENABLED
264 // Source frames are extended to 16 pixels. This is different than
265 // L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS)
266 // A 6 tap filter is used for motion search. This requires 2 pixels
267 // before and 3 pixels after. So the largest Y mv on a border would
268 // then be 16 - 3. The UV blocks are half the size of the Y and
269 // therefore only extended by 8. The largest mv that a UV block
270 // can support is 8 - 3. A UV mv is half of a Y mv.
271 // (16 - 3) >> 1 == 6 which is greater than 8 - 3.
272 // To keep the mv in play for both Y and UV planes the max that it
273 // can be on a border is therefore 16 - 5.
274 cpi
->mb
.mv_row_min
= -((mb_row
* 16) + (16 - 5));
275 cpi
->mb
.mv_row_max
= ((cpi
->common
.mb_rows
- 1 - mb_row
) * 16)
279 for (mb_col
= 0; mb_col
< mb_cols
; mb_col
++)
284 vpx_memset(accumulator
, 0, 384*sizeof(unsigned int));
285 vpx_memset(count
, 0, 384*sizeof(unsigned short));
287 #if ALT_REF_MC_ENABLED
288 cpi
->mb
.mv_col_min
= -((mb_col
* 16) + (16 - 5));
289 cpi
->mb
.mv_col_max
= ((cpi
->common
.mb_cols
- 1 - mb_col
) * 16)
293 for (frame
= 0; frame
< frame_count
; frame
++)
297 if (cpi
->frames
[frame
] == NULL
)
300 mbd
->block
[0].bmi
.mv
.as_mv
.row
= 0;
301 mbd
->block
[0].bmi
.mv
.as_mv
.col
= 0;
303 #if ALT_REF_MC_ENABLED
304 #define THRESH_LOW 10000
305 #define THRESH_HIGH 20000
307 // Find best match in this frame by MC
308 err
= vp8_temporal_filter_find_matching_mb_c
310 cpi
->frames
[alt_ref_index
],
316 // Assign higher weight to matching MB if it's error
317 // score is lower. If not applying MC default behavior
318 // is to weight all MBs equal.
319 filter_weight
= err
<THRESH_LOW
320 ? 2 : err
<THRESH_HIGH
? 1 : 0;
322 if (filter_weight
!= 0)
324 // Construct the predictors
325 vp8_temporal_filter_predictors_mb_c
327 cpi
->frames
[frame
]->y_buffer
+ mb_y_offset
,
328 cpi
->frames
[frame
]->u_buffer
+ mb_uv_offset
,
329 cpi
->frames
[frame
]->v_buffer
+ mb_uv_offset
,
330 cpi
->frames
[frame
]->y_stride
,
331 mbd
->block
[0].bmi
.mv
.as_mv
.row
,
332 mbd
->block
[0].bmi
.mv
.as_mv
.col
,
335 // Apply the filter (YUV)
336 TEMPORAL_INVOKE(&cpi
->rtcd
.temporal
, apply
)
337 (f
->y_buffer
+ mb_y_offset
,
346 TEMPORAL_INVOKE(&cpi
->rtcd
.temporal
, apply
)
347 (f
->u_buffer
+ mb_uv_offset
,
356 TEMPORAL_INVOKE(&cpi
->rtcd
.temporal
, apply
)
357 (f
->v_buffer
+ mb_uv_offset
,
368 // Normalize filter output to produce AltRef frame
369 dst1
= cpi
->alt_ref_buffer
.y_buffer
;
370 stride
= cpi
->alt_ref_buffer
.y_stride
;
372 for (i
= 0,k
= 0; i
< 16; i
++)
374 for (j
= 0; j
< 16; j
++, k
++)
376 unsigned int pval
= accumulator
[k
] + (count
[k
] >> 1);
377 pval
*= cpi
->fixed_divide
[count
[k
]];
380 dst1
[byte
] = (unsigned char)pval
;
382 // move to next pixel
389 dst1
= cpi
->alt_ref_buffer
.u_buffer
;
390 dst2
= cpi
->alt_ref_buffer
.v_buffer
;
391 stride
= cpi
->alt_ref_buffer
.uv_stride
;
393 for (i
= 0,k
= 256; i
< 8; i
++)
395 for (j
= 0; j
< 8; j
++, k
++)
400 unsigned int pval
= accumulator
[k
] + (count
[k
] >> 1);
401 pval
*= cpi
->fixed_divide
[count
[k
]];
403 dst1
[byte
] = (unsigned char)pval
;
406 pval
= accumulator
[m
] + (count
[m
] >> 1);
407 pval
*= cpi
->fixed_divide
[count
[m
]];
409 dst2
[byte
] = (unsigned char)pval
;
411 // move to next pixel
422 mb_y_offset
+= 16*(f
->y_stride
-mb_cols
);
423 mb_uv_offset
+= 8*(f
->uv_stride
-mb_cols
);
426 // Restore input state
427 mbd
->pre
.y_buffer
= y_buffer
;
428 mbd
->pre
.u_buffer
= u_buffer
;
429 mbd
->pre
.v_buffer
= v_buffer
;
432 void vp8_temporal_filter_prepare_c
440 int num_frames_backward
= 0;
441 int num_frames_forward
= 0;
442 int frames_to_blur_backward
= 0;
443 int frames_to_blur_forward
= 0;
444 int frames_to_blur
= 0;
447 int strength
= cpi
->oxcf
.arnr_strength
;
449 int blur_type
= cpi
->oxcf
.arnr_type
;
451 int max_frames
= cpi
->active_arnr_frames
;
453 num_frames_backward
= distance
;
454 num_frames_forward
= vp8_lookahead_depth(cpi
->lookahead
)
455 - (num_frames_backward
+ 1);
460 /////////////////////////////////////////
463 frames_to_blur_backward
= num_frames_backward
;
465 if (frames_to_blur_backward
>= max_frames
)
466 frames_to_blur_backward
= max_frames
- 1;
468 frames_to_blur
= frames_to_blur_backward
+ 1;
472 /////////////////////////////////////////
475 frames_to_blur_forward
= num_frames_forward
;
477 if (frames_to_blur_forward
>= max_frames
)
478 frames_to_blur_forward
= max_frames
- 1;
480 frames_to_blur
= frames_to_blur_forward
+ 1;
485 /////////////////////////////////////////
487 frames_to_blur_forward
= num_frames_forward
;
488 frames_to_blur_backward
= num_frames_backward
;
490 if (frames_to_blur_forward
> frames_to_blur_backward
)
491 frames_to_blur_forward
= frames_to_blur_backward
;
493 if (frames_to_blur_backward
> frames_to_blur_forward
)
494 frames_to_blur_backward
= frames_to_blur_forward
;
496 // When max_frames is even we have 1 more frame backward than forward
497 if (frames_to_blur_forward
> (max_frames
- 1) / 2)
498 frames_to_blur_forward
= ((max_frames
- 1) / 2);
500 if (frames_to_blur_backward
> (max_frames
/ 2))
501 frames_to_blur_backward
= (max_frames
/ 2);
503 frames_to_blur
= frames_to_blur_backward
+ frames_to_blur_forward
+ 1;
507 start_frame
= distance
+ frames_to_blur_forward
;
511 printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
513 , num_frames_backward
516 , frames_to_blur_backward
517 , frames_to_blur_forward
518 , cpi
->source_encode_index
519 , cpi
->last_alt_ref_sei
523 // Setup frame pointers, NULL indicates frame not included in filter
524 vpx_memset(cpi
->frames
, 0, max_frames
*sizeof(YV12_BUFFER_CONFIG
*));
525 for (frame
= 0; frame
< frames_to_blur
; frame
++)
527 int which_buffer
= start_frame
- frame
;
528 struct lookahead_entry
* buf
= vp8_lookahead_peek(cpi
->lookahead
,
530 cpi
->frames
[frames_to_blur
-1-frame
] = &buf
->img
;
533 vp8_temporal_filter_iterate_c (
536 frames_to_blur_backward
,