2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
15 #include "onyxd_int.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vp8/common/threading.h"
19 #include "vp8/common/loopfilter.h"
20 #include "vp8/common/extend.h"
21 #include "vpx_ports/vpx_timer.h"
22 #include "detokenize.h"
23 #include "vp8/common/reconinter.h"
24 #include "reconintra_mt.h"
26 extern void mb_init_dequantizer(VP8D_COMP
*pbi
, MACROBLOCKD
*xd
);
27 extern void clamp_mvs(MACROBLOCKD
*xd
);
28 extern void vp8_build_uvmvs(MACROBLOCKD
*x
, int fullpixel
);
30 #if CONFIG_RUNTIME_CPU_DETECT
31 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
33 #define RTCD_VTABLE(x) NULL
36 static void setup_decoding_thread_data(VP8D_COMP
*pbi
, MACROBLOCKD
*xd
, MB_ROW_DEC
*mbrd
, int count
)
38 VP8_COMMON
*const pc
= & pbi
->common
;
41 for (i
= 0; i
< count
; i
++)
43 MACROBLOCKD
*mbd
= &mbrd
[i
].mbd
;
44 #if CONFIG_RUNTIME_CPU_DETECT
47 mbd
->subpixel_predict
= xd
->subpixel_predict
;
48 mbd
->subpixel_predict8x4
= xd
->subpixel_predict8x4
;
49 mbd
->subpixel_predict8x8
= xd
->subpixel_predict8x8
;
50 mbd
->subpixel_predict16x16
= xd
->subpixel_predict16x16
;
52 mbd
->mode_info_context
= pc
->mi
+ pc
->mode_info_stride
* (i
+ 1);
53 mbd
->mode_info_stride
= pc
->mode_info_stride
;
55 mbd
->frame_type
= pc
->frame_type
;
56 mbd
->frames_since_golden
= pc
->frames_since_golden
;
57 mbd
->frames_till_alt_ref_frame
= pc
->frames_till_alt_ref_frame
;
59 mbd
->pre
= pc
->yv12_fb
[pc
->lst_fb_idx
];
60 mbd
->dst
= pc
->yv12_fb
[pc
->new_fb_idx
];
62 vp8_setup_block_dptrs(mbd
);
63 vp8_build_block_doffsets(mbd
);
64 mbd
->segmentation_enabled
= xd
->segmentation_enabled
;
65 mbd
->mb_segement_abs_delta
= xd
->mb_segement_abs_delta
;
66 vpx_memcpy(mbd
->segment_feature_data
, xd
->segment_feature_data
, sizeof(xd
->segment_feature_data
));
68 /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
69 vpx_memcpy(mbd
->ref_lf_deltas
, xd
->ref_lf_deltas
, sizeof(xd
->ref_lf_deltas
));
70 /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
71 vpx_memcpy(mbd
->mode_lf_deltas
, xd
->mode_lf_deltas
, sizeof(xd
->mode_lf_deltas
));
72 /*unsigned char mode_ref_lf_delta_enabled;
73 unsigned char mode_ref_lf_delta_update;*/
74 mbd
->mode_ref_lf_delta_enabled
= xd
->mode_ref_lf_delta_enabled
;
75 mbd
->mode_ref_lf_delta_update
= xd
->mode_ref_lf_delta_update
;
77 mbd
->current_bc
= &pbi
->bc2
;
79 for (j
= 0; j
< 25; j
++)
81 mbd
->block
[j
].dequant
= xd
->block
[j
].dequant
;
85 for (i
=0; i
< pc
->mb_rows
; i
++)
86 pbi
->mt_current_mb_col
[i
]=-1;
90 static void decode_macroblock(VP8D_COMP
*pbi
, MACROBLOCKD
*xd
, int mb_row
, int mb_col
)
93 int i
, do_clamp
= xd
->mode_info_context
->mbmi
.need_to_clamp_mvs
;
94 VP8_COMMON
*pc
= &pbi
->common
;
96 if (xd
->mode_info_context
->mbmi
.mb_skip_coeff
)
98 vp8_reset_mb_tokens_context(xd
);
102 eobtotal
= vp8_decode_mb_tokens(pbi
, xd
);
105 /* Perform temporary clamping of the MV to be used for prediction */
111 xd
->mode_info_context
->mbmi
.dc_diff
= 1;
113 if (xd
->mode_info_context
->mbmi
.mode
!= B_PRED
&& xd
->mode_info_context
->mbmi
.mode
!= SPLITMV
&& eobtotal
== 0)
115 xd
->mode_info_context
->mbmi
.dc_diff
= 0;
117 /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
118 if (xd
->frame_type
== KEY_FRAME
|| xd
->mode_info_context
->mbmi
.ref_frame
== INTRA_FRAME
)
120 vp8mt_build_intra_predictors_mbuv_s(pbi
, xd
, mb_row
, mb_col
);
121 vp8mt_build_intra_predictors_mby_s(pbi
, xd
, mb_row
, mb_col
);
125 vp8_build_inter_predictors_mb_s(xd
);
130 if (xd
->segmentation_enabled
)
131 mb_init_dequantizer(pbi
, xd
);
134 if (xd
->frame_type
== KEY_FRAME
|| xd
->mode_info_context
->mbmi
.ref_frame
== INTRA_FRAME
)
136 vp8mt_build_intra_predictors_mbuv(pbi
, xd
, mb_row
, mb_col
);
138 if (xd
->mode_info_context
->mbmi
.mode
!= B_PRED
)
140 vp8mt_build_intra_predictors_mby(pbi
, xd
, mb_row
, mb_col
);
142 vp8mt_intra_prediction_down_copy(pbi
, xd
, mb_row
, mb_col
);
147 vp8_build_inter_predictors_mb(xd
);
150 /* dequantization and idct */
151 if (xd
->mode_info_context
->mbmi
.mode
!= B_PRED
&& xd
->mode_info_context
->mbmi
.mode
!= SPLITMV
)
153 BLOCKD
*b
= &xd
->block
[24];
154 DEQUANT_INVOKE(&pbi
->dequant
, block
)(b
);
156 /* do 2nd order transform on the dc block */
157 if (xd
->eobs
[24] > 1)
159 IDCT_INVOKE(RTCD_VTABLE(idct
), iwalsh16
)(&b
->dqcoeff
[0], b
->diff
);
160 ((int *)b
->qcoeff
)[0] = 0;
161 ((int *)b
->qcoeff
)[1] = 0;
162 ((int *)b
->qcoeff
)[2] = 0;
163 ((int *)b
->qcoeff
)[3] = 0;
164 ((int *)b
->qcoeff
)[4] = 0;
165 ((int *)b
->qcoeff
)[5] = 0;
166 ((int *)b
->qcoeff
)[6] = 0;
167 ((int *)b
->qcoeff
)[7] = 0;
171 IDCT_INVOKE(RTCD_VTABLE(idct
), iwalsh1
)(&b
->dqcoeff
[0], b
->diff
);
172 ((int *)b
->qcoeff
)[0] = 0;
175 DEQUANT_INVOKE (&pbi
->dequant
, dc_idct_add_y_block
)
176 (xd
->qcoeff
, xd
->block
[0].dequant
,
177 xd
->predictor
, xd
->dst
.y_buffer
,
178 xd
->dst
.y_stride
, xd
->eobs
, xd
->block
[24].diff
);
180 else if ((xd
->frame_type
== KEY_FRAME
|| xd
->mode_info_context
->mbmi
.ref_frame
== INTRA_FRAME
) && xd
->mode_info_context
->mbmi
.mode
== B_PRED
)
182 for (i
= 0; i
< 16; i
++)
184 BLOCKD
*b
= &xd
->block
[i
];
185 vp8mt_predict_intra4x4(pbi
, xd
, b
->bmi
.mode
, b
->predictor
, mb_row
, mb_col
, i
);
189 DEQUANT_INVOKE(&pbi
->dequant
, idct_add
)
190 (b
->qcoeff
, b
->dequant
, b
->predictor
,
191 *(b
->base_dst
) + b
->dst
, 16, b
->dst_stride
);
195 IDCT_INVOKE(RTCD_VTABLE(idct
), idct1_scalar_add
)
196 (b
->qcoeff
[0] * b
->dequant
[0], b
->predictor
,
197 *(b
->base_dst
) + b
->dst
, 16, b
->dst_stride
);
198 ((int *)b
->qcoeff
)[0] = 0;
204 DEQUANT_INVOKE (&pbi
->dequant
, idct_add_y_block
)
205 (xd
->qcoeff
, xd
->block
[0].dequant
,
206 xd
->predictor
, xd
->dst
.y_buffer
,
207 xd
->dst
.y_stride
, xd
->eobs
);
210 DEQUANT_INVOKE (&pbi
->dequant
, idct_add_uv_block
)
211 (xd
->qcoeff
+16*16, xd
->block
[16].dequant
,
212 xd
->predictor
+16*16, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
,
213 xd
->dst
.uv_stride
, xd
->eobs
+16);
217 static THREAD_FUNCTION
thread_decoding_proc(void *p_data
)
219 int ithread
= ((DECODETHREAD_DATA
*)p_data
)->ithread
;
220 VP8D_COMP
*pbi
= (VP8D_COMP
*)(((DECODETHREAD_DATA
*)p_data
)->ptr1
);
221 MB_ROW_DEC
*mbrd
= (MB_ROW_DEC
*)(((DECODETHREAD_DATA
*)p_data
)->ptr2
);
222 ENTROPY_CONTEXT_PLANES mb_row_left_context
;
226 if (pbi
->b_multithreaded_rd
== 0)
229 /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
230 if (sem_wait(&pbi
->h_event_start_decoding
[ithread
]) == 0)
232 if (pbi
->b_multithreaded_rd
== 0)
236 VP8_COMMON
*pc
= &pbi
->common
;
237 MACROBLOCKD
*xd
= &mbrd
->mbd
;
240 int num_part
= 1 << pbi
->common
.multi_token_partition
;
241 volatile int *last_row_current_mb_col
;
242 int nsync
= pbi
->sync_range
;
244 for (mb_row
= ithread
+1; mb_row
< pc
->mb_rows
; mb_row
+= (pbi
->decoding_thread_count
+ 1))
247 int recon_yoffset
, recon_uvoffset
;
249 int ref_fb_idx
= pc
->lst_fb_idx
;
250 int dst_fb_idx
= pc
->new_fb_idx
;
251 int recon_y_stride
= pc
->yv12_fb
[ref_fb_idx
].y_stride
;
252 int recon_uv_stride
= pc
->yv12_fb
[ref_fb_idx
].uv_stride
;
255 loop_filter_info
*lfi
= pc
->lf_info
;
256 int alt_flt_enabled
= xd
->segmentation_enabled
;
259 pbi
->mb_row_di
[ithread
].mb_row
= mb_row
;
260 pbi
->mb_row_di
[ithread
].mbd
.current_bc
= &pbi
->mbc
[mb_row
%num_part
];
262 last_row_current_mb_col
= &pbi
->mt_current_mb_col
[mb_row
-1];
264 recon_yoffset
= mb_row
* recon_y_stride
* 16;
265 recon_uvoffset
= mb_row
* recon_uv_stride
* 8;
266 /* reset above block coeffs */
268 xd
->above_context
= pc
->above_context
;
269 xd
->left_context
= &mb_row_left_context
;
270 vpx_memset(&mb_row_left_context
, 0, sizeof(mb_row_left_context
));
271 xd
->up_available
= (mb_row
!= 0);
273 xd
->mb_to_top_edge
= -((mb_row
* 16)) << 3;
274 xd
->mb_to_bottom_edge
= ((pc
->mb_rows
- 1 - mb_row
) * 16) << 3;
276 for (mb_col
= 0; mb_col
< pc
->mb_cols
; mb_col
++)
278 if ((mb_col
& (nsync
-1)) == 0)
280 while (mb_col
> (*last_row_current_mb_col
- nsync
) && *last_row_current_mb_col
!= pc
->mb_cols
- 1)
287 if (xd
->mode_info_context
->mbmi
.mode
== SPLITMV
|| xd
->mode_info_context
->mbmi
.mode
== B_PRED
)
289 for (i
= 0; i
< 16; i
++)
291 BLOCKD
*d
= &xd
->block
[i
];
292 vpx_memcpy(&d
->bmi
, &xd
->mode_info_context
->bmi
[i
], sizeof(B_MODE_INFO
));
296 /* Distance of Mb to the various image edges.
297 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
299 xd
->mb_to_left_edge
= -((mb_col
* 16) << 3);
300 xd
->mb_to_right_edge
= ((pc
->mb_cols
- 1 - mb_col
) * 16) << 3;
302 xd
->dst
.y_buffer
= pc
->yv12_fb
[dst_fb_idx
].y_buffer
+ recon_yoffset
;
303 xd
->dst
.u_buffer
= pc
->yv12_fb
[dst_fb_idx
].u_buffer
+ recon_uvoffset
;
304 xd
->dst
.v_buffer
= pc
->yv12_fb
[dst_fb_idx
].v_buffer
+ recon_uvoffset
;
306 xd
->left_available
= (mb_col
!= 0);
308 /* Select the appropriate reference frame for this MB */
309 if (xd
->mode_info_context
->mbmi
.ref_frame
== LAST_FRAME
)
310 ref_fb_idx
= pc
->lst_fb_idx
;
311 else if (xd
->mode_info_context
->mbmi
.ref_frame
== GOLDEN_FRAME
)
312 ref_fb_idx
= pc
->gld_fb_idx
;
314 ref_fb_idx
= pc
->alt_fb_idx
;
316 xd
->pre
.y_buffer
= pc
->yv12_fb
[ref_fb_idx
].y_buffer
+ recon_yoffset
;
317 xd
->pre
.u_buffer
= pc
->yv12_fb
[ref_fb_idx
].u_buffer
+ recon_uvoffset
;
318 xd
->pre
.v_buffer
= pc
->yv12_fb
[ref_fb_idx
].v_buffer
+ recon_uvoffset
;
320 vp8_build_uvmvs(xd
, pc
->full_pixel
);
321 decode_macroblock(pbi
, xd
, mb_row
, mb_col
);
323 if (pbi
->common
.filter_level
)
325 if( mb_row
!= pc
->mb_rows
-1 )
327 /* Save decoded MB last row data for next-row decoding */
328 vpx_memcpy((pbi
->mt_yabove_row
[mb_row
+ 1] + 32 + mb_col
*16), (xd
->dst
.y_buffer
+ 15 * recon_y_stride
), 16);
329 vpx_memcpy((pbi
->mt_uabove_row
[mb_row
+ 1] + 16 + mb_col
*8), (xd
->dst
.u_buffer
+ 7 * recon_uv_stride
), 8);
330 vpx_memcpy((pbi
->mt_vabove_row
[mb_row
+ 1] + 16 + mb_col
*8), (xd
->dst
.v_buffer
+ 7 * recon_uv_stride
), 8);
333 /* save left_col for next MB decoding */
334 if(mb_col
!= pc
->mb_cols
-1)
336 MODE_INFO
*next
= xd
->mode_info_context
+1;
338 if (xd
->frame_type
== KEY_FRAME
|| next
->mbmi
.ref_frame
== INTRA_FRAME
)
340 for (i
= 0; i
< 16; i
++)
341 pbi
->mt_yleft_col
[mb_row
][i
] = xd
->dst
.y_buffer
[i
* recon_y_stride
+ 15];
342 for (i
= 0; i
< 8; i
++)
344 pbi
->mt_uleft_col
[mb_row
][i
] = xd
->dst
.u_buffer
[i
* recon_uv_stride
+ 7];
345 pbi
->mt_vleft_col
[mb_row
][i
] = xd
->dst
.v_buffer
[i
* recon_uv_stride
+ 7];
350 /* update loopfilter info */
351 Segment
= (alt_flt_enabled
) ? xd
->mode_info_context
->mbmi
.segment_id
: 0;
352 filter_level
= pbi
->mt_baseline_filter_level
[Segment
];
353 /* Distance of Mb to the various image edges.
354 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
355 * Apply any context driven MB level adjustment
357 filter_level
= vp8_adjust_mb_lf_value(xd
, filter_level
);
359 /* loopfilter on this macroblock. */
363 pc
->lf_mbv(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
365 if (xd
->mode_info_context
->mbmi
.dc_diff
> 0)
366 pc
->lf_bv(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
368 /* don't apply across umv border */
370 pc
->lf_mbh(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
372 if (xd
->mode_info_context
->mbmi
.dc_diff
> 0)
373 pc
->lf_bh(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
380 ++xd
->mode_info_context
; /* next mb */
384 /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
385 pbi
->mt_current_mb_col
[mb_row
] = mb_col
;
388 /* adjust to the next row of mbs */
389 if (pbi
->common
.filter_level
)
391 if(mb_row
!= pc
->mb_rows
-1)
393 int lasty
= pc
->yv12_fb
[ref_fb_idx
].y_width
+ VP8BORDERINPIXELS
;
394 int lastuv
= (pc
->yv12_fb
[ref_fb_idx
].y_width
>>1) + (VP8BORDERINPIXELS
>>1);
396 for (i
= 0; i
< 4; i
++)
398 pbi
->mt_yabove_row
[mb_row
+1][lasty
+ i
] = pbi
->mt_yabove_row
[mb_row
+1][lasty
-1];
399 pbi
->mt_uabove_row
[mb_row
+1][lastuv
+ i
] = pbi
->mt_uabove_row
[mb_row
+1][lastuv
-1];
400 pbi
->mt_vabove_row
[mb_row
+1][lastuv
+ i
] = pbi
->mt_vabove_row
[mb_row
+1][lastuv
-1];
404 vp8_extend_mb_row(&pc
->yv12_fb
[dst_fb_idx
], xd
->dst
.y_buffer
+ 16, xd
->dst
.u_buffer
+ 8, xd
->dst
.v_buffer
+ 8);
406 ++xd
->mode_info_context
; /* skip prediction column */
408 /* since we have multithread */
409 xd
->mode_info_context
+= xd
->mode_info_stride
* pbi
->decoding_thread_count
;
413 /* add this to each frame */
414 if ((mbrd
->mb_row
== pbi
->common
.mb_rows
-1) || ((mbrd
->mb_row
== pbi
->common
.mb_rows
-2) && (pbi
->common
.mb_rows
% (pbi
->decoding_thread_count
+1))==1))
416 /*SetEvent(pbi->h_event_end_decoding);*/
417 sem_post(&pbi
->h_event_end_decoding
);
425 void vp8_decoder_create_threads(VP8D_COMP
*pbi
)
430 pbi
->b_multithreaded_rd
= 0;
431 pbi
->allocated_decoding_thread_count
= 0;
432 core_count
= (pbi
->max_threads
> 16) ? 16 : pbi
->max_threads
;
436 pbi
->b_multithreaded_rd
= 1;
437 pbi
->decoding_thread_count
= core_count
-1;
439 CHECK_MEM_ERROR(pbi
->h_decoding_thread
, vpx_malloc(sizeof(pthread_t
) * pbi
->decoding_thread_count
));
440 CHECK_MEM_ERROR(pbi
->h_event_start_decoding
, vpx_malloc(sizeof(sem_t
) * pbi
->decoding_thread_count
));
441 CHECK_MEM_ERROR(pbi
->mb_row_di
, vpx_memalign(32, sizeof(MB_ROW_DEC
) * pbi
->decoding_thread_count
));
442 vpx_memset(pbi
->mb_row_di
, 0, sizeof(MB_ROW_DEC
) * pbi
->decoding_thread_count
);
443 CHECK_MEM_ERROR(pbi
->de_thread_data
, vpx_malloc(sizeof(DECODETHREAD_DATA
) * pbi
->decoding_thread_count
));
445 for (ithread
= 0; ithread
< pbi
->decoding_thread_count
; ithread
++)
447 sem_init(&pbi
->h_event_start_decoding
[ithread
], 0, 0);
449 pbi
->de_thread_data
[ithread
].ithread
= ithread
;
450 pbi
->de_thread_data
[ithread
].ptr1
= (void *)pbi
;
451 pbi
->de_thread_data
[ithread
].ptr2
= (void *) &pbi
->mb_row_di
[ithread
];
453 pthread_create(&pbi
->h_decoding_thread
[ithread
], 0, thread_decoding_proc
, (&pbi
->de_thread_data
[ithread
]));
456 sem_init(&pbi
->h_event_end_decoding
, 0, 0);
458 pbi
->allocated_decoding_thread_count
= pbi
->decoding_thread_count
;
463 void vp8mt_de_alloc_temp_buffers(VP8D_COMP
*pbi
, int mb_rows
)
465 VP8_COMMON
*const pc
= & pbi
->common
;
468 if (pbi
->b_multithreaded_rd
)
470 vpx_free(pbi
->mt_current_mb_col
);
471 pbi
->mt_current_mb_col
= NULL
;
473 /* Free above_row buffers. */
474 if (pbi
->mt_yabove_row
)
476 for (i
=0; i
< mb_rows
; i
++)
478 vpx_free(pbi
->mt_yabove_row
[i
]);
479 pbi
->mt_yabove_row
[i
] = NULL
;
481 vpx_free(pbi
->mt_yabove_row
);
482 pbi
->mt_yabove_row
= NULL
;
485 if (pbi
->mt_uabove_row
)
487 for (i
=0; i
< mb_rows
; i
++)
489 vpx_free(pbi
->mt_uabove_row
[i
]);
490 pbi
->mt_uabove_row
[i
] = NULL
;
492 vpx_free(pbi
->mt_uabove_row
);
493 pbi
->mt_uabove_row
= NULL
;
496 if (pbi
->mt_vabove_row
)
498 for (i
=0; i
< mb_rows
; i
++)
500 vpx_free(pbi
->mt_vabove_row
[i
]);
501 pbi
->mt_vabove_row
[i
] = NULL
;
503 vpx_free(pbi
->mt_vabove_row
);
504 pbi
->mt_vabove_row
= NULL
;
507 /* Free left_col buffers. */
508 if (pbi
->mt_yleft_col
)
510 for (i
=0; i
< mb_rows
; i
++)
512 vpx_free(pbi
->mt_yleft_col
[i
]);
513 pbi
->mt_yleft_col
[i
] = NULL
;
515 vpx_free(pbi
->mt_yleft_col
);
516 pbi
->mt_yleft_col
= NULL
;
519 if (pbi
->mt_uleft_col
)
521 for (i
=0; i
< mb_rows
; i
++)
523 vpx_free(pbi
->mt_uleft_col
[i
]);
524 pbi
->mt_uleft_col
[i
] = NULL
;
526 vpx_free(pbi
->mt_uleft_col
);
527 pbi
->mt_uleft_col
= NULL
;
530 if (pbi
->mt_vleft_col
)
532 for (i
=0; i
< mb_rows
; i
++)
534 vpx_free(pbi
->mt_vleft_col
[i
]);
535 pbi
->mt_vleft_col
[i
] = NULL
;
537 vpx_free(pbi
->mt_vleft_col
);
538 pbi
->mt_vleft_col
= NULL
;
544 void vp8mt_alloc_temp_buffers(VP8D_COMP
*pbi
, int width
, int prev_mb_rows
)
546 VP8_COMMON
*const pc
= & pbi
->common
;
550 if (pbi
->b_multithreaded_rd
)
552 vp8mt_de_alloc_temp_buffers(pbi
, prev_mb_rows
);
554 /* our internal buffers are always multiples of 16 */
555 if ((width
& 0xf) != 0)
556 width
+= 16 - (width
& 0xf);
558 if (width
< 640) pbi
->sync_range
= 1;
559 else if (width
<= 1280) pbi
->sync_range
= 8;
560 else if (width
<= 2560) pbi
->sync_range
=16;
561 else pbi
->sync_range
= 32;
563 uv_width
= width
>>1;
565 /* Allocate an int for each mb row. */
566 CHECK_MEM_ERROR(pbi
->mt_current_mb_col
, vpx_malloc(sizeof(int) * pc
->mb_rows
));
568 /* Allocate memory for above_row buffers. */
569 CHECK_MEM_ERROR(pbi
->mt_yabove_row
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
570 for (i
=0; i
< pc
->mb_rows
; i
++)
571 CHECK_MEM_ERROR(pbi
->mt_yabove_row
[i
], vpx_calloc(sizeof(unsigned char) * (width
+ (VP8BORDERINPIXELS
<<1)), 1));
573 CHECK_MEM_ERROR(pbi
->mt_uabove_row
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
574 for (i
=0; i
< pc
->mb_rows
; i
++)
575 CHECK_MEM_ERROR(pbi
->mt_uabove_row
[i
], vpx_calloc(sizeof(unsigned char) * (uv_width
+ VP8BORDERINPIXELS
), 1));
577 CHECK_MEM_ERROR(pbi
->mt_vabove_row
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
578 for (i
=0; i
< pc
->mb_rows
; i
++)
579 CHECK_MEM_ERROR(pbi
->mt_vabove_row
[i
], vpx_calloc(sizeof(unsigned char) * (uv_width
+ VP8BORDERINPIXELS
), 1));
581 /* Allocate memory for left_col buffers. */
582 CHECK_MEM_ERROR(pbi
->mt_yleft_col
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
583 for (i
=0; i
< pc
->mb_rows
; i
++)
584 CHECK_MEM_ERROR(pbi
->mt_yleft_col
[i
], vpx_calloc(sizeof(unsigned char) * 16, 1));
586 CHECK_MEM_ERROR(pbi
->mt_uleft_col
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
587 for (i
=0; i
< pc
->mb_rows
; i
++)
588 CHECK_MEM_ERROR(pbi
->mt_uleft_col
[i
], vpx_calloc(sizeof(unsigned char) * 8, 1));
590 CHECK_MEM_ERROR(pbi
->mt_vleft_col
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
591 for (i
=0; i
< pc
->mb_rows
; i
++)
592 CHECK_MEM_ERROR(pbi
->mt_vleft_col
[i
], vpx_calloc(sizeof(unsigned char) * 8, 1));
597 void vp8_decoder_remove_threads(VP8D_COMP
*pbi
)
599 /* shutdown MB Decoding thread; */
600 if (pbi
->b_multithreaded_rd
)
604 pbi
->b_multithreaded_rd
= 0;
606 /* allow all threads to exit */
607 for (i
= 0; i
< pbi
->allocated_decoding_thread_count
; i
++)
609 sem_post(&pbi
->h_event_start_decoding
[i
]);
610 pthread_join(pbi
->h_decoding_thread
[i
], NULL
);
613 for (i
= 0; i
< pbi
->allocated_decoding_thread_count
; i
++)
615 sem_destroy(&pbi
->h_event_start_decoding
[i
]);
618 sem_destroy(&pbi
->h_event_end_decoding
);
620 vpx_free(pbi
->h_decoding_thread
);
621 pbi
->h_decoding_thread
= NULL
;
623 vpx_free(pbi
->h_event_start_decoding
);
624 pbi
->h_event_start_decoding
= NULL
;
626 vpx_free(pbi
->mb_row_di
);
627 pbi
->mb_row_di
= NULL
;
629 vpx_free(pbi
->de_thread_data
);
630 pbi
->de_thread_data
= NULL
;
635 static void lpf_init( VP8D_COMP
*pbi
, int default_filt_lvl
)
637 VP8_COMMON
*cm
= &pbi
->common
;
638 MACROBLOCKD
*mbd
= &pbi
->mb
;
639 /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/
640 loop_filter_info
*lfi
= cm
->lf_info
;
641 FRAME_TYPE frame_type
= cm
->frame_type
;
645 int baseline_filter_level[MAX_MB_SEGMENTS];*/
646 int alt_flt_enabled
= mbd
->segmentation_enabled
;
649 /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
651 /* Note the baseline filter values for each segment */
654 for (i
= 0; i
< MAX_MB_SEGMENTS
; i
++)
657 if (mbd
->mb_segement_abs_delta
== SEGMENT_ABSDATA
)
658 pbi
->mt_baseline_filter_level
[i
] = mbd
->segment_feature_data
[MB_LVL_ALT_LF
][i
];
662 pbi
->mt_baseline_filter_level
[i
] = default_filt_lvl
+ mbd
->segment_feature_data
[MB_LVL_ALT_LF
][i
];
663 pbi
->mt_baseline_filter_level
[i
] = (pbi
->mt_baseline_filter_level
[i
] >= 0) ? ((pbi
->mt_baseline_filter_level
[i
] <= MAX_LOOP_FILTER
) ? pbi
->mt_baseline_filter_level
[i
] : MAX_LOOP_FILTER
) : 0; /* Clamp to valid range */
669 for (i
= 0; i
< MAX_MB_SEGMENTS
; i
++)
670 pbi
->mt_baseline_filter_level
[i
] = default_filt_lvl
;
673 /* Initialize the loop filter for this frame. */
674 if ((cm
->last_filter_type
!= cm
->filter_type
) || (cm
->last_sharpness_level
!= cm
->sharpness_level
))
675 vp8_init_loop_filter(cm
);
676 else if (frame_type
!= cm
->last_frame_type
)
677 vp8_frame_init_loop_filter(lfi
, frame_type
);
681 void vp8mt_decode_mb_rows( VP8D_COMP
*pbi
, MACROBLOCKD
*xd
)
684 VP8_COMMON
*pc
= &pbi
->common
;
687 int num_part
= 1 << pbi
->common
.multi_token_partition
;
689 volatile int *last_row_current_mb_col
= NULL
;
690 int nsync
= pbi
->sync_range
;
693 loop_filter_info
*lfi
= pc
->lf_info
;
694 int alt_flt_enabled
= xd
->segmentation_enabled
;
697 if(pbi
->common
.filter_level
)
699 /* Set above_row buffer to 127 for decoding first MB row */
700 vpx_memset(pbi
->mt_yabove_row
[0] + VP8BORDERINPIXELS
-1, 127, pc
->yv12_fb
[pc
->lst_fb_idx
].y_width
+ 5);
701 vpx_memset(pbi
->mt_uabove_row
[0] + (VP8BORDERINPIXELS
>>1)-1, 127, (pc
->yv12_fb
[pc
->lst_fb_idx
].y_width
>>1) +5);
702 vpx_memset(pbi
->mt_vabove_row
[0] + (VP8BORDERINPIXELS
>>1)-1, 127, (pc
->yv12_fb
[pc
->lst_fb_idx
].y_width
>>1) +5);
704 for (i
=1; i
<pc
->mb_rows
; i
++)
706 vpx_memset(pbi
->mt_yabove_row
[i
] + VP8BORDERINPIXELS
-1, (unsigned char)129, 1);
707 vpx_memset(pbi
->mt_uabove_row
[i
] + (VP8BORDERINPIXELS
>>1)-1, (unsigned char)129, 1);
708 vpx_memset(pbi
->mt_vabove_row
[i
] + (VP8BORDERINPIXELS
>>1)-1, (unsigned char)129, 1);
711 /* Set left_col to 129 initially */
712 for (i
=0; i
<pc
->mb_rows
; i
++)
714 vpx_memset(pbi
->mt_yleft_col
[i
], (unsigned char)129, 16);
715 vpx_memset(pbi
->mt_uleft_col
[i
], (unsigned char)129, 8);
716 vpx_memset(pbi
->mt_vleft_col
[i
], (unsigned char)129, 8);
718 lpf_init(pbi
, pc
->filter_level
);
721 setup_decoding_thread_data(pbi
, xd
, pbi
->mb_row_di
, pbi
->decoding_thread_count
);
723 for (i
= 0; i
< pbi
->decoding_thread_count
; i
++)
724 sem_post(&pbi
->h_event_start_decoding
[i
]);
726 for (mb_row
= 0; mb_row
< pc
->mb_rows
; mb_row
+= (pbi
->decoding_thread_count
+ 1))
729 xd
->current_bc
= &pbi
->mbc
[mb_row
%num_part
];
731 /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
734 int recon_yoffset
, recon_uvoffset
;
736 int ref_fb_idx
= pc
->lst_fb_idx
;
737 int dst_fb_idx
= pc
->new_fb_idx
;
738 int recon_y_stride
= pc
->yv12_fb
[ref_fb_idx
].y_stride
;
739 int recon_uv_stride
= pc
->yv12_fb
[ref_fb_idx
].uv_stride
;
741 /* volatile int *last_row_current_mb_col = NULL; */
743 last_row_current_mb_col
= &pbi
->mt_current_mb_col
[mb_row
-1];
745 vpx_memset(&pc
->left_context
, 0, sizeof(pc
->left_context
));
746 recon_yoffset
= mb_row
* recon_y_stride
* 16;
747 recon_uvoffset
= mb_row
* recon_uv_stride
* 8;
748 /* reset above block coeffs */
750 xd
->above_context
= pc
->above_context
;
751 xd
->up_available
= (mb_row
!= 0);
753 xd
->mb_to_top_edge
= -((mb_row
* 16)) << 3;
754 xd
->mb_to_bottom_edge
= ((pc
->mb_rows
- 1 - mb_row
) * 16) << 3;
756 for (mb_col
= 0; mb_col
< pc
->mb_cols
; mb_col
++)
758 if ( mb_row
> 0 && (mb_col
& (nsync
-1)) == 0){
759 while (mb_col
> (*last_row_current_mb_col
- nsync
) && *last_row_current_mb_col
!= pc
->mb_cols
- 1)
766 if (xd
->mode_info_context
->mbmi
.mode
== SPLITMV
|| xd
->mode_info_context
->mbmi
.mode
== B_PRED
)
768 for (i
= 0; i
< 16; i
++)
770 BLOCKD
*d
= &xd
->block
[i
];
771 vpx_memcpy(&d
->bmi
, &xd
->mode_info_context
->bmi
[i
], sizeof(B_MODE_INFO
));
775 /* Distance of Mb to the various image edges.
776 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
778 xd
->mb_to_left_edge
= -((mb_col
* 16) << 3);
779 xd
->mb_to_right_edge
= ((pc
->mb_cols
- 1 - mb_col
) * 16) << 3;
781 xd
->dst
.y_buffer
= pc
->yv12_fb
[dst_fb_idx
].y_buffer
+ recon_yoffset
;
782 xd
->dst
.u_buffer
= pc
->yv12_fb
[dst_fb_idx
].u_buffer
+ recon_uvoffset
;
783 xd
->dst
.v_buffer
= pc
->yv12_fb
[dst_fb_idx
].v_buffer
+ recon_uvoffset
;
785 xd
->left_available
= (mb_col
!= 0);
787 /* Select the appropriate reference frame for this MB */
788 if (xd
->mode_info_context
->mbmi
.ref_frame
== LAST_FRAME
)
789 ref_fb_idx
= pc
->lst_fb_idx
;
790 else if (xd
->mode_info_context
->mbmi
.ref_frame
== GOLDEN_FRAME
)
791 ref_fb_idx
= pc
->gld_fb_idx
;
793 ref_fb_idx
= pc
->alt_fb_idx
;
795 xd
->pre
.y_buffer
= pc
->yv12_fb
[ref_fb_idx
].y_buffer
+ recon_yoffset
;
796 xd
->pre
.u_buffer
= pc
->yv12_fb
[ref_fb_idx
].u_buffer
+ recon_uvoffset
;
797 xd
->pre
.v_buffer
= pc
->yv12_fb
[ref_fb_idx
].v_buffer
+ recon_uvoffset
;
799 if (xd
->mode_info_context
->mbmi
.ref_frame
!= INTRA_FRAME
)
801 /* propagate errors from reference frames */
802 xd
->corrupted
|= pc
->yv12_fb
[ref_fb_idx
].corrupted
;
805 vp8_build_uvmvs(xd
, pc
->full_pixel
);
806 decode_macroblock(pbi
, xd
, mb_row
, mb_col
);
808 /* check if the boolean decoder has suffered an error */
809 xd
->corrupted
|= vp8dx_bool_error(xd
->current_bc
);
811 if (pbi
->common
.filter_level
)
813 /* Save decoded MB last row data for next-row decoding */
814 if(mb_row
!= pc
->mb_rows
-1)
816 vpx_memcpy((pbi
->mt_yabove_row
[mb_row
+1] + 32 + mb_col
*16), (xd
->dst
.y_buffer
+ 15 * recon_y_stride
), 16);
817 vpx_memcpy((pbi
->mt_uabove_row
[mb_row
+1] + 16 + mb_col
*8), (xd
->dst
.u_buffer
+ 7 * recon_uv_stride
), 8);
818 vpx_memcpy((pbi
->mt_vabove_row
[mb_row
+1] + 16 + mb_col
*8), (xd
->dst
.v_buffer
+ 7 * recon_uv_stride
), 8);
821 /* save left_col for next MB decoding */
822 if(mb_col
!= pc
->mb_cols
-1)
824 MODE_INFO
*next
= xd
->mode_info_context
+1;
826 if (xd
->frame_type
== KEY_FRAME
|| next
->mbmi
.ref_frame
== INTRA_FRAME
)
828 for (i
= 0; i
< 16; i
++)
829 pbi
->mt_yleft_col
[mb_row
][i
] = xd
->dst
.y_buffer
[i
* recon_y_stride
+ 15];
830 for (i
= 0; i
< 8; i
++)
832 pbi
->mt_uleft_col
[mb_row
][i
] = xd
->dst
.u_buffer
[i
* recon_uv_stride
+ 7];
833 pbi
->mt_vleft_col
[mb_row
][i
] = xd
->dst
.v_buffer
[i
* recon_uv_stride
+ 7];
838 /* update loopfilter info */
839 Segment
= (alt_flt_enabled
) ? xd
->mode_info_context
->mbmi
.segment_id
: 0;
840 filter_level
= pbi
->mt_baseline_filter_level
[Segment
];
841 /* Distance of Mb to the various image edges.
842 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
843 * Apply any context driven MB level adjustment
845 filter_level
= vp8_adjust_mb_lf_value(xd
, filter_level
);
847 /* loopfilter on this macroblock. */
851 pc
->lf_mbv(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
853 if (xd
->mode_info_context
->mbmi
.dc_diff
> 0)
854 pc
->lf_bv(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
856 /* don't apply across umv border */
858 pc
->lf_mbh(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
860 if (xd
->mode_info_context
->mbmi
.dc_diff
> 0)
861 pc
->lf_bh(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
868 ++xd
->mode_info_context
; /* next mb */
872 pbi
->mt_current_mb_col
[mb_row
] = mb_col
;
875 /* adjust to the next row of mbs */
876 if (pbi
->common
.filter_level
)
878 if(mb_row
!= pc
->mb_rows
-1)
880 int lasty
= pc
->yv12_fb
[ref_fb_idx
].y_width
+ VP8BORDERINPIXELS
;
881 int lastuv
= (pc
->yv12_fb
[ref_fb_idx
].y_width
>>1) + (VP8BORDERINPIXELS
>>1);
883 for (i
= 0; i
< 4; i
++)
885 pbi
->mt_yabove_row
[mb_row
+1][lasty
+ i
] = pbi
->mt_yabove_row
[mb_row
+1][lasty
-1];
886 pbi
->mt_uabove_row
[mb_row
+1][lastuv
+ i
] = pbi
->mt_uabove_row
[mb_row
+1][lastuv
-1];
887 pbi
->mt_vabove_row
[mb_row
+1][lastuv
+ i
] = pbi
->mt_vabove_row
[mb_row
+1][lastuv
-1];
891 vp8_extend_mb_row(&pc
->yv12_fb
[dst_fb_idx
], xd
->dst
.y_buffer
+ 16, xd
->dst
.u_buffer
+ 8, xd
->dst
.v_buffer
+ 8);
893 ++xd
->mode_info_context
; /* skip prediction column */
895 xd
->mode_info_context
+= xd
->mode_info_stride
* pbi
->decoding_thread_count
;
898 sem_wait(&pbi
->h_event_end_decoding
); /* add back for each frame */