2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
16 #include <mach/mach_init.h>
18 #include "onyxd_int.h"
19 #include "vpx_mem/vpx_mem.h"
20 #include "threading.h"
22 #include "loopfilter.h"
24 #include "vpx_ports/vpx_timer.h"
25 #include "detokenize.h"
26 #include "reconinter.h"
27 #include "reconintra_mt.h"
29 extern void mb_init_dequantizer(VP8D_COMP
*pbi
, MACROBLOCKD
*xd
);
30 extern void clamp_mvs(MACROBLOCKD
*xd
);
31 extern void vp8_build_uvmvs(MACROBLOCKD
*x
, int fullpixel
);
33 #if CONFIG_RUNTIME_CPU_DETECT
34 #define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
36 #define RTCD_VTABLE(x) NULL
39 void vp8_setup_decoding_thread_data(VP8D_COMP
*pbi
, MACROBLOCKD
*xd
, MB_ROW_DEC
*mbrd
, int count
)
41 #if CONFIG_MULTITHREAD
42 VP8_COMMON
*const pc
= & pbi
->common
;
45 for (i
= 0; i
< count
; i
++)
47 MACROBLOCKD
*mbd
= &mbrd
[i
].mbd
;
48 #if CONFIG_RUNTIME_CPU_DETECT
51 mbd
->subpixel_predict
= xd
->subpixel_predict
;
52 mbd
->subpixel_predict8x4
= xd
->subpixel_predict8x4
;
53 mbd
->subpixel_predict8x8
= xd
->subpixel_predict8x8
;
54 mbd
->subpixel_predict16x16
= xd
->subpixel_predict16x16
;
56 mbd
->mode_info_context
= pc
->mi
+ pc
->mode_info_stride
* (i
+ 1);
57 mbd
->mode_info_stride
= pc
->mode_info_stride
;
59 mbd
->frame_type
= pc
->frame_type
;
60 mbd
->frames_since_golden
= pc
->frames_since_golden
;
61 mbd
->frames_till_alt_ref_frame
= pc
->frames_till_alt_ref_frame
;
63 mbd
->pre
= pc
->yv12_fb
[pc
->lst_fb_idx
];
64 mbd
->dst
= pc
->yv12_fb
[pc
->new_fb_idx
];
66 vp8_setup_block_dptrs(mbd
);
67 vp8_build_block_doffsets(mbd
);
68 mbd
->segmentation_enabled
= xd
->segmentation_enabled
;
69 mbd
->mb_segement_abs_delta
= xd
->mb_segement_abs_delta
;
70 vpx_memcpy(mbd
->segment_feature_data
, xd
->segment_feature_data
, sizeof(xd
->segment_feature_data
));
72 /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
73 vpx_memcpy(mbd
->ref_lf_deltas
, xd
->ref_lf_deltas
, sizeof(xd
->ref_lf_deltas
));
74 /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
75 vpx_memcpy(mbd
->mode_lf_deltas
, xd
->mode_lf_deltas
, sizeof(xd
->mode_lf_deltas
));
76 /*unsigned char mode_ref_lf_delta_enabled;
77 unsigned char mode_ref_lf_delta_update;*/
78 mbd
->mode_ref_lf_delta_enabled
= xd
->mode_ref_lf_delta_enabled
;
79 mbd
->mode_ref_lf_delta_update
= xd
->mode_ref_lf_delta_update
;
81 mbd
->current_bc
= &pbi
->bc2
;
83 for (j
= 0; j
< 25; j
++)
85 mbd
->block
[j
].dequant
= xd
->block
[j
].dequant
;
89 for (i
=0; i
< pc
->mb_rows
; i
++)
90 pbi
->mt_current_mb_col
[i
]=-1;
100 void vp8mt_decode_macroblock(VP8D_COMP
*pbi
, MACROBLOCKD
*xd
, int mb_row
, int mb_col
)
102 #if CONFIG_MULTITHREAD
104 int i
, do_clamp
= xd
->mode_info_context
->mbmi
.need_to_clamp_mvs
;
105 VP8_COMMON
*pc
= &pbi
->common
;
107 if (xd
->mode_info_context
->mbmi
.mb_skip_coeff
)
109 vp8_reset_mb_tokens_context(xd
);
113 eobtotal
= vp8_decode_mb_tokens(pbi
, xd
);
116 /* Perform temporary clamping of the MV to be used for prediction */
122 xd
->mode_info_context
->mbmi
.dc_diff
= 1;
124 if (xd
->mode_info_context
->mbmi
.mode
!= B_PRED
&& xd
->mode_info_context
->mbmi
.mode
!= SPLITMV
&& eobtotal
== 0)
126 xd
->mode_info_context
->mbmi
.dc_diff
= 0;
128 /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
129 if (xd
->frame_type
== KEY_FRAME
|| xd
->mode_info_context
->mbmi
.ref_frame
== INTRA_FRAME
)
131 vp8mt_build_intra_predictors_mbuv_s(pbi
, xd
, mb_row
, mb_col
);
132 vp8mt_build_intra_predictors_mby_s(pbi
, xd
, mb_row
, mb_col
);
136 vp8_build_inter_predictors_mb_s(xd
);
141 if (xd
->segmentation_enabled
)
142 mb_init_dequantizer(pbi
, xd
);
145 if (xd
->frame_type
== KEY_FRAME
|| xd
->mode_info_context
->mbmi
.ref_frame
== INTRA_FRAME
)
147 vp8mt_build_intra_predictors_mbuv(pbi
, xd
, mb_row
, mb_col
);
149 if (xd
->mode_info_context
->mbmi
.mode
!= B_PRED
)
151 vp8mt_build_intra_predictors_mby(pbi
, xd
, mb_row
, mb_col
);
153 vp8mt_intra_prediction_down_copy(pbi
, xd
, mb_row
, mb_col
);
158 vp8_build_inter_predictors_mb(xd
);
161 /* dequantization and idct */
162 if (xd
->mode_info_context
->mbmi
.mode
!= B_PRED
&& xd
->mode_info_context
->mbmi
.mode
!= SPLITMV
)
164 BLOCKD
*b
= &xd
->block
[24];
165 DEQUANT_INVOKE(&pbi
->dequant
, block
)(b
);
167 /* do 2nd order transform on the dc block */
168 if (xd
->eobs
[24] > 1)
170 IDCT_INVOKE(RTCD_VTABLE(idct
), iwalsh16
)(&b
->dqcoeff
[0], b
->diff
);
171 ((int *)b
->qcoeff
)[0] = 0;
172 ((int *)b
->qcoeff
)[1] = 0;
173 ((int *)b
->qcoeff
)[2] = 0;
174 ((int *)b
->qcoeff
)[3] = 0;
175 ((int *)b
->qcoeff
)[4] = 0;
176 ((int *)b
->qcoeff
)[5] = 0;
177 ((int *)b
->qcoeff
)[6] = 0;
178 ((int *)b
->qcoeff
)[7] = 0;
182 IDCT_INVOKE(RTCD_VTABLE(idct
), iwalsh1
)(&b
->dqcoeff
[0], b
->diff
);
183 ((int *)b
->qcoeff
)[0] = 0;
186 DEQUANT_INVOKE (&pbi
->dequant
, dc_idct_add_y_block
)
187 (xd
->qcoeff
, xd
->block
[0].dequant
,
188 xd
->predictor
, xd
->dst
.y_buffer
,
189 xd
->dst
.y_stride
, xd
->eobs
, xd
->block
[24].diff
);
191 else if ((xd
->frame_type
== KEY_FRAME
|| xd
->mode_info_context
->mbmi
.ref_frame
== INTRA_FRAME
) && xd
->mode_info_context
->mbmi
.mode
== B_PRED
)
193 for (i
= 0; i
< 16; i
++)
195 BLOCKD
*b
= &xd
->block
[i
];
196 vp8mt_predict_intra4x4(pbi
, xd
, b
->bmi
.mode
, b
->predictor
, mb_row
, mb_col
, i
);
200 DEQUANT_INVOKE(&pbi
->dequant
, idct_add
)
201 (b
->qcoeff
, b
->dequant
, b
->predictor
,
202 *(b
->base_dst
) + b
->dst
, 16, b
->dst_stride
);
206 IDCT_INVOKE(RTCD_VTABLE(idct
), idct1_scalar_add
)
207 (b
->qcoeff
[0] * b
->dequant
[0], b
->predictor
,
208 *(b
->base_dst
) + b
->dst
, 16, b
->dst_stride
);
209 ((int *)b
->qcoeff
)[0] = 0;
215 DEQUANT_INVOKE (&pbi
->dequant
, idct_add_y_block
)
216 (xd
->qcoeff
, xd
->block
[0].dequant
,
217 xd
->predictor
, xd
->dst
.y_buffer
,
218 xd
->dst
.y_stride
, xd
->eobs
);
221 DEQUANT_INVOKE (&pbi
->dequant
, idct_add_uv_block
)
222 (xd
->qcoeff
+16*16, xd
->block
[16].dequant
,
223 xd
->predictor
+16*16, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
,
224 xd
->dst
.uv_stride
, xd
->eobs
+16);
234 THREAD_FUNCTION
vp8_thread_decoding_proc(void *p_data
)
236 #if CONFIG_MULTITHREAD
237 int ithread
= ((DECODETHREAD_DATA
*)p_data
)->ithread
;
238 VP8D_COMP
*pbi
= (VP8D_COMP
*)(((DECODETHREAD_DATA
*)p_data
)->ptr1
);
239 MB_ROW_DEC
*mbrd
= (MB_ROW_DEC
*)(((DECODETHREAD_DATA
*)p_data
)->ptr2
);
240 ENTROPY_CONTEXT_PLANES mb_row_left_context
;
244 if (pbi
->b_multithreaded_rd
== 0)
247 /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
248 if (sem_wait(&pbi
->h_event_start_decoding
[ithread
]) == 0)
250 if (pbi
->b_multithreaded_rd
== 0)
254 VP8_COMMON
*pc
= &pbi
->common
;
255 MACROBLOCKD
*xd
= &mbrd
->mbd
;
258 int num_part
= 1 << pbi
->common
.multi_token_partition
;
259 volatile int *last_row_current_mb_col
;
260 int nsync
= pbi
->sync_range
;
262 for (mb_row
= ithread
+1; mb_row
< pc
->mb_rows
; mb_row
+= (pbi
->decoding_thread_count
+ 1))
265 int recon_yoffset
, recon_uvoffset
;
267 int ref_fb_idx
= pc
->lst_fb_idx
;
268 int dst_fb_idx
= pc
->new_fb_idx
;
269 int recon_y_stride
= pc
->yv12_fb
[ref_fb_idx
].y_stride
;
270 int recon_uv_stride
= pc
->yv12_fb
[ref_fb_idx
].uv_stride
;
273 loop_filter_info
*lfi
= pc
->lf_info
;
274 int alt_flt_enabled
= xd
->segmentation_enabled
;
277 pbi
->mb_row_di
[ithread
].mb_row
= mb_row
;
278 pbi
->mb_row_di
[ithread
].mbd
.current_bc
= &pbi
->mbc
[mb_row
%num_part
];
280 last_row_current_mb_col
= &pbi
->mt_current_mb_col
[mb_row
-1];
282 recon_yoffset
= mb_row
* recon_y_stride
* 16;
283 recon_uvoffset
= mb_row
* recon_uv_stride
* 8;
284 /* reset above block coeffs */
286 xd
->above_context
= pc
->above_context
;
287 xd
->left_context
= &mb_row_left_context
;
288 vpx_memset(&mb_row_left_context
, 0, sizeof(mb_row_left_context
));
289 xd
->up_available
= (mb_row
!= 0);
291 xd
->mb_to_top_edge
= -((mb_row
* 16)) << 3;
292 xd
->mb_to_bottom_edge
= ((pc
->mb_rows
- 1 - mb_row
) * 16) << 3;
294 for (mb_col
= 0; mb_col
< pc
->mb_cols
; mb_col
++)
296 if ((mb_col
& (nsync
-1)) == 0)
298 while (mb_col
> (*last_row_current_mb_col
- nsync
) && *last_row_current_mb_col
!= pc
->mb_cols
- 1)
305 if (xd
->mode_info_context
->mbmi
.mode
== SPLITMV
|| xd
->mode_info_context
->mbmi
.mode
== B_PRED
)
307 for (i
= 0; i
< 16; i
++)
309 BLOCKD
*d
= &xd
->block
[i
];
310 vpx_memcpy(&d
->bmi
, &xd
->mode_info_context
->bmi
[i
], sizeof(B_MODE_INFO
));
314 if(pbi
->common
.filter_level
)
316 /*update loopfilter info*/
317 Segment
= (alt_flt_enabled
) ? xd
->mode_info_context
->mbmi
.segment_id
: 0;
318 filter_level
= pbi
->mt_baseline_filter_level
[Segment
];
319 /* Distance of Mb to the various image edges.
320 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
321 * Apply any context driven MB level adjustment
323 vp8_adjust_mb_lf_value(xd
, &filter_level
);
326 /* Distance of Mb to the various image edges.
327 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
329 xd
->mb_to_left_edge
= -((mb_col
* 16) << 3);
330 xd
->mb_to_right_edge
= ((pc
->mb_cols
- 1 - mb_col
) * 16) << 3;
332 xd
->dst
.y_buffer
= pc
->yv12_fb
[dst_fb_idx
].y_buffer
+ recon_yoffset
;
333 xd
->dst
.u_buffer
= pc
->yv12_fb
[dst_fb_idx
].u_buffer
+ recon_uvoffset
;
334 xd
->dst
.v_buffer
= pc
->yv12_fb
[dst_fb_idx
].v_buffer
+ recon_uvoffset
;
336 xd
->left_available
= (mb_col
!= 0);
338 /* Select the appropriate reference frame for this MB */
339 if (xd
->mode_info_context
->mbmi
.ref_frame
== LAST_FRAME
)
340 ref_fb_idx
= pc
->lst_fb_idx
;
341 else if (xd
->mode_info_context
->mbmi
.ref_frame
== GOLDEN_FRAME
)
342 ref_fb_idx
= pc
->gld_fb_idx
;
344 ref_fb_idx
= pc
->alt_fb_idx
;
346 xd
->pre
.y_buffer
= pc
->yv12_fb
[ref_fb_idx
].y_buffer
+ recon_yoffset
;
347 xd
->pre
.u_buffer
= pc
->yv12_fb
[ref_fb_idx
].u_buffer
+ recon_uvoffset
;
348 xd
->pre
.v_buffer
= pc
->yv12_fb
[ref_fb_idx
].v_buffer
+ recon_uvoffset
;
350 vp8_build_uvmvs(xd
, pc
->full_pixel
);
351 vp8mt_decode_macroblock(pbi
, xd
, mb_row
, mb_col
);
353 if (pbi
->common
.filter_level
)
355 if( mb_row
!= pc
->mb_rows
-1 )
357 /* Save decoded MB last row data for next-row decoding */
358 vpx_memcpy((pbi
->mt_yabove_row
[mb_row
+ 1] + 32 + mb_col
*16), (xd
->dst
.y_buffer
+ 15 * recon_y_stride
), 16);
359 vpx_memcpy((pbi
->mt_uabove_row
[mb_row
+ 1] + 16 + mb_col
*8), (xd
->dst
.u_buffer
+ 7 * recon_uv_stride
), 8);
360 vpx_memcpy((pbi
->mt_vabove_row
[mb_row
+ 1] + 16 + mb_col
*8), (xd
->dst
.v_buffer
+ 7 * recon_uv_stride
), 8);
363 /* save left_col for next MB decoding */
364 if(mb_col
!= pc
->mb_cols
-1)
366 MODE_INFO
*next
= xd
->mode_info_context
+1;
368 if (xd
->frame_type
== KEY_FRAME
|| next
->mbmi
.ref_frame
== INTRA_FRAME
)
370 for (i
= 0; i
< 16; i
++)
371 pbi
->mt_yleft_col
[mb_row
][i
] = xd
->dst
.y_buffer
[i
* recon_y_stride
+ 15];
372 for (i
= 0; i
< 8; i
++)
374 pbi
->mt_uleft_col
[mb_row
][i
] = xd
->dst
.u_buffer
[i
* recon_uv_stride
+ 7];
375 pbi
->mt_vleft_col
[mb_row
][i
] = xd
->dst
.v_buffer
[i
* recon_uv_stride
+ 7];
380 /* loopfilter on this macroblock. */
384 pc
->lf_mbv(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
386 if (xd
->mode_info_context
->mbmi
.dc_diff
> 0)
387 pc
->lf_bv(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
389 /* don't apply across umv border */
391 pc
->lf_mbh(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
393 if (xd
->mode_info_context
->mbmi
.dc_diff
> 0)
394 pc
->lf_bh(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
401 ++xd
->mode_info_context
; /* next mb */
405 /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
406 pbi
->mt_current_mb_col
[mb_row
] = mb_col
;
409 /* adjust to the next row of mbs */
410 if (pbi
->common
.filter_level
)
412 if(mb_row
!= pc
->mb_rows
-1)
414 int lasty
= pc
->yv12_fb
[ref_fb_idx
].y_width
+ VP8BORDERINPIXELS
;
415 int lastuv
= (pc
->yv12_fb
[ref_fb_idx
].y_width
>>1) + (VP8BORDERINPIXELS
>>1);
417 for (i
= 0; i
< 4; i
++)
419 pbi
->mt_yabove_row
[mb_row
+1][lasty
+ i
] = pbi
->mt_yabove_row
[mb_row
+1][lasty
-1];
420 pbi
->mt_uabove_row
[mb_row
+1][lastuv
+ i
] = pbi
->mt_uabove_row
[mb_row
+1][lastuv
-1];
421 pbi
->mt_vabove_row
[mb_row
+1][lastuv
+ i
] = pbi
->mt_vabove_row
[mb_row
+1][lastuv
-1];
425 vp8_extend_mb_row(&pc
->yv12_fb
[dst_fb_idx
], xd
->dst
.y_buffer
+ 16, xd
->dst
.u_buffer
+ 8, xd
->dst
.v_buffer
+ 8);
427 ++xd
->mode_info_context
; /* skip prediction column */
429 /* since we have multithread */
430 xd
->mode_info_context
+= xd
->mode_info_stride
* pbi
->decoding_thread_count
;
434 /* add this to each frame */
435 if ((mbrd
->mb_row
== pbi
->common
.mb_rows
-1) || ((mbrd
->mb_row
== pbi
->common
.mb_rows
-2) && (pbi
->common
.mb_rows
% (pbi
->decoding_thread_count
+1))==1))
437 /*SetEvent(pbi->h_event_end_decoding);*/
438 sem_post(&pbi
->h_event_end_decoding
);
449 void vp8_decoder_create_threads(VP8D_COMP
*pbi
)
451 #if CONFIG_MULTITHREAD
456 pbi
->b_multithreaded_rd
= 0;
457 pbi
->allocated_decoding_thread_count
= 0;
458 core_count
= (pbi
->max_threads
> 16) ? 16 : pbi
->max_threads
;
462 pbi
->b_multithreaded_rd
= 1;
463 pbi
->decoding_thread_count
= core_count
-1;
465 CHECK_MEM_ERROR(pbi
->h_decoding_thread
, vpx_malloc(sizeof(pthread_t
) * pbi
->decoding_thread_count
));
466 CHECK_MEM_ERROR(pbi
->h_event_start_decoding
, vpx_malloc(sizeof(sem_t
) * pbi
->decoding_thread_count
));
467 CHECK_MEM_ERROR(pbi
->mb_row_di
, vpx_memalign(32, sizeof(MB_ROW_DEC
) * pbi
->decoding_thread_count
));
468 vpx_memset(pbi
->mb_row_di
, 0, sizeof(MB_ROW_DEC
) * pbi
->decoding_thread_count
);
469 CHECK_MEM_ERROR(pbi
->de_thread_data
, vpx_malloc(sizeof(DECODETHREAD_DATA
) * pbi
->decoding_thread_count
));
471 for (ithread
= 0; ithread
< pbi
->decoding_thread_count
; ithread
++)
473 sem_init(&pbi
->h_event_start_decoding
[ithread
], 0, 0);
475 pbi
->de_thread_data
[ithread
].ithread
= ithread
;
476 pbi
->de_thread_data
[ithread
].ptr1
= (void *)pbi
;
477 pbi
->de_thread_data
[ithread
].ptr2
= (void *) &pbi
->mb_row_di
[ithread
];
479 pthread_create(&pbi
->h_decoding_thread
[ithread
], 0, vp8_thread_decoding_proc
, (&pbi
->de_thread_data
[ithread
]));
482 sem_init(&pbi
->h_event_end_decoding
, 0, 0);
484 pbi
->allocated_decoding_thread_count
= pbi
->decoding_thread_count
;
493 void vp8mt_de_alloc_temp_buffers(VP8D_COMP
*pbi
, int mb_rows
)
495 #if CONFIG_MULTITHREAD
496 VP8_COMMON
*const pc
= & pbi
->common
;
499 if (pbi
->b_multithreaded_rd
)
501 if (pbi
->mt_current_mb_col
)
503 vpx_free(pbi
->mt_current_mb_col
);
504 pbi
->mt_current_mb_col
= NULL
;
507 /* Free above_row buffers. */
508 if (pbi
->mt_yabove_row
)
510 for (i
=0; i
< mb_rows
; i
++)
512 if (pbi
->mt_yabove_row
[i
])
514 vpx_free(pbi
->mt_yabove_row
[i
]);
515 pbi
->mt_yabove_row
[i
] = NULL
;
518 vpx_free(pbi
->mt_yabove_row
);
519 pbi
->mt_yabove_row
= NULL
;
522 if (pbi
->mt_uabove_row
)
524 for (i
=0; i
< mb_rows
; i
++)
526 if (pbi
->mt_uabove_row
[i
])
528 vpx_free(pbi
->mt_uabove_row
[i
]);
529 pbi
->mt_uabove_row
[i
] = NULL
;
532 vpx_free(pbi
->mt_uabove_row
);
533 pbi
->mt_uabove_row
= NULL
;
536 if (pbi
->mt_vabove_row
)
538 for (i
=0; i
< mb_rows
; i
++)
540 if (pbi
->mt_vabove_row
[i
])
542 vpx_free(pbi
->mt_vabove_row
[i
]);
543 pbi
->mt_vabove_row
[i
] = NULL
;
546 vpx_free(pbi
->mt_vabove_row
);
547 pbi
->mt_vabove_row
= NULL
;
550 /* Free left_col buffers. */
551 if (pbi
->mt_yleft_col
)
553 for (i
=0; i
< mb_rows
; i
++)
555 if (pbi
->mt_yleft_col
[i
])
557 vpx_free(pbi
->mt_yleft_col
[i
]);
558 pbi
->mt_yleft_col
[i
] = NULL
;
561 vpx_free(pbi
->mt_yleft_col
);
562 pbi
->mt_yleft_col
= NULL
;
565 if (pbi
->mt_uleft_col
)
567 for (i
=0; i
< mb_rows
; i
++)
569 if (pbi
->mt_uleft_col
[i
])
571 vpx_free(pbi
->mt_uleft_col
[i
]);
572 pbi
->mt_uleft_col
[i
] = NULL
;
575 vpx_free(pbi
->mt_uleft_col
);
576 pbi
->mt_uleft_col
= NULL
;
579 if (pbi
->mt_vleft_col
)
581 for (i
=0; i
< mb_rows
; i
++)
583 if (pbi
->mt_vleft_col
[i
])
585 vpx_free(pbi
->mt_vleft_col
[i
]);
586 pbi
->mt_vleft_col
[i
] = NULL
;
589 vpx_free(pbi
->mt_vleft_col
);
590 pbi
->mt_vleft_col
= NULL
;
599 void vp8mt_alloc_temp_buffers(VP8D_COMP
*pbi
, int width
, int prev_mb_rows
)
601 #if CONFIG_MULTITHREAD
602 VP8_COMMON
*const pc
= & pbi
->common
;
606 if (pbi
->b_multithreaded_rd
)
608 vp8mt_de_alloc_temp_buffers(pbi
, prev_mb_rows
);
610 /* our internal buffers are always multiples of 16 */
611 if ((width
& 0xf) != 0)
612 width
+= 16 - (width
& 0xf);
614 if (width
< 640) pbi
->sync_range
= 1;
615 else if (width
<= 1280) pbi
->sync_range
= 8;
616 else if (width
<= 2560) pbi
->sync_range
=16;
617 else pbi
->sync_range
= 32;
619 uv_width
= width
>>1;
621 /* Allocate an int for each mb row. */
622 CHECK_MEM_ERROR(pbi
->mt_current_mb_col
, vpx_malloc(sizeof(int) * pc
->mb_rows
));
624 /* Allocate memory for above_row buffers. */
625 CHECK_MEM_ERROR(pbi
->mt_yabove_row
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
626 for (i
=0; i
< pc
->mb_rows
; i
++)
627 CHECK_MEM_ERROR(pbi
->mt_yabove_row
[i
], vpx_calloc(sizeof(unsigned char) * (width
+ (VP8BORDERINPIXELS
<<1)), 1));
629 CHECK_MEM_ERROR(pbi
->mt_uabove_row
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
630 for (i
=0; i
< pc
->mb_rows
; i
++)
631 CHECK_MEM_ERROR(pbi
->mt_uabove_row
[i
], vpx_calloc(sizeof(unsigned char) * (uv_width
+ VP8BORDERINPIXELS
), 1));
633 CHECK_MEM_ERROR(pbi
->mt_vabove_row
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
634 for (i
=0; i
< pc
->mb_rows
; i
++)
635 CHECK_MEM_ERROR(pbi
->mt_vabove_row
[i
], vpx_calloc(sizeof(unsigned char) * (uv_width
+ VP8BORDERINPIXELS
), 1));
637 /* Allocate memory for left_col buffers. */
638 CHECK_MEM_ERROR(pbi
->mt_yleft_col
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
639 for (i
=0; i
< pc
->mb_rows
; i
++)
640 CHECK_MEM_ERROR(pbi
->mt_yleft_col
[i
], vpx_calloc(sizeof(unsigned char) * 16, 1));
642 CHECK_MEM_ERROR(pbi
->mt_uleft_col
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
643 for (i
=0; i
< pc
->mb_rows
; i
++)
644 CHECK_MEM_ERROR(pbi
->mt_uleft_col
[i
], vpx_calloc(sizeof(unsigned char) * 8, 1));
646 CHECK_MEM_ERROR(pbi
->mt_vleft_col
, vpx_malloc(sizeof(unsigned char *) * pc
->mb_rows
));
647 for (i
=0; i
< pc
->mb_rows
; i
++)
648 CHECK_MEM_ERROR(pbi
->mt_vleft_col
[i
], vpx_calloc(sizeof(unsigned char) * 8, 1));
657 void vp8_decoder_remove_threads(VP8D_COMP
*pbi
)
659 #if CONFIG_MULTITHREAD
661 /* shutdown MB Decoding thread; */
662 if (pbi
->b_multithreaded_rd
)
666 pbi
->b_multithreaded_rd
= 0;
668 /* allow all threads to exit */
669 for (i
= 0; i
< pbi
->allocated_decoding_thread_count
; i
++)
671 sem_post(&pbi
->h_event_start_decoding
[i
]);
672 pthread_join(pbi
->h_decoding_thread
[i
], NULL
);
675 for (i
= 0; i
< pbi
->allocated_decoding_thread_count
; i
++)
677 sem_destroy(&pbi
->h_event_start_decoding
[i
]);
680 sem_destroy(&pbi
->h_event_end_decoding
);
682 if (pbi
->h_decoding_thread
)
684 vpx_free(pbi
->h_decoding_thread
);
685 pbi
->h_decoding_thread
= NULL
;
688 if (pbi
->h_event_start_decoding
)
690 vpx_free(pbi
->h_event_start_decoding
);
691 pbi
->h_event_start_decoding
= NULL
;
696 vpx_free(pbi
->mb_row_di
);
697 pbi
->mb_row_di
= NULL
;
700 if (pbi
->de_thread_data
)
702 vpx_free(pbi
->de_thread_data
);
703 pbi
->de_thread_data
= NULL
;
712 void vp8mt_lpf_init( VP8D_COMP
*pbi
, int default_filt_lvl
)
714 #if CONFIG_MULTITHREAD
715 VP8_COMMON
*cm
= &pbi
->common
;
716 MACROBLOCKD
*mbd
= &pbi
->mb
;
717 /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/
718 loop_filter_info
*lfi
= cm
->lf_info
;
719 FRAME_TYPE frame_type
= cm
->frame_type
;
723 int baseline_filter_level[MAX_MB_SEGMENTS];*/
725 int alt_flt_enabled
= mbd
->segmentation_enabled
;
728 /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
730 /* Note the baseline filter values for each segment */
733 for (i
= 0; i
< MAX_MB_SEGMENTS
; i
++)
736 if (mbd
->mb_segement_abs_delta
== SEGMENT_ABSDATA
)
737 pbi
->mt_baseline_filter_level
[i
] = mbd
->segment_feature_data
[MB_LVL_ALT_LF
][i
];
741 pbi
->mt_baseline_filter_level
[i
] = default_filt_lvl
+ mbd
->segment_feature_data
[MB_LVL_ALT_LF
][i
];
742 pbi
->mt_baseline_filter_level
[i
] = (pbi
->mt_baseline_filter_level
[i
] >= 0) ? ((pbi
->mt_baseline_filter_level
[i
] <= MAX_LOOP_FILTER
) ? pbi
->mt_baseline_filter_level
[i
] : MAX_LOOP_FILTER
) : 0; /* Clamp to valid range */
748 for (i
= 0; i
< MAX_MB_SEGMENTS
; i
++)
749 pbi
->mt_baseline_filter_level
[i
] = default_filt_lvl
;
752 /* Initialize the loop filter for this frame. */
753 if ((cm
->last_filter_type
!= cm
->filter_type
) || (cm
->last_sharpness_level
!= cm
->sharpness_level
))
754 vp8_init_loop_filter(cm
);
755 else if (frame_type
!= cm
->last_frame_type
)
756 vp8_frame_init_loop_filter(lfi
, frame_type
);
759 (void) default_filt_lvl
;
764 void vp8mt_decode_mb_rows( VP8D_COMP
*pbi
, MACROBLOCKD
*xd
)
766 #if CONFIG_MULTITHREAD
768 VP8_COMMON
*pc
= &pbi
->common
;
771 int num_part
= 1 << pbi
->common
.multi_token_partition
;
773 volatile int *last_row_current_mb_col
= NULL
;
774 int nsync
= pbi
->sync_range
;
777 loop_filter_info
*lfi
= pc
->lf_info
;
778 int alt_flt_enabled
= xd
->segmentation_enabled
;
781 if(pbi
->common
.filter_level
)
783 /* Set above_row buffer to 127 for decoding first MB row */
784 vpx_memset(pbi
->mt_yabove_row
[0] + VP8BORDERINPIXELS
-1, 127, pc
->yv12_fb
[pc
->lst_fb_idx
].y_width
+ 5);
785 vpx_memset(pbi
->mt_uabove_row
[0] + (VP8BORDERINPIXELS
>>1)-1, 127, (pc
->yv12_fb
[pc
->lst_fb_idx
].y_width
>>1) +5);
786 vpx_memset(pbi
->mt_vabove_row
[0] + (VP8BORDERINPIXELS
>>1)-1, 127, (pc
->yv12_fb
[pc
->lst_fb_idx
].y_width
>>1) +5);
788 for (i
=1; i
<pc
->mb_rows
; i
++)
790 vpx_memset(pbi
->mt_yabove_row
[i
] + VP8BORDERINPIXELS
-1, (unsigned char)129, 1);
791 vpx_memset(pbi
->mt_uabove_row
[i
] + (VP8BORDERINPIXELS
>>1)-1, (unsigned char)129, 1);
792 vpx_memset(pbi
->mt_vabove_row
[i
] + (VP8BORDERINPIXELS
>>1)-1, (unsigned char)129, 1);
795 /* Set left_col to 129 initially */
796 for (i
=0; i
<pc
->mb_rows
; i
++)
798 vpx_memset(pbi
->mt_yleft_col
[i
], (unsigned char)129, 16);
799 vpx_memset(pbi
->mt_uleft_col
[i
], (unsigned char)129, 8);
800 vpx_memset(pbi
->mt_vleft_col
[i
], (unsigned char)129, 8);
802 vp8mt_lpf_init(pbi
, pc
->filter_level
);
805 vp8_setup_decoding_thread_data(pbi
, xd
, pbi
->mb_row_di
, pbi
->decoding_thread_count
);
807 for (i
= 0; i
< pbi
->decoding_thread_count
; i
++)
808 sem_post(&pbi
->h_event_start_decoding
[i
]);
810 for (mb_row
= 0; mb_row
< pc
->mb_rows
; mb_row
+= (pbi
->decoding_thread_count
+ 1))
814 xd
->current_bc
= &pbi
->mbc
[mb_row
%num_part
];
816 /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
819 int recon_yoffset
, recon_uvoffset
;
821 int ref_fb_idx
= pc
->lst_fb_idx
;
822 int dst_fb_idx
= pc
->new_fb_idx
;
823 int recon_y_stride
= pc
->yv12_fb
[ref_fb_idx
].y_stride
;
824 int recon_uv_stride
= pc
->yv12_fb
[ref_fb_idx
].uv_stride
;
826 /* volatile int *last_row_current_mb_col = NULL; */
828 last_row_current_mb_col
= &pbi
->mt_current_mb_col
[mb_row
-1];
830 vpx_memset(&pc
->left_context
, 0, sizeof(pc
->left_context
));
831 recon_yoffset
= mb_row
* recon_y_stride
* 16;
832 recon_uvoffset
= mb_row
* recon_uv_stride
* 8;
833 /* reset above block coeffs */
835 xd
->above_context
= pc
->above_context
;
836 xd
->up_available
= (mb_row
!= 0);
838 xd
->mb_to_top_edge
= -((mb_row
* 16)) << 3;
839 xd
->mb_to_bottom_edge
= ((pc
->mb_rows
- 1 - mb_row
) * 16) << 3;
841 for (mb_col
= 0; mb_col
< pc
->mb_cols
; mb_col
++)
843 if ( mb_row
> 0 && (mb_col
& (nsync
-1)) == 0){
844 while (mb_col
> (*last_row_current_mb_col
- nsync
) && *last_row_current_mb_col
!= pc
->mb_cols
- 1)
851 if (xd
->mode_info_context
->mbmi
.mode
== SPLITMV
|| xd
->mode_info_context
->mbmi
.mode
== B_PRED
)
853 for (i
= 0; i
< 16; i
++)
855 BLOCKD
*d
= &xd
->block
[i
];
856 vpx_memcpy(&d
->bmi
, &xd
->mode_info_context
->bmi
[i
], sizeof(B_MODE_INFO
));
860 if(pbi
->common
.filter_level
)
862 /* update loopfilter info */
863 Segment
= (alt_flt_enabled
) ? xd
->mode_info_context
->mbmi
.segment_id
: 0;
864 filter_level
= pbi
->mt_baseline_filter_level
[Segment
];
865 /* Distance of Mb to the various image edges.
866 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
867 * Apply any context driven MB level adjustment
869 vp8_adjust_mb_lf_value(xd
, &filter_level
);
872 /* Distance of Mb to the various image edges.
873 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
875 xd
->mb_to_left_edge
= -((mb_col
* 16) << 3);
876 xd
->mb_to_right_edge
= ((pc
->mb_cols
- 1 - mb_col
) * 16) << 3;
878 xd
->dst
.y_buffer
= pc
->yv12_fb
[dst_fb_idx
].y_buffer
+ recon_yoffset
;
879 xd
->dst
.u_buffer
= pc
->yv12_fb
[dst_fb_idx
].u_buffer
+ recon_uvoffset
;
880 xd
->dst
.v_buffer
= pc
->yv12_fb
[dst_fb_idx
].v_buffer
+ recon_uvoffset
;
882 xd
->left_available
= (mb_col
!= 0);
884 /* Select the appropriate reference frame for this MB */
885 if (xd
->mode_info_context
->mbmi
.ref_frame
== LAST_FRAME
)
886 ref_fb_idx
= pc
->lst_fb_idx
;
887 else if (xd
->mode_info_context
->mbmi
.ref_frame
== GOLDEN_FRAME
)
888 ref_fb_idx
= pc
->gld_fb_idx
;
890 ref_fb_idx
= pc
->alt_fb_idx
;
892 xd
->pre
.y_buffer
= pc
->yv12_fb
[ref_fb_idx
].y_buffer
+ recon_yoffset
;
893 xd
->pre
.u_buffer
= pc
->yv12_fb
[ref_fb_idx
].u_buffer
+ recon_uvoffset
;
894 xd
->pre
.v_buffer
= pc
->yv12_fb
[ref_fb_idx
].v_buffer
+ recon_uvoffset
;
896 vp8_build_uvmvs(xd
, pc
->full_pixel
);
897 vp8mt_decode_macroblock(pbi
, xd
, mb_row
, mb_col
);
899 if (pbi
->common
.filter_level
)
901 /* Save decoded MB last row data for next-row decoding */
902 if(mb_row
!= pc
->mb_rows
-1)
904 vpx_memcpy((pbi
->mt_yabove_row
[mb_row
+1] + 32 + mb_col
*16), (xd
->dst
.y_buffer
+ 15 * recon_y_stride
), 16);
905 vpx_memcpy((pbi
->mt_uabove_row
[mb_row
+1] + 16 + mb_col
*8), (xd
->dst
.u_buffer
+ 7 * recon_uv_stride
), 8);
906 vpx_memcpy((pbi
->mt_vabove_row
[mb_row
+1] + 16 + mb_col
*8), (xd
->dst
.v_buffer
+ 7 * recon_uv_stride
), 8);
909 /* save left_col for next MB decoding */
910 if(mb_col
!= pc
->mb_cols
-1)
912 MODE_INFO
*next
= xd
->mode_info_context
+1;
914 if (xd
->frame_type
== KEY_FRAME
|| next
->mbmi
.ref_frame
== INTRA_FRAME
)
916 for (i
= 0; i
< 16; i
++)
917 pbi
->mt_yleft_col
[mb_row
][i
] = xd
->dst
.y_buffer
[i
* recon_y_stride
+ 15];
918 for (i
= 0; i
< 8; i
++)
920 pbi
->mt_uleft_col
[mb_row
][i
] = xd
->dst
.u_buffer
[i
* recon_uv_stride
+ 7];
921 pbi
->mt_vleft_col
[mb_row
][i
] = xd
->dst
.v_buffer
[i
* recon_uv_stride
+ 7];
926 /* loopfilter on this macroblock. */
930 pc
->lf_mbv(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
932 if (xd
->mode_info_context
->mbmi
.dc_diff
> 0)
933 pc
->lf_bv(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
935 /* don't apply across umv border */
937 pc
->lf_mbh(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
939 if (xd
->mode_info_context
->mbmi
.dc_diff
> 0)
940 pc
->lf_bh(xd
->dst
.y_buffer
, xd
->dst
.u_buffer
, xd
->dst
.v_buffer
, recon_y_stride
, recon_uv_stride
, &lfi
[filter_level
], pc
->simpler_lpf
);
947 ++xd
->mode_info_context
; /* next mb */
951 pbi
->mt_current_mb_col
[mb_row
] = mb_col
;
954 /* adjust to the next row of mbs */
955 if (pbi
->common
.filter_level
)
957 if(mb_row
!= pc
->mb_rows
-1)
959 int lasty
= pc
->yv12_fb
[ref_fb_idx
].y_width
+ VP8BORDERINPIXELS
;
960 int lastuv
= (pc
->yv12_fb
[ref_fb_idx
].y_width
>>1) + (VP8BORDERINPIXELS
>>1);
962 for (i
= 0; i
< 4; i
++)
964 pbi
->mt_yabove_row
[mb_row
+1][lasty
+ i
] = pbi
->mt_yabove_row
[mb_row
+1][lasty
-1];
965 pbi
->mt_uabove_row
[mb_row
+1][lastuv
+ i
] = pbi
->mt_uabove_row
[mb_row
+1][lastuv
-1];
966 pbi
->mt_vabove_row
[mb_row
+1][lastuv
+ i
] = pbi
->mt_vabove_row
[mb_row
+1][lastuv
-1];
970 vp8_extend_mb_row(&pc
->yv12_fb
[dst_fb_idx
], xd
->dst
.y_buffer
+ 16, xd
->dst
.u_buffer
+ 8, xd
->dst
.v_buffer
+ 8);
972 ++xd
->mode_info_context
; /* skip prediction column */
974 xd
->mode_info_context
+= xd
->mode_info_stride
* pbi
->decoding_thread_count
;
977 sem_wait(&pbi
->h_event_end_decoding
); /* add back for each frame */