2 * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license and patent
5 * grant that can be found in the LICENSE file in the root of the source
6 * tree. All contributing project authors may be found in the AUTHORS
7 * file in the root of the source tree.
14 #include "onyxd_int.h"
15 #include "vpx_mem/vpx_mem.h"
16 #include "threading.h"
18 #include "loopfilter.h"
20 #include "vpx_ports/vpx_timer.h"
22 extern void vp8_decode_mb_row(VP8D_COMP
*pbi
,
27 extern void vp8_build_uvmvs(MACROBLOCKD
*x
, int fullpixel
);
28 extern void vp8_decode_macroblock(VP8D_COMP
*pbi
, MACROBLOCKD
*xd
);
30 void vp8_setup_decoding_thread_data(VP8D_COMP
*pbi
, MACROBLOCKD
*xd
, MB_ROW_DEC
*mbrd
, int count
)
35 #if CONFIG_MULTITHREAD
36 VP8_COMMON
*const pc
= & pbi
->common
;
39 for (i
= 0; i
< count
; i
++)
41 MACROBLOCKD
*mbd
= &mbrd
[i
].mbd
;
42 #if CONFIG_RUNTIME_CPU_DETECT
47 mbd
->subpixel_predict
= xd
->subpixel_predict
;
48 mbd
->subpixel_predict8x4
= xd
->subpixel_predict8x4
;
49 mbd
->subpixel_predict8x8
= xd
->subpixel_predict8x8
;
50 mbd
->subpixel_predict16x16
= xd
->subpixel_predict16x16
;
51 mbd
->gf_active_ptr
= xd
->gf_active_ptr
;
53 mbd
->mode_info
= pc
->mi
- 1;
54 mbd
->mode_info_context
= pc
->mi
+ pc
->mode_info_stride
* (i
+ 1);
55 mbd
->mode_info_stride
= pc
->mode_info_stride
;
57 mbd
->frame_type
= pc
->frame_type
;
58 mbd
->frames_since_golden
= pc
->frames_since_golden
;
59 mbd
->frames_till_alt_ref_frame
= pc
->frames_till_alt_ref_frame
;
61 mbd
->pre
= pc
->last_frame
;
62 mbd
->dst
= pc
->new_frame
;
67 vp8_setup_block_dptrs(mbd
);
68 vp8_build_block_doffsets(mbd
);
69 mbd
->segmentation_enabled
= xd
->segmentation_enabled
;
70 mbd
->mb_segement_abs_delta
= xd
->mb_segement_abs_delta
;
71 vpx_memcpy(mbd
->segment_feature_data
, xd
->segment_feature_data
, sizeof(xd
->segment_feature_data
));
73 mbd
->mbmi
.mode
= DC_PRED
;
74 mbd
->mbmi
.uv_mode
= DC_PRED
;
76 mbd
->current_bc
= &pbi
->bc2
;
78 for (j
= 0; j
< 25; j
++)
80 mbd
->block
[j
].dequant
= xd
->block
[j
].dequant
;
93 THREAD_FUNCTION
vp8_thread_decoding_proc(void *p_data
)
95 #if CONFIG_MULTITHREAD
96 int ithread
= ((DECODETHREAD_DATA
*)p_data
)->ithread
;
97 VP8D_COMP
*pbi
= (VP8D_COMP
*)(((DECODETHREAD_DATA
*)p_data
)->ptr1
);
98 MB_ROW_DEC
*mbrd
= (MB_ROW_DEC
*)(((DECODETHREAD_DATA
*)p_data
)->ptr2
);
99 ENTROPY_CONTEXT mb_row_left_context
[4][4];
103 if (pbi
->b_multithreaded_rd
== 0)
106 //if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0)
107 if (sem_wait(&pbi
->h_event_mbrdecoding
[ithread
]) == 0)
109 if (pbi
->b_multithreaded_rd
== 0)
113 VP8_COMMON
*pc
= &pbi
->common
;
114 int mb_row
= mbrd
->mb_row
;
115 MACROBLOCKD
*xd
= &mbrd
->mbd
;
117 //printf("ithread:%d mb_row %d\n", ithread, mb_row);
119 int recon_yoffset
, recon_uvoffset
;
121 int recon_y_stride
= pc
->last_frame
.y_stride
;
122 int recon_uv_stride
= pc
->last_frame
.uv_stride
;
124 volatile int *last_row_current_mb_col
;
127 last_row_current_mb_col
= &pbi
->mb_row_di
[ithread
-1].current_mb_col
;
129 last_row_current_mb_col
= &pbi
->current_mb_col_main
;
131 recon_yoffset
= mb_row
* recon_y_stride
* 16;
132 recon_uvoffset
= mb_row
* recon_uv_stride
* 8;
133 // reset above block coeffs
135 xd
->above_context
[Y1CONTEXT
] = pc
->above_context
[Y1CONTEXT
];
136 xd
->above_context
[UCONTEXT
] = pc
->above_context
[UCONTEXT
];
137 xd
->above_context
[VCONTEXT
] = pc
->above_context
[VCONTEXT
];
138 xd
->above_context
[Y2CONTEXT
] = pc
->above_context
[Y2CONTEXT
];
139 xd
->left_context
= mb_row_left_context
;
140 vpx_memset(mb_row_left_context
, 0, sizeof(mb_row_left_context
));
141 xd
->up_available
= (mb_row
!= 0);
143 xd
->mb_to_top_edge
= -((mb_row
* 16)) << 3;
144 xd
->mb_to_bottom_edge
= ((pc
->mb_rows
- 1 - mb_row
) * 16) << 3;
146 for (mb_col
= 0; mb_col
< pc
->mb_cols
; mb_col
++)
149 while (mb_col
> (*last_row_current_mb_col
- 1) && *last_row_current_mb_col
!= pc
->mb_cols
- 1)
155 // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
156 vpx_memcpy(&xd
->mbmi
, &xd
->mode_info_context
->mbmi
, 32); //sizeof(MB_MODE_INFO) );
158 if (xd
->mbmi
.mode
== SPLITMV
|| xd
->mbmi
.mode
== B_PRED
)
160 for (i
= 0; i
< 16; i
++)
162 BLOCKD
*d
= &xd
->block
[i
];
163 vpx_memcpy(&d
->bmi
, &xd
->mode_info_context
->bmi
[i
], sizeof(B_MODE_INFO
));
167 // Distance of Mb to the various image edges.
168 // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
169 xd
->mb_to_left_edge
= -((mb_col
* 16) << 3);
170 xd
->mb_to_right_edge
= ((pc
->mb_cols
- 1 - mb_col
) * 16) << 3;
172 xd
->dst
.y_buffer
= pc
->new_frame
.y_buffer
+ recon_yoffset
;
173 xd
->dst
.u_buffer
= pc
->new_frame
.u_buffer
+ recon_uvoffset
;
174 xd
->dst
.v_buffer
= pc
->new_frame
.v_buffer
+ recon_uvoffset
;
176 xd
->left_available
= (mb_col
!= 0);
178 // Select the appropriate reference frame for this MB
179 if (xd
->mbmi
.ref_frame
== LAST_FRAME
)
181 xd
->pre
.y_buffer
= pc
->last_frame
.y_buffer
+ recon_yoffset
;
182 xd
->pre
.u_buffer
= pc
->last_frame
.u_buffer
+ recon_uvoffset
;
183 xd
->pre
.v_buffer
= pc
->last_frame
.v_buffer
+ recon_uvoffset
;
185 else if (xd
->mbmi
.ref_frame
== GOLDEN_FRAME
)
187 // Golden frame reconstruction buffer
188 xd
->pre
.y_buffer
= pc
->golden_frame
.y_buffer
+ recon_yoffset
;
189 xd
->pre
.u_buffer
= pc
->golden_frame
.u_buffer
+ recon_uvoffset
;
190 xd
->pre
.v_buffer
= pc
->golden_frame
.v_buffer
+ recon_uvoffset
;
194 // Alternate reference frame reconstruction buffer
195 xd
->pre
.y_buffer
= pc
->alt_ref_frame
.y_buffer
+ recon_yoffset
;
196 xd
->pre
.u_buffer
= pc
->alt_ref_frame
.u_buffer
+ recon_uvoffset
;
197 xd
->pre
.v_buffer
= pc
->alt_ref_frame
.v_buffer
+ recon_uvoffset
;
200 vp8_build_uvmvs(xd
, pc
->full_pixel
);
202 vp8dx_bool_decoder_fill(xd
->current_bc
);
203 vp8_decode_macroblock(pbi
, xd
);
209 ++xd
->mode_info_context
; /* next mb */
211 xd
->gf_active_ptr
++; // GF useage flag for next MB
213 xd
->above_context
[Y1CONTEXT
] += 4;
214 xd
->above_context
[UCONTEXT
] += 2;
215 xd
->above_context
[VCONTEXT
] += 2;
216 xd
->above_context
[Y2CONTEXT
] ++;
217 pbi
->mb_row_di
[ithread
].current_mb_col
= mb_col
;
221 // adjust to the next row of mbs
224 xd
->dst
.y_buffer
+ 16, xd
->dst
.u_buffer
+ 8, xd
->dst
.v_buffer
+ 8
227 ++xd
->mode_info_context
; /* skip prediction column */
229 // since we have multithread
230 xd
->mode_info_context
+= xd
->mode_info_stride
* pbi
->decoding_thread_count
;
232 //memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
233 if ((mb_row
& 1) == 1)
235 pbi
->last_mb_row_decoded
= mb_row
;
236 //printf("S%d", pbi->last_mb_row_decoded);
239 if (ithread
== (pbi
->decoding_thread_count
- 1) || mb_row
== pc
->mb_rows
- 1)
241 //SetEvent(pbi->h_event_main);
242 sem_post(&pbi
->h_event_main
);
256 THREAD_FUNCTION
vp8_thread_loop_filter(void *p_data
)
258 #if CONFIG_MULTITHREAD
259 VP8D_COMP
*pbi
= (VP8D_COMP
*)p_data
;
263 if (pbi
->b_multithreaded_lf
== 0)
266 //printf("before waiting for start_lpf\n");
268 //if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0)
269 if (sem_wait(&pbi
->h_event_start_lpf
) == 0)
271 if (pbi
->b_multithreaded_lf
== 0) // we're shutting down
276 VP8_COMMON
*cm
= &pbi
->common
;
277 MACROBLOCKD
*mbd
= &pbi
->lpfmb
;
278 int default_filt_lvl
= pbi
->common
.filter_level
;
280 YV12_BUFFER_CONFIG
*post
= &cm
->new_frame
;
281 loop_filter_info
*lfi
= cm
->lf_info
;
287 int baseline_filter_level
[MAX_MB_SEGMENTS
];
289 int alt_flt_enabled
= mbd
->segmentation_enabled
;
292 unsigned char *y_ptr
, *u_ptr
, *v_ptr
;
294 volatile int *last_mb_row_decoded
= &pbi
->last_mb_row_decoded
;
296 //MODE_INFO * this_mb_mode_info = cm->mi;
297 mbd
->mode_info_context
= cm
->mi
; // Point at base of Mb MODE_INFO list
299 // Note the baseline filter values for each segment
302 for (i
= 0; i
< MAX_MB_SEGMENTS
; i
++)
304 if (mbd
->mb_segement_abs_delta
== SEGMENT_ABSDATA
)
305 baseline_filter_level
[i
] = mbd
->segment_feature_data
[MB_LVL_ALT_LF
][i
];
308 baseline_filter_level
[i
] = default_filt_lvl
+ mbd
->segment_feature_data
[MB_LVL_ALT_LF
][i
];
309 baseline_filter_level
[i
] = (baseline_filter_level
[i
] >= 0) ? ((baseline_filter_level
[i
] <= MAX_LOOP_FILTER
) ? baseline_filter_level
[i
] : MAX_LOOP_FILTER
) : 0; // Clamp to valid range
315 for (i
= 0; i
< MAX_MB_SEGMENTS
; i
++)
316 baseline_filter_level
[i
] = default_filt_lvl
;
319 // Initialize the loop filter for this frame.
320 vp8_init_loop_filter(cm
);
322 // Set up the buffer pointers
323 y_ptr
= post
->y_buffer
;
324 u_ptr
= post
->u_buffer
;
325 v_ptr
= post
->v_buffer
;
327 // vp8_filter each macro block
328 for (mb_row
= 0; mb_row
< cm
->mb_rows
; mb_row
++)
331 while (mb_row
>= *last_mb_row_decoded
)
337 //printf("R%d", mb_row);
338 for (mb_col
= 0; mb_col
< cm
->mb_cols
; mb_col
++)
340 int Segment
= (alt_flt_enabled
) ? mbd
->mode_info_context
->mbmi
.segment_id
: 0;
342 filter_level
= baseline_filter_level
[Segment
];
344 // Apply any context driven MB level adjustment
345 vp8_adjust_mb_lf_value(mbd
, &filter_level
);
350 cm
->lf_mbv(y_ptr
, u_ptr
, v_ptr
, post
->y_stride
, post
->uv_stride
, &lfi
[filter_level
], cm
->simpler_lpf
);
352 if (mbd
->mode_info_context
->mbmi
.dc_diff
> 0)
353 cm
->lf_bv(y_ptr
, u_ptr
, v_ptr
, post
->y_stride
, post
->uv_stride
, &lfi
[filter_level
], cm
->simpler_lpf
);
355 // don't apply across umv border
357 cm
->lf_mbh(y_ptr
, u_ptr
, v_ptr
, post
->y_stride
, post
->uv_stride
, &lfi
[filter_level
], cm
->simpler_lpf
);
359 if (mbd
->mode_info_context
->mbmi
.dc_diff
> 0)
360 cm
->lf_bh(y_ptr
, u_ptr
, v_ptr
, post
->y_stride
, post
->uv_stride
, &lfi
[filter_level
], cm
->simpler_lpf
);
367 mbd
->mode_info_context
++; // step to next MB
371 y_ptr
+= post
->y_stride
* 16 - post
->y_width
;
372 u_ptr
+= post
->uv_stride
* 8 - post
->uv_width
;
373 v_ptr
+= post
->uv_stride
* 8 - post
->uv_width
;
375 mbd
->mode_info_context
++; // Skip border mb
378 //printf("R%d\n", mb_row);
379 // When done, signal main thread that ME is finished
380 //SetEvent(pbi->h_event_lpf);
381 sem_post(&pbi
->h_event_lpf
);
393 void vp8_decoder_create_threads(VP8D_COMP
*pbi
)
395 #if CONFIG_MULTITHREAD
399 pbi
->b_multithreaded_rd
= 0;
400 pbi
->b_multithreaded_lf
= 0;
401 pbi
->allocated_decoding_thread_count
= 0;
402 core_count
= (pbi
->max_threads
> 16) ? 16 : pbi
->max_threads
; //vp8_get_proc_core_count();
405 sem_init(&pbi
->h_event_lpf
, 0, 0);
406 sem_init(&pbi
->h_event_start_lpf
, 0, 0);
407 pbi
->b_multithreaded_lf
= 1;
408 pthread_create(&pbi
->h_thread_lpf
, 0, vp8_thread_loop_filter
, (pbi
));
413 pbi
->b_multithreaded_rd
= 1;
414 pbi
->decoding_thread_count
= core_count
- 1;
416 CHECK_MEM_ERROR(pbi
->h_decoding_thread
, vpx_malloc(sizeof(pthread_t
) * pbi
->decoding_thread_count
));
417 CHECK_MEM_ERROR(pbi
->h_event_mbrdecoding
, vpx_malloc(sizeof(sem_t
) * pbi
->decoding_thread_count
));
418 CHECK_MEM_ERROR(pbi
->mb_row_di
, vpx_memalign(32, sizeof(MB_ROW_DEC
) * pbi
->decoding_thread_count
));
419 vpx_memset(pbi
->mb_row_di
, 0, sizeof(MB_ROW_DEC
) * pbi
->decoding_thread_count
);
420 CHECK_MEM_ERROR(pbi
->de_thread_data
, vpx_malloc(sizeof(DECODETHREAD_DATA
) * pbi
->decoding_thread_count
));
422 for (ithread
= 0; ithread
< pbi
->decoding_thread_count
; ithread
++)
424 sem_init(&pbi
->h_event_mbrdecoding
[ithread
], 0, 0);
426 pbi
->de_thread_data
[ithread
].ithread
= ithread
;
427 pbi
->de_thread_data
[ithread
].ptr1
= (void *)pbi
;
428 pbi
->de_thread_data
[ithread
].ptr2
= (void *) &pbi
->mb_row_di
[ithread
];
430 pthread_create(&pbi
->h_decoding_thread
[ithread
], 0, vp8_thread_decoding_proc
, (&pbi
->de_thread_data
[ithread
]));
434 sem_init(&pbi
->h_event_main
, 0, 0);
435 pbi
->allocated_decoding_thread_count
= pbi
->decoding_thread_count
;
443 void vp8_decoder_remove_threads(VP8D_COMP
*pbi
)
445 #if CONFIG_MULTITHREAD
447 if (pbi
->b_multithreaded_lf
)
449 pbi
->b_multithreaded_lf
= 0;
450 sem_post(&pbi
->h_event_start_lpf
);
451 pthread_join(pbi
->h_thread_lpf
, 0);
452 sem_destroy(&pbi
->h_event_start_lpf
);
455 //shutdown MB Decoding thread;
456 if (pbi
->b_multithreaded_rd
)
458 pbi
->b_multithreaded_rd
= 0;
459 // allow all threads to exit
463 for (i
= 0; i
< pbi
->allocated_decoding_thread_count
; i
++)
466 sem_post(&pbi
->h_event_mbrdecoding
[i
]);
467 pthread_join(pbi
->h_decoding_thread
[i
], NULL
);
473 for (i
= 0; i
< pbi
->allocated_decoding_thread_count
; i
++)
475 sem_destroy(&pbi
->h_event_mbrdecoding
[i
]);
481 sem_destroy(&pbi
->h_event_main
);
483 if (pbi
->h_decoding_thread
)
485 vpx_free(pbi
->h_decoding_thread
);
486 pbi
->h_decoding_thread
= NULL
;
489 if (pbi
->h_event_mbrdecoding
)
491 vpx_free(pbi
->h_event_mbrdecoding
);
492 pbi
->h_event_mbrdecoding
= NULL
;
497 vpx_free(pbi
->mb_row_di
);
498 pbi
->mb_row_di
= NULL
;
501 if (pbi
->de_thread_data
)
503 vpx_free(pbi
->de_thread_data
);
504 pbi
->de_thread_data
= NULL
;
514 void vp8_start_lfthread(VP8D_COMP
*pbi
)
516 #if CONFIG_MULTITHREAD
517 memcpy(&pbi
->lpfmb
, &pbi
->mb
, sizeof(pbi
->mb
));
518 pbi
->last_mb_row_decoded
= 0;
519 sem_post(&pbi
->h_event_start_lpf
);
525 void vp8_stop_lfthread(VP8D_COMP
*pbi
)
527 #if CONFIG_MULTITHREAD
528 struct vpx_usec_timer timer
;
530 vpx_usec_timer_start(&timer
);
532 sem_wait(&pbi
->h_event_lpf
);
534 vpx_usec_timer_mark(&timer
);
535 pbi
->time_loop_filtering
+= vpx_usec_timer_elapsed(&timer
);
542 void vp8_mtdecode_mb_rows(VP8D_COMP
*pbi
,
545 #if CONFIG_MULTITHREAD
547 VP8_COMMON
*pc
= &pbi
->common
;
550 int num_part
= 1 << pbi
->common
.multi_token_partition
;
552 vp8_setup_decoding_thread_data(pbi
, xd
, pbi
->mb_row_di
, pbi
->decoding_thread_count
);
554 for (mb_row
= 0; mb_row
< pc
->mb_rows
; mb_row
+= (pbi
->decoding_thread_count
+ 1))
557 pbi
->current_mb_col_main
= -1;
559 xd
->current_bc
= &pbi
->mbc
[ibc
];
565 for (i
= 0; i
< pbi
->decoding_thread_count
; i
++)
567 if ((mb_row
+ i
+ 1) >= pc
->mb_rows
)
570 pbi
->mb_row_di
[i
].mb_row
= mb_row
+ i
+ 1;
571 pbi
->mb_row_di
[i
].mbd
.current_bc
= &pbi
->mbc
[ibc
];
577 pbi
->mb_row_di
[i
].current_mb_col
= -1;
578 sem_post(&pbi
->h_event_mbrdecoding
[i
]);
581 vp8_decode_mb_row(pbi
, pc
, mb_row
, xd
);
583 xd
->mode_info_context
+= xd
->mode_info_stride
* pbi
->decoding_thread_count
;
585 if (mb_row
< pc
->mb_rows
- 1)
587 sem_wait(&pbi
->h_event_main
);
591 pbi
->last_mb_row_decoded
= mb_row
;