2 * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "onyxd_int.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "threading.h"
19 #include "loopfilter.h"
21 #include "vpx_ports/vpx_timer.h"
23 extern void vp8_decode_mb_row(VP8D_COMP
*pbi
,
28 extern void vp8_build_uvmvs(MACROBLOCKD
*x
, int fullpixel
);
29 extern void vp8_decode_macroblock(VP8D_COMP
*pbi
, MACROBLOCKD
*xd
);
31 void vp8_setup_decoding_thread_data(VP8D_COMP
*pbi
, MACROBLOCKD
*xd
, MB_ROW_DEC
*mbrd
, int count
)
36 #if CONFIG_MULTITHREAD
37 VP8_COMMON
*const pc
= & pbi
->common
;
40 for (i
= 0; i
< count
; i
++)
42 MACROBLOCKD
*mbd
= &mbrd
[i
].mbd
;
43 #if CONFIG_RUNTIME_CPU_DETECT
48 mbd
->subpixel_predict
= xd
->subpixel_predict
;
49 mbd
->subpixel_predict8x4
= xd
->subpixel_predict8x4
;
50 mbd
->subpixel_predict8x8
= xd
->subpixel_predict8x8
;
51 mbd
->subpixel_predict16x16
= xd
->subpixel_predict16x16
;
52 mbd
->gf_active_ptr
= xd
->gf_active_ptr
;
54 mbd
->mode_info
= pc
->mi
- 1;
55 mbd
->mode_info_context
= pc
->mi
+ pc
->mode_info_stride
* (i
+ 1);
56 mbd
->mode_info_stride
= pc
->mode_info_stride
;
58 mbd
->frame_type
= pc
->frame_type
;
59 mbd
->frames_since_golden
= pc
->frames_since_golden
;
60 mbd
->frames_till_alt_ref_frame
= pc
->frames_till_alt_ref_frame
;
62 mbd
->pre
= pc
->last_frame
;
63 mbd
->dst
= pc
->new_frame
;
68 vp8_setup_block_dptrs(mbd
);
69 vp8_build_block_doffsets(mbd
);
70 mbd
->segmentation_enabled
= xd
->segmentation_enabled
;
71 mbd
->mb_segement_abs_delta
= xd
->mb_segement_abs_delta
;
72 vpx_memcpy(mbd
->segment_feature_data
, xd
->segment_feature_data
, sizeof(xd
->segment_feature_data
));
74 mbd
->mbmi
.mode
= DC_PRED
;
75 mbd
->mbmi
.uv_mode
= DC_PRED
;
77 mbd
->current_bc
= &pbi
->bc2
;
79 for (j
= 0; j
< 25; j
++)
81 mbd
->block
[j
].dequant
= xd
->block
[j
].dequant
;
94 THREAD_FUNCTION
vp8_thread_decoding_proc(void *p_data
)
96 #if CONFIG_MULTITHREAD
97 int ithread
= ((DECODETHREAD_DATA
*)p_data
)->ithread
;
98 VP8D_COMP
*pbi
= (VP8D_COMP
*)(((DECODETHREAD_DATA
*)p_data
)->ptr1
);
99 MB_ROW_DEC
*mbrd
= (MB_ROW_DEC
*)(((DECODETHREAD_DATA
*)p_data
)->ptr2
);
100 ENTROPY_CONTEXT mb_row_left_context
[4][4];
104 if (pbi
->b_multithreaded_rd
== 0)
107 //if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0)
108 if (sem_wait(&pbi
->h_event_mbrdecoding
[ithread
]) == 0)
110 if (pbi
->b_multithreaded_rd
== 0)
114 VP8_COMMON
*pc
= &pbi
->common
;
115 int mb_row
= mbrd
->mb_row
;
116 MACROBLOCKD
*xd
= &mbrd
->mbd
;
118 //printf("ithread:%d mb_row %d\n", ithread, mb_row);
120 int recon_yoffset
, recon_uvoffset
;
122 int recon_y_stride
= pc
->last_frame
.y_stride
;
123 int recon_uv_stride
= pc
->last_frame
.uv_stride
;
125 volatile int *last_row_current_mb_col
;
128 last_row_current_mb_col
= &pbi
->mb_row_di
[ithread
-1].current_mb_col
;
130 last_row_current_mb_col
= &pbi
->current_mb_col_main
;
132 recon_yoffset
= mb_row
* recon_y_stride
* 16;
133 recon_uvoffset
= mb_row
* recon_uv_stride
* 8;
134 // reset above block coeffs
136 xd
->above_context
[Y1CONTEXT
] = pc
->above_context
[Y1CONTEXT
];
137 xd
->above_context
[UCONTEXT
] = pc
->above_context
[UCONTEXT
];
138 xd
->above_context
[VCONTEXT
] = pc
->above_context
[VCONTEXT
];
139 xd
->above_context
[Y2CONTEXT
] = pc
->above_context
[Y2CONTEXT
];
140 xd
->left_context
= mb_row_left_context
;
141 vpx_memset(mb_row_left_context
, 0, sizeof(mb_row_left_context
));
142 xd
->up_available
= (mb_row
!= 0);
144 xd
->mb_to_top_edge
= -((mb_row
* 16)) << 3;
145 xd
->mb_to_bottom_edge
= ((pc
->mb_rows
- 1 - mb_row
) * 16) << 3;
147 for (mb_col
= 0; mb_col
< pc
->mb_cols
; mb_col
++)
150 while (mb_col
> (*last_row_current_mb_col
- 1) && *last_row_current_mb_col
!= pc
->mb_cols
- 1)
156 // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
157 // the partition_bmi array is unused in the decoder, so don't copy it.
158 vpx_memcpy(&xd
->mbmi
, &xd
->mode_info_context
->mbmi
,
159 sizeof(MB_MODE_INFO
) - sizeof(xd
->mbmi
.partition_bmi
));
161 if (xd
->mbmi
.mode
== SPLITMV
|| xd
->mbmi
.mode
== B_PRED
)
163 for (i
= 0; i
< 16; i
++)
165 BLOCKD
*d
= &xd
->block
[i
];
166 vpx_memcpy(&d
->bmi
, &xd
->mode_info_context
->bmi
[i
], sizeof(B_MODE_INFO
));
170 // Distance of Mb to the various image edges.
171 // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
172 xd
->mb_to_left_edge
= -((mb_col
* 16) << 3);
173 xd
->mb_to_right_edge
= ((pc
->mb_cols
- 1 - mb_col
) * 16) << 3;
175 xd
->dst
.y_buffer
= pc
->new_frame
.y_buffer
+ recon_yoffset
;
176 xd
->dst
.u_buffer
= pc
->new_frame
.u_buffer
+ recon_uvoffset
;
177 xd
->dst
.v_buffer
= pc
->new_frame
.v_buffer
+ recon_uvoffset
;
179 xd
->left_available
= (mb_col
!= 0);
181 // Select the appropriate reference frame for this MB
182 if (xd
->mbmi
.ref_frame
== LAST_FRAME
)
184 xd
->pre
.y_buffer
= pc
->last_frame
.y_buffer
+ recon_yoffset
;
185 xd
->pre
.u_buffer
= pc
->last_frame
.u_buffer
+ recon_uvoffset
;
186 xd
->pre
.v_buffer
= pc
->last_frame
.v_buffer
+ recon_uvoffset
;
188 else if (xd
->mbmi
.ref_frame
== GOLDEN_FRAME
)
190 // Golden frame reconstruction buffer
191 xd
->pre
.y_buffer
= pc
->golden_frame
.y_buffer
+ recon_yoffset
;
192 xd
->pre
.u_buffer
= pc
->golden_frame
.u_buffer
+ recon_uvoffset
;
193 xd
->pre
.v_buffer
= pc
->golden_frame
.v_buffer
+ recon_uvoffset
;
197 // Alternate reference frame reconstruction buffer
198 xd
->pre
.y_buffer
= pc
->alt_ref_frame
.y_buffer
+ recon_yoffset
;
199 xd
->pre
.u_buffer
= pc
->alt_ref_frame
.u_buffer
+ recon_uvoffset
;
200 xd
->pre
.v_buffer
= pc
->alt_ref_frame
.v_buffer
+ recon_uvoffset
;
203 vp8_build_uvmvs(xd
, pc
->full_pixel
);
205 vp8_decode_macroblock(pbi
, xd
);
211 ++xd
->mode_info_context
; /* next mb */
213 xd
->gf_active_ptr
++; // GF useage flag for next MB
215 xd
->above_context
[Y1CONTEXT
] += 4;
216 xd
->above_context
[UCONTEXT
] += 2;
217 xd
->above_context
[VCONTEXT
] += 2;
218 xd
->above_context
[Y2CONTEXT
] ++;
219 pbi
->mb_row_di
[ithread
].current_mb_col
= mb_col
;
223 // adjust to the next row of mbs
226 xd
->dst
.y_buffer
+ 16, xd
->dst
.u_buffer
+ 8, xd
->dst
.v_buffer
+ 8
229 ++xd
->mode_info_context
; /* skip prediction column */
231 // since we have multithread
232 xd
->mode_info_context
+= xd
->mode_info_stride
* pbi
->decoding_thread_count
;
234 //memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
235 if ((mb_row
& 1) == 1)
237 pbi
->last_mb_row_decoded
= mb_row
;
238 //printf("S%d", pbi->last_mb_row_decoded);
241 if (ithread
== (pbi
->decoding_thread_count
- 1) || mb_row
== pc
->mb_rows
- 1)
243 //SetEvent(pbi->h_event_main);
244 sem_post(&pbi
->h_event_main
);
258 THREAD_FUNCTION
vp8_thread_loop_filter(void *p_data
)
260 #if CONFIG_MULTITHREAD
261 VP8D_COMP
*pbi
= (VP8D_COMP
*)p_data
;
265 if (pbi
->b_multithreaded_lf
== 0)
268 //printf("before waiting for start_lpf\n");
270 //if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0)
271 if (sem_wait(&pbi
->h_event_start_lpf
) == 0)
273 if (pbi
->b_multithreaded_lf
== 0) // we're shutting down
278 VP8_COMMON
*cm
= &pbi
->common
;
279 MACROBLOCKD
*mbd
= &pbi
->lpfmb
;
280 int default_filt_lvl
= pbi
->common
.filter_level
;
282 YV12_BUFFER_CONFIG
*post
= &cm
->new_frame
;
283 loop_filter_info
*lfi
= cm
->lf_info
;
284 int frame_type
= cm
->frame_type
;
289 int baseline_filter_level
[MAX_MB_SEGMENTS
];
291 int alt_flt_enabled
= mbd
->segmentation_enabled
;
294 unsigned char *y_ptr
, *u_ptr
, *v_ptr
;
296 volatile int *last_mb_row_decoded
= &pbi
->last_mb_row_decoded
;
298 //MODE_INFO * this_mb_mode_info = cm->mi;
299 mbd
->mode_info_context
= cm
->mi
; // Point at base of Mb MODE_INFO list
301 // Note the baseline filter values for each segment
304 for (i
= 0; i
< MAX_MB_SEGMENTS
; i
++)
306 if (mbd
->mb_segement_abs_delta
== SEGMENT_ABSDATA
)
307 baseline_filter_level
[i
] = mbd
->segment_feature_data
[MB_LVL_ALT_LF
][i
];
310 baseline_filter_level
[i
] = default_filt_lvl
+ mbd
->segment_feature_data
[MB_LVL_ALT_LF
][i
];
311 baseline_filter_level
[i
] = (baseline_filter_level
[i
] >= 0) ? ((baseline_filter_level
[i
] <= MAX_LOOP_FILTER
) ? baseline_filter_level
[i
] : MAX_LOOP_FILTER
) : 0; // Clamp to valid range
317 for (i
= 0; i
< MAX_MB_SEGMENTS
; i
++)
318 baseline_filter_level
[i
] = default_filt_lvl
;
321 // Initialize the loop filter for this frame.
322 if ((cm
->last_filter_type
!= cm
->filter_type
) || (cm
->last_sharpness_level
!= cm
->sharpness_level
))
323 vp8_init_loop_filter(cm
);
324 else if (frame_type
!= cm
->last_frame_type
)
325 vp8_frame_init_loop_filter(lfi
, frame_type
);
327 // Set up the buffer pointers
328 y_ptr
= post
->y_buffer
;
329 u_ptr
= post
->u_buffer
;
330 v_ptr
= post
->v_buffer
;
332 // vp8_filter each macro block
333 for (mb_row
= 0; mb_row
< cm
->mb_rows
; mb_row
++)
336 while (mb_row
>= *last_mb_row_decoded
)
342 //printf("R%d", mb_row);
343 for (mb_col
= 0; mb_col
< cm
->mb_cols
; mb_col
++)
345 int Segment
= (alt_flt_enabled
) ? mbd
->mode_info_context
->mbmi
.segment_id
: 0;
347 filter_level
= baseline_filter_level
[Segment
];
349 // Apply any context driven MB level adjustment
350 vp8_adjust_mb_lf_value(mbd
, &filter_level
);
355 cm
->lf_mbv(y_ptr
, u_ptr
, v_ptr
, post
->y_stride
, post
->uv_stride
, &lfi
[filter_level
], cm
->simpler_lpf
);
357 if (mbd
->mode_info_context
->mbmi
.dc_diff
> 0)
358 cm
->lf_bv(y_ptr
, u_ptr
, v_ptr
, post
->y_stride
, post
->uv_stride
, &lfi
[filter_level
], cm
->simpler_lpf
);
360 // don't apply across umv border
362 cm
->lf_mbh(y_ptr
, u_ptr
, v_ptr
, post
->y_stride
, post
->uv_stride
, &lfi
[filter_level
], cm
->simpler_lpf
);
364 if (mbd
->mode_info_context
->mbmi
.dc_diff
> 0)
365 cm
->lf_bh(y_ptr
, u_ptr
, v_ptr
, post
->y_stride
, post
->uv_stride
, &lfi
[filter_level
], cm
->simpler_lpf
);
372 mbd
->mode_info_context
++; // step to next MB
376 y_ptr
+= post
->y_stride
* 16 - post
->y_width
;
377 u_ptr
+= post
->uv_stride
* 8 - post
->uv_width
;
378 v_ptr
+= post
->uv_stride
* 8 - post
->uv_width
;
380 mbd
->mode_info_context
++; // Skip border mb
383 //printf("R%d\n", mb_row);
384 // When done, signal main thread that ME is finished
385 //SetEvent(pbi->h_event_lpf);
386 sem_post(&pbi
->h_event_lpf
);
398 void vp8_decoder_create_threads(VP8D_COMP
*pbi
)
400 #if CONFIG_MULTITHREAD
404 pbi
->b_multithreaded_rd
= 0;
405 pbi
->b_multithreaded_lf
= 0;
406 pbi
->allocated_decoding_thread_count
= 0;
407 core_count
= (pbi
->max_threads
> 16) ? 16 : pbi
->max_threads
; //vp8_get_proc_core_count();
410 sem_init(&pbi
->h_event_lpf
, 0, 0);
411 sem_init(&pbi
->h_event_start_lpf
, 0, 0);
412 pbi
->b_multithreaded_lf
= 1;
413 pthread_create(&pbi
->h_thread_lpf
, 0, vp8_thread_loop_filter
, (pbi
));
418 pbi
->b_multithreaded_rd
= 1;
419 pbi
->decoding_thread_count
= core_count
- 1;
421 CHECK_MEM_ERROR(pbi
->h_decoding_thread
, vpx_malloc(sizeof(pthread_t
) * pbi
->decoding_thread_count
));
422 CHECK_MEM_ERROR(pbi
->h_event_mbrdecoding
, vpx_malloc(sizeof(sem_t
) * pbi
->decoding_thread_count
));
423 CHECK_MEM_ERROR(pbi
->mb_row_di
, vpx_memalign(32, sizeof(MB_ROW_DEC
) * pbi
->decoding_thread_count
));
424 vpx_memset(pbi
->mb_row_di
, 0, sizeof(MB_ROW_DEC
) * pbi
->decoding_thread_count
);
425 CHECK_MEM_ERROR(pbi
->de_thread_data
, vpx_malloc(sizeof(DECODETHREAD_DATA
) * pbi
->decoding_thread_count
));
427 for (ithread
= 0; ithread
< pbi
->decoding_thread_count
; ithread
++)
429 sem_init(&pbi
->h_event_mbrdecoding
[ithread
], 0, 0);
431 pbi
->de_thread_data
[ithread
].ithread
= ithread
;
432 pbi
->de_thread_data
[ithread
].ptr1
= (void *)pbi
;
433 pbi
->de_thread_data
[ithread
].ptr2
= (void *) &pbi
->mb_row_di
[ithread
];
435 pthread_create(&pbi
->h_decoding_thread
[ithread
], 0, vp8_thread_decoding_proc
, (&pbi
->de_thread_data
[ithread
]));
439 sem_init(&pbi
->h_event_main
, 0, 0);
440 pbi
->allocated_decoding_thread_count
= pbi
->decoding_thread_count
;
448 void vp8_decoder_remove_threads(VP8D_COMP
*pbi
)
450 #if CONFIG_MULTITHREAD
452 if (pbi
->b_multithreaded_lf
)
454 pbi
->b_multithreaded_lf
= 0;
455 sem_post(&pbi
->h_event_start_lpf
);
456 pthread_join(pbi
->h_thread_lpf
, 0);
457 sem_destroy(&pbi
->h_event_start_lpf
);
460 //shutdown MB Decoding thread;
461 if (pbi
->b_multithreaded_rd
)
463 pbi
->b_multithreaded_rd
= 0;
464 // allow all threads to exit
468 for (i
= 0; i
< pbi
->allocated_decoding_thread_count
; i
++)
471 sem_post(&pbi
->h_event_mbrdecoding
[i
]);
472 pthread_join(pbi
->h_decoding_thread
[i
], NULL
);
478 for (i
= 0; i
< pbi
->allocated_decoding_thread_count
; i
++)
480 sem_destroy(&pbi
->h_event_mbrdecoding
[i
]);
486 sem_destroy(&pbi
->h_event_main
);
488 if (pbi
->h_decoding_thread
)
490 vpx_free(pbi
->h_decoding_thread
);
491 pbi
->h_decoding_thread
= NULL
;
494 if (pbi
->h_event_mbrdecoding
)
496 vpx_free(pbi
->h_event_mbrdecoding
);
497 pbi
->h_event_mbrdecoding
= NULL
;
502 vpx_free(pbi
->mb_row_di
);
503 pbi
->mb_row_di
= NULL
;
506 if (pbi
->de_thread_data
)
508 vpx_free(pbi
->de_thread_data
);
509 pbi
->de_thread_data
= NULL
;
519 void vp8_start_lfthread(VP8D_COMP
*pbi
)
521 #if CONFIG_MULTITHREAD
522 memcpy(&pbi
->lpfmb
, &pbi
->mb
, sizeof(pbi
->mb
));
523 pbi
->last_mb_row_decoded
= 0;
524 sem_post(&pbi
->h_event_start_lpf
);
530 void vp8_stop_lfthread(VP8D_COMP
*pbi
)
532 #if CONFIG_MULTITHREAD
533 struct vpx_usec_timer timer
;
535 vpx_usec_timer_start(&timer
);
537 sem_wait(&pbi
->h_event_lpf
);
539 vpx_usec_timer_mark(&timer
);
540 pbi
->time_loop_filtering
+= vpx_usec_timer_elapsed(&timer
);
547 void vp8_mtdecode_mb_rows(VP8D_COMP
*pbi
,
550 #if CONFIG_MULTITHREAD
552 VP8_COMMON
*pc
= &pbi
->common
;
555 int num_part
= 1 << pbi
->common
.multi_token_partition
;
557 vp8_setup_decoding_thread_data(pbi
, xd
, pbi
->mb_row_di
, pbi
->decoding_thread_count
);
559 for (mb_row
= 0; mb_row
< pc
->mb_rows
; mb_row
+= (pbi
->decoding_thread_count
+ 1))
562 pbi
->current_mb_col_main
= -1;
564 xd
->current_bc
= &pbi
->mbc
[ibc
];
570 for (i
= 0; i
< pbi
->decoding_thread_count
; i
++)
572 if ((mb_row
+ i
+ 1) >= pc
->mb_rows
)
575 pbi
->mb_row_di
[i
].mb_row
= mb_row
+ i
+ 1;
576 pbi
->mb_row_di
[i
].mbd
.current_bc
= &pbi
->mbc
[ibc
];
582 pbi
->mb_row_di
[i
].current_mb_col
= -1;
583 sem_post(&pbi
->h_event_mbrdecoding
[i
]);
586 vp8_decode_mb_row(pbi
, pc
, mb_row
, xd
);
588 xd
->mode_info_context
+= xd
->mode_info_stride
* pbi
->decoding_thread_count
;
590 if (mb_row
< pc
->mb_rows
- 1)
592 sem_wait(&pbi
->h_event_main
);
596 pbi
->last_mb_row_decoded
= mb_row
;