2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vp8/common/threading.h"
13 #include "vp8/common/common.h"
14 #include "vp8/common/extend.h"
16 #if CONFIG_MULTITHREAD
18 extern int vp8cx_encode_inter_macroblock(VP8_COMP
*cpi
, MACROBLOCK
*x
,
19 TOKENEXTRA
**t
, int recon_yoffset
,
21 extern int vp8cx_encode_intra_macro_block(VP8_COMP
*cpi
, MACROBLOCK
*x
,
23 extern void vp8cx_mb_init_quantizer(VP8_COMP
*cpi
, MACROBLOCK
*x
);
24 extern void vp8_build_block_offsets(MACROBLOCK
*x
);
25 extern void vp8_setup_block_ptrs(MACROBLOCK
*x
);
27 extern void loopfilter_frame(VP8_COMP
*cpi
, VP8_COMMON
*cm
);
29 static THREAD_FUNCTION
loopfilter_thread(void *p_data
)
31 VP8_COMP
*cpi
= (VP8_COMP
*)(((LPFTHREAD_DATA
*)p_data
)->ptr1
);
32 VP8_COMMON
*cm
= &cpi
->common
;
36 if (cpi
->b_multi_threaded
== 0)
39 if (sem_wait(&cpi
->h_event_start_lpf
) == 0)
41 if (cpi
->b_multi_threaded
== FALSE
) // we're shutting down
44 loopfilter_frame(cpi
, cm
);
46 sem_post(&cpi
->h_event_end_lpf
);
54 THREAD_FUNCTION
thread_encoding_proc(void *p_data
)
56 int ithread
= ((ENCODETHREAD_DATA
*)p_data
)->ithread
;
57 VP8_COMP
*cpi
= (VP8_COMP
*)(((ENCODETHREAD_DATA
*)p_data
)->ptr1
);
58 MB_ROW_COMP
*mbri
= (MB_ROW_COMP
*)(((ENCODETHREAD_DATA
*)p_data
)->ptr2
);
59 ENTROPY_CONTEXT_PLANES mb_row_left_context
;
61 const int nsync
= cpi
->mt_sync_range
;
62 //printf("Started thread %d\n", ithread);
66 if (cpi
->b_multi_threaded
== 0)
69 //if(WaitForSingleObject(cpi->h_event_mbrencoding[ithread], INFINITE) == WAIT_OBJECT_0)
70 if (sem_wait(&cpi
->h_event_start_encoding
[ithread
]) == 0)
72 VP8_COMMON
*cm
= &cpi
->common
;
74 MACROBLOCK
*x
= &mbri
->mb
;
75 MACROBLOCKD
*xd
= &x
->e_mbd
;
78 int *segment_counts
= mbri
->segment_counts
;
79 int *totalrate
= &mbri
->totalrate
;
81 if (cpi
->b_multi_threaded
== FALSE
) // we're shutting down
84 for (mb_row
= ithread
+ 1; mb_row
< cm
->mb_rows
; mb_row
+= (cpi
->encoding_thread_count
+ 1))
88 int recon_yoffset
, recon_uvoffset
;
90 int ref_fb_idx
= cm
->lst_fb_idx
;
91 int dst_fb_idx
= cm
->new_fb_idx
;
92 int recon_y_stride
= cm
->yv12_fb
[ref_fb_idx
].y_stride
;
93 int recon_uv_stride
= cm
->yv12_fb
[ref_fb_idx
].uv_stride
;
94 int map_index
= (mb_row
* cm
->mb_cols
);
95 volatile int *last_row_current_mb_col
;
97 tp
= cpi
->tok
+ (mb_row
* (cm
->mb_cols
* 16 * 24));
99 last_row_current_mb_col
= &cpi
->mt_current_mb_col
[mb_row
- 1];
101 // reset above block coeffs
102 xd
->above_context
= cm
->above_context
;
103 xd
->left_context
= &mb_row_left_context
;
105 vp8_zero(mb_row_left_context
);
107 xd
->up_available
= (mb_row
!= 0);
108 recon_yoffset
= (mb_row
* recon_y_stride
* 16);
109 recon_uvoffset
= (mb_row
* recon_uv_stride
* 8);
111 cpi
->tplist
[mb_row
].start
= tp
;
113 //printf("Thread mb_row = %d\n", mb_row);
115 // Set the mb activity pointer to the start of the row.
116 x
->mb_activity_ptr
= &cpi
->mb_activity_map
[map_index
];
118 // for each macroblock col in image
119 for (mb_col
= 0; mb_col
< cm
->mb_cols
; mb_col
++)
121 if ((mb_col
& (nsync
- 1)) == 0)
123 while (mb_col
> (*last_row_current_mb_col
- nsync
) && *last_row_current_mb_col
!= cm
->mb_cols
- 1)
130 // Distance of Mb to the various image edges.
131 // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
132 xd
->mb_to_left_edge
= -((mb_col
* 16) << 3);
133 xd
->mb_to_right_edge
= ((cm
->mb_cols
- 1 - mb_col
) * 16) << 3;
134 xd
->mb_to_top_edge
= -((mb_row
* 16) << 3);
135 xd
->mb_to_bottom_edge
= ((cm
->mb_rows
- 1 - mb_row
) * 16) << 3;
137 // Set up limit values for motion vectors used to prevent them extending outside the UMV borders
138 x
->mv_col_min
= -((mb_col
* 16) + (VP8BORDERINPIXELS
- 16));
139 x
->mv_col_max
= ((cm
->mb_cols
- 1 - mb_col
) * 16) + (VP8BORDERINPIXELS
- 16);
140 x
->mv_row_min
= -((mb_row
* 16) + (VP8BORDERINPIXELS
- 16));
141 x
->mv_row_max
= ((cm
->mb_rows
- 1 - mb_row
) * 16) + (VP8BORDERINPIXELS
- 16);
143 xd
->dst
.y_buffer
= cm
->yv12_fb
[dst_fb_idx
].y_buffer
+ recon_yoffset
;
144 xd
->dst
.u_buffer
= cm
->yv12_fb
[dst_fb_idx
].u_buffer
+ recon_uvoffset
;
145 xd
->dst
.v_buffer
= cm
->yv12_fb
[dst_fb_idx
].v_buffer
+ recon_uvoffset
;
146 xd
->left_available
= (mb_col
!= 0);
148 x
->rddiv
= cpi
->RDDIV
;
149 x
->rdmult
= cpi
->RDMULT
;
151 //Copy current mb to a buffer
152 RECON_INVOKE(&xd
->rtcd
->recon
, copy16x16
)(x
->src
.y_buffer
, x
->src
.y_stride
, x
->thismb
, 16);
154 if (cpi
->oxcf
.tuning
== VP8_TUNE_SSIM
)
155 vp8_activity_masking(cpi
, x
);
157 // Is segmentation enabled
158 // MB level adjutment to quantizer
159 if (xd
->segmentation_enabled
)
161 // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
162 if (cpi
->segmentation_map
[map_index
+ mb_col
] <= 3)
163 xd
->mode_info_context
->mbmi
.segment_id
= cpi
->segmentation_map
[map_index
+ mb_col
];
165 xd
->mode_info_context
->mbmi
.segment_id
= 0;
167 vp8cx_mb_init_quantizer(cpi
, x
);
170 xd
->mode_info_context
->mbmi
.segment_id
= 0; // Set to Segment 0 by default
172 x
->active_ptr
= cpi
->active_map
+ map_index
+ mb_col
;
174 if (cm
->frame_type
== KEY_FRAME
)
176 *totalrate
+= vp8cx_encode_intra_macro_block(cpi
, x
, &tp
);
178 y_modes
[xd
->mbmi
.mode
] ++;
183 *totalrate
+= vp8cx_encode_inter_macroblock(cpi
, x
, &tp
, recon_yoffset
, recon_uvoffset
);
186 inter_y_modes
[xd
->mbmi
.mode
] ++;
188 if (xd
->mbmi
.mode
== SPLITMV
)
192 for (b
= 0; b
< xd
->mbmi
.partition_count
; b
++)
194 inter_b_modes
[x
->partition
->bmi
[b
].mode
] ++;
200 // Count of last ref frame 0,0 useage
201 if ((xd
->mode_info_context
->mbmi
.mode
== ZEROMV
) && (xd
->mode_info_context
->mbmi
.ref_frame
== LAST_FRAME
))
202 cpi
->inter_zz_count
++;
204 // Special case code for cyclic refresh
205 // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
206 // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
207 if (cpi
->cyclic_refresh_mode_enabled
&& xd
->segmentation_enabled
)
209 const MB_MODE_INFO
* mbmi
= &xd
->mode_info_context
->mbmi
;
210 cpi
->segmentation_map
[map_index
+ mb_col
] = mbmi
->segment_id
;
212 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
213 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
214 // else mark it as dirty (1).
215 if (mbmi
->segment_id
)
216 cpi
->cyclic_refresh_map
[map_index
+ mb_col
] = -1;
217 else if ((mbmi
->mode
== ZEROMV
) && (mbmi
->ref_frame
== LAST_FRAME
))
219 if (cpi
->cyclic_refresh_map
[map_index
+ mb_col
] == 1)
220 cpi
->cyclic_refresh_map
[map_index
+ mb_col
] = 0;
223 cpi
->cyclic_refresh_map
[map_index
+ mb_col
] = 1;
227 cpi
->tplist
[mb_row
].stop
= tp
;
229 // Increment pointer into gf useage flags structure.
232 // Increment the activity mask pointers.
233 x
->mb_activity_ptr
++;
235 /* save the block info */
236 for (i
= 0; i
< 16; i
++)
237 xd
->mode_info_context
->bmi
[i
] = xd
->block
[i
].bmi
;
239 // adjust to the next column of macroblocks
240 x
->src
.y_buffer
+= 16;
241 x
->src
.u_buffer
+= 8;
242 x
->src
.v_buffer
+= 8;
247 // Keep track of segment useage
248 segment_counts
[xd
->mode_info_context
->mbmi
.segment_id
]++;
251 xd
->mode_info_context
++;
255 cpi
->mt_current_mb_col
[mb_row
] = mb_col
;
258 //extend the recon for intra prediction
260 &cm
->yv12_fb
[dst_fb_idx
],
261 xd
->dst
.y_buffer
+ 16,
262 xd
->dst
.u_buffer
+ 8,
263 xd
->dst
.v_buffer
+ 8);
265 // this is to account for the border
266 xd
->mode_info_context
++;
269 x
->src
.y_buffer
+= 16 * x
->src
.y_stride
* (cpi
->encoding_thread_count
+ 1) - 16 * cm
->mb_cols
;
270 x
->src
.u_buffer
+= 8 * x
->src
.uv_stride
* (cpi
->encoding_thread_count
+ 1) - 8 * cm
->mb_cols
;
271 x
->src
.v_buffer
+= 8 * x
->src
.uv_stride
* (cpi
->encoding_thread_count
+ 1) - 8 * cm
->mb_cols
;
273 xd
->mode_info_context
+= xd
->mode_info_stride
* cpi
->encoding_thread_count
;
274 x
->partition_info
+= xd
->mode_info_stride
* cpi
->encoding_thread_count
;
275 x
->gf_active_ptr
+= cm
->mb_cols
* cpi
->encoding_thread_count
;
277 if (mb_row
== cm
->mb_rows
- 1)
279 //SetEvent(cpi->h_event_main);
280 sem_post(&cpi
->h_event_end_encoding
); /* signal frame encoding end */
286 //printf("exit thread %d\n", ithread);
290 static void setup_mbby_copy(MACROBLOCK
*mbdst
, MACROBLOCK
*mbsrc
)
293 MACROBLOCK
*x
= mbsrc
;
294 MACROBLOCK
*z
= mbdst
;
298 z
->ss_count
= x
->ss_count
;
299 z
->searches_per_step
= x
->searches_per_step
;
300 z
->errorperbit
= x
->errorperbit
;
302 z
->sadperbit16
= x
->sadperbit16
;
303 z
->sadperbit4
= x
->sadperbit4
;
306 z->mv_col_min = x->mv_col_min;
307 z->mv_col_max = x->mv_col_max;
308 z->mv_row_min = x->mv_row_min;
309 z->mv_row_max = x->mv_row_max;
310 z->vector_range = x->vector_range ;
313 z
->vp8_short_fdct4x4
= x
->vp8_short_fdct4x4
;
314 z
->vp8_short_fdct8x4
= x
->vp8_short_fdct8x4
;
315 z
->short_walsh4x4
= x
->short_walsh4x4
;
316 z
->quantize_b
= x
->quantize_b
;
317 z
->quantize_b_pair
= x
->quantize_b_pair
;
318 z
->optimize
= x
->optimize
;
322 z->src.y_buffer = x->src.y_buffer;
323 z->src.u_buffer = x->src.u_buffer;
324 z->src.v_buffer = x->src.v_buffer;
328 vpx_memcpy(z
->mvcosts
, x
->mvcosts
, sizeof(x
->mvcosts
));
329 z
->mvcost
[0] = &z
->mvcosts
[0][mv_max
+1];
330 z
->mvcost
[1] = &z
->mvcosts
[1][mv_max
+1];
331 z
->mvsadcost
[0] = &z
->mvsadcosts
[0][mvfp_max
+1];
332 z
->mvsadcost
[1] = &z
->mvsadcosts
[1][mvfp_max
+1];
335 vpx_memcpy(z
->token_costs
, x
->token_costs
, sizeof(x
->token_costs
));
336 vpx_memcpy(z
->inter_bmode_costs
, x
->inter_bmode_costs
, sizeof(x
->inter_bmode_costs
));
337 //memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts));
338 //memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost));
339 vpx_memcpy(z
->mbmode_cost
, x
->mbmode_cost
, sizeof(x
->mbmode_cost
));
340 vpx_memcpy(z
->intra_uv_mode_cost
, x
->intra_uv_mode_cost
, sizeof(x
->intra_uv_mode_cost
));
341 vpx_memcpy(z
->bmode_costs
, x
->bmode_costs
, sizeof(x
->bmode_costs
));
343 for (i
= 0; i
< 25; i
++)
345 z
->block
[i
].quant
= x
->block
[i
].quant
;
346 z
->block
[i
].quant_fast
= x
->block
[i
].quant_fast
;
347 z
->block
[i
].quant_shift
= x
->block
[i
].quant_shift
;
348 z
->block
[i
].zbin
= x
->block
[i
].zbin
;
349 z
->block
[i
].zrun_zbin_boost
= x
->block
[i
].zrun_zbin_boost
;
350 z
->block
[i
].round
= x
->block
[i
].round
;
352 z->block[i].src = x->block[i].src;
354 z
->block
[i
].src_stride
= x
->block
[i
].src_stride
;
355 z
->block
[i
].force_empty
= x
->block
[i
].force_empty
;
360 MACROBLOCKD
*xd
= &x
->e_mbd
;
361 MACROBLOCKD
*zd
= &z
->e_mbd
;
364 zd->mode_info_context = xd->mode_info_context;
365 zd->mode_info = xd->mode_info;
367 zd->mode_info_stride = xd->mode_info_stride;
368 zd->frame_type = xd->frame_type;
369 zd->up_available = xd->up_available ;
370 zd->left_available = xd->left_available;
371 zd->left_context = xd->left_context;
372 zd->last_frame_dc = xd->last_frame_dc;
373 zd->last_frame_dccons = xd->last_frame_dccons;
374 zd->gold_frame_dc = xd->gold_frame_dc;
375 zd->gold_frame_dccons = xd->gold_frame_dccons;
376 zd->mb_to_left_edge = xd->mb_to_left_edge;
377 zd->mb_to_right_edge = xd->mb_to_right_edge;
378 zd->mb_to_top_edge = xd->mb_to_top_edge ;
379 zd->mb_to_bottom_edge = xd->mb_to_bottom_edge;
380 zd->gf_active_ptr = xd->gf_active_ptr;
381 zd->frames_since_golden = xd->frames_since_golden;
382 zd->frames_till_alt_ref_frame = xd->frames_till_alt_ref_frame;
384 zd
->subpixel_predict
= xd
->subpixel_predict
;
385 zd
->subpixel_predict8x4
= xd
->subpixel_predict8x4
;
386 zd
->subpixel_predict8x8
= xd
->subpixel_predict8x8
;
387 zd
->subpixel_predict16x16
= xd
->subpixel_predict16x16
;
388 zd
->segmentation_enabled
= xd
->segmentation_enabled
;
389 zd
->mb_segement_abs_delta
= xd
->mb_segement_abs_delta
;
390 vpx_memcpy(zd
->segment_feature_data
, xd
->segment_feature_data
, sizeof(xd
->segment_feature_data
));
392 for (i
= 0; i
< 25; i
++)
394 zd
->block
[i
].dequant
= xd
->block
[i
].dequant
;
399 void vp8cx_init_mbrthread_data(VP8_COMP
*cpi
,
407 VP8_COMMON
*const cm
= & cpi
->common
;
408 MACROBLOCKD
*const xd
= & x
->e_mbd
;
412 for (i
= 0; i
< count
; i
++)
414 MACROBLOCK
*mb
= & mbr_ei
[i
].mb
;
415 MACROBLOCKD
*mbd
= &mb
->e_mbd
;
417 mbd
->subpixel_predict
= xd
->subpixel_predict
;
418 mbd
->subpixel_predict8x4
= xd
->subpixel_predict8x4
;
419 mbd
->subpixel_predict8x8
= xd
->subpixel_predict8x8
;
420 mbd
->subpixel_predict16x16
= xd
->subpixel_predict16x16
;
421 #if CONFIG_RUNTIME_CPU_DETECT
422 mbd
->rtcd
= xd
->rtcd
;
424 mb
->gf_active_ptr
= x
->gf_active_ptr
;
426 mb
->vector_range
= 32;
428 vpx_memset(mbr_ei
[i
].segment_counts
, 0, sizeof(mbr_ei
[i
].segment_counts
));
429 mbr_ei
[i
].totalrate
= 0;
431 mb
->partition_info
= x
->pi
+ x
->e_mbd
.mode_info_stride
* (i
+ 1);
433 mbd
->mode_info_context
= cm
->mi
+ x
->e_mbd
.mode_info_stride
* (i
+ 1);
434 mbd
->mode_info_stride
= cm
->mode_info_stride
;
436 mbd
->frame_type
= cm
->frame_type
;
438 mbd
->frames_since_golden
= cm
->frames_since_golden
;
439 mbd
->frames_till_alt_ref_frame
= cm
->frames_till_alt_ref_frame
;
441 mb
->src
= * cpi
->Source
;
442 mbd
->pre
= cm
->yv12_fb
[cm
->lst_fb_idx
];
443 mbd
->dst
= cm
->yv12_fb
[cm
->new_fb_idx
];
445 mb
->src
.y_buffer
+= 16 * x
->src
.y_stride
* (i
+ 1);
446 mb
->src
.u_buffer
+= 8 * x
->src
.uv_stride
* (i
+ 1);
447 mb
->src
.v_buffer
+= 8 * x
->src
.uv_stride
* (i
+ 1);
449 vp8_build_block_offsets(mb
);
451 vp8_setup_block_dptrs(mbd
);
453 vp8_setup_block_ptrs(mb
);
455 mbd
->left_context
= &cm
->left_context
;
456 mb
->mvc
= cm
->fc
.mvc
;
458 setup_mbby_copy(&mbr_ei
[i
].mb
, x
);
463 void vp8cx_create_encoder_threads(VP8_COMP
*cpi
)
465 const VP8_COMMON
* cm
= &cpi
->common
;
467 cpi
->b_multi_threaded
= 0;
468 cpi
->encoding_thread_count
= 0;
470 if (cm
->processor_core_count
> 1 && cpi
->oxcf
.multi_threaded
> 1)
473 int th_count
= cpi
->oxcf
.multi_threaded
- 1;
475 /* don't allocate more threads than cores available */
476 if (cpi
->oxcf
.multi_threaded
> cm
->processor_core_count
)
477 th_count
= cm
->processor_core_count
- 1;
479 /* we have th_count + 1 (main) threads processing one row each */
480 /* no point to have more threads than the sync range allows */
481 if(th_count
> ((cm
->mb_cols
/ cpi
->mt_sync_range
) - 1))
483 th_count
= (cm
->mb_cols
/ cpi
->mt_sync_range
) - 1;
489 CHECK_MEM_ERROR(cpi
->h_encoding_thread
, vpx_malloc(sizeof(pthread_t
) * th_count
));
490 CHECK_MEM_ERROR(cpi
->h_event_start_encoding
, vpx_malloc(sizeof(sem_t
) * th_count
));
491 CHECK_MEM_ERROR(cpi
->mb_row_ei
, vpx_memalign(32, sizeof(MB_ROW_COMP
) * th_count
));
492 vpx_memset(cpi
->mb_row_ei
, 0, sizeof(MB_ROW_COMP
) * th_count
);
493 CHECK_MEM_ERROR(cpi
->en_thread_data
,
494 vpx_malloc(sizeof(ENCODETHREAD_DATA
) * th_count
));
495 CHECK_MEM_ERROR(cpi
->mt_current_mb_col
,
496 vpx_malloc(sizeof(*cpi
->mt_current_mb_col
) * cm
->mb_rows
));
498 sem_init(&cpi
->h_event_end_encoding
, 0, 0);
500 cpi
->b_multi_threaded
= 1;
501 cpi
->encoding_thread_count
= th_count
;
504 printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n",
505 (cpi->encoding_thread_count +1));
508 for (ithread
= 0; ithread
< th_count
; ithread
++)
510 ENCODETHREAD_DATA
* ethd
= &cpi
->en_thread_data
[ithread
];
512 sem_init(&cpi
->h_event_start_encoding
[ithread
], 0, 0);
513 ethd
->ithread
= ithread
;
514 ethd
->ptr1
= (void *)cpi
;
515 ethd
->ptr2
= (void *)&cpi
->mb_row_ei
[ithread
];
517 pthread_create(&cpi
->h_encoding_thread
[ithread
], 0, thread_encoding_proc
, ethd
);
521 LPFTHREAD_DATA
* lpfthd
= &cpi
->lpf_thread_data
;
523 sem_init(&cpi
->h_event_start_lpf
, 0, 0);
524 sem_init(&cpi
->h_event_end_lpf
, 0, 0);
526 lpfthd
->ptr1
= (void *)cpi
;
527 pthread_create(&cpi
->h_filter_thread
, 0, loopfilter_thread
, lpfthd
);
533 void vp8cx_remove_encoder_threads(VP8_COMP
*cpi
)
535 if (cpi
->b_multi_threaded
)
537 //shutdown other threads
538 cpi
->b_multi_threaded
= 0;
542 for (i
= 0; i
< cpi
->encoding_thread_count
; i
++)
544 //SetEvent(cpi->h_event_mbrencoding[i]);
545 sem_post(&cpi
->h_event_start_encoding
[i
]);
546 pthread_join(cpi
->h_encoding_thread
[i
], 0);
548 sem_destroy(&cpi
->h_event_start_encoding
[i
]);
551 sem_post(&cpi
->h_event_start_lpf
);
552 pthread_join(cpi
->h_filter_thread
, 0);
555 sem_destroy(&cpi
->h_event_end_encoding
);
556 sem_destroy(&cpi
->h_event_end_lpf
);
557 sem_destroy(&cpi
->h_event_start_lpf
);
559 //free thread related resources
560 vpx_free(cpi
->h_event_start_encoding
);
561 vpx_free(cpi
->h_encoding_thread
);
562 vpx_free(cpi
->mb_row_ei
);
563 vpx_free(cpi
->en_thread_data
);
564 vpx_free(cpi
->mt_current_mb_col
);