2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vp8/common/threading.h"
13 #include "vp8/common/common.h"
14 #include "vp8/common/extend.h"
16 #if CONFIG_MULTITHREAD
18 extern int vp8cx_encode_inter_macroblock(VP8_COMP
*cpi
, MACROBLOCK
*x
,
19 TOKENEXTRA
**t
, int recon_yoffset
,
21 extern int vp8cx_encode_intra_macro_block(VP8_COMP
*cpi
, MACROBLOCK
*x
,
23 extern void vp8cx_mb_init_quantizer(VP8_COMP
*cpi
, MACROBLOCK
*x
);
24 extern void vp8_build_block_offsets(MACROBLOCK
*x
);
25 extern void vp8_setup_block_ptrs(MACROBLOCK
*x
);
27 extern void loopfilter_frame(VP8_COMP
*cpi
, VP8_COMMON
*cm
);
29 static THREAD_FUNCTION
loopfilter_thread(void *p_data
)
31 VP8_COMP
*cpi
= (VP8_COMP
*)(((LPFTHREAD_DATA
*)p_data
)->ptr1
);
32 VP8_COMMON
*cm
= &cpi
->common
;
36 if (cpi
->b_multi_threaded
== 0)
39 if (sem_wait(&cpi
->h_event_start_lpf
) == 0)
41 if (cpi
->b_multi_threaded
== FALSE
) // we're shutting down
44 loopfilter_frame(cpi
, cm
);
46 sem_post(&cpi
->h_event_end_lpf
);
54 THREAD_FUNCTION
thread_encoding_proc(void *p_data
)
56 int ithread
= ((ENCODETHREAD_DATA
*)p_data
)->ithread
;
57 VP8_COMP
*cpi
= (VP8_COMP
*)(((ENCODETHREAD_DATA
*)p_data
)->ptr1
);
58 MB_ROW_COMP
*mbri
= (MB_ROW_COMP
*)(((ENCODETHREAD_DATA
*)p_data
)->ptr2
);
59 ENTROPY_CONTEXT_PLANES mb_row_left_context
;
61 const int nsync
= cpi
->mt_sync_range
;
62 //printf("Started thread %d\n", ithread);
66 if (cpi
->b_multi_threaded
== 0)
69 //if(WaitForSingleObject(cpi->h_event_mbrencoding[ithread], INFINITE) == WAIT_OBJECT_0)
70 if (sem_wait(&cpi
->h_event_start_encoding
[ithread
]) == 0)
72 VP8_COMMON
*cm
= &cpi
->common
;
74 MACROBLOCK
*x
= &mbri
->mb
;
75 MACROBLOCKD
*xd
= &x
->e_mbd
;
78 int *segment_counts
= mbri
->segment_counts
;
79 int *totalrate
= &mbri
->totalrate
;
81 if (cpi
->b_multi_threaded
== FALSE
) // we're shutting down
84 for (mb_row
= ithread
+ 1; mb_row
< cm
->mb_rows
; mb_row
+= (cpi
->encoding_thread_count
+ 1))
88 int recon_yoffset
, recon_uvoffset
;
90 int ref_fb_idx
= cm
->lst_fb_idx
;
91 int dst_fb_idx
= cm
->new_fb_idx
;
92 int recon_y_stride
= cm
->yv12_fb
[ref_fb_idx
].y_stride
;
93 int recon_uv_stride
= cm
->yv12_fb
[ref_fb_idx
].uv_stride
;
94 volatile int *last_row_current_mb_col
;
95 INT64 activity_sum
= 0;
97 tp
= cpi
->tok
+ (mb_row
* (cm
->mb_cols
* 16 * 24));
99 last_row_current_mb_col
= &cpi
->mt_current_mb_col
[mb_row
- 1];
101 // reset above block coeffs
102 xd
->above_context
= cm
->above_context
;
103 xd
->left_context
= &mb_row_left_context
;
105 vp8_zero(mb_row_left_context
);
107 xd
->up_available
= (mb_row
!= 0);
108 recon_yoffset
= (mb_row
* recon_y_stride
* 16);
109 recon_uvoffset
= (mb_row
* recon_uv_stride
* 8);
111 cpi
->tplist
[mb_row
].start
= tp
;
113 //printf("Thread mb_row = %d\n", mb_row);
115 // for each macroblock col in image
116 for (mb_col
= 0; mb_col
< cm
->mb_cols
; mb_col
++)
118 int seg_map_index
= (mb_row
* cm
->mb_cols
);
120 if ((mb_col
& (nsync
- 1)) == 0)
122 while (mb_col
> (*last_row_current_mb_col
- nsync
) && *last_row_current_mb_col
!= cm
->mb_cols
- 1)
129 // Distance of Mb to the various image edges.
130 // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
131 xd
->mb_to_left_edge
= -((mb_col
* 16) << 3);
132 xd
->mb_to_right_edge
= ((cm
->mb_cols
- 1 - mb_col
) * 16) << 3;
133 xd
->mb_to_top_edge
= -((mb_row
* 16) << 3);
134 xd
->mb_to_bottom_edge
= ((cm
->mb_rows
- 1 - mb_row
) * 16) << 3;
136 // Set up limit values for motion vectors used to prevent them extending outside the UMV borders
137 x
->mv_col_min
= -((mb_col
* 16) + (VP8BORDERINPIXELS
- 16));
138 x
->mv_col_max
= ((cm
->mb_cols
- 1 - mb_col
) * 16) + (VP8BORDERINPIXELS
- 16);
139 x
->mv_row_min
= -((mb_row
* 16) + (VP8BORDERINPIXELS
- 16));
140 x
->mv_row_max
= ((cm
->mb_rows
- 1 - mb_row
) * 16) + (VP8BORDERINPIXELS
- 16);
142 xd
->dst
.y_buffer
= cm
->yv12_fb
[dst_fb_idx
].y_buffer
+ recon_yoffset
;
143 xd
->dst
.u_buffer
= cm
->yv12_fb
[dst_fb_idx
].u_buffer
+ recon_uvoffset
;
144 xd
->dst
.v_buffer
= cm
->yv12_fb
[dst_fb_idx
].v_buffer
+ recon_uvoffset
;
145 xd
->left_available
= (mb_col
!= 0);
147 x
->rddiv
= cpi
->RDDIV
;
148 x
->rdmult
= cpi
->RDMULT
;
150 if (cpi
->oxcf
.tuning
== VP8_TUNE_SSIM
)
151 activity_sum
+= vp8_activity_masking(cpi
, x
);
153 // Is segmentation enabled
154 // MB level adjutment to quantizer
155 if (xd
->segmentation_enabled
)
157 // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
158 if (cpi
->segmentation_map
[seg_map_index
+ mb_col
] <= 3)
159 xd
->mode_info_context
->mbmi
.segment_id
= cpi
->segmentation_map
[seg_map_index
+ mb_col
];
161 xd
->mode_info_context
->mbmi
.segment_id
= 0;
163 vp8cx_mb_init_quantizer(cpi
, x
);
166 xd
->mode_info_context
->mbmi
.segment_id
= 0; // Set to Segment 0 by default
168 x
->active_ptr
= cpi
->active_map
+ seg_map_index
+ mb_col
;
170 if (cm
->frame_type
== KEY_FRAME
)
172 *totalrate
+= vp8cx_encode_intra_macro_block(cpi
, x
, &tp
);
174 y_modes
[xd
->mbmi
.mode
] ++;
179 *totalrate
+= vp8cx_encode_inter_macroblock(cpi
, x
, &tp
, recon_yoffset
, recon_uvoffset
);
182 inter_y_modes
[xd
->mbmi
.mode
] ++;
184 if (xd
->mbmi
.mode
== SPLITMV
)
188 for (b
= 0; b
< xd
->mbmi
.partition_count
; b
++)
190 inter_b_modes
[x
->partition
->bmi
[b
].mode
] ++;
196 // Count of last ref frame 0,0 useage
197 if ((xd
->mode_info_context
->mbmi
.mode
== ZEROMV
) && (xd
->mode_info_context
->mbmi
.ref_frame
== LAST_FRAME
))
198 cpi
->inter_zz_count
++;
200 // Special case code for cyclic refresh
201 // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
202 // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
203 if (cpi
->cyclic_refresh_mode_enabled
&& xd
->segmentation_enabled
)
205 const MB_MODE_INFO
* mbmi
= &xd
->mode_info_context
->mbmi
;
206 cpi
->segmentation_map
[seg_map_index
+ mb_col
] = mbmi
->segment_id
;
208 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
209 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
210 // else mark it as dirty (1).
211 if (mbmi
->segment_id
)
212 cpi
->cyclic_refresh_map
[seg_map_index
+ mb_col
] = -1;
213 else if ((mbmi
->mode
== ZEROMV
) && (mbmi
->ref_frame
== LAST_FRAME
))
215 if (cpi
->cyclic_refresh_map
[seg_map_index
+ mb_col
] == 1)
216 cpi
->cyclic_refresh_map
[seg_map_index
+ mb_col
] = 0;
219 cpi
->cyclic_refresh_map
[seg_map_index
+ mb_col
] = 1;
223 cpi
->tplist
[mb_row
].stop
= tp
;
225 x
->gf_active_ptr
++; // Increment pointer into gf useage flags structure for next mb
227 for (i
= 0; i
< 16; i
++)
228 vpx_memcpy(&xd
->mode_info_context
->bmi
[i
], &xd
->block
[i
].bmi
, sizeof(xd
->block
[i
].bmi
));
230 // adjust to the next column of macroblocks
231 x
->src
.y_buffer
+= 16;
232 x
->src
.u_buffer
+= 8;
233 x
->src
.v_buffer
+= 8;
238 // Keep track of segment useage
239 segment_counts
[xd
->mode_info_context
->mbmi
.segment_id
]++;
242 xd
->mode_info_context
++;
246 cpi
->mt_current_mb_col
[mb_row
] = mb_col
;
249 //extend the recon for intra prediction
251 &cm
->yv12_fb
[dst_fb_idx
],
252 xd
->dst
.y_buffer
+ 16,
253 xd
->dst
.u_buffer
+ 8,
254 xd
->dst
.v_buffer
+ 8);
256 // this is to account for the border
257 xd
->mode_info_context
++;
259 x
->activity_sum
+= activity_sum
;
261 x
->src
.y_buffer
+= 16 * x
->src
.y_stride
* (cpi
->encoding_thread_count
+ 1) - 16 * cm
->mb_cols
;
262 x
->src
.u_buffer
+= 8 * x
->src
.uv_stride
* (cpi
->encoding_thread_count
+ 1) - 8 * cm
->mb_cols
;
263 x
->src
.v_buffer
+= 8 * x
->src
.uv_stride
* (cpi
->encoding_thread_count
+ 1) - 8 * cm
->mb_cols
;
265 xd
->mode_info_context
+= xd
->mode_info_stride
* cpi
->encoding_thread_count
;
266 x
->partition_info
+= xd
->mode_info_stride
* cpi
->encoding_thread_count
;
268 if (mb_row
== cm
->mb_rows
- 1)
270 //SetEvent(cpi->h_event_main);
271 sem_post(&cpi
->h_event_end_encoding
); /* signal frame encoding end */
277 //printf("exit thread %d\n", ithread);
281 static void setup_mbby_copy(MACROBLOCK
*mbdst
, MACROBLOCK
*mbsrc
)
284 MACROBLOCK
*x
= mbsrc
;
285 MACROBLOCK
*z
= mbdst
;
289 z
->ss_count
= x
->ss_count
;
290 z
->searches_per_step
= x
->searches_per_step
;
291 z
->errorperbit
= x
->errorperbit
;
293 z
->sadperbit16
= x
->sadperbit16
;
294 z
->sadperbit4
= x
->sadperbit4
;
295 z
->errthresh
= x
->errthresh
;
298 z->mv_col_min = x->mv_col_min;
299 z->mv_col_max = x->mv_col_max;
300 z->mv_row_min = x->mv_row_min;
301 z->mv_row_max = x->mv_row_max;
302 z->vector_range = x->vector_range ;
305 z
->vp8_short_fdct4x4
= x
->vp8_short_fdct4x4
;
306 z
->vp8_short_fdct8x4
= x
->vp8_short_fdct8x4
;
307 z
->short_walsh4x4
= x
->short_walsh4x4
;
308 z
->quantize_b
= x
->quantize_b
;
309 z
->optimize
= x
->optimize
;
313 z->src.y_buffer = x->src.y_buffer;
314 z->src.u_buffer = x->src.u_buffer;
315 z->src.v_buffer = x->src.v_buffer;
319 vpx_memcpy(z
->mvcosts
, x
->mvcosts
, sizeof(x
->mvcosts
));
320 z
->mvcost
[0] = &z
->mvcosts
[0][mv_max
+1];
321 z
->mvcost
[1] = &z
->mvcosts
[1][mv_max
+1];
322 z
->mvsadcost
[0] = &z
->mvsadcosts
[0][mvfp_max
+1];
323 z
->mvsadcost
[1] = &z
->mvsadcosts
[1][mvfp_max
+1];
326 vpx_memcpy(z
->token_costs
, x
->token_costs
, sizeof(x
->token_costs
));
327 vpx_memcpy(z
->inter_bmode_costs
, x
->inter_bmode_costs
, sizeof(x
->inter_bmode_costs
));
328 //memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts));
329 //memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost));
330 vpx_memcpy(z
->mbmode_cost
, x
->mbmode_cost
, sizeof(x
->mbmode_cost
));
331 vpx_memcpy(z
->intra_uv_mode_cost
, x
->intra_uv_mode_cost
, sizeof(x
->intra_uv_mode_cost
));
332 vpx_memcpy(z
->bmode_costs
, x
->bmode_costs
, sizeof(x
->bmode_costs
));
334 for (i
= 0; i
< 25; i
++)
336 z
->block
[i
].quant
= x
->block
[i
].quant
;
337 z
->block
[i
].quant_fast
= x
->block
[i
].quant_fast
;
338 z
->block
[i
].quant_shift
= x
->block
[i
].quant_shift
;
339 z
->block
[i
].zbin
= x
->block
[i
].zbin
;
340 z
->block
[i
].zrun_zbin_boost
= x
->block
[i
].zrun_zbin_boost
;
341 z
->block
[i
].round
= x
->block
[i
].round
;
343 z->block[i].src = x->block[i].src;
345 z
->block
[i
].src_stride
= x
->block
[i
].src_stride
;
346 z
->block
[i
].force_empty
= x
->block
[i
].force_empty
;
351 MACROBLOCKD
*xd
= &x
->e_mbd
;
352 MACROBLOCKD
*zd
= &z
->e_mbd
;
355 zd->mode_info_context = xd->mode_info_context;
356 zd->mode_info = xd->mode_info;
358 zd->mode_info_stride = xd->mode_info_stride;
359 zd->frame_type = xd->frame_type;
360 zd->up_available = xd->up_available ;
361 zd->left_available = xd->left_available;
362 zd->left_context = xd->left_context;
363 zd->last_frame_dc = xd->last_frame_dc;
364 zd->last_frame_dccons = xd->last_frame_dccons;
365 zd->gold_frame_dc = xd->gold_frame_dc;
366 zd->gold_frame_dccons = xd->gold_frame_dccons;
367 zd->mb_to_left_edge = xd->mb_to_left_edge;
368 zd->mb_to_right_edge = xd->mb_to_right_edge;
369 zd->mb_to_top_edge = xd->mb_to_top_edge ;
370 zd->mb_to_bottom_edge = xd->mb_to_bottom_edge;
371 zd->gf_active_ptr = xd->gf_active_ptr;
372 zd->frames_since_golden = xd->frames_since_golden;
373 zd->frames_till_alt_ref_frame = xd->frames_till_alt_ref_frame;
375 zd
->subpixel_predict
= xd
->subpixel_predict
;
376 zd
->subpixel_predict8x4
= xd
->subpixel_predict8x4
;
377 zd
->subpixel_predict8x8
= xd
->subpixel_predict8x8
;
378 zd
->subpixel_predict16x16
= xd
->subpixel_predict16x16
;
379 zd
->segmentation_enabled
= xd
->segmentation_enabled
;
380 zd
->mb_segement_abs_delta
= xd
->mb_segement_abs_delta
;
381 vpx_memcpy(zd
->segment_feature_data
, xd
->segment_feature_data
, sizeof(xd
->segment_feature_data
));
383 for (i
= 0; i
< 25; i
++)
385 zd
->block
[i
].dequant
= xd
->block
[i
].dequant
;
390 void vp8cx_init_mbrthread_data(VP8_COMP
*cpi
,
398 VP8_COMMON
*const cm
= & cpi
->common
;
399 MACROBLOCKD
*const xd
= & x
->e_mbd
;
403 for (i
= 0; i
< count
; i
++)
405 MACROBLOCK
*mb
= & mbr_ei
[i
].mb
;
406 MACROBLOCKD
*mbd
= &mb
->e_mbd
;
408 mbd
->subpixel_predict
= xd
->subpixel_predict
;
409 mbd
->subpixel_predict8x4
= xd
->subpixel_predict8x4
;
410 mbd
->subpixel_predict8x8
= xd
->subpixel_predict8x8
;
411 mbd
->subpixel_predict16x16
= xd
->subpixel_predict16x16
;
412 #if CONFIG_RUNTIME_CPU_DETECT
413 mbd
->rtcd
= xd
->rtcd
;
415 mb
->gf_active_ptr
= x
->gf_active_ptr
;
417 mb
->vector_range
= 32;
419 vpx_memset(mbr_ei
[i
].segment_counts
, 0, sizeof(mbr_ei
[i
].segment_counts
));
420 mbr_ei
[i
].totalrate
= 0;
422 mb
->partition_info
= x
->pi
+ x
->e_mbd
.mode_info_stride
* (i
+ 1);
424 mbd
->mode_info_context
= cm
->mi
+ x
->e_mbd
.mode_info_stride
* (i
+ 1);
425 mbd
->mode_info_stride
= cm
->mode_info_stride
;
427 mbd
->frame_type
= cm
->frame_type
;
429 mbd
->frames_since_golden
= cm
->frames_since_golden
;
430 mbd
->frames_till_alt_ref_frame
= cm
->frames_till_alt_ref_frame
;
432 mb
->src
= * cpi
->Source
;
433 mbd
->pre
= cm
->yv12_fb
[cm
->lst_fb_idx
];
434 mbd
->dst
= cm
->yv12_fb
[cm
->new_fb_idx
];
436 mb
->src
.y_buffer
+= 16 * x
->src
.y_stride
* (i
+ 1);
437 mb
->src
.u_buffer
+= 8 * x
->src
.uv_stride
* (i
+ 1);
438 mb
->src
.v_buffer
+= 8 * x
->src
.uv_stride
* (i
+ 1);
440 vp8_build_block_offsets(mb
);
442 vp8_setup_block_dptrs(mbd
);
444 vp8_setup_block_ptrs(mb
);
446 mb
->activity_sum
= 0;
448 mbd
->left_context
= &cm
->left_context
;
449 mb
->mvc
= cm
->fc
.mvc
;
451 setup_mbby_copy(&mbr_ei
[i
].mb
, x
);
456 void vp8cx_create_encoder_threads(VP8_COMP
*cpi
)
458 const VP8_COMMON
* cm
= &cpi
->common
;
460 cpi
->b_multi_threaded
= 0;
461 cpi
->encoding_thread_count
= 0;
462 cpi
->processor_core_count
= 32; //vp8_get_proc_core_count();
464 if (cpi
->processor_core_count
> 1 && cpi
->oxcf
.multi_threaded
> 1)
467 int th_count
= cpi
->oxcf
.multi_threaded
- 1;
469 if (cpi
->oxcf
.multi_threaded
> cpi
->processor_core_count
)
470 th_count
= cpi
->processor_core_count
- 1;
472 /* we have th_count + 1 (main) threads processing one row each */
473 /* no point to have more threads than the sync range allows */
474 if(th_count
> ((cm
->mb_cols
/ cpi
->mt_sync_range
) - 1))
476 th_count
= (cm
->mb_cols
/ cpi
->mt_sync_range
) - 1;
482 CHECK_MEM_ERROR(cpi
->h_encoding_thread
, vpx_malloc(sizeof(pthread_t
) * th_count
));
483 CHECK_MEM_ERROR(cpi
->h_event_start_encoding
, vpx_malloc(sizeof(sem_t
) * th_count
));
484 CHECK_MEM_ERROR(cpi
->mb_row_ei
, vpx_memalign(32, sizeof(MB_ROW_COMP
) * th_count
));
485 vpx_memset(cpi
->mb_row_ei
, 0, sizeof(MB_ROW_COMP
) * th_count
);
486 CHECK_MEM_ERROR(cpi
->en_thread_data
,
487 vpx_malloc(sizeof(ENCODETHREAD_DATA
) * th_count
));
488 CHECK_MEM_ERROR(cpi
->mt_current_mb_col
,
489 vpx_malloc(sizeof(*cpi
->mt_current_mb_col
) * cm
->mb_rows
));
491 sem_init(&cpi
->h_event_end_encoding
, 0, 0);
493 cpi
->b_multi_threaded
= 1;
494 cpi
->encoding_thread_count
= th_count
;
497 printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n",
498 (cpi->encoding_thread_count +1));
501 for (ithread
= 0; ithread
< th_count
; ithread
++)
503 ENCODETHREAD_DATA
* ethd
= &cpi
->en_thread_data
[ithread
];
505 sem_init(&cpi
->h_event_start_encoding
[ithread
], 0, 0);
506 ethd
->ithread
= ithread
;
507 ethd
->ptr1
= (void *)cpi
;
508 ethd
->ptr2
= (void *)&cpi
->mb_row_ei
[ithread
];
510 pthread_create(&cpi
->h_encoding_thread
[ithread
], 0, thread_encoding_proc
, ethd
);
514 LPFTHREAD_DATA
* lpfthd
= &cpi
->lpf_thread_data
;
516 sem_init(&cpi
->h_event_start_lpf
, 0, 0);
517 sem_init(&cpi
->h_event_end_lpf
, 0, 0);
519 lpfthd
->ptr1
= (void *)cpi
;
520 pthread_create(&cpi
->h_filter_thread
, 0, loopfilter_thread
, lpfthd
);
526 void vp8cx_remove_encoder_threads(VP8_COMP
*cpi
)
528 if (cpi
->b_multi_threaded
)
530 //shutdown other threads
531 cpi
->b_multi_threaded
= 0;
535 for (i
= 0; i
< cpi
->encoding_thread_count
; i
++)
537 //SetEvent(cpi->h_event_mbrencoding[i]);
538 sem_post(&cpi
->h_event_start_encoding
[i
]);
539 pthread_join(cpi
->h_encoding_thread
[i
], 0);
541 sem_destroy(&cpi
->h_event_start_encoding
[i
]);
544 sem_post(&cpi
->h_event_start_lpf
);
545 pthread_join(cpi
->h_filter_thread
, 0);
548 sem_destroy(&cpi
->h_event_end_encoding
);
549 sem_destroy(&cpi
->h_event_end_lpf
);
550 sem_destroy(&cpi
->h_event_start_lpf
);
552 //free thread related resources
553 vpx_free(cpi
->h_event_start_encoding
);
554 vpx_free(cpi
->h_encoding_thread
);
555 vpx_free(cpi
->mb_row_ei
);
556 vpx_free(cpi
->en_thread_data
);
557 vpx_free(cpi
->mt_current_mb_col
);