2 * H.264 HW decode acceleration through VA API
4 * Copyright (C) 2008-2009 Splitted-Desktop Systems
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "vaapi_decode.h"
30 * This file implements the glue code between Libav's and VA API's
31 * structures for H.264 decoding.
35 * Initialize an empty VA API picture.
37 * VA API requires a fixed-size reference picture array.
39 static void init_vaapi_pic(VAPictureH264
*va_pic
)
41 va_pic
->picture_id
= VA_INVALID_ID
;
42 va_pic
->flags
= VA_PICTURE_H264_INVALID
;
43 va_pic
->TopFieldOrderCnt
= 0;
44 va_pic
->BottomFieldOrderCnt
= 0;
48 * Translate an Libav Picture into its VA API form.
50 * @param[out] va_pic A pointer to VA API's own picture struct
51 * @param[in] pic A pointer to the Libav picture struct to convert
52 * @param[in] pic_structure The picture field type (as defined in mpegvideo.h),
53 * supersedes pic's field type if nonzero.
55 static void fill_vaapi_pic(VAPictureH264
*va_pic
,
56 const H264Picture
*pic
,
59 if (pic_structure
== 0)
60 pic_structure
= pic
->reference
;
61 pic_structure
&= PICT_FRAME
; /* PICT_TOP_FIELD|PICT_BOTTOM_FIELD */
63 va_pic
->picture_id
= ff_vaapi_get_surface_id(pic
->f
);
64 va_pic
->frame_idx
= pic
->long_ref
? pic
->pic_id
: pic
->frame_num
;
67 if (pic_structure
!= PICT_FRAME
)
68 va_pic
->flags
|= (pic_structure
& PICT_TOP_FIELD
) ? VA_PICTURE_H264_TOP_FIELD
: VA_PICTURE_H264_BOTTOM_FIELD
;
70 va_pic
->flags
|= pic
->long_ref
? VA_PICTURE_H264_LONG_TERM_REFERENCE
: VA_PICTURE_H264_SHORT_TERM_REFERENCE
;
72 va_pic
->TopFieldOrderCnt
= 0;
73 if (pic
->field_poc
[0] != INT_MAX
)
74 va_pic
->TopFieldOrderCnt
= pic
->field_poc
[0];
76 va_pic
->BottomFieldOrderCnt
= 0;
77 if (pic
->field_poc
[1] != INT_MAX
)
78 va_pic
->BottomFieldOrderCnt
= pic
->field_poc
[1];
81 /** Decoded Picture Buffer (DPB). */
83 int size
; ///< Current number of reference frames in the DPB
84 int max_size
; ///< Max number of reference frames. This is FF_ARRAY_ELEMS(VAPictureParameterBufferH264.ReferenceFrames)
85 VAPictureH264
*va_pics
; ///< Pointer to VAPictureParameterBufferH264.ReferenceFrames array
89 * Append picture to the decoded picture buffer, in a VA API form that
90 * merges the second field picture attributes with the first, if
91 * available. The decoded picture buffer's size must be large enough
92 * to receive the new VA API picture object.
94 static int dpb_add(DPB
*dpb
, const H264Picture
*pic
)
98 if (dpb
->size
>= dpb
->max_size
)
101 for (i
= 0; i
< dpb
->size
; i
++) {
102 VAPictureH264
* const va_pic
= &dpb
->va_pics
[i
];
103 if (va_pic
->picture_id
== ff_vaapi_get_surface_id(pic
->f
)) {
104 VAPictureH264 temp_va_pic
;
105 fill_vaapi_pic(&temp_va_pic
, pic
, 0);
107 if ((temp_va_pic
.flags
^ va_pic
->flags
) & (VA_PICTURE_H264_TOP_FIELD
| VA_PICTURE_H264_BOTTOM_FIELD
)) {
108 va_pic
->flags
|= temp_va_pic
.flags
& (VA_PICTURE_H264_TOP_FIELD
| VA_PICTURE_H264_BOTTOM_FIELD
);
109 /* Merge second field */
110 if (temp_va_pic
.flags
& VA_PICTURE_H264_TOP_FIELD
) {
111 va_pic
->TopFieldOrderCnt
= temp_va_pic
.TopFieldOrderCnt
;
113 va_pic
->BottomFieldOrderCnt
= temp_va_pic
.BottomFieldOrderCnt
;
120 fill_vaapi_pic(&dpb
->va_pics
[dpb
->size
++], pic
, 0);
124 /** Fill in VA API reference frames array. */
125 static int fill_vaapi_ReferenceFrames(VAPictureParameterBufferH264
*pic_param
,
126 const H264Context
*h
)
132 dpb
.max_size
= FF_ARRAY_ELEMS(pic_param
->ReferenceFrames
);
133 dpb
.va_pics
= pic_param
->ReferenceFrames
;
134 for (i
= 0; i
< dpb
.max_size
; i
++)
135 init_vaapi_pic(&dpb
.va_pics
[i
]);
137 for (i
= 0; i
< h
->short_ref_count
; i
++) {
138 const H264Picture
*pic
= h
->short_ref
[i
];
139 if (pic
&& pic
->reference
&& dpb_add(&dpb
, pic
) < 0)
143 for (i
= 0; i
< 16; i
++) {
144 const H264Picture
*pic
= h
->long_ref
[i
];
145 if (pic
&& pic
->reference
&& dpb_add(&dpb
, pic
) < 0)
152 * Fill in VA API reference picture lists from the Libav reference
155 * @param[out] RefPicList VA API internal reference picture list
156 * @param[in] ref_list A pointer to the Libav reference list
157 * @param[in] ref_count The number of reference pictures in ref_list
159 static void fill_vaapi_RefPicList(VAPictureH264 RefPicList
[32],
160 const H264Ref
*ref_list
,
161 unsigned int ref_count
)
163 unsigned int i
, n
= 0;
164 for (i
= 0; i
< ref_count
; i
++)
165 if (ref_list
[i
].reference
)
166 fill_vaapi_pic(&RefPicList
[n
++], ref_list
[i
].parent
,
167 ref_list
[i
].reference
);
170 init_vaapi_pic(&RefPicList
[n
]);
174 * Fill in prediction weight table.
176 * VA API requires a plain prediction weight table as it does not infer
179 * @param[in] h A pointer to the current H.264 context
180 * @param[in] list The reference frame list index to use
181 * @param[out] luma_weight_flag VA API plain luma weight flag
182 * @param[out] luma_weight VA API plain luma weight table
183 * @param[out] luma_offset VA API plain luma offset table
184 * @param[out] chroma_weight_flag VA API plain chroma weight flag
185 * @param[out] chroma_weight VA API plain chroma weight table
186 * @param[out] chroma_offset VA API plain chroma offset table
188 static void fill_vaapi_plain_pred_weight_table(const H264Context
*h
,
190 unsigned char *luma_weight_flag
,
191 short luma_weight
[32],
192 short luma_offset
[32],
193 unsigned char *chroma_weight_flag
,
194 short chroma_weight
[32][2],
195 short chroma_offset
[32][2])
197 const H264SliceContext
*sl
= &h
->slice_ctx
[0];
200 *luma_weight_flag
= sl
->pwt
.luma_weight_flag
[list
];
201 *chroma_weight_flag
= sl
->pwt
.chroma_weight_flag
[list
];
203 for (i
= 0; i
< sl
->ref_count
[list
]; i
++) {
204 /* VA API also wants the inferred (default) values, not
205 only what is available in the bitstream (7.4.3.2). */
206 if (sl
->pwt
.luma_weight_flag
[list
]) {
207 luma_weight
[i
] = sl
->pwt
.luma_weight
[i
][list
][0];
208 luma_offset
[i
] = sl
->pwt
.luma_weight
[i
][list
][1];
210 luma_weight
[i
] = 1 << sl
->pwt
.luma_log2_weight_denom
;
213 for (j
= 0; j
< 2; j
++) {
214 if (sl
->pwt
.chroma_weight_flag
[list
]) {
215 chroma_weight
[i
][j
] = sl
->pwt
.chroma_weight
[i
][list
][j
][0];
216 chroma_offset
[i
][j
] = sl
->pwt
.chroma_weight
[i
][list
][j
][1];
218 chroma_weight
[i
][j
] = 1 << sl
->pwt
.chroma_log2_weight_denom
;
219 chroma_offset
[i
][j
] = 0;
225 /** Initialize and start decoding a frame with VA API. */
226 static int vaapi_h264_start_frame(AVCodecContext
*avctx
,
227 av_unused
const uint8_t *buffer
,
228 av_unused
uint32_t size
)
230 const H264Context
*h
= avctx
->priv_data
;
231 VAAPIDecodePicture
*pic
= h
->cur_pic_ptr
->hwaccel_picture_private
;
232 const PPS
*pps
= h
->ps
.pps
;
233 const SPS
*sps
= h
->ps
.sps
;
234 VAPictureParameterBufferH264 pic_param
;
235 VAIQMatrixBufferH264 iq_matrix
;
238 pic
->output_surface
= ff_vaapi_get_surface_id(h
->cur_pic_ptr
->f
);
240 pic_param
= (VAPictureParameterBufferH264
) {
241 .picture_width_in_mbs_minus1
= h
->mb_width
- 1,
242 .picture_height_in_mbs_minus1
= h
->mb_height
- 1,
243 .bit_depth_luma_minus8
= sps
->bit_depth_luma
- 8,
244 .bit_depth_chroma_minus8
= sps
->bit_depth_chroma
- 8,
245 .num_ref_frames
= sps
->ref_frame_count
,
247 .chroma_format_idc
= sps
->chroma_format_idc
,
248 .residual_colour_transform_flag
= sps
->residual_color_transform_flag
,
249 .gaps_in_frame_num_value_allowed_flag
= sps
->gaps_in_frame_num_allowed_flag
,
250 .frame_mbs_only_flag
= sps
->frame_mbs_only_flag
,
251 .mb_adaptive_frame_field_flag
= sps
->mb_aff
,
252 .direct_8x8_inference_flag
= sps
->direct_8x8_inference_flag
,
253 .MinLumaBiPredSize8x8
= sps
->level_idc
>= 31, /* A.3.3.2 */
254 .log2_max_frame_num_minus4
= sps
->log2_max_frame_num
- 4,
255 .pic_order_cnt_type
= sps
->poc_type
,
256 .log2_max_pic_order_cnt_lsb_minus4
= sps
->log2_max_poc_lsb
- 4,
257 .delta_pic_order_always_zero_flag
= sps
->delta_pic_order_always_zero_flag
,
259 .num_slice_groups_minus1
= pps
->slice_group_count
- 1,
260 .slice_group_map_type
= pps
->mb_slice_group_map_type
,
261 .slice_group_change_rate_minus1
= 0, /* FMO is not implemented */
262 .pic_init_qp_minus26
= pps
->init_qp
- 26,
263 .pic_init_qs_minus26
= pps
->init_qs
- 26,
264 .chroma_qp_index_offset
= pps
->chroma_qp_index_offset
[0],
265 .second_chroma_qp_index_offset
= pps
->chroma_qp_index_offset
[1],
267 .entropy_coding_mode_flag
= pps
->cabac
,
268 .weighted_pred_flag
= pps
->weighted_pred
,
269 .weighted_bipred_idc
= pps
->weighted_bipred_idc
,
270 .transform_8x8_mode_flag
= pps
->transform_8x8_mode
,
271 .field_pic_flag
= h
->picture_structure
!= PICT_FRAME
,
272 .constrained_intra_pred_flag
= pps
->constrained_intra_pred
,
273 .pic_order_present_flag
= pps
->pic_order_present
,
274 .deblocking_filter_control_present_flag
= pps
->deblocking_filter_parameters_present
,
275 .redundant_pic_cnt_present_flag
= pps
->redundant_pic_cnt_present
,
276 .reference_pic_flag
= h
->nal_ref_idc
!= 0,
278 .frame_num
= h
->poc
.frame_num
,
281 fill_vaapi_pic(&pic_param
.CurrPic
, h
->cur_pic_ptr
, h
->picture_structure
);
282 err
= fill_vaapi_ReferenceFrames(&pic_param
, h
);
286 err
= ff_vaapi_decode_make_param_buffer(avctx
, pic
,
287 VAPictureParameterBufferType
,
288 &pic_param
, sizeof(pic_param
));
292 memcpy(iq_matrix
.ScalingList4x4
,
293 pps
->scaling_matrix4
, sizeof(iq_matrix
.ScalingList4x4
));
294 memcpy(iq_matrix
.ScalingList8x8
[0],
295 pps
->scaling_matrix8
[0], sizeof(iq_matrix
.ScalingList8x8
[0]));
296 memcpy(iq_matrix
.ScalingList8x8
[1],
297 pps
->scaling_matrix8
[3], sizeof(iq_matrix
.ScalingList8x8
[0]));
299 err
= ff_vaapi_decode_make_param_buffer(avctx
, pic
,
300 VAIQMatrixBufferType
,
301 &iq_matrix
, sizeof(iq_matrix
));
308 ff_vaapi_decode_cancel(avctx
, pic
);
312 /** End a hardware decoding based frame. */
313 static int vaapi_h264_end_frame(AVCodecContext
*avctx
)
315 const H264Context
*h
= avctx
->priv_data
;
316 VAAPIDecodePicture
*pic
= h
->cur_pic_ptr
->hwaccel_picture_private
;
317 H264SliceContext
*sl
= &h
->slice_ctx
[0];
320 ret
= ff_vaapi_decode_issue(avctx
, pic
);
324 ff_h264_draw_horiz_band(h
, sl
, 0, h
->avctx
->height
);
330 /** Decode the given H.264 slice with VA API. */
331 static int vaapi_h264_decode_slice(AVCodecContext
*avctx
,
332 const uint8_t *buffer
,
335 const H264Context
*h
= avctx
->priv_data
;
336 VAAPIDecodePicture
*pic
= h
->cur_pic_ptr
->hwaccel_picture_private
;
337 const H264SliceContext
*sl
= &h
->slice_ctx
[0];
338 VASliceParameterBufferH264 slice_param
;
341 slice_param
= (VASliceParameterBufferH264
) {
342 .slice_data_size
= size
,
343 .slice_data_offset
= 0,
344 .slice_data_flag
= VA_SLICE_DATA_FLAG_ALL
,
345 .slice_data_bit_offset
= get_bits_count(&sl
->gb
),
346 .first_mb_in_slice
= (sl
->mb_y
>> FIELD_OR_MBAFF_PICTURE(h
)) * h
->mb_width
+ sl
->mb_x
,
347 .slice_type
= ff_h264_get_slice_type(sl
),
348 .direct_spatial_mv_pred_flag
= sl
->slice_type
== AV_PICTURE_TYPE_B
? sl
->direct_spatial_mv_pred
: 0,
349 .num_ref_idx_l0_active_minus1
= sl
->list_count
> 0 ? sl
->ref_count
[0] - 1 : 0,
350 .num_ref_idx_l1_active_minus1
= sl
->list_count
> 1 ? sl
->ref_count
[1] - 1 : 0,
351 .cabac_init_idc
= sl
->cabac_init_idc
,
352 .slice_qp_delta
= sl
->qscale
- h
->ps
.pps
->init_qp
,
353 .disable_deblocking_filter_idc
= sl
->deblocking_filter
< 2 ? !sl
->deblocking_filter
: sl
->deblocking_filter
,
354 .slice_alpha_c0_offset_div2
= sl
->slice_alpha_c0_offset
/ 2,
355 .slice_beta_offset_div2
= sl
->slice_beta_offset
/ 2,
356 .luma_log2_weight_denom
= sl
->pwt
.luma_log2_weight_denom
,
357 .chroma_log2_weight_denom
= sl
->pwt
.chroma_log2_weight_denom
,
360 fill_vaapi_RefPicList(slice_param
.RefPicList0
, sl
->ref_list
[0],
361 sl
->list_count
> 0 ? sl
->ref_count
[0] : 0);
362 fill_vaapi_RefPicList(slice_param
.RefPicList1
, sl
->ref_list
[1],
363 sl
->list_count
> 1 ? sl
->ref_count
[1] : 0);
365 fill_vaapi_plain_pred_weight_table(h
, 0,
366 &slice_param
.luma_weight_l0_flag
,
367 slice_param
.luma_weight_l0
,
368 slice_param
.luma_offset_l0
,
369 &slice_param
.chroma_weight_l0_flag
,
370 slice_param
.chroma_weight_l0
,
371 slice_param
.chroma_offset_l0
);
372 fill_vaapi_plain_pred_weight_table(h
, 1,
373 &slice_param
.luma_weight_l1_flag
,
374 slice_param
.luma_weight_l1
,
375 slice_param
.luma_offset_l1
,
376 &slice_param
.chroma_weight_l1_flag
,
377 slice_param
.chroma_weight_l1
,
378 slice_param
.chroma_offset_l1
);
380 err
= ff_vaapi_decode_make_slice_buffer(avctx
, pic
,
381 &slice_param
, sizeof(slice_param
),
384 ff_vaapi_decode_cancel(avctx
, pic
);
391 const AVHWAccel ff_h264_vaapi_hwaccel
= {
392 .name
= "h264_vaapi",
393 .type
= AVMEDIA_TYPE_VIDEO
,
394 .id
= AV_CODEC_ID_H264
,
395 .pix_fmt
= AV_PIX_FMT_VAAPI
,
396 .start_frame
= &vaapi_h264_start_frame
,
397 .end_frame
= &vaapi_h264_end_frame
,
398 .decode_slice
= &vaapi_h264_decode_slice
,
399 .frame_priv_data_size
= sizeof(VAAPIDecodePicture
),
400 .init
= &ff_vaapi_decode_init
,
401 .uninit
= &ff_vaapi_decode_uninit
,
402 .frame_params
= &ff_vaapi_common_frame_params
,
403 .priv_data_size
= sizeof(VAAPIDecodeContext
),
404 .caps_internal
= HWACCEL_CAP_ASYNC_SAFE
,