2 * AV1 HW decode acceleration through VDPAU
4 * Copyright (c) 2022 Manoj Gupta Bonda
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <vdpau/vdpau.h>
24 #include "libavutil/pixdesc.h"
28 #include "hwaccel_internal.h"
30 #include "vdpau_internal.h"
32 static int get_bit_depth_from_seq(const AV1RawSequenceHeader
*seq
)
34 if (seq
->seq_profile
== 2 && seq
->color_config
.high_bitdepth
) {
35 return seq
->color_config
.twelve_bit
? 12 : 10;
36 } else if (seq
->seq_profile
<= 2 && seq
->color_config
.high_bitdepth
) {
43 static int vdpau_av1_start_frame(AVCodecContext
*avctx
,
44 const uint8_t *buffer
, uint32_t size
)
46 AV1DecContext
*s
= avctx
->priv_data
;
47 const AV1RawSequenceHeader
*seq
= s
->raw_seq
;
48 const AV1RawFrameHeader
*frame_header
= s
->raw_frame_header
;
49 const AV1RawFilmGrainParams
*film_grain
= &s
->cur_frame
.film_grain
;
51 struct vdpau_picture_context
*pic_ctx
= s
->cur_frame
.hwaccel_picture_private
;
54 unsigned char remap_lr_type
[4] = { AV1_RESTORE_NONE
, AV1_RESTORE_SWITCHABLE
, AV1_RESTORE_WIENER
, AV1_RESTORE_SGRPROJ
};
57 VdpPictureInfoAV1
*info
= &pic_ctx
->info
.av1
;
58 const AVPixFmtDescriptor
*pixdesc
= av_pix_fmt_desc_get(avctx
->sw_pix_fmt
);
60 return AV_PIX_FMT_NONE
;
63 info
->width
= avctx
->width
;
64 info
->height
= avctx
->height
;
67 info
->frame_offset
= frame_header
->order_hint
;
70 info
->profile
= seq
->seq_profile
;
71 info
->use_128x128_superblock
= seq
->use_128x128_superblock
;
72 info
->subsampling_x
= seq
->color_config
.subsampling_x
;
73 info
->subsampling_y
= seq
->color_config
.subsampling_y
;
74 info
->mono_chrome
= seq
->color_config
.mono_chrome
;
75 info
->bit_depth_minus8
= get_bit_depth_from_seq(seq
) - 8;
76 info
->enable_filter_intra
= seq
->enable_filter_intra
;
77 info
->enable_intra_edge_filter
= seq
->enable_intra_edge_filter
;
78 info
->enable_interintra_compound
= seq
->enable_interintra_compound
;
79 info
->enable_masked_compound
= seq
->enable_masked_compound
;
80 info
->enable_dual_filter
= seq
->enable_dual_filter
;
81 info
->enable_order_hint
= seq
->enable_order_hint
;
82 info
->order_hint_bits_minus1
= seq
->order_hint_bits_minus_1
;
83 info
->enable_jnt_comp
= seq
->enable_jnt_comp
;
84 info
->enable_superres
= seq
->enable_superres
;
85 info
->enable_cdef
= seq
->enable_cdef
;
86 info
->enable_restoration
= seq
->enable_restoration
;
87 info
->enable_fgs
= seq
->film_grain_params_present
;
90 info
->frame_type
= frame_header
->frame_type
;
91 info
->show_frame
= frame_header
->show_frame
;
92 info
->disable_cdf_update
= frame_header
->disable_cdf_update
;
93 info
->allow_screen_content_tools
= frame_header
->allow_screen_content_tools
;
94 info
->force_integer_mv
= s
->cur_frame
.force_integer_mv
;
95 info
->coded_denom
= frame_header
->coded_denom
;
96 info
->allow_intrabc
= frame_header
->allow_intrabc
;
97 info
->allow_high_precision_mv
= frame_header
->allow_high_precision_mv
;
98 info
->interp_filter
= frame_header
->interpolation_filter
;
99 info
->switchable_motion_mode
= frame_header
->is_motion_mode_switchable
;
100 info
->use_ref_frame_mvs
= frame_header
->use_ref_frame_mvs
;
101 info
->disable_frame_end_update_cdf
= frame_header
->disable_frame_end_update_cdf
;
102 info
->delta_q_present
= frame_header
->delta_q_present
;
103 info
->delta_q_res
= frame_header
->delta_q_res
;
104 info
->using_qmatrix
= frame_header
->using_qmatrix
;
105 info
->coded_lossless
= s
->cur_frame
.coded_lossless
;
106 info
->use_superres
= frame_header
->use_superres
;
107 info
->tx_mode
= frame_header
->tx_mode
;
108 info
->reference_mode
= frame_header
->reference_select
;
109 info
->allow_warped_motion
= frame_header
->allow_warped_motion
;
110 info
->reduced_tx_set
= frame_header
->reduced_tx_set
;
111 info
->skip_mode
= frame_header
->skip_mode_present
;
114 info
->num_tile_cols
= frame_header
->tile_cols
;
115 info
->num_tile_rows
= frame_header
->tile_rows
;
116 info
->context_update_tile_id
= frame_header
->context_update_tile_id
;
119 info
->cdef_damping_minus_3
= frame_header
->cdef_damping_minus_3
;
120 info
->cdef_bits
= frame_header
->cdef_bits
;
123 info
->SkipModeFrame0
= frame_header
->skip_mode_present
?
124 s
->cur_frame
.skip_mode_frame_idx
[0] : 0;
125 info
->SkipModeFrame1
= frame_header
->skip_mode_present
?
126 s
->cur_frame
.skip_mode_frame_idx
[1] : 0;
129 info
->base_qindex
= frame_header
->base_q_idx
;
130 info
->qp_y_dc_delta_q
= frame_header
->delta_q_y_dc
;
131 info
->qp_u_dc_delta_q
= frame_header
->delta_q_u_dc
;
132 info
->qp_v_dc_delta_q
= frame_header
->delta_q_v_dc
;
133 info
->qp_u_ac_delta_q
= frame_header
->delta_q_u_ac
;
134 info
->qp_v_ac_delta_q
= frame_header
->delta_q_v_ac
;
135 info
->qm_y
= frame_header
->qm_y
;
136 info
->qm_u
= frame_header
->qm_u
;
137 info
->qm_v
= frame_header
->qm_v
;
140 info
->segmentation_enabled
= frame_header
->segmentation_enabled
;
141 info
->segmentation_update_map
= frame_header
->segmentation_update_map
;
142 info
->segmentation_update_data
= frame_header
->segmentation_update_data
;
143 info
->segmentation_temporal_update
= frame_header
->segmentation_temporal_update
;
146 info
->loop_filter_level
[0] = frame_header
->loop_filter_level
[0];
147 info
->loop_filter_level
[1] = frame_header
->loop_filter_level
[1];
148 info
->loop_filter_level_u
= frame_header
->loop_filter_level
[2];
149 info
->loop_filter_level_v
= frame_header
->loop_filter_level
[3];
150 info
->loop_filter_sharpness
= frame_header
->loop_filter_sharpness
;
151 info
->loop_filter_delta_enabled
= frame_header
->loop_filter_delta_enabled
;
152 info
->loop_filter_delta_update
= frame_header
->loop_filter_delta_update
;
153 info
->loop_filter_mode_deltas
[0] = frame_header
->loop_filter_mode_deltas
[0];
154 info
->loop_filter_mode_deltas
[1] = frame_header
->loop_filter_mode_deltas
[1];
155 info
->delta_lf_present
= frame_header
->delta_lf_present
;
156 info
->delta_lf_res
= frame_header
->delta_lf_res
;
157 info
->delta_lf_multi
= frame_header
->delta_lf_multi
;
160 info
->lr_type
[0] = remap_lr_type
[frame_header
->lr_type
[0]];
161 info
->lr_type
[1] = remap_lr_type
[frame_header
->lr_type
[1]];
162 info
->lr_type
[2] = remap_lr_type
[frame_header
->lr_type
[2]];
163 info
->lr_unit_size
[0] = 1 + frame_header
->lr_unit_shift
;
164 info
->lr_unit_size
[1] = 1 + frame_header
->lr_unit_shift
- frame_header
->lr_uv_shift
;
165 info
->lr_unit_size
[2] = 1 + frame_header
->lr_unit_shift
- frame_header
->lr_uv_shift
;
167 /* Reference Frames */
168 info
->temporal_layer_id
= s
->cur_frame
.temporal_id
;
169 info
->spatial_layer_id
= s
->cur_frame
.spatial_id
;
171 /* Film Grain Params */
172 info
->apply_grain
= film_grain
->apply_grain
;
173 info
->overlap_flag
= film_grain
->overlap_flag
;
174 info
->scaling_shift_minus8
= film_grain
->grain_scaling_minus_8
;
175 info
->chroma_scaling_from_luma
= film_grain
->chroma_scaling_from_luma
;
176 info
->ar_coeff_lag
= film_grain
->ar_coeff_lag
;
177 info
->ar_coeff_shift_minus6
= film_grain
->ar_coeff_shift_minus_6
;
178 info
->grain_scale_shift
= film_grain
->grain_scale_shift
;
179 info
->clip_to_restricted_range
= film_grain
->clip_to_restricted_range
;
180 info
->num_y_points
= film_grain
->num_y_points
;
181 info
->num_cb_points
= film_grain
->num_cb_points
;
182 info
->num_cr_points
= film_grain
->num_cr_points
;
183 info
->random_seed
= film_grain
->grain_seed
;
184 info
->cb_mult
= film_grain
->cb_mult
;
185 info
->cb_luma_mult
= film_grain
->cb_luma_mult
;
186 info
->cb_offset
= film_grain
->cb_offset
;
187 info
->cr_mult
= film_grain
->cr_mult
;
188 info
->cr_luma_mult
= film_grain
->cr_luma_mult
;
189 info
->cr_offset
= film_grain
->cr_offset
;
192 for (i
= 0; i
< frame_header
->tile_cols
; ++i
) {
193 info
->tile_widths
[i
] = frame_header
->width_in_sbs_minus_1
[i
] + 1;
195 for (i
= 0; i
< frame_header
->tile_rows
; ++i
) {
196 info
->tile_heights
[i
] = frame_header
->height_in_sbs_minus_1
[i
] + 1;
200 for (i
= 0; i
< (1 << frame_header
->cdef_bits
); ++i
) {
201 info
->cdef_y_strength
[i
] = (frame_header
->cdef_y_pri_strength
[i
] & 0x0F) | (frame_header
->cdef_y_sec_strength
[i
] << 4);
202 info
->cdef_uv_strength
[i
] = (frame_header
->cdef_uv_pri_strength
[i
] & 0x0F) | (frame_header
->cdef_uv_sec_strength
[i
] << 4);
207 for (i
= 0; i
< AV1_MAX_SEGMENTS
; ++i
) {
208 info
->segmentation_feature_mask
[i
] = 0;
209 for (j
= 0; j
< AV1_SEG_LVL_MAX
; ++j
) {
210 info
->segmentation_feature_mask
[i
] |= frame_header
->feature_enabled
[i
][j
] << j
;
211 info
->segmentation_feature_data
[i
][j
] = frame_header
->feature_value
[i
][j
];
215 for (i
= 0; i
< AV1_NUM_REF_FRAMES
; ++i
) {
217 info
->loop_filter_ref_deltas
[i
] = frame_header
->loop_filter_ref_deltas
[i
];
219 /* Reference Frames */
220 info
->ref_frame_map
[i
] = s
->ref
[i
].f
&& ff_vdpau_get_surface_id(s
->ref
[i
].f
) ?
221 ff_vdpau_get_surface_id(s
->ref
[i
].f
) : VDP_INVALID_HANDLE
;
224 if (frame_header
->primary_ref_frame
== AV1_PRIMARY_REF_NONE
) {
225 info
->primary_ref_frame
= -1;
227 int8_t pri_ref_idx
= frame_header
->ref_frame_idx
[frame_header
->primary_ref_frame
];
228 info
->primary_ref_frame
= info
->ref_frame_map
[pri_ref_idx
];
231 for (i
= 0; i
< AV1_REFS_PER_FRAME
; ++i
) {
233 int8_t ref_idx
= frame_header
->ref_frame_idx
[i
];
234 AVFrame
*ref_frame
= s
->ref
[ref_idx
].f
;
236 info
->ref_frame
[i
].index
= info
->ref_frame_map
[ref_idx
];
237 info
->ref_frame
[i
].width
= ref_frame
? ref_frame
->width
: 0;
238 info
->ref_frame
[i
].height
= ref_frame
? ref_frame
->height
: 0;
241 info
->global_motion
[i
].invalid
= !frame_header
->is_global
[AV1_REF_FRAME_LAST
+ i
];
242 info
->global_motion
[i
].wmtype
= s
->cur_frame
.gm_type
[AV1_REF_FRAME_LAST
+ i
];
243 for (j
= 0; j
< 6; ++j
) {
244 info
->global_motion
[i
].wmmat
[j
] = s
->cur_frame
.gm_params
[AV1_REF_FRAME_LAST
+ i
][j
];
248 /* Film Grain Params */
249 if (film_grain
->apply_grain
) {
250 for (i
= 0; i
< 14; ++i
) {
251 info
->scaling_points_y
[i
][0] = film_grain
->point_y_value
[i
];
252 info
->scaling_points_y
[i
][1] = film_grain
->point_y_scaling
[i
];
254 for (i
= 0; i
< 10; ++i
) {
255 info
->scaling_points_cb
[i
][0] = film_grain
->point_cb_value
[i
];
256 info
->scaling_points_cb
[i
][1] = film_grain
->point_cb_scaling
[i
];
257 info
->scaling_points_cr
[i
][0] = film_grain
->point_cr_value
[i
];
258 info
->scaling_points_cr
[i
][1] = film_grain
->point_cr_scaling
[i
];
260 for (i
= 0; i
< 24; ++i
) {
261 info
->ar_coeffs_y
[i
] = (short)film_grain
->ar_coeffs_y_plus_128
[i
] - 128;
263 for (i
= 0; i
< 25; ++i
) {
264 info
->ar_coeffs_cb
[i
] = (short)film_grain
->ar_coeffs_cb_plus_128
[i
] - 128;
265 info
->ar_coeffs_cr
[i
] = (short)film_grain
->ar_coeffs_cr_plus_128
[i
] - 128;
270 return ff_vdpau_common_start_frame(pic_ctx
, buffer
, size
);
274 static int vdpau_av1_decode_slice(AVCodecContext
*avctx
,
275 const uint8_t *buffer
, uint32_t size
)
277 const AV1DecContext
*s
= avctx
->priv_data
;
278 const AV1RawFrameHeader
*frame_header
= s
->raw_frame_header
;
279 struct vdpau_picture_context
*pic_ctx
= s
->cur_frame
.hwaccel_picture_private
;
280 VdpPictureInfoAV1
*info
= &pic_ctx
->info
.av1
;
283 VdpBitstreamBuffer
*buffers
= pic_ctx
->bitstream_buffers
;
284 int bitstream_len
= 0;
286 nb_slices
= frame_header
->tile_cols
* frame_header
->tile_rows
;
287 /* Shortcut if all tiles are in the same buffer*/
288 if (nb_slices
== s
->tg_end
- s
->tg_start
+ 1) {
289 for (int i
= 0; i
< nb_slices
; ++i
) {
290 info
->tile_info
[i
*2 ] = s
->tile_group_info
[i
].tile_offset
;
291 info
->tile_info
[i
*2 + 1] = info
->tile_info
[i
*2] + s
->tile_group_info
[i
].tile_size
;
293 val
= ff_vdpau_add_buffer(pic_ctx
, buffer
, size
);
301 for(int i
= 0; i
< pic_ctx
->bitstream_buffers_used
; i
++) {
302 bitstream_len
+= buffers
->bitstream_bytes
;
306 for (uint32_t tile_num
= s
->tg_start
; tile_num
<= s
->tg_end
; ++tile_num
) {
307 info
->tile_info
[tile_num
*2 ] = bitstream_len
+ s
->tile_group_info
[tile_num
].tile_offset
;
308 info
->tile_info
[tile_num
*2 + 1] = info
->tile_info
[tile_num
*2] + s
->tile_group_info
[tile_num
].tile_size
;
311 val
= ff_vdpau_add_buffer(pic_ctx
, buffer
, size
);
319 static int vdpau_av1_end_frame(AVCodecContext
*avctx
)
321 const AV1DecContext
*s
= avctx
->priv_data
;
322 struct vdpau_picture_context
*pic_ctx
= s
->cur_frame
.hwaccel_picture_private
;
326 val
= ff_vdpau_common_end_frame(avctx
, s
->cur_frame
.f
, pic_ctx
);
333 static int vdpau_av1_init(AVCodecContext
*avctx
)
335 VdpDecoderProfile profile
;
336 uint32_t level
= avctx
->level
;
338 switch (avctx
->profile
) {
339 case AV_PROFILE_AV1_MAIN
:
340 profile
= VDP_DECODER_PROFILE_AV1_MAIN
;
342 case AV_PROFILE_AV1_HIGH
:
343 profile
= VDP_DECODER_PROFILE_AV1_HIGH
;
345 case AV_PROFILE_AV1_PROFESSIONAL
:
346 profile
= VDP_DECODER_PROFILE_AV1_PROFESSIONAL
;
349 return AVERROR(ENOTSUP
);
352 return ff_vdpau_common_init(avctx
, profile
, level
);
355 const FFHWAccel ff_av1_vdpau_hwaccel
= {
356 .p
.name
= "av1_vdpau",
357 .p
.type
= AVMEDIA_TYPE_VIDEO
,
358 .p
.id
= AV_CODEC_ID_AV1
,
359 .p
.pix_fmt
= AV_PIX_FMT_VDPAU
,
360 .start_frame
= vdpau_av1_start_frame
,
361 .end_frame
= vdpau_av1_end_frame
,
362 .decode_slice
= vdpau_av1_decode_slice
,
363 .frame_priv_data_size
= sizeof(struct vdpau_picture_context
),
364 .init
= vdpau_av1_init
,
365 .uninit
= ff_vdpau_common_uninit
,
366 .frame_params
= ff_vdpau_common_frame_params
,
367 .priv_data_size
= sizeof(VDPAUContext
),
368 .caps_internal
= HWACCEL_CAP_ASYNC_SAFE
,