2 * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/avassert.h"
22 #include "libavutil/hwcontext.h"
23 #include "libavutil/hwcontext_cuda_internal.h"
24 #include "libavutil/cuda_check.h"
29 #include "cuda/load_helper.h"
31 extern const unsigned char ff_vf_yadif_cuda_ptx_data
[];
32 extern const unsigned int ff_vf_yadif_cuda_ptx_len
;
34 typedef struct DeintCUDAContext
{
37 AVCUDADeviceContext
*hwctx
;
38 AVBufferRef
*device_ref
;
39 AVBufferRef
*input_frames_ref
;
40 AVHWFramesContext
*input_frames
;
43 CUfunction cu_func_uchar
;
44 CUfunction cu_func_uchar2
;
45 CUfunction cu_func_ushort
;
46 CUfunction cu_func_ushort2
;
49 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
50 #define ALIGN_UP(a, b) (((a) + (b) - 1) & ~((b) - 1))
54 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
56 static CUresult
call_kernel(AVFilterContext
*ctx
, CUfunction func
,
57 CUdeviceptr prev
, CUdeviceptr cur
, CUdeviceptr next
,
58 CUarray_format format
, int channels
,
59 int src_width
, // Width is pixels per channel
60 int src_height
, // Height is pixels per channel
61 int src_pitch
, // Pitch is bytes
63 int dst_width
, // Width is pixels per channel
64 int dst_height
, // Height is pixels per channel
65 int dst_pitch
, // Pitch is pixels per channel
68 DeintCUDAContext
*s
= ctx
->priv
;
69 CudaFunctions
*cu
= s
->hwctx
->internal
->cuda_dl
;
70 CUtexObject tex_prev
= 0, tex_cur
= 0, tex_next
= 0;
72 int skip_spatial_check
= s
->yadif
.mode
&2;
74 void *args
[] = { &dst
, &tex_prev
, &tex_cur
, &tex_next
,
75 &dst_width
, &dst_height
, &dst_pitch
,
76 &src_width
, &src_height
, &parity
, &tff
,
77 &skip_spatial_check
};
79 CUDA_TEXTURE_DESC tex_desc
= {
80 .filterMode
= CU_TR_FILTER_MODE_POINT
,
81 .flags
= CU_TRSF_READ_AS_INTEGER
,
84 CUDA_RESOURCE_DESC res_desc
= {
85 .resType
= CU_RESOURCE_TYPE_PITCH2D
,
86 .res
.pitch2D
.format
= format
,
87 .res
.pitch2D
.numChannels
= channels
,
88 .res
.pitch2D
.width
= src_width
,
89 .res
.pitch2D
.height
= src_height
,
90 .res
.pitch2D
.pitchInBytes
= src_pitch
,
93 res_desc
.res
.pitch2D
.devPtr
= (CUdeviceptr
)prev
;
94 ret
= CHECK_CU(cu
->cuTexObjectCreate(&tex_prev
, &res_desc
, &tex_desc
, NULL
));
98 res_desc
.res
.pitch2D
.devPtr
= (CUdeviceptr
)cur
;
99 ret
= CHECK_CU(cu
->cuTexObjectCreate(&tex_cur
, &res_desc
, &tex_desc
, NULL
));
103 res_desc
.res
.pitch2D
.devPtr
= (CUdeviceptr
)next
;
104 ret
= CHECK_CU(cu
->cuTexObjectCreate(&tex_next
, &res_desc
, &tex_desc
, NULL
));
108 ret
= CHECK_CU(cu
->cuLaunchKernel(func
,
109 DIV_UP(dst_width
, BLOCKX
), DIV_UP(dst_height
, BLOCKY
), 1,
111 0, s
->hwctx
->stream
, args
, NULL
));
115 CHECK_CU(cu
->cuTexObjectDestroy(tex_prev
));
117 CHECK_CU(cu
->cuTexObjectDestroy(tex_cur
));
119 CHECK_CU(cu
->cuTexObjectDestroy(tex_next
));
124 static void filter(AVFilterContext
*ctx
, AVFrame
*dst
,
127 DeintCUDAContext
*s
= ctx
->priv
;
128 YADIFContext
*y
= &s
->yadif
;
129 CudaFunctions
*cu
= s
->hwctx
->internal
->cuda_dl
;
133 ret
= CHECK_CU(cu
->cuCtxPushCurrent(s
->hwctx
->cuda_ctx
));
137 for (i
= 0; i
< y
->csp
->nb_components
; i
++) {
139 CUarray_format format
;
140 int pixel_size
, channels
;
141 const AVComponentDescriptor
*comp
= &y
->csp
->comp
[i
];
143 if (comp
->plane
< i
) {
144 // We process planes as a whole, so don't reprocess
145 // them for additional components
149 pixel_size
= (comp
->depth
+ comp
->shift
) / 8;
150 channels
= comp
->step
/ pixel_size
;
151 if (pixel_size
> 2 || channels
> 2) {
152 av_log(ctx
, AV_LOG_ERROR
, "Unsupported pixel format: %s\n", y
->csp
->name
);
155 switch (pixel_size
) {
157 func
= channels
== 1 ? s
->cu_func_uchar
: s
->cu_func_uchar2
;
158 format
= CU_AD_FORMAT_UNSIGNED_INT8
;
161 func
= channels
== 1 ? s
->cu_func_ushort
: s
->cu_func_ushort2
;
162 format
= CU_AD_FORMAT_UNSIGNED_INT16
;
165 av_log(ctx
, AV_LOG_ERROR
, "Unsupported pixel format: %s\n", y
->csp
->name
);
168 av_log(ctx
, AV_LOG_TRACE
,
169 "Deinterlacing plane %d: pixel_size: %d channels: %d\n",
170 comp
->plane
, pixel_size
, channels
);
171 call_kernel(ctx
, func
,
172 (CUdeviceptr
)y
->prev
->data
[i
],
173 (CUdeviceptr
)y
->cur
->data
[i
],
174 (CUdeviceptr
)y
->next
->data
[i
],
176 AV_CEIL_RSHIFT(y
->cur
->width
, i
? y
->csp
->log2_chroma_w
: 0),
177 AV_CEIL_RSHIFT(y
->cur
->height
, i
? y
->csp
->log2_chroma_h
: 0),
179 (CUdeviceptr
)dst
->data
[i
],
180 AV_CEIL_RSHIFT(dst
->width
, i
? y
->csp
->log2_chroma_w
: 0),
181 AV_CEIL_RSHIFT(dst
->height
, i
? y
->csp
->log2_chroma_h
: 0),
182 dst
->linesize
[i
] / comp
->step
,
187 CHECK_CU(cu
->cuCtxPopCurrent(&dummy
));
191 static av_cold
void deint_cuda_uninit(AVFilterContext
*ctx
)
194 DeintCUDAContext
*s
= ctx
->priv
;
196 if (s
->hwctx
&& s
->cu_module
) {
197 CudaFunctions
*cu
= s
->hwctx
->internal
->cuda_dl
;
198 CHECK_CU(cu
->cuCtxPushCurrent(s
->hwctx
->cuda_ctx
));
199 CHECK_CU(cu
->cuModuleUnload(s
->cu_module
));
200 CHECK_CU(cu
->cuCtxPopCurrent(&dummy
));
203 ff_yadif_uninit(ctx
);
205 av_buffer_unref(&s
->device_ref
);
207 av_buffer_unref(&s
->input_frames_ref
);
208 s
->input_frames
= NULL
;
211 static int config_input(AVFilterLink
*inlink
)
213 FilterLink
*l
= ff_filter_link(inlink
);
214 AVFilterContext
*ctx
= inlink
->dst
;
215 DeintCUDAContext
*s
= ctx
->priv
;
217 if (!l
->hw_frames_ctx
) {
218 av_log(ctx
, AV_LOG_ERROR
, "A hardware frames reference is "
219 "required to associate the processing device.\n");
220 return AVERROR(EINVAL
);
223 s
->input_frames_ref
= av_buffer_ref(l
->hw_frames_ctx
);
224 if (!s
->input_frames_ref
) {
225 av_log(ctx
, AV_LOG_ERROR
, "A input frames reference create "
227 return AVERROR(ENOMEM
);
229 s
->input_frames
= (AVHWFramesContext
*)s
->input_frames_ref
->data
;
234 static int config_output(AVFilterLink
*link
)
236 FilterLink
*l
= ff_filter_link(link
);
237 AVHWFramesContext
*output_frames
;
238 AVFilterContext
*ctx
= link
->src
;
239 DeintCUDAContext
*s
= ctx
->priv
;
240 YADIFContext
*y
= &s
->yadif
;
245 av_assert0(s
->input_frames
);
246 s
->device_ref
= av_buffer_ref(s
->input_frames
->device_ref
);
247 if (!s
->device_ref
) {
248 av_log(ctx
, AV_LOG_ERROR
, "A device reference create "
250 return AVERROR(ENOMEM
);
252 s
->hwctx
= ((AVHWDeviceContext
*)s
->device_ref
->data
)->hwctx
;
253 cu
= s
->hwctx
->internal
->cuda_dl
;
255 l
->hw_frames_ctx
= av_hwframe_ctx_alloc(s
->device_ref
);
256 if (!l
->hw_frames_ctx
) {
257 av_log(ctx
, AV_LOG_ERROR
, "Failed to create HW frame context "
259 ret
= AVERROR(ENOMEM
);
263 output_frames
= (AVHWFramesContext
*)l
->hw_frames_ctx
->data
;
265 output_frames
->format
= AV_PIX_FMT_CUDA
;
266 output_frames
->sw_format
= s
->input_frames
->sw_format
;
267 output_frames
->width
= ctx
->inputs
[0]->w
;
268 output_frames
->height
= ctx
->inputs
[0]->h
;
270 output_frames
->initial_pool_size
= 4;
272 ret
= ff_filter_init_hw_frames(ctx
, link
, 10);
276 ret
= av_hwframe_ctx_init(l
->hw_frames_ctx
);
278 av_log(ctx
, AV_LOG_ERROR
, "Failed to initialise CUDA frame "
279 "context for output: %d\n", ret
);
283 ret
= ff_yadif_config_output_common(link
);
287 y
->csp
= av_pix_fmt_desc_get(output_frames
->sw_format
);
290 ret
= CHECK_CU(cu
->cuCtxPushCurrent(s
->hwctx
->cuda_ctx
));
294 ret
= ff_cuda_load_module(ctx
, s
->hwctx
, &s
->cu_module
, ff_vf_yadif_cuda_ptx_data
, ff_vf_yadif_cuda_ptx_len
);
298 ret
= CHECK_CU(cu
->cuModuleGetFunction(&s
->cu_func_uchar
, s
->cu_module
, "yadif_uchar"));
302 ret
= CHECK_CU(cu
->cuModuleGetFunction(&s
->cu_func_uchar2
, s
->cu_module
, "yadif_uchar2"));
306 ret
= CHECK_CU(cu
->cuModuleGetFunction(&s
->cu_func_ushort
, s
->cu_module
, "yadif_ushort"));
310 ret
= CHECK_CU(cu
->cuModuleGetFunction(&s
->cu_func_ushort2
, s
->cu_module
, "yadif_ushort2"));
315 CHECK_CU(cu
->cuCtxPopCurrent(&dummy
));
320 static const AVClass yadif_cuda_class
= {
321 .class_name
= "yadif_cuda",
322 .item_name
= av_default_item_name
,
323 .option
= ff_yadif_options
,
324 .version
= LIBAVUTIL_VERSION_INT
,
325 .category
= AV_CLASS_CATEGORY_FILTER
,
328 static const AVFilterPad deint_cuda_inputs
[] = {
331 .type
= AVMEDIA_TYPE_VIDEO
,
332 .filter_frame
= ff_yadif_filter_frame
,
333 .config_props
= config_input
,
337 static const AVFilterPad deint_cuda_outputs
[] = {
340 .type
= AVMEDIA_TYPE_VIDEO
,
341 .request_frame
= ff_yadif_request_frame
,
342 .config_props
= config_output
,
346 const FFFilter ff_vf_yadif_cuda
= {
347 .p
.name
= "yadif_cuda",
348 .p
.description
= NULL_IF_CONFIG_SMALL("Deinterlace CUDA frames"),
349 .p
.priv_class
= &yadif_cuda_class
,
350 .p
.flags
= AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
,
351 .priv_size
= sizeof(DeintCUDAContext
),
352 .uninit
= deint_cuda_uninit
,
353 FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA
),
354 FILTER_INPUTS(deint_cuda_inputs
),
355 FILTER_OUTPUTS(deint_cuda_outputs
),
356 .flags_internal
= FF_FILTER_FLAG_HWFRAME_AWARE
,