2 * This file is part of Libav.
4 * Libav is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * Libav is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with Libav; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
23 #include "hwcontext_cuda.h"
28 typedef struct CUDAFramesContext
{
29 int shift_width
, shift_height
;
32 static const enum AVPixelFormat supported_formats
[] = {
40 static int cuda_frames_get_constraints(AVHWDeviceContext
*ctx
,
42 AVHWFramesConstraints
*constraints
)
46 constraints
->valid_sw_formats
= av_malloc_array(FF_ARRAY_ELEMS(supported_formats
) + 1,
47 sizeof(*constraints
->valid_sw_formats
));
48 if (!constraints
->valid_sw_formats
)
49 return AVERROR(ENOMEM
);
51 for (i
= 0; i
< FF_ARRAY_ELEMS(supported_formats
); i
++)
52 constraints
->valid_sw_formats
[i
] = supported_formats
[i
];
53 constraints
->valid_sw_formats
[FF_ARRAY_ELEMS(supported_formats
)] = AV_PIX_FMT_NONE
;
55 constraints
->valid_hw_formats
= av_malloc_array(2, sizeof(*constraints
->valid_hw_formats
));
56 if (!constraints
->valid_hw_formats
)
57 return AVERROR(ENOMEM
);
59 constraints
->valid_hw_formats
[0] = AV_PIX_FMT_CUDA
;
60 constraints
->valid_hw_formats
[1] = AV_PIX_FMT_NONE
;
65 static void cuda_buffer_free(void *opaque
, uint8_t *data
)
67 AVHWFramesContext
*ctx
= opaque
;
68 AVCUDADeviceContext
*hwctx
= ctx
->device_ctx
->hwctx
;
72 cuCtxPushCurrent(hwctx
->cuda_ctx
);
74 cuMemFree((CUdeviceptr
)data
);
76 cuCtxPopCurrent(&dummy
);
79 static AVBufferRef
*cuda_pool_alloc(void *opaque
, int size
)
81 AVHWFramesContext
*ctx
= opaque
;
82 AVCUDADeviceContext
*hwctx
= ctx
->device_ctx
->hwctx
;
84 AVBufferRef
*ret
= NULL
;
85 CUcontext dummy
= NULL
;
89 err
= cuCtxPushCurrent(hwctx
->cuda_ctx
);
90 if (err
!= CUDA_SUCCESS
) {
91 av_log(ctx
, AV_LOG_ERROR
, "Error setting current CUDA context\n");
95 err
= cuMemAlloc(&data
, size
);
96 if (err
!= CUDA_SUCCESS
)
99 ret
= av_buffer_create((uint8_t*)data
, size
, cuda_buffer_free
, ctx
, 0);
106 cuCtxPopCurrent(&dummy
);
110 static int cuda_frames_init(AVHWFramesContext
*ctx
)
112 CUDAFramesContext
*priv
= ctx
->internal
->priv
;
115 for (i
= 0; i
< FF_ARRAY_ELEMS(supported_formats
); i
++) {
116 if (ctx
->sw_format
== supported_formats
[i
])
119 if (i
== FF_ARRAY_ELEMS(supported_formats
)) {
120 av_log(ctx
, AV_LOG_ERROR
, "Pixel format '%s' is not supported\n",
121 av_get_pix_fmt_name(ctx
->sw_format
));
122 return AVERROR(ENOSYS
);
125 av_pix_fmt_get_chroma_sub_sample(ctx
->sw_format
, &priv
->shift_width
, &priv
->shift_height
);
130 switch (ctx
->sw_format
) {
131 case AV_PIX_FMT_NV12
:
132 case AV_PIX_FMT_YUV420P
:
133 size
= ctx
->width
* ctx
->height
* 3 / 2;
135 case AV_PIX_FMT_P010
:
136 size
= ctx
->width
* ctx
->height
* 3;
138 case AV_PIX_FMT_YUV444P
:
139 size
= ctx
->width
* ctx
->height
* 3;
141 case AV_PIX_FMT_YUV444P16
:
142 size
= ctx
->width
* ctx
->height
* 6;
146 ctx
->internal
->pool_internal
= av_buffer_pool_init2(size
, ctx
, cuda_pool_alloc
, NULL
);
147 if (!ctx
->internal
->pool_internal
)
148 return AVERROR(ENOMEM
);
154 static int cuda_get_buffer(AVHWFramesContext
*ctx
, AVFrame
*frame
)
156 frame
->buf
[0] = av_buffer_pool_get(ctx
->pool
);
158 return AVERROR(ENOMEM
);
160 switch (ctx
->sw_format
) {
161 case AV_PIX_FMT_NV12
:
162 frame
->data
[0] = frame
->buf
[0]->data
;
163 frame
->data
[1] = frame
->data
[0] + ctx
->width
* ctx
->height
;
164 frame
->linesize
[0] = ctx
->width
;
165 frame
->linesize
[1] = ctx
->width
;
167 case AV_PIX_FMT_YUV420P
:
168 frame
->data
[0] = frame
->buf
[0]->data
;
169 frame
->data
[2] = frame
->data
[0] + ctx
->width
* ctx
->height
;
170 frame
->data
[1] = frame
->data
[2] + ctx
->width
* ctx
->height
/ 4;
171 frame
->linesize
[0] = ctx
->width
;
172 frame
->linesize
[1] = ctx
->width
/ 2;
173 frame
->linesize
[2] = ctx
->width
/ 2;
175 case AV_PIX_FMT_P010
:
176 frame
->data
[0] = frame
->buf
[0]->data
;
177 frame
->data
[1] = frame
->data
[0] + 2 * ctx
->width
* ctx
->height
;
178 frame
->linesize
[0] = 2 * ctx
->width
;
179 frame
->linesize
[1] = 2 * ctx
->width
;
181 case AV_PIX_FMT_YUV444P
:
182 frame
->data
[0] = frame
->buf
[0]->data
;
183 frame
->data
[1] = frame
->data
[0] + ctx
->width
* ctx
->height
;
184 frame
->data
[2] = frame
->data
[1] + ctx
->width
* ctx
->height
;
185 frame
->linesize
[0] = ctx
->width
;
186 frame
->linesize
[1] = ctx
->width
;
187 frame
->linesize
[2] = ctx
->width
;
189 case AV_PIX_FMT_YUV444P16
:
190 frame
->data
[0] = frame
->buf
[0]->data
;
191 frame
->data
[1] = frame
->data
[0] + 2 * ctx
->width
* ctx
->height
;
192 frame
->data
[2] = frame
->data
[1] + 2 * ctx
->width
* ctx
->height
;
193 frame
->linesize
[0] = 2 * ctx
->width
;
194 frame
->linesize
[1] = 2 * ctx
->width
;
195 frame
->linesize
[2] = 2 * ctx
->width
;
198 av_frame_unref(frame
);
202 frame
->format
= AV_PIX_FMT_CUDA
;
203 frame
->width
= ctx
->width
;
204 frame
->height
= ctx
->height
;
209 static int cuda_transfer_get_formats(AVHWFramesContext
*ctx
,
210 enum AVHWFrameTransferDirection dir
,
211 enum AVPixelFormat
**formats
)
213 enum AVPixelFormat
*fmts
;
215 fmts
= av_malloc_array(2, sizeof(*fmts
));
217 return AVERROR(ENOMEM
);
219 fmts
[0] = ctx
->sw_format
;
220 fmts
[1] = AV_PIX_FMT_NONE
;
227 static int cuda_transfer_data_from(AVHWFramesContext
*ctx
, AVFrame
*dst
,
230 CUDAFramesContext
*priv
= ctx
->internal
->priv
;
231 AVCUDADeviceContext
*device_hwctx
= ctx
->device_ctx
->hwctx
;
237 err
= cuCtxPushCurrent(device_hwctx
->cuda_ctx
);
238 if (err
!= CUDA_SUCCESS
)
239 return AVERROR_UNKNOWN
;
241 for (i
= 0; i
< FF_ARRAY_ELEMS(src
->data
) && src
->data
[i
]; i
++) {
242 CUDA_MEMCPY2D cpy
= {
243 .srcMemoryType
= CU_MEMORYTYPE_DEVICE
,
244 .dstMemoryType
= CU_MEMORYTYPE_HOST
,
245 .srcDevice
= (CUdeviceptr
)src
->data
[i
],
246 .dstHost
= dst
->data
[i
],
247 .srcPitch
= src
->linesize
[i
],
248 .dstPitch
= dst
->linesize
[i
],
249 .WidthInBytes
= FFMIN(src
->linesize
[i
], dst
->linesize
[i
]),
250 .Height
= src
->height
>> (i
? priv
->shift_height
: 0),
253 err
= cuMemcpy2D(&cpy
);
254 if (err
!= CUDA_SUCCESS
) {
255 av_log(ctx
, AV_LOG_ERROR
, "Error transferring the data from the CUDA frame\n");
256 return AVERROR_UNKNOWN
;
260 cuCtxPopCurrent(&dummy
);
265 static int cuda_transfer_data_to(AVHWFramesContext
*ctx
, AVFrame
*dst
,
268 CUDAFramesContext
*priv
= ctx
->internal
->priv
;
269 AVCUDADeviceContext
*device_hwctx
= ctx
->device_ctx
->hwctx
;
275 err
= cuCtxPushCurrent(device_hwctx
->cuda_ctx
);
276 if (err
!= CUDA_SUCCESS
)
277 return AVERROR_UNKNOWN
;
279 for (i
= 0; i
< FF_ARRAY_ELEMS(src
->data
) && src
->data
[i
]; i
++) {
280 CUDA_MEMCPY2D cpy
= {
281 .srcMemoryType
= CU_MEMORYTYPE_HOST
,
282 .dstMemoryType
= CU_MEMORYTYPE_DEVICE
,
283 .srcHost
= src
->data
[i
],
284 .dstDevice
= (CUdeviceptr
)dst
->data
[i
],
285 .srcPitch
= src
->linesize
[i
],
286 .dstPitch
= dst
->linesize
[i
],
287 .WidthInBytes
= FFMIN(src
->linesize
[i
], dst
->linesize
[i
]),
288 .Height
= src
->height
>> (i
? priv
->shift_height
: 0),
291 err
= cuMemcpy2D(&cpy
);
292 if (err
!= CUDA_SUCCESS
) {
293 av_log(ctx
, AV_LOG_ERROR
, "Error transferring the data from the CUDA frame\n");
294 return AVERROR_UNKNOWN
;
298 cuCtxPopCurrent(&dummy
);
303 static void cuda_device_free(AVHWDeviceContext
*ctx
)
305 AVCUDADeviceContext
*hwctx
= ctx
->hwctx
;
306 cuCtxDestroy(hwctx
->cuda_ctx
);
309 static int cuda_device_create(AVHWDeviceContext
*ctx
, const char *device
,
310 AVDictionary
*opts
, int flags
)
312 AVCUDADeviceContext
*hwctx
= ctx
->hwctx
;
319 device_idx
= strtol(device
, NULL
, 0);
322 if (err
!= CUDA_SUCCESS
) {
323 av_log(ctx
, AV_LOG_ERROR
, "Could not initialize the CUDA driver API\n");
324 return AVERROR_UNKNOWN
;
327 err
= cuDeviceGet(&cu_device
, device_idx
);
328 if (err
!= CUDA_SUCCESS
) {
329 av_log(ctx
, AV_LOG_ERROR
, "Could not get the device number %d\n", device_idx
);
330 return AVERROR_UNKNOWN
;
333 err
= cuCtxCreate(&hwctx
->cuda_ctx
, 0, cu_device
);
334 if (err
!= CUDA_SUCCESS
) {
335 av_log(ctx
, AV_LOG_ERROR
, "Error creating a CUDA context\n");
336 return AVERROR_UNKNOWN
;
339 cuCtxPopCurrent(&dummy
);
341 ctx
->free
= cuda_device_free
;
346 const HWContextType ff_hwcontext_type_cuda
= {
347 .type
= AV_HWDEVICE_TYPE_CUDA
,
350 .device_hwctx_size
= sizeof(AVCUDADeviceContext
),
351 .frames_priv_size
= sizeof(CUDAFramesContext
),
353 .device_create
= cuda_device_create
,
354 .frames_get_constraints
= cuda_frames_get_constraints
,
355 .frames_init
= cuda_frames_init
,
356 .frames_get_buffer
= cuda_get_buffer
,
357 .transfer_get_formats
= cuda_transfer_get_formats
,
358 .transfer_data_to
= cuda_transfer_data_to
,
359 .transfer_data_from
= cuda_transfer_data_from
,
361 .pix_fmts
= (const enum AVPixelFormat
[]){ AV_PIX_FMT_CUDA
, AV_PIX_FMT_NONE
},