aarch64: Add assembly support for -fsanitize=hwaddress tagged globals.
[libav.git] / libavutil / hwcontext_cuda.c
blobfc9b8b429859b60722e7566734acdd536acdba9d
1 /*
2 * This file is part of Libav.
4 * Libav is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * Libav is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with Libav; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 #include "buffer.h"
20 #include "common.h"
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
23 #include "hwcontext_cuda.h"
24 #include "mem.h"
25 #include "pixdesc.h"
26 #include "pixfmt.h"
28 typedef struct CUDAFramesContext {
29 int shift_width, shift_height;
30 } CUDAFramesContext;
32 static const enum AVPixelFormat supported_formats[] = {
33 AV_PIX_FMT_NV12,
34 AV_PIX_FMT_YUV420P,
35 AV_PIX_FMT_P010,
36 AV_PIX_FMT_YUV444P,
37 AV_PIX_FMT_YUV444P16,
40 static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
41 const void *hwconfig,
42 AVHWFramesConstraints *constraints)
44 int i;
46 constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
47 sizeof(*constraints->valid_sw_formats));
48 if (!constraints->valid_sw_formats)
49 return AVERROR(ENOMEM);
51 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
52 constraints->valid_sw_formats[i] = supported_formats[i];
53 constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
55 constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
56 if (!constraints->valid_hw_formats)
57 return AVERROR(ENOMEM);
59 constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
60 constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
62 return 0;
65 static void cuda_buffer_free(void *opaque, uint8_t *data)
67 AVHWFramesContext *ctx = opaque;
68 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
70 CUcontext dummy;
72 cuCtxPushCurrent(hwctx->cuda_ctx);
74 cuMemFree((CUdeviceptr)data);
76 cuCtxPopCurrent(&dummy);
79 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
81 AVHWFramesContext *ctx = opaque;
82 AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
84 AVBufferRef *ret = NULL;
85 CUcontext dummy = NULL;
86 CUdeviceptr data;
87 CUresult err;
89 err = cuCtxPushCurrent(hwctx->cuda_ctx);
90 if (err != CUDA_SUCCESS) {
91 av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
92 return NULL;
95 err = cuMemAlloc(&data, size);
96 if (err != CUDA_SUCCESS)
97 goto fail;
99 ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
100 if (!ret) {
101 cuMemFree(data);
102 goto fail;
105 fail:
106 cuCtxPopCurrent(&dummy);
107 return ret;
110 static int cuda_frames_init(AVHWFramesContext *ctx)
112 CUDAFramesContext *priv = ctx->internal->priv;
113 int i;
115 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
116 if (ctx->sw_format == supported_formats[i])
117 break;
119 if (i == FF_ARRAY_ELEMS(supported_formats)) {
120 av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
121 av_get_pix_fmt_name(ctx->sw_format));
122 return AVERROR(ENOSYS);
125 av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
127 if (!ctx->pool) {
128 int size;
130 switch (ctx->sw_format) {
131 case AV_PIX_FMT_NV12:
132 case AV_PIX_FMT_YUV420P:
133 size = ctx->width * ctx->height * 3 / 2;
134 break;
135 case AV_PIX_FMT_P010:
136 size = ctx->width * ctx->height * 3;
137 break;
138 case AV_PIX_FMT_YUV444P:
139 size = ctx->width * ctx->height * 3;
140 break;
141 case AV_PIX_FMT_YUV444P16:
142 size = ctx->width * ctx->height * 6;
143 break;
146 ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
147 if (!ctx->internal->pool_internal)
148 return AVERROR(ENOMEM);
151 return 0;
154 static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
156 frame->buf[0] = av_buffer_pool_get(ctx->pool);
157 if (!frame->buf[0])
158 return AVERROR(ENOMEM);
160 switch (ctx->sw_format) {
161 case AV_PIX_FMT_NV12:
162 frame->data[0] = frame->buf[0]->data;
163 frame->data[1] = frame->data[0] + ctx->width * ctx->height;
164 frame->linesize[0] = ctx->width;
165 frame->linesize[1] = ctx->width;
166 break;
167 case AV_PIX_FMT_YUV420P:
168 frame->data[0] = frame->buf[0]->data;
169 frame->data[2] = frame->data[0] + ctx->width * ctx->height;
170 frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4;
171 frame->linesize[0] = ctx->width;
172 frame->linesize[1] = ctx->width / 2;
173 frame->linesize[2] = ctx->width / 2;
174 break;
175 case AV_PIX_FMT_P010:
176 frame->data[0] = frame->buf[0]->data;
177 frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height;
178 frame->linesize[0] = 2 * ctx->width;
179 frame->linesize[1] = 2 * ctx->width;
180 break;
181 case AV_PIX_FMT_YUV444P:
182 frame->data[0] = frame->buf[0]->data;
183 frame->data[1] = frame->data[0] + ctx->width * ctx->height;
184 frame->data[2] = frame->data[1] + ctx->width * ctx->height;
185 frame->linesize[0] = ctx->width;
186 frame->linesize[1] = ctx->width;
187 frame->linesize[2] = ctx->width;
188 break;
189 case AV_PIX_FMT_YUV444P16:
190 frame->data[0] = frame->buf[0]->data;
191 frame->data[1] = frame->data[0] + 2 * ctx->width * ctx->height;
192 frame->data[2] = frame->data[1] + 2 * ctx->width * ctx->height;
193 frame->linesize[0] = 2 * ctx->width;
194 frame->linesize[1] = 2 * ctx->width;
195 frame->linesize[2] = 2 * ctx->width;
196 break;
197 default:
198 av_frame_unref(frame);
199 return AVERROR_BUG;
202 frame->format = AV_PIX_FMT_CUDA;
203 frame->width = ctx->width;
204 frame->height = ctx->height;
206 return 0;
209 static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
210 enum AVHWFrameTransferDirection dir,
211 enum AVPixelFormat **formats)
213 enum AVPixelFormat *fmts;
215 fmts = av_malloc_array(2, sizeof(*fmts));
216 if (!fmts)
217 return AVERROR(ENOMEM);
219 fmts[0] = ctx->sw_format;
220 fmts[1] = AV_PIX_FMT_NONE;
222 *formats = fmts;
224 return 0;
227 static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
228 const AVFrame *src)
230 CUDAFramesContext *priv = ctx->internal->priv;
231 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
233 CUcontext dummy;
234 CUresult err;
235 int i;
237 err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
238 if (err != CUDA_SUCCESS)
239 return AVERROR_UNKNOWN;
241 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
242 CUDA_MEMCPY2D cpy = {
243 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
244 .dstMemoryType = CU_MEMORYTYPE_HOST,
245 .srcDevice = (CUdeviceptr)src->data[i],
246 .dstHost = dst->data[i],
247 .srcPitch = src->linesize[i],
248 .dstPitch = dst->linesize[i],
249 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
250 .Height = src->height >> (i ? priv->shift_height : 0),
253 err = cuMemcpy2D(&cpy);
254 if (err != CUDA_SUCCESS) {
255 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
256 return AVERROR_UNKNOWN;
260 cuCtxPopCurrent(&dummy);
262 return 0;
265 static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
266 const AVFrame *src)
268 CUDAFramesContext *priv = ctx->internal->priv;
269 AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
271 CUcontext dummy;
272 CUresult err;
273 int i;
275 err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
276 if (err != CUDA_SUCCESS)
277 return AVERROR_UNKNOWN;
279 for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
280 CUDA_MEMCPY2D cpy = {
281 .srcMemoryType = CU_MEMORYTYPE_HOST,
282 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
283 .srcHost = src->data[i],
284 .dstDevice = (CUdeviceptr)dst->data[i],
285 .srcPitch = src->linesize[i],
286 .dstPitch = dst->linesize[i],
287 .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
288 .Height = src->height >> (i ? priv->shift_height : 0),
291 err = cuMemcpy2D(&cpy);
292 if (err != CUDA_SUCCESS) {
293 av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
294 return AVERROR_UNKNOWN;
298 cuCtxPopCurrent(&dummy);
300 return 0;
303 static void cuda_device_free(AVHWDeviceContext *ctx)
305 AVCUDADeviceContext *hwctx = ctx->hwctx;
306 cuCtxDestroy(hwctx->cuda_ctx);
309 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
310 AVDictionary *opts, int flags)
312 AVCUDADeviceContext *hwctx = ctx->hwctx;
313 CUdevice cu_device;
314 CUcontext dummy;
315 CUresult err;
316 int device_idx = 0;
318 if (device)
319 device_idx = strtol(device, NULL, 0);
321 err = cuInit(0);
322 if (err != CUDA_SUCCESS) {
323 av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
324 return AVERROR_UNKNOWN;
327 err = cuDeviceGet(&cu_device, device_idx);
328 if (err != CUDA_SUCCESS) {
329 av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
330 return AVERROR_UNKNOWN;
333 err = cuCtxCreate(&hwctx->cuda_ctx, 0, cu_device);
334 if (err != CUDA_SUCCESS) {
335 av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
336 return AVERROR_UNKNOWN;
339 cuCtxPopCurrent(&dummy);
341 ctx->free = cuda_device_free;
343 return 0;
346 const HWContextType ff_hwcontext_type_cuda = {
347 .type = AV_HWDEVICE_TYPE_CUDA,
348 .name = "CUDA",
350 .device_hwctx_size = sizeof(AVCUDADeviceContext),
351 .frames_priv_size = sizeof(CUDAFramesContext),
353 .device_create = cuda_device_create,
354 .frames_get_constraints = cuda_frames_get_constraints,
355 .frames_init = cuda_frames_init,
356 .frames_get_buffer = cuda_get_buffer,
357 .transfer_get_formats = cuda_transfer_get_formats,
358 .transfer_data_to = cuda_transfer_data_to,
359 .transfer_data_from = cuda_transfer_data_from,
361 .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },