libavfilter/vf_scale_vulkan.c

   1 /*
   2  * Copyright (c) Lynne
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "libavutil/random_seed.h"
  22 #include "libavutil/opt.h"
  23 #include "libavutil/vulkan_spirv.h"
  24 #include "vulkan_filter.h"
  25 #include "scale_eval.h"
  26 #include "filters.h"
  27 #include "colorspace.h"
  28 #include "video.h"
  29
  30 enum ScalerFunc {
  31     F_BILINEAR = 0,
  32     F_NEAREST,
  33
  34     F_NB,
  35 };
  36
  37 typedef struct ScaleVulkanContext {
  38     FFVulkanContext vkctx;
  39
  40     int initialized;
  41     FFVkExecPool e;
  42     AVVulkanDeviceQueueFamily *qf;
  43     FFVulkanShader shd;
  44     VkSampler sampler;
  45
  46     /* Push constants / options */
  47     struct {
  48         float yuv_matrix[4][4];
  49         int crop_x;
  50         int crop_y;
  51         int crop_w;
  52         int crop_h;
  53     } opts;
  54
  55     char *out_format_string;
  56     char *w_expr;
  57     char *h_expr;
  58
  59     enum ScalerFunc scaler;
  60     enum AVColorRange out_range;
  61 } ScaleVulkanContext;
  62
  63 static const char scale_bilinear[] = {
  64     C(0, vec4 scale_bilinear(int idx, ivec2 pos, vec2 crop_range, vec2 crop_off))
  65     C(0, {                                                                      )
  66     C(1,     vec2 npos = (vec2(pos) + 0.5f) / imageSize(output_img[idx]);       )
  67     C(1,     npos *= crop_range;    /* Reduce the range */                      )
  68     C(1,     npos += crop_off;      /* Offset the start */                      )
  69     C(1,     return texture(input_img[idx], npos);                              )
  70     C(0, }                                                                      )
  71 };
  72
  73 static const char rgb2yuv[] = {
  74     C(0, vec4 rgb2yuv(vec4 src, int fullrange)                                  )
  75     C(0, {                                                                      )
  76     C(1,     src *= yuv_matrix;                                                 )
  77     C(1,     if (fullrange == 1) {                                              )
  78     C(2,         src += vec4(0.0, 0.5, 0.5, 0.0);                               )
  79     C(1,     } else {                                                           )
  80     C(2,         src *= vec4(219.0 / 255.0, 224.0 / 255.0, 224.0 / 255.0, 1.0); )
  81     C(2,         src += vec4(16.0 / 255.0, 128.0 / 255.0, 128.0 / 255.0, 0.0);  )
  82     C(1,     }                                                                  )
  83     C(1,     return src;                                                        )
  84     C(0, }                                                                      )
  85 };
  86
  87 static const char write_nv12[] = {
  88     C(0, void write_nv12(vec4 src, ivec2 pos)                                   )
  89     C(0, {                                                                      )
  90     C(1,     imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0));        )
  91     C(1,     pos /= ivec2(2);                                                   )
  92     C(1,     imageStore(output_img[1], pos, vec4(src.g, src.b, 0.0, 0.0));      )
  93     C(0, }                                                                      )
  94 };
  95
  96 static const char write_420[] = {
  97     C(0, void write_420(vec4 src, ivec2 pos)                                    )
  98     C(0, {                                                                      )
  99     C(1,     imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0));        )
 100     C(1,     pos /= ivec2(2);                                                   )
 101     C(1,     imageStore(output_img[1], pos, vec4(src.g, 0.0, 0.0, 0.0));        )
 102     C(1,     imageStore(output_img[2], pos, vec4(src.b, 0.0, 0.0, 0.0));        )
 103     C(0, }                                                                      )
 104 };
 105
 106 static const char write_444[] = {
 107     C(0, void write_444(vec4 src, ivec2 pos)                                    )
 108     C(0, {                                                                      )
 109     C(1,     imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0));        )
 110     C(1,     imageStore(output_img[1], pos, vec4(src.g, 0.0, 0.0, 0.0));        )
 111     C(1,     imageStore(output_img[2], pos, vec4(src.b, 0.0, 0.0, 0.0));        )
 112     C(0, }                                                                      )
 113 };
 114
 115 static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 116 {
 117     int err;
 118     uint8_t *spv_data;
 119     size_t spv_len;
 120     void *spv_opaque = NULL;
 121     VkFilter sampler_mode;
 122     ScaleVulkanContext *s = ctx->priv;
 123     FFVulkanContext *vkctx = &s->vkctx;
 124     FFVulkanShader *shd = &s->shd;
 125     FFVkSPIRVCompiler *spv;
 126     FFVulkanDescriptorSetBinding *desc;
 127
 128     int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format);
 129
 130     switch (s->scaler) {
 131     case F_NEAREST:
 132         sampler_mode = VK_FILTER_NEAREST;
 133         break;
 134     case F_BILINEAR:
 135         sampler_mode = VK_FILTER_LINEAR;
 136         break;
 137     };
 138
 139     spv = ff_vk_spirv_init();
 140     if (!spv) {
 141         av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
 142         return AVERROR_EXTERNAL;
 143     }
 144
 145     s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0);
 146     if (!s->qf) {
 147         av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n");
 148         err = AVERROR(ENOTSUP);
 149         goto fail;
 150     }
 151
 152     RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL));
 153     RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode));
 154     RET(ff_vk_shader_init(vkctx, &s->shd, "scale",
 155                           VK_SHADER_STAGE_COMPUTE_BIT,
 156                           NULL, 0,
 157                           32, 32, 1,
 158                           0));
 159
 160     GLSLC(0, layout(push_constant, std430) uniform pushConstants {        );
 161     GLSLC(1,    mat4 yuv_matrix;                                          );
 162     GLSLC(1,    int crop_x;                                               );
 163     GLSLC(1,    int crop_y;                                               );
 164     GLSLC(1,    int crop_w;                                               );
 165     GLSLC(1,    int crop_h;                                               );
 166     GLSLC(0, };                                                           );
 167     GLSLC(0,                                                              );
 168
 169     ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts),
 170                                 VK_SHADER_STAGE_COMPUTE_BIT);
 171
 172     desc = (FFVulkanDescriptorSetBinding []) {
 173         {
 174             .name       = "input_img",
 175             .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
 176             .dimensions = 2,
 177             .elems      = in_planes,
 178             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
 179             .samplers   = DUP_SAMPLER(s->sampler),
 180         },
 181         {
 182             .name       = "output_img",
 183             .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
 184             .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT),
 185             .mem_quali  = "writeonly",
 186             .dimensions = 2,
 187             .elems      = av_pix_fmt_count_planes(s->vkctx.output_format),
 188             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
 189         },
 190     };
 191
 192     RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0));
 193
 194     GLSLD(   scale_bilinear                                                  );
 195
 196     if (s->vkctx.output_format != s->vkctx.input_format) {
 197         GLSLD(   rgb2yuv                                                     );
 198     }
 199
 200     switch (s->vkctx.output_format) {
 201     case AV_PIX_FMT_NV12:    GLSLD(write_nv12); break;
 202     case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
 203     case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
 204     default: break;
 205     }
 206
 207     GLSLC(0, void main()                                                     );
 208     GLSLC(0, {                                                               );
 209     GLSLC(1,     ivec2 size;                                                 );
 210     GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);                );
 211     GLSLF(1,     vec2 in_d = vec2(%i, %i);             ,in->width, in->height);
 212     GLSLC(1,     vec2 c_r = vec2(crop_w, crop_h) / in_d;                     );
 213     GLSLC(1,     vec2 c_o = vec2(crop_x, crop_y) / in_d;                     );
 214     GLSLC(0,                                                                 );
 215
 216     if (s->vkctx.output_format == s->vkctx.input_format) {
 217         for (int i = 0; i < desc[1].elems; i++) {
 218             GLSLF(1,  size = imageSize(output_img[%i]);                    ,i);
 219             GLSLC(1,  if (IS_WITHIN(pos, size)) {                            );
 220             switch (s->scaler) {
 221             case F_NEAREST:
 222             case F_BILINEAR:
 223                 GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o);     ,i);
 224                 GLSLF(2, imageStore(output_img[%i], pos, res);             ,i);
 225                 break;
 226             };
 227             GLSLC(1, }                                                       );
 228         }
 229     } else {
 230         GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o);                );
 231         GLSLF(1, res = rgb2yuv(res, %i);    ,s->out_range == AVCOL_RANGE_JPEG);
 232         switch (s->vkctx.output_format) {
 233         case AV_PIX_FMT_NV12:    GLSLC(1, write_nv12(res, pos); ); break;
 234         case AV_PIX_FMT_YUV420P: GLSLC(1,  write_420(res, pos); ); break;
 235         case AV_PIX_FMT_YUV444P: GLSLC(1,  write_444(res, pos); ); break;
 236         default: return AVERROR(EINVAL);
 237         }
 238     }
 239
 240     GLSLC(0, }                                                               );
 241
 242     if (s->vkctx.output_format != s->vkctx.input_format) {
 243         const AVLumaCoefficients *lcoeffs;
 244         double tmp_mat[3][3];
 245
 246         lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace);
 247         if (!lcoeffs) {
 248             av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n");
 249             return AVERROR(EINVAL);
 250         }
 251
 252         ff_fill_rgb2yuv_table(lcoeffs, tmp_mat);
 253
 254         for (int y = 0; y < 3; y++)
 255             for (int x = 0; x < 3; x++)
 256                 s->opts.yuv_matrix[x][y] = tmp_mat[x][y];
 257         s->opts.yuv_matrix[3][3] = 1.0;
 258     }
 259
 260     RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main",
 261                             &spv_opaque));
 262     RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main"));
 263
 264     RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd));
 265
 266     s->initialized = 1;
 267
 268 fail:
 269     if (spv_opaque)
 270         spv->free_shader(spv, &spv_opaque);
 271     if (spv)
 272         spv->uninit(&spv);
 273
 274     return err;
 275 }
 276
 277 static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
 278 {
 279     int err;
 280     AVFilterContext *ctx = link->dst;
 281     ScaleVulkanContext *s = ctx->priv;
 282     AVFilterLink *outlink = ctx->outputs[0];
 283
 284     AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
 285     if (!out) {
 286         err = AVERROR(ENOMEM);
 287         goto fail;
 288     }
 289
 290     if (!s->initialized)
 291         RET(init_filter(ctx, in));
 292
 293     s->opts.crop_x = in->crop_left;
 294     s->opts.crop_y = in->crop_top;
 295     s->opts.crop_w = in->width - (in->crop_left + in->crop_right);
 296     s->opts.crop_h = in->height - (in->crop_top + in->crop_bottom);
 297
 298     RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in,
 299                                     s->sampler, &s->opts, sizeof(s->opts)));
 300
 301     err = av_frame_copy_props(out, in);
 302     if (err < 0)
 303         goto fail;
 304
 305     if (out->width != in->width || out->height != in->height) {
 306         av_frame_side_data_remove_by_props(&out->side_data, &out->nb_side_data,
 307                                            AV_SIDE_DATA_PROP_SIZE_DEPENDENT);
 308     }
 309
 310     if (s->out_range != AVCOL_RANGE_UNSPECIFIED)
 311         out->color_range = s->out_range;
 312     if (s->vkctx.output_format != s->vkctx.input_format)
 313         out->chroma_location = AVCHROMA_LOC_TOPLEFT;
 314
 315     av_frame_free(&in);
 316
 317     return ff_filter_frame(outlink, out);
 318
 319 fail:
 320     av_frame_free(&in);
 321     av_frame_free(&out);
 322     return err;
 323 }
 324
 325 static int scale_vulkan_config_output(AVFilterLink *outlink)
 326 {
 327     int err;
 328     AVFilterContext *avctx = outlink->src;
 329     ScaleVulkanContext *s  = avctx->priv;
 330     FFVulkanContext *vkctx = &s->vkctx;
 331     AVFilterLink *inlink   = outlink->src->inputs[0];
 332
 333     err = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink,
 334                                    &vkctx->output_width,
 335                                    &vkctx->output_height);
 336     if (err < 0)
 337         return err;
 338
 339     ff_scale_adjust_dimensions(inlink, &vkctx->output_width, &vkctx->output_height, 0, 1);
 340
 341     outlink->w = vkctx->output_width;
 342     outlink->h = vkctx->output_height;
 343
 344     if (s->out_format_string) {
 345         s->vkctx.output_format = av_get_pix_fmt(s->out_format_string);
 346         if (s->vkctx.output_format == AV_PIX_FMT_NONE) {
 347             av_log(avctx, AV_LOG_ERROR, "Invalid output format.\n");
 348             return AVERROR(EINVAL);
 349         }
 350     } else {
 351         s->vkctx.output_format = s->vkctx.input_format;
 352     }
 353
 354     if (s->vkctx.output_format != s->vkctx.input_format) {
 355         if (!ff_vk_mt_is_np_rgb(s->vkctx.input_format)) {
 356             av_log(avctx, AV_LOG_ERROR, "Unsupported input format for conversion\n");
 357             return AVERROR(EINVAL);
 358         }
 359         if (s->vkctx.output_format != AV_PIX_FMT_NV12 &&
 360             s->vkctx.output_format != AV_PIX_FMT_YUV420P &&
 361             s->vkctx.output_format != AV_PIX_FMT_YUV444P) {
 362             av_log(avctx, AV_LOG_ERROR, "Unsupported output format\n");
 363             return AVERROR(EINVAL);
 364         }
 365     } else if (s->out_range != AVCOL_RANGE_UNSPECIFIED) {
 366         av_log(avctx, AV_LOG_ERROR, "Cannot change range without converting format\n");
 367         return AVERROR(EINVAL);
 368     }
 369
 370     return ff_vk_filter_config_output(outlink);
 371 }
 372
 373 static void scale_vulkan_uninit(AVFilterContext *avctx)
 374 {
 375     ScaleVulkanContext *s = avctx->priv;
 376     FFVulkanContext *vkctx = &s->vkctx;
 377     FFVulkanFunctions *vk = &vkctx->vkfn;
 378
 379     ff_vk_exec_pool_free(vkctx, &s->e);
 380     ff_vk_shader_free(vkctx, &s->shd);
 381
 382     if (s->sampler)
 383         vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
 384                            vkctx->hwctx->alloc);
 385
 386     ff_vk_uninit(&s->vkctx);
 387
 388     s->initialized = 0;
 389 }
 390
 391 #define OFFSET(x) offsetof(ScaleVulkanContext, x)
 392 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
 393 static const AVOption scale_vulkan_options[] = {
 394     { "w", "Output video width",  OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
 395     { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
 396     { "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, .unit = "scaler" },
 397         { "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, .unit = "scaler" },
 398         { "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, .unit = "scaler" },
 399     { "format", "Output video format (software format of hardware frames)", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
 400     { "out_range", "Output colour range (from 0 to 2) (default 0)", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, .unit = "range" },
 401         { "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" },
 402         { "limited", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" },
 403         { "jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" },
 404         { "mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" },
 405         { "tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" },
 406         { "pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" },
 407     { NULL },
 408 };
 409
 410 AVFILTER_DEFINE_CLASS(scale_vulkan);
 411
 412 static const AVFilterPad scale_vulkan_inputs[] = {
 413     {
 414         .name         = "default",
 415         .type         = AVMEDIA_TYPE_VIDEO,
 416         .filter_frame = &scale_vulkan_filter_frame,
 417         .config_props = &ff_vk_filter_config_input,
 418     },
 419 };
 420
 421 static const AVFilterPad scale_vulkan_outputs[] = {
 422     {
 423         .name = "default",
 424         .type = AVMEDIA_TYPE_VIDEO,
 425         .config_props = &scale_vulkan_config_output,
 426     },
 427 };
 428
 429 const FFFilter ff_vf_scale_vulkan = {
 430     .p.name         = "scale_vulkan",
 431     .p.description  = NULL_IF_CONFIG_SMALL("Scale Vulkan frames"),
 432     .p.priv_class   = &scale_vulkan_class,
 433     .p.flags        = AVFILTER_FLAG_HWDEVICE,
 434     .priv_size      = sizeof(ScaleVulkanContext),
 435     .init           = &ff_vk_filter_init,
 436     .uninit         = &scale_vulkan_uninit,
 437     FILTER_INPUTS(scale_vulkan_inputs),
 438     FILTER_OUTPUTS(scale_vulkan_outputs),
 439     FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN),
 440     .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
 441 };