4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/mem.h"
22 #include "libavutil/random_seed.h"
23 #include "libavutil/vulkan_spirv.h"
24 #include "libavutil/opt.h"
25 #include "vulkan_filter.h"
30 #define TYPE_NAME "vec4"
32 #define TYPE_SIZE (TYPE_ELEMS*4)
34 typedef struct NLMeansVulkanContext
{
35 FFVulkanContext vkctx
;
39 AVVulkanDeviceQueueFamily
*qf
;
42 AVBufferPool
*integral_buf_pool
;
43 AVBufferPool
*ws_buf_pool
;
45 FFVkBuffer xyoffsets_buf
;
48 FFVulkanShader shd_weights
;
49 FFVulkanShader shd_denoise
;
65 } NLMeansVulkanContext
;
67 static void insert_first(FFVulkanShader
*shd
, int r
, const char *off
, int horiz
, int plane
, int comp
)
69 GLSLF(4, s1
= texture(input_img
[%i
], pos
+ ivec2(%i
+ %s
, %i
+ %s
))[%i
];
70 ,plane
, horiz
? r
: 0, horiz
? off
: "0", !horiz
? r
: 0, !horiz
? off
: "0", comp
);
72 GLSLF(4, s2
[0] = texture(input_img
[%i
], pos
+ offs
[0] + ivec2(%i
+ %s
, %i
+ %s
))[%i
];
73 ,plane
, horiz
? r
: 0, horiz
? off
: "0", !horiz
? r
: 0, !horiz
? off
: "0", comp
);
74 GLSLF(4, s2
[1] = texture(input_img
[%i
], pos
+ offs
[1] + ivec2(%i
+ %s
, %i
+ %s
))[%i
];
75 ,plane
, horiz
? r
: 0, horiz
? off
: "0", !horiz
? r
: 0, !horiz
? off
: "0", comp
);
76 GLSLF(4, s2
[2] = texture(input_img
[%i
], pos
+ offs
[2] + ivec2(%i
+ %s
, %i
+ %s
))[%i
];
77 ,plane
, horiz
? r
: 0, horiz
? off
: "0", !horiz
? r
: 0, !horiz
? off
: "0", comp
);
78 GLSLF(4, s2
[3] = texture(input_img
[%i
], pos
+ offs
[3] + ivec2(%i
+ %s
, %i
+ %s
))[%i
];
79 ,plane
, horiz
? r
: 0, horiz
? off
: "0", !horiz
? r
: 0, !horiz
? off
: "0", comp
);
81 GLSLC(4, s2
= (s1
- s2
) * (s1
- s2
); );
84 static void insert_horizontal_pass(FFVulkanShader
*shd
, int nb_rows
, int first
, int plane
, int comp
)
86 GLSLF(1, pos
.y
= int(gl_GlobalInvocationID
.x
) * %i
; ,nb_rows
);
88 GLSLC(1, barrier(); );
90 GLSLF(1, if (pos
.y
< height
[%i
]) { ,plane
);
91 GLSLC(2, #pragma unroll(1) );
92 GLSLF(2, for (r
= 0; r
< %i
; r
++) { ,nb_rows
);
93 GLSLC(3, prefix_sum
= DTYPE(0); );
94 GLSLC(3, offset
= int_stride
* uint64_t(pos
.y
+ r
); );
95 GLSLC(3, dst
= DataBuffer(uint64_t(integral_data
) + offset
); );
97 GLSLF(3, for (pos
.x
= 0; pos
.x
< width
[%i
]; pos
.x
++) { ,plane
);
99 insert_first(shd
, 0, "r", 0, plane
, comp
);
101 GLSLC(4, s2
= dst
.v
[pos
.x
]; );
102 GLSLC(4, dst
.v
[pos
.x
] = s2
+ prefix_sum
; );
103 GLSLC(4, prefix_sum
+= s2
; );
110 static void insert_vertical_pass(FFVulkanShader
*shd
, int nb_rows
, int first
, int plane
, int comp
)
112 GLSLF(1, pos
.x
= int(gl_GlobalInvocationID
.x
) * %i
; ,nb_rows
);
113 GLSLC(1, #pragma unroll(1) );
114 GLSLF(1, for (r
= 0; r
< %i
; r
++) ,nb_rows
);
115 GLSLC(2, psum
[r
] = DTYPE(0); );
118 GLSLC(1, barrier(); );
120 GLSLF(1, if (pos
.x
< width
[%i
]) { ,plane
);
121 GLSLF(2, for (pos
.y
= 0; pos
.y
< height
[%i
]; pos
.y
++) { ,plane
);
122 GLSLC(3, offset
= int_stride
* uint64_t(pos
.y
); );
123 GLSLC(3, dst
= DataBuffer(uint64_t(integral_data
) + offset
); );
125 GLSLC(3, #pragma unroll(1) );
126 GLSLF(3, for (r
= 0; r
< %i
; r
++) { ,nb_rows
);
128 insert_first(shd
, 0, "r", 1, plane
, comp
);
130 GLSLC(4, s2
= dst
.v
[pos
.x
+ r
]; );
131 GLSLC(4, dst
.v
[pos
.x
+ r
] = s2
+ psum
[r
]; );
132 GLSLC(4, psum
[r
] += s2
; );
139 static void insert_weights_pass(FFVulkanShader
*shd
, int nb_rows
, int vert
,
140 int t
, int dst_comp
, int plane
, int comp
)
142 GLSLF(1, p
= patch_size
[%i
]; ,dst_comp
);
144 GLSLC(1, barrier(); );
147 GLSLF(1, for (pos
.y
= 0; pos
.y
< height
[%i
]; pos
.y
++) { ,plane
);
148 GLSLF(2, if (gl_GlobalInvocationID
.x
*%i
>= width
[%i
]) ,nb_rows
, plane
);
150 GLSLF(2, for (r
= 0; r
< %i
; r
++) { ,nb_rows
);
151 GLSLF(3, pos
.x
= int(gl_GlobalInvocationID
.x
) * %i
+ r
; ,nb_rows
);
153 GLSLF(1, for (pos
.x
= 0; pos
.x
< width
[%i
]; pos
.x
++) { ,plane
);
154 GLSLF(2, if (gl_GlobalInvocationID
.x
*%i
>= height
[%i
]) ,nb_rows
, plane
);
156 GLSLF(2, for (r
= 0; r
< %i
; r
++) { ,nb_rows
);
157 GLSLF(3, pos
.y
= int(gl_GlobalInvocationID
.x
) * %i
+ r
; ,nb_rows
);
160 GLSLC(3, a
= DTYPE(0); );
161 GLSLC(3, b
= DTYPE(0); );
162 GLSLC(3, c
= DTYPE(0); );
163 GLSLC(3, d
= DTYPE(0); );
165 GLSLC(3, lt
= ((pos
.x
- p
) < 0) || ((pos
.y
- p
) < 0); );
167 GLSLF(3, src
[0] = texture(input_img
[%i
], pos
+ offs
[0])[%i
]; ,plane
, comp
);
168 GLSLF(3, src
[1] = texture(input_img
[%i
], pos
+ offs
[1])[%i
]; ,plane
, comp
);
169 GLSLF(3, src
[2] = texture(input_img
[%i
], pos
+ offs
[2])[%i
]; ,plane
, comp
);
170 GLSLF(3, src
[3] = texture(input_img
[%i
], pos
+ offs
[3])[%i
]; ,plane
, comp
);
172 GLSLC(3, if (lt
== false) { );
173 GLSLC(3, offset
= int_stride
* uint64_t(pos
.y
- p
); );
174 GLSLC(3, dst
= DataBuffer(uint64_t(integral_data
) + offset
); );
175 GLSLC(4, a
= dst
.v
[pos
.x
- p
]; );
176 GLSLC(4, c
= dst
.v
[pos
.x
+ p
]; );
177 GLSLC(3, offset
= int_stride
* uint64_t(pos
.y
+ p
); );
178 GLSLC(3, dst
= DataBuffer(uint64_t(integral_data
) + offset
); );
179 GLSLC(4, b
= dst
.v
[pos
.x
- p
]; );
180 GLSLC(4, d
= dst
.v
[pos
.x
+ p
]; );
183 GLSLC(3, patch_diff
= d
+ a
- b
- c
; );
184 GLSLF(3, w
= exp(patch_diff
* strength
[%i
]); ,dst_comp
);
185 GLSLC(3, w_sum
= w
[0] + w
[1] + w
[2] + w
[3]; );
186 GLSLC(3, sum
= dot(w
, src
*255); );
189 GLSLF(3, atomicAdd(weights_
%i
[pos
.y
*ws_stride
[%i
] + pos
.x
], w_sum
); ,dst_comp
, dst_comp
);
190 GLSLF(3, atomicAdd(sums_
%i
[pos
.y
*ws_stride
[%i
] + pos
.x
], sum
); ,dst_comp
, dst_comp
);
192 GLSLF(3, weights_
%i
[pos
.y
*ws_stride
[%i
] + pos
.x
] += w_sum
; ,dst_comp
, dst_comp
);
193 GLSLF(3, sums_
%i
[pos
.y
*ws_stride
[%i
] + pos
.x
] += sum
; ,dst_comp
, dst_comp
);
199 typedef struct HorizontalPushData
{
202 uint32_t ws_stride
[4];
203 int32_t patch_size
[4];
205 VkDeviceAddress integral_base
;
206 uint64_t integral_size
;
208 uint32_t xyoffs_start
;
209 } HorizontalPushData
;
211 static av_cold
int init_weights_pipeline(FFVulkanContext
*vkctx
, FFVkExecPool
*exec
,
213 VkSampler sampler
, FFVkSPIRVCompiler
*spv
,
214 int width
, int height
, int t
,
215 const AVPixFmtDescriptor
*desc
,
216 int planes
, int *nb_rows
)
221 void *spv_opaque
= NULL
;
222 FFVulkanDescriptorSetBinding
*desc_set
;
223 int max_dim
= FFMAX(width
, height
);
224 uint32_t max_wg
= vkctx
->props
.properties
.limits
.maxComputeWorkGroupSize
[0];
225 int wg_size
, wg_rows
;
227 /* Round the max workgroup size to the previous power of two */
231 if (max_wg
> max_dim
) {
233 } else if (max_wg
< max_dim
) {
235 while (wg_size
*wg_rows
< max_dim
)
239 RET(ff_vk_shader_init(vkctx
, shd
, "nlmeans_weights",
240 VK_SHADER_STAGE_COMPUTE_BIT
,
241 (const char *[]) { "GL_EXT_buffer_reference",
242 "GL_EXT_buffer_reference2" }, 2,
249 GLSLC(0, #extension GL_EXT_shader_atomic_float : require );
250 GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
252 GLSLF(0, #define DTYPE %s ,TYPE_NAME);
253 GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
255 GLSLC(0, layout(buffer_reference
, buffer_reference_align
= T_ALIGN
) buffer DataBuffer
{ );
256 GLSLC(1, DTYPE v
[]; );
259 GLSLC(0, layout(push_constant
, std430
) uniform pushConstants
{ );
260 GLSLC(1, uvec4 width
; );
261 GLSLC(1, uvec4 height
; );
262 GLSLC(1, uvec4 ws_stride
; );
263 GLSLC(1, ivec4 patch_size
; );
264 GLSLC(1, vec4 strength
; );
265 GLSLC(1, DataBuffer integral_base
; );
266 GLSLC(1, uint64_t integral_size
; );
267 GLSLC(1, uint64_t int_stride
; );
268 GLSLC(1, uint xyoffs_start
; );
272 ff_vk_shader_add_push_const(shd
, 0, sizeof(HorizontalPushData
),
273 VK_SHADER_STAGE_COMPUTE_BIT
);
275 desc_set
= (FFVulkanDescriptorSetBinding
[]) {
278 .type
= VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
,
281 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
282 .samplers
= DUP_SAMPLER(sampler
),
285 .name
= "weights_buffer_0",
286 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
287 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
288 .buf_content
= "float weights_0[];",
291 .name
= "sums_buffer_0",
292 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
293 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
294 .buf_content
= "float sums_0[];",
297 .name
= "weights_buffer_1",
298 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
299 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
300 .buf_content
= "float weights_1[];",
303 .name
= "sums_buffer_1",
304 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
305 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
306 .buf_content
= "float sums_1[];",
309 .name
= "weights_buffer_2",
310 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
311 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
312 .buf_content
= "float weights_2[];",
315 .name
= "sums_buffer_2",
316 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
317 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
318 .buf_content
= "float sums_2[];",
321 .name
= "weights_buffer_3",
322 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
323 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
324 .buf_content
= "float weights_3[];",
327 .name
= "sums_buffer_3",
328 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
329 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
330 .buf_content
= "float sums_3[];",
333 RET(ff_vk_shader_add_descriptor_set(vkctx
, shd
, desc_set
, 1 + 2*desc
->nb_components
, 0, 0));
335 desc_set
= (FFVulkanDescriptorSetBinding
[]) {
337 .name
= "xyoffsets_buffer",
338 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
339 .mem_quali
= "readonly",
340 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
341 .buf_content
= "ivec2 xyoffsets[];",
344 RET(ff_vk_shader_add_descriptor_set(vkctx
, shd
, desc_set
, 1, 1, 0));
347 GLSLC(0, void main() );
349 GLSLC(1, uint64_t offset
; );
350 GLSLC(1, DataBuffer dst
; );
351 GLSLC(1, float s1
; );
352 GLSLC(1, DTYPE s2
; );
353 GLSLC(1, DTYPE prefix_sum
; );
354 GLSLF(1, DTYPE psum
[%i
]; ,*nb_rows
);
356 GLSLC(1, ivec2 pos
; );
359 GLSLC(1, DataBuffer integral_data
; );
360 GLSLF(1, ivec2 offs
[%i
]; ,TYPE_ELEMS
);
362 GLSLC(1, int invoc_idx
= int(gl_WorkGroupID
.z
); );
364 GLSLC(1, offset
= integral_size
* invoc_idx
; );
365 GLSLC(1, integral_data
= DataBuffer(uint64_t(integral_base
) + offset
); );
366 for (int i
= 0; i
< TYPE_ELEMS
; i
++)
367 GLSLF(1, offs
[%i
] = xyoffsets
[xyoffs_start
+ %i
*invoc_idx
+ %i
]; ,i
,TYPE_ELEMS
,i
);
374 GLSLC(1, DTYPE patch_diff
; );
375 if (TYPE_ELEMS
== 4) {
376 GLSLC(1, vec4 src
; );
379 GLSLC(1, vec4 src
[4]; );
380 GLSLC(1, vec4 w
[4]; );
382 GLSLC(1, float w_sum
; );
383 GLSLC(1, float sum
; );
389 for (int i
= 0; i
< desc
->nb_components
; i
++) {
390 int off
= desc
->comp
[i
].offset
/ (FFALIGN(desc
->comp
[i
].depth
, 8)/8);
391 if (width
>= height
) {
392 insert_horizontal_pass(shd
, *nb_rows
, 1, desc
->comp
[i
].plane
, off
);
393 insert_vertical_pass(shd
, *nb_rows
, 0, desc
->comp
[i
].plane
, off
);
394 insert_weights_pass(shd
, *nb_rows
, 0, t
, i
, desc
->comp
[i
].plane
, off
);
396 insert_vertical_pass(shd
, *nb_rows
, 1, desc
->comp
[i
].plane
, off
);
397 insert_horizontal_pass(shd
, *nb_rows
, 0, desc
->comp
[i
].plane
, off
);
398 insert_weights_pass(shd
, *nb_rows
, 1, t
, i
, desc
->comp
[i
].plane
, off
);
404 RET(spv
->compile_shader(vkctx
, spv
, shd
, &spv_data
, &spv_len
, "main", &spv_opaque
));
405 RET(ff_vk_shader_link(vkctx
, shd
, spv_data
, spv_len
, "main"));
407 RET(ff_vk_shader_register_exec(vkctx
, exec
, shd
));
411 spv
->free_shader(spv
, &spv_opaque
);
416 typedef struct DenoisePushData
{
417 uint32_t ws_stride
[4];
420 static av_cold
int init_denoise_pipeline(FFVulkanContext
*vkctx
, FFVkExecPool
*exec
,
422 VkSampler sampler
, FFVkSPIRVCompiler
*spv
,
423 const AVPixFmtDescriptor
*desc
, int planes
)
428 void *spv_opaque
= NULL
;
429 FFVulkanDescriptorSetBinding
*desc_set
;
431 RET(ff_vk_shader_init(vkctx
, shd
, "nlmeans_denoise",
432 VK_SHADER_STAGE_COMPUTE_BIT
,
433 (const char *[]) { "GL_EXT_buffer_reference",
434 "GL_EXT_buffer_reference2" }, 2,
438 GLSLC(0, layout(push_constant
, std430
) uniform pushConstants
{ );
439 GLSLC(1, uvec4 ws_stride
; );
442 ff_vk_shader_add_push_const(shd
, 0, sizeof(DenoisePushData
),
443 VK_SHADER_STAGE_COMPUTE_BIT
);
445 desc_set
= (FFVulkanDescriptorSetBinding
[]) {
448 .type
= VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
,
451 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
452 .samplers
= DUP_SAMPLER(sampler
),
455 .name
= "output_img",
456 .type
= VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
,
457 .mem_layout
= ff_vk_shader_rep_fmt(vkctx
->output_format
, FF_VK_REP_FLOAT
),
458 .mem_quali
= "writeonly",
461 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
464 RET(ff_vk_shader_add_descriptor_set(vkctx
, shd
, desc_set
, 2, 0, 0));
466 desc_set
= (FFVulkanDescriptorSetBinding
[]) {
468 .name
= "weights_buffer_0",
469 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
470 .mem_quali
= "readonly",
471 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
472 .buf_content
= "float weights_0[];",
475 .name
= "sums_buffer_0",
476 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
477 .mem_quali
= "readonly",
478 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
479 .buf_content
= "float sums_0[];",
482 .name
= "weights_buffer_1",
483 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
484 .mem_quali
= "readonly",
485 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
486 .buf_content
= "float weights_1[];",
489 .name
= "sums_buffer_1",
490 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
491 .mem_quali
= "readonly",
492 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
493 .buf_content
= "float sums_1[];",
496 .name
= "weights_buffer_2",
497 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
498 .mem_quali
= "readonly",
499 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
500 .buf_content
= "float weights_2[];",
503 .name
= "sums_buffer_2",
504 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
505 .mem_quali
= "readonly",
506 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
507 .buf_content
= "float sums_2[];",
510 .name
= "weights_buffer_3",
511 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
512 .mem_quali
= "readonly",
513 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
514 .buf_content
= "float weights_3[];",
517 .name
= "sums_buffer_3",
518 .type
= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER
,
519 .mem_quali
= "readonly",
520 .stages
= VK_SHADER_STAGE_COMPUTE_BIT
,
521 .buf_content
= "float sums_3[];",
525 RET(ff_vk_shader_add_descriptor_set(vkctx
, shd
, desc_set
, 2*desc
->nb_components
, 0, 0));
527 GLSLC(0, void main() );
529 GLSLC(1, ivec2 size
; );
530 GLSLC(1, const ivec2 pos
= ivec2(gl_GlobalInvocationID
.xy
); );
531 GLSLC(1, const uint plane
= uint(gl_WorkGroupID
.z
); );
533 GLSLC(1, float w_sum
; );
534 GLSLC(1, float sum
; );
535 GLSLC(1, vec4 src
; );
538 GLSLC(1, size
= imageSize(output_img
[plane
]); );
539 GLSLC(1, if (!IS_WITHIN(pos
, size
)) );
542 GLSLC(1, src
= texture(input_img
[plane
], pos
); );
544 for (int c
= 0; c
< desc
->nb_components
; c
++) {
545 int off
= desc
->comp
[c
].offset
/ (FFALIGN(desc
->comp
[c
].depth
, 8)/8);
546 GLSLF(1, if (plane
== %i
) { ,desc
->comp
[c
].plane
);
547 GLSLF(2, w_sum
= weights_
%i
[pos
.y
*ws_stride
[%i
] + pos
.x
]; ,c
, c
);
548 GLSLF(2, sum
= sums_
%i
[pos
.y
*ws_stride
[%i
] + pos
.x
]; ,c
, c
);
549 GLSLF(2, r
[%i
] = (sum
+ src
[%i
]*255) / (1.0 + w_sum
) / 255; ,off
, off
);
553 GLSLC(1, imageStore(output_img
[plane
], pos
, r
); );
556 RET(spv
->compile_shader(vkctx
, spv
, shd
, &spv_data
, &spv_len
, "main", &spv_opaque
));
557 RET(ff_vk_shader_link(vkctx
, shd
, spv_data
, spv_len
, "main"));
559 RET(ff_vk_shader_register_exec(vkctx
, exec
, shd
));
563 spv
->free_shader(spv
, &spv_opaque
);
568 static av_cold
int init_filter(AVFilterContext
*ctx
)
571 int xcnt
= 0, ycnt
= 0;
572 NLMeansVulkanContext
*s
= ctx
->priv
;
573 FFVulkanContext
*vkctx
= &s
->vkctx
;
574 const int planes
= av_pix_fmt_count_planes(s
->vkctx
.output_format
);
575 FFVkSPIRVCompiler
*spv
= NULL
;
577 int offsets_dispatched
= 0, nb_dispatches
= 0;
579 const AVPixFmtDescriptor
*desc
;
580 desc
= av_pix_fmt_desc_get(vkctx
->output_format
);
582 return AVERROR(EINVAL
);
584 if (!(s
->opts
.r
& 1)) {
586 av_log(ctx
, AV_LOG_WARNING
, "Research size should be odd, setting to %i",
590 if (!(s
->opts
.p
& 1)) {
592 av_log(ctx
, AV_LOG_WARNING
, "Patch size should be odd, setting to %i",
596 for (int i
= 0; i
< 4; i
++) {
597 double str
= (s
->opts
.sc
[i
] > 1.0) ? s
->opts
.sc
[i
] : s
->opts
.s
;
598 int ps
= (s
->opts
.pc
[i
] ? s
->opts
.pc
[i
] : s
->opts
.p
);
601 str
= 255.0*255.0 / str
;
602 s
->strength
[i
] = str
;
605 av_log(ctx
, AV_LOG_WARNING
, "Patch size should be odd, setting to %i",
608 s
->patch
[i
] = ps
/ 2;
612 s
->nb_offsets
= (2*rad
+ 1)*(2*rad
+ 1) - 1;
613 s
->xoffsets
= av_malloc(s
->nb_offsets
*sizeof(*s
->xoffsets
));
614 s
->yoffsets
= av_malloc(s
->nb_offsets
*sizeof(*s
->yoffsets
));
617 for (int x
= -rad
; x
<= rad
; x
++) {
618 for (int y
= -rad
; y
<= rad
; y
++) {
622 s
->xoffsets
[xcnt
++] = x
;
623 s
->yoffsets
[ycnt
++] = y
;
628 RET(ff_vk_create_buf(&s
->vkctx
, &s
->xyoffsets_buf
, 2*s
->nb_offsets
*sizeof(int32_t), NULL
, NULL
,
629 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
|
630 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT
,
631 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
632 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
));
633 RET(ff_vk_map_buffer(&s
->vkctx
, &s
->xyoffsets_buf
, (uint8_t **)&offsets_buf
, 0));
635 for (int i
= 0; i
< 2*s
->nb_offsets
; i
+= 2) {
636 offsets_buf
[i
+ 0] = s
->xoffsets
[i
>> 1];
637 offsets_buf
[i
+ 1] = s
->yoffsets
[i
>> 1];
640 RET(ff_vk_unmap_buffer(&s
->vkctx
, &s
->xyoffsets_buf
, 1));
642 s
->opts
.t
= FFMIN(s
->opts
.t
, (FFALIGN(s
->nb_offsets
, TYPE_ELEMS
) / TYPE_ELEMS
));
643 if (!vkctx
->atomic_float_feats
.shaderBufferFloat32AtomicAdd
) {
644 av_log(ctx
, AV_LOG_WARNING
, "Device doesn't support atomic float adds, "
645 "disabling dispatch parallelism\n");
649 spv
= ff_vk_spirv_init();
651 av_log(ctx
, AV_LOG_ERROR
, "Unable to initialize SPIR-V compiler!\n");
652 return AVERROR_EXTERNAL
;
655 s
->qf
= ff_vk_qf_find(vkctx
, VK_QUEUE_COMPUTE_BIT
, 0);
657 av_log(ctx
, AV_LOG_ERROR
, "Device has no compute queues\n");
658 err
= AVERROR(ENOTSUP
);
662 RET(ff_vk_exec_pool_init(vkctx
, s
->qf
, &s
->e
, 1, 0, 0, 0, NULL
));
663 RET(ff_vk_init_sampler(vkctx
, &s
->sampler
, 1, VK_FILTER_NEAREST
));
665 RET(init_weights_pipeline(vkctx
, &s
->e
, &s
->shd_weights
, s
->sampler
,
666 spv
, s
->vkctx
.output_width
, s
->vkctx
.output_height
,
667 s
->opts
.t
, desc
, planes
, &s
->pl_weights_rows
));
669 RET(init_denoise_pipeline(vkctx
, &s
->e
, &s
->shd_denoise
, s
->sampler
,
672 RET(ff_vk_shader_update_desc_buffer(vkctx
, &s
->e
.contexts
[0], &s
->shd_weights
,
674 &s
->xyoffsets_buf
, 0, s
->xyoffsets_buf
.size
,
675 VK_FORMAT_UNDEFINED
));
678 int wg_invoc
= FFMIN((s
->nb_offsets
- offsets_dispatched
)/TYPE_ELEMS
, s
->opts
.t
);
679 wg_invoc
= FFMIN(wg_invoc
, vkctx
->props
.properties
.limits
.maxComputeWorkGroupCount
[2]);
680 offsets_dispatched
+= wg_invoc
* TYPE_ELEMS
;
682 } while (offsets_dispatched
< s
->nb_offsets
);
684 av_log(ctx
, AV_LOG_VERBOSE
, "Filter initialized, %i x/y offsets, %i dispatches\n",
685 s
->nb_offsets
, nb_dispatches
);
696 static int denoise_pass(NLMeansVulkanContext
*s
, FFVkExecContext
*exec
,
697 FFVkBuffer
*ws_vk
, uint32_t ws_stride
[4])
699 FFVulkanContext
*vkctx
= &s
->vkctx
;
700 FFVulkanFunctions
*vk
= &vkctx
->vkfn
;
701 VkBufferMemoryBarrier2 buf_bar
[8];
704 DenoisePushData pd
= {
705 { ws_stride
[0], ws_stride
[1], ws_stride
[2], ws_stride
[3] },
708 /* Denoise pass pipeline */
709 ff_vk_exec_bind_shader(vkctx
, exec
, &s
->shd_denoise
);
712 ff_vk_shader_update_push_const(vkctx
, exec
, &s
->shd_denoise
,
713 VK_SHADER_STAGE_COMPUTE_BIT
,
716 buf_bar
[nb_buf_bar
++] = (VkBufferMemoryBarrier2
) {
717 .sType
= VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2
,
718 .srcStageMask
= ws_vk
->stage
,
719 .dstStageMask
= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
,
720 .srcAccessMask
= ws_vk
->access
,
721 .dstAccessMask
= VK_ACCESS_2_SHADER_STORAGE_READ_BIT
,
722 .srcQueueFamilyIndex
= VK_QUEUE_FAMILY_IGNORED
,
723 .dstQueueFamilyIndex
= VK_QUEUE_FAMILY_IGNORED
,
724 .buffer
= ws_vk
->buf
,
729 vk
->CmdPipelineBarrier2(exec
->buf
, &(VkDependencyInfo
) {
730 .sType
= VK_STRUCTURE_TYPE_DEPENDENCY_INFO
,
731 .pBufferMemoryBarriers
= buf_bar
,
732 .bufferMemoryBarrierCount
= nb_buf_bar
,
734 ws_vk
->stage
= buf_bar
[0].dstStageMask
;
735 ws_vk
->access
= buf_bar
[0].dstAccessMask
;
737 /* End of denoise pass */
738 vk
->CmdDispatch(exec
->buf
,
739 FFALIGN(vkctx
->output_width
, s
->shd_denoise
.lg_size
[0])/s
->shd_denoise
.lg_size
[0],
740 FFALIGN(vkctx
->output_height
, s
->shd_denoise
.lg_size
[1])/s
->shd_denoise
.lg_size
[1],
741 av_pix_fmt_count_planes(s
->vkctx
.output_format
));
746 static int nlmeans_vulkan_filter_frame(AVFilterLink
*link
, AVFrame
*in
)
750 AVFilterContext
*ctx
= link
->dst
;
751 NLMeansVulkanContext
*s
= ctx
->priv
;
752 AVFilterLink
*outlink
= ctx
->outputs
[0];
753 FFVulkanContext
*vkctx
= &s
->vkctx
;
754 FFVulkanFunctions
*vk
= &vkctx
->vkfn
;
756 const AVPixFmtDescriptor
*desc
;
758 int plane_heights
[4];
760 int offsets_dispatched
= 0;
763 AVBufferRef
*integral_buf
= NULL
;
764 FFVkBuffer
*integral_vk
;
769 AVBufferRef
*ws_buf
= NULL
;
771 VkDeviceSize weights_offs
[4];
772 VkDeviceSize sums_offs
[4];
773 uint32_t ws_stride
[4];
775 size_t ws_total_size
= 0;
777 FFVkExecContext
*exec
;
778 VkImageView in_views
[AV_NUM_DATA_POINTERS
];
779 VkImageView out_views
[AV_NUM_DATA_POINTERS
];
780 VkImageMemoryBarrier2 img_bar
[8];
782 VkBufferMemoryBarrier2 buf_bar
[8];
786 RET(init_filter(ctx
));
788 desc
= av_pix_fmt_desc_get(vkctx
->output_format
);
790 return AVERROR(EINVAL
);
793 int_stride
= s
->shd_weights
.lg_size
[0]*s
->pl_weights_rows
*TYPE_SIZE
;
794 int_size
= s
->shd_weights
.lg_size
[0]*s
->pl_weights_rows
*int_stride
;
796 /* Plane dimensions */
797 for (int i
= 0; i
< desc
->nb_components
; i
++) {
798 plane_widths
[i
] = !i
|| (i
== 3) ? vkctx
->output_width
: AV_CEIL_RSHIFT(vkctx
->output_width
, desc
->log2_chroma_w
);
799 plane_heights
[i
] = !i
|| (i
== 3) ? vkctx
->output_height
: AV_CEIL_RSHIFT(vkctx
->output_height
, desc
->log2_chroma_w
);
800 plane_widths
[i
] = FFALIGN(plane_widths
[i
], s
->shd_denoise
.lg_size
[0]);
801 plane_heights
[i
] = FFALIGN(plane_heights
[i
], s
->shd_denoise
.lg_size
[1]);
803 ws_stride
[i
] = plane_widths
[i
];
804 ws_size
[i
] = ws_stride
[i
] * plane_heights
[i
] * sizeof(float);
805 ws_total_size
+= ws_size
[i
];
809 err
= ff_vk_get_pooled_buffer(&s
->vkctx
, &s
->integral_buf_pool
, &integral_buf
,
810 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT
|
811 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
,
813 s
->opts
.t
* int_size
,
814 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
);
817 integral_vk
= (FFVkBuffer
*)integral_buf
->data
;
819 err
= ff_vk_get_pooled_buffer(&s
->vkctx
, &s
->ws_buf_pool
, &ws_buf
,
820 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT
|
821 VK_BUFFER_USAGE_TRANSFER_DST_BIT
|
822 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
,
825 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
);
828 ws_vk
= (FFVkBuffer
*)ws_buf
->data
;
831 sums_offs
[0] = ws_total_size
;
832 for (int i
= 1; i
< desc
->nb_components
; i
++) {
833 weights_offs
[i
] = weights_offs
[i
- 1] + ws_size
[i
- 1];
834 sums_offs
[i
] = sums_offs
[i
- 1] + ws_size
[i
- 1];
838 out
= ff_get_video_buffer(outlink
, outlink
->w
, outlink
->h
);
840 err
= AVERROR(ENOMEM
);
844 /* Execution context */
845 exec
= ff_vk_exec_get(&s
->vkctx
, &s
->e
);
846 ff_vk_exec_start(vkctx
, exec
);
849 RET(ff_vk_exec_add_dep_frame(vkctx
, exec
, in
,
850 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT
,
851 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
));
852 RET(ff_vk_exec_add_dep_frame(vkctx
, exec
, out
,
853 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT
,
854 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
));
856 RET(ff_vk_exec_add_dep_buf(vkctx
, exec
, &integral_buf
, 1, 0));
859 RET(ff_vk_exec_add_dep_buf(vkctx
, exec
, &ws_buf
, 1, 0));
862 /* Input frame prep */
863 RET(ff_vk_create_imageviews(vkctx
, exec
, in_views
, in
, FF_VK_REP_FLOAT
));
864 ff_vk_frame_barrier(vkctx
, exec
, in
, img_bar
, &nb_img_bar
,
865 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT
,
866 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
,
867 VK_ACCESS_SHADER_READ_BIT
,
868 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
,
869 VK_QUEUE_FAMILY_IGNORED
);
871 /* Output frame prep */
872 RET(ff_vk_create_imageviews(vkctx
, exec
, out_views
, out
, FF_VK_REP_FLOAT
));
873 ff_vk_frame_barrier(vkctx
, exec
, out
, img_bar
, &nb_img_bar
,
874 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT
,
875 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
,
876 VK_ACCESS_SHADER_WRITE_BIT
,
877 VK_IMAGE_LAYOUT_GENERAL
,
878 VK_QUEUE_FAMILY_IGNORED
);
881 buf_bar
[nb_buf_bar
++] = (VkBufferMemoryBarrier2
) {
882 .sType
= VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2
,
883 .srcStageMask
= ws_vk
->stage
,
884 .dstStageMask
= VK_PIPELINE_STAGE_2_TRANSFER_BIT
,
885 .srcAccessMask
= ws_vk
->access
,
886 .dstAccessMask
= VK_ACCESS_2_TRANSFER_WRITE_BIT
,
887 .srcQueueFamilyIndex
= VK_QUEUE_FAMILY_IGNORED
,
888 .dstQueueFamilyIndex
= VK_QUEUE_FAMILY_IGNORED
,
889 .buffer
= ws_vk
->buf
,
893 buf_bar
[nb_buf_bar
++] = (VkBufferMemoryBarrier2
) {
894 .sType
= VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2
,
895 .srcStageMask
= integral_vk
->stage
,
896 .dstStageMask
= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
,
897 .srcAccessMask
= integral_vk
->access
,
898 .dstAccessMask
= VK_ACCESS_2_SHADER_STORAGE_READ_BIT
|
899 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT
,
900 .srcQueueFamilyIndex
= VK_QUEUE_FAMILY_IGNORED
,
901 .dstQueueFamilyIndex
= VK_QUEUE_FAMILY_IGNORED
,
902 .buffer
= integral_vk
->buf
,
903 .size
= integral_vk
->size
,
907 vk
->CmdPipelineBarrier2(exec
->buf
, &(VkDependencyInfo
) {
908 .sType
= VK_STRUCTURE_TYPE_DEPENDENCY_INFO
,
909 .pImageMemoryBarriers
= img_bar
,
910 .imageMemoryBarrierCount
= nb_img_bar
,
911 .pBufferMemoryBarriers
= buf_bar
,
912 .bufferMemoryBarrierCount
= nb_buf_bar
,
914 ws_vk
->stage
= buf_bar
[0].dstStageMask
;
915 ws_vk
->access
= buf_bar
[0].dstAccessMask
;
916 integral_vk
->stage
= buf_bar
[1].dstStageMask
;
917 integral_vk
->access
= buf_bar
[1].dstAccessMask
;
920 vk
->CmdFillBuffer(exec
->buf
, ws_vk
->buf
, 0, ws_vk
->size
, 0x0);
923 buf_bar
[nb_buf_bar
++] = (VkBufferMemoryBarrier2
) {
924 .sType
= VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2
,
925 .srcStageMask
= ws_vk
->stage
,
926 .dstStageMask
= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
,
927 .srcAccessMask
= ws_vk
->access
,
928 .dstAccessMask
= VK_ACCESS_2_SHADER_STORAGE_READ_BIT
|
929 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT
,
930 .srcQueueFamilyIndex
= VK_QUEUE_FAMILY_IGNORED
,
931 .dstQueueFamilyIndex
= VK_QUEUE_FAMILY_IGNORED
,
932 .buffer
= ws_vk
->buf
,
937 vk
->CmdPipelineBarrier2(exec
->buf
, &(VkDependencyInfo
) {
938 .sType
= VK_STRUCTURE_TYPE_DEPENDENCY_INFO
,
939 .pBufferMemoryBarriers
= buf_bar
,
940 .bufferMemoryBarrierCount
= nb_buf_bar
,
942 ws_vk
->stage
= buf_bar
[0].dstStageMask
;
943 ws_vk
->access
= buf_bar
[0].dstAccessMask
;
945 /* Update weights descriptors */
946 ff_vk_shader_update_img_array(vkctx
, exec
, &s
->shd_weights
, in
, in_views
, 0, 0,
947 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
,
949 for (int i
= 0; i
< desc
->nb_components
; i
++) {
950 RET(ff_vk_shader_update_desc_buffer(&s
->vkctx
, exec
, &s
->shd_weights
, 0, 1 + i
*2 + 0, 0,
951 ws_vk
, weights_offs
[i
], ws_size
[i
],
952 VK_FORMAT_UNDEFINED
));
953 RET(ff_vk_shader_update_desc_buffer(&s
->vkctx
, exec
, &s
->shd_weights
, 0, 1 + i
*2 + 1, 0,
954 ws_vk
, sums_offs
[i
], ws_size
[i
],
955 VK_FORMAT_UNDEFINED
));
958 /* Update denoise descriptors */
959 ff_vk_shader_update_img_array(vkctx
, exec
, &s
->shd_denoise
, in
, in_views
, 0, 0,
960 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
,
962 ff_vk_shader_update_img_array(vkctx
, exec
, &s
->shd_denoise
, out
, out_views
, 0, 1,
963 VK_IMAGE_LAYOUT_GENERAL
, s
->sampler
);
964 for (int i
= 0; i
< desc
->nb_components
; i
++) {
965 RET(ff_vk_shader_update_desc_buffer(&s
->vkctx
, exec
, &s
->shd_denoise
, 1, i
*2 + 0, 0,
966 ws_vk
, weights_offs
[i
], ws_size
[i
],
967 VK_FORMAT_UNDEFINED
));
968 RET(ff_vk_shader_update_desc_buffer(&s
->vkctx
, exec
, &s
->shd_denoise
, 1, i
*2 + 1, 0,
969 ws_vk
, sums_offs
[i
], ws_size
[i
],
970 VK_FORMAT_UNDEFINED
));
973 /* Weights pipeline */
974 ff_vk_exec_bind_shader(vkctx
, exec
, &s
->shd_weights
);
978 HorizontalPushData pd
= {
979 { plane_widths
[0], plane_widths
[1], plane_widths
[2], plane_widths
[3] },
980 { plane_heights
[0], plane_heights
[1], plane_heights
[2], plane_heights
[3] },
981 { ws_stride
[0], ws_stride
[1], ws_stride
[2], ws_stride
[3] },
982 { s
->patch
[0], s
->patch
[1], s
->patch
[2], s
->patch
[3] },
983 { s
->strength
[0], s
->strength
[1], s
->strength
[2], s
->strength
[2], },
984 integral_vk
->address
,
986 (uint64_t)int_stride
,
991 ff_vk_shader_update_push_const(vkctx
, exec
, &s
->shd_weights
,
992 VK_SHADER_STAGE_COMPUTE_BIT
,
995 if (offsets_dispatched
) {
997 buf_bar
[nb_buf_bar
++] = (VkBufferMemoryBarrier2
) {
998 .sType
= VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2
,
999 .srcStageMask
= integral_vk
->stage
,
1000 .dstStageMask
= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
,
1001 .srcAccessMask
= integral_vk
->access
,
1002 .dstAccessMask
= VK_ACCESS_2_SHADER_STORAGE_READ_BIT
|
1003 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT
,
1004 .srcQueueFamilyIndex
= VK_QUEUE_FAMILY_IGNORED
,
1005 .dstQueueFamilyIndex
= VK_QUEUE_FAMILY_IGNORED
,
1006 .buffer
= integral_vk
->buf
,
1007 .size
= integral_vk
->size
,
1011 vk
->CmdPipelineBarrier2(exec
->buf
, &(VkDependencyInfo
) {
1012 .sType
= VK_STRUCTURE_TYPE_DEPENDENCY_INFO
,
1013 .pBufferMemoryBarriers
= buf_bar
,
1014 .bufferMemoryBarrierCount
= nb_buf_bar
,
1016 integral_vk
->stage
= buf_bar
[1].dstStageMask
;
1017 integral_vk
->access
= buf_bar
[1].dstAccessMask
;
1020 wg_invoc
= FFMIN((s
->nb_offsets
- offsets_dispatched
)/TYPE_ELEMS
, s
->opts
.t
);
1021 wg_invoc
= FFMIN(wg_invoc
, vkctx
->props
.properties
.limits
.maxComputeWorkGroupCount
[2]);
1023 /* End of horizontal pass */
1024 vk
->CmdDispatch(exec
->buf
, 1, 1, wg_invoc
);
1026 offsets_dispatched
+= wg_invoc
* TYPE_ELEMS
;
1027 } while (offsets_dispatched
< s
->nb_offsets
);
1029 RET(denoise_pass(s
, exec
, ws_vk
, ws_stride
));
1031 err
= ff_vk_exec_submit(vkctx
, exec
);
1035 err
= av_frame_copy_props(out
, in
);
1041 return ff_filter_frame(outlink
, out
);
1044 av_buffer_unref(&integral_buf
);
1045 av_buffer_unref(&ws_buf
);
1047 av_frame_free(&out
);
1051 static void nlmeans_vulkan_uninit(AVFilterContext
*avctx
)
1053 NLMeansVulkanContext
*s
= avctx
->priv
;
1054 FFVulkanContext
*vkctx
= &s
->vkctx
;
1055 FFVulkanFunctions
*vk
= &vkctx
->vkfn
;
1057 ff_vk_exec_pool_free(vkctx
, &s
->e
);
1058 ff_vk_shader_free(vkctx
, &s
->shd_weights
);
1059 ff_vk_shader_free(vkctx
, &s
->shd_denoise
);
1061 av_buffer_pool_uninit(&s
->integral_buf_pool
);
1062 av_buffer_pool_uninit(&s
->ws_buf_pool
);
1065 vk
->DestroySampler(vkctx
->hwctx
->act_dev
, s
->sampler
,
1066 vkctx
->hwctx
->alloc
);
1068 ff_vk_uninit(&s
->vkctx
);
1070 av_freep(&s
->xoffsets
);
1071 av_freep(&s
->yoffsets
);
1076 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
1077 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1078 static const AVOption nlmeans_vulkan_options
[] = {
1079 { "s", "denoising strength for all components", OFFSET(opts
.s
), AV_OPT_TYPE_DOUBLE
, { .dbl
= 1.0 }, 1.0, 100.0, FLAGS
},
1080 { "p", "patch size for all components", OFFSET(opts
.p
), AV_OPT_TYPE_INT
, { .i64
= 3*2+1 }, 0, 99, FLAGS
},
1081 { "r", "research window radius", OFFSET(opts
.r
), AV_OPT_TYPE_INT
, { .i64
= 7*2+1 }, 0, 99, FLAGS
},
1082 { "t", "parallelism", OFFSET(opts
.t
), AV_OPT_TYPE_INT
, { .i64
= 36 }, 1, 168, FLAGS
},
1084 { "s1", "denoising strength for component 1", OFFSET(opts
.sc
[0]), AV_OPT_TYPE_DOUBLE
, { .dbl
= 1.0 }, 1.0, 100.0, FLAGS
},
1085 { "s2", "denoising strength for component 2", OFFSET(opts
.sc
[1]), AV_OPT_TYPE_DOUBLE
, { .dbl
= 1.0 }, 1.0, 100.0, FLAGS
},
1086 { "s3", "denoising strength for component 3", OFFSET(opts
.sc
[2]), AV_OPT_TYPE_DOUBLE
, { .dbl
= 1.0 }, 1.0, 100.0, FLAGS
},
1087 { "s4", "denoising strength for component 4", OFFSET(opts
.sc
[3]), AV_OPT_TYPE_DOUBLE
, { .dbl
= 1.0 }, 1.0, 100.0, FLAGS
},
1089 { "p1", "patch size for component 1", OFFSET(opts
.pc
[0]), AV_OPT_TYPE_INT
, { .i64
= 0 }, 0, 99, FLAGS
},
1090 { "p2", "patch size for component 2", OFFSET(opts
.pc
[1]), AV_OPT_TYPE_INT
, { .i64
= 0 }, 0, 99, FLAGS
},
1091 { "p3", "patch size for component 3", OFFSET(opts
.pc
[2]), AV_OPT_TYPE_INT
, { .i64
= 0 }, 0, 99, FLAGS
},
1092 { "p4", "patch size for component 4", OFFSET(opts
.pc
[3]), AV_OPT_TYPE_INT
, { .i64
= 0 }, 0, 99, FLAGS
},
1097 AVFILTER_DEFINE_CLASS(nlmeans_vulkan
);
1099 static const AVFilterPad nlmeans_vulkan_inputs
[] = {
1102 .type
= AVMEDIA_TYPE_VIDEO
,
1103 .filter_frame
= &nlmeans_vulkan_filter_frame
,
1104 .config_props
= &ff_vk_filter_config_input
,
1108 static const AVFilterPad nlmeans_vulkan_outputs
[] = {
1111 .type
= AVMEDIA_TYPE_VIDEO
,
1112 .config_props
= &ff_vk_filter_config_output
,
1116 const FFFilter ff_vf_nlmeans_vulkan
= {
1117 .p
.name
= "nlmeans_vulkan",
1118 .p
.description
= NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
1119 .p
.priv_class
= &nlmeans_vulkan_class
,
1120 .p
.flags
= AVFILTER_FLAG_HWDEVICE
,
1121 .priv_size
= sizeof(NLMeansVulkanContext
),
1122 .init
= &ff_vk_filter_init
,
1123 .uninit
= &nlmeans_vulkan_uninit
,
1124 FILTER_INPUTS(nlmeans_vulkan_inputs
),
1125 FILTER_OUTPUTS(nlmeans_vulkan_outputs
),
1126 FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN
),
1127 .flags_internal
= FF_FILTER_FLAG_HWFRAME_AWARE
,