2 * Copyright (c) 2015-2016 mawen1250
3 * Copyright (c) 2018 Paul B Mahol
5 * This file is part of FFmpeg.
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in all
15 * copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * - opponent color space
34 #include "libavutil/cpu.h"
35 #include "libavutil/imgutils.h"
36 #include "libavutil/mem.h"
37 #include "libavutil/opt.h"
38 #include "libavutil/pixdesc.h"
39 #include "libavutil/tx.h"
42 #include "framesync.h"
45 #define MAX_NB_THREADS 32
53 typedef struct ThreadData
{
61 typedef struct PosCode
{
65 typedef struct PosPairCode
{
70 typedef struct SliceContext
{
71 AVTXContext
*gdctf
, *gdcti
;
72 av_tx_fn tx_fn_g
, itx_fn_g
;
73 AVTXContext
*dctf
, *dcti
;
74 av_tx_fn tx_fn
, itx_fn
;
85 PosPairCode match_blocks
[256];
87 PosCode
*search_positions
;
90 typedef struct BM3DContext
{
100 float hard_threshold
;
113 SliceContext slices
[MAX_NB_THREADS
];
118 void (*get_block_row
)(const uint8_t *srcp
, int src_linesize
,
119 int y
, int x
, int block_size
, float *dst
);
120 double (*do_block_ssd
)(struct BM3DContext
*s
, PosCode
*pos
,
121 const uint8_t *src
, int src_stride
,
123 void (*do_output
)(struct BM3DContext
*s
, uint8_t *dst
, int dst_linesize
,
124 int plane
, int nb_jobs
);
125 void (*block_filtering
)(struct BM3DContext
*s
,
126 const uint8_t *src
, int src_linesize
,
127 const uint8_t *ref
, int ref_linesize
,
128 int y
, int x
, int plane
, int jobnr
);
131 #define OFFSET(x) offsetof(BM3DContext, x)
132 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
134 static const AVOption bm3d_options
[] = {
135 { "sigma", "set denoising strength",
136 OFFSET(sigma
), AV_OPT_TYPE_FLOAT
, {.dbl
=1}, 0, 99999.9, FLAGS
},
137 { "block", "set size of local patch",
138 OFFSET(block_size
), AV_OPT_TYPE_INT
, {.i64
=16}, 8, 64, FLAGS
},
139 { "bstep", "set sliding step for processing blocks",
140 OFFSET(block_step
), AV_OPT_TYPE_INT
, {.i64
=4}, 1, 64, FLAGS
},
141 { "group", "set maximal number of similar blocks",
142 OFFSET(group_size
), AV_OPT_TYPE_INT
, {.i64
=1}, 1, 256, FLAGS
},
143 { "range", "set block matching range",
144 OFFSET(bm_range
), AV_OPT_TYPE_INT
, {.i64
=9}, 1, INT32_MAX
, FLAGS
},
145 { "mstep", "set step for block matching",
146 OFFSET(bm_step
), AV_OPT_TYPE_INT
, {.i64
=1}, 1, 64, FLAGS
},
147 { "thmse", "set threshold of mean square error for block matching",
148 OFFSET(th_mse
), AV_OPT_TYPE_FLOAT
, {.dbl
=0}, 0, INT32_MAX
, FLAGS
},
149 { "hdthr", "set hard threshold for 3D transfer domain",
150 OFFSET(hard_threshold
), AV_OPT_TYPE_FLOAT
, {.dbl
=2.7}, 0, INT32_MAX
, FLAGS
},
151 { "estim", "set filtering estimation mode",
152 OFFSET(mode
), AV_OPT_TYPE_INT
, {.i64
=BASIC
}, 0, NB_MODES
-1, FLAGS
, .unit
= "mode" },
153 { "basic", "basic estimate",
154 0, AV_OPT_TYPE_CONST
, {.i64
=BASIC
}, 0, 0, FLAGS
, .unit
= "mode" },
155 { "final", "final estimate",
156 0, AV_OPT_TYPE_CONST
, {.i64
=FINAL
}, 0, 0, FLAGS
, .unit
= "mode" },
157 { "ref", "have reference stream",
158 OFFSET(ref
), AV_OPT_TYPE_BOOL
, {.i64
=0}, 0, 1, FLAGS
},
159 { "planes", "set planes to filter",
160 OFFSET(planes
), AV_OPT_TYPE_INT
, {.i64
=7}, 0, 15, FLAGS
},
164 AVFILTER_DEFINE_CLASS(bm3d
);
166 static const enum AVPixelFormat pix_fmts
[] = {
167 AV_PIX_FMT_GRAY8
, AV_PIX_FMT_GRAY9
, AV_PIX_FMT_GRAY10
,
168 AV_PIX_FMT_GRAY12
, AV_PIX_FMT_GRAY14
, AV_PIX_FMT_GRAY16
,
169 AV_PIX_FMT_YUV410P
, AV_PIX_FMT_YUV411P
,
170 AV_PIX_FMT_YUV420P
, AV_PIX_FMT_YUV422P
,
171 AV_PIX_FMT_YUV440P
, AV_PIX_FMT_YUV444P
,
172 AV_PIX_FMT_YUVJ420P
, AV_PIX_FMT_YUVJ422P
,
173 AV_PIX_FMT_YUVJ440P
, AV_PIX_FMT_YUVJ444P
,
175 AV_PIX_FMT_YUV420P9
, AV_PIX_FMT_YUV422P9
, AV_PIX_FMT_YUV444P9
,
176 AV_PIX_FMT_YUV420P10
, AV_PIX_FMT_YUV422P10
, AV_PIX_FMT_YUV444P10
,
177 AV_PIX_FMT_YUV440P10
,
178 AV_PIX_FMT_YUV444P12
, AV_PIX_FMT_YUV422P12
, AV_PIX_FMT_YUV420P12
,
179 AV_PIX_FMT_YUV440P12
,
180 AV_PIX_FMT_YUV444P14
, AV_PIX_FMT_YUV422P14
, AV_PIX_FMT_YUV420P14
,
181 AV_PIX_FMT_YUV420P16
, AV_PIX_FMT_YUV422P16
, AV_PIX_FMT_YUV444P16
,
182 AV_PIX_FMT_GBRP
, AV_PIX_FMT_GBRP9
, AV_PIX_FMT_GBRP10
,
183 AV_PIX_FMT_GBRP12
, AV_PIX_FMT_GBRP14
, AV_PIX_FMT_GBRP16
,
184 AV_PIX_FMT_YUVA420P
, AV_PIX_FMT_YUVA422P
, AV_PIX_FMT_YUVA444P
,
185 AV_PIX_FMT_YUVA444P9
, AV_PIX_FMT_YUVA444P10
, AV_PIX_FMT_YUVA444P12
, AV_PIX_FMT_YUVA444P16
,
186 AV_PIX_FMT_YUVA422P9
, AV_PIX_FMT_YUVA422P10
, AV_PIX_FMT_YUVA422P12
, AV_PIX_FMT_YUVA422P16
,
187 AV_PIX_FMT_YUVA420P9
, AV_PIX_FMT_YUVA420P10
, AV_PIX_FMT_YUVA420P16
,
188 AV_PIX_FMT_GBRAP
, AV_PIX_FMT_GBRAP10
, AV_PIX_FMT_GBRAP12
, AV_PIX_FMT_GBRAP16
,
192 static int do_search_boundary(int pos
, int plane_boundary
, int search_range
, int search_step
)
196 search_range
= search_range
/ search_step
* search_step
;
198 if (pos
== plane_boundary
) {
199 search_boundary
= plane_boundary
;
200 } else if (pos
> plane_boundary
) {
201 search_boundary
= pos
- search_range
;
203 while (search_boundary
< plane_boundary
) {
204 search_boundary
+= search_step
;
207 search_boundary
= pos
+ search_range
;
209 while (search_boundary
> plane_boundary
) {
210 search_boundary
-= search_step
;
214 return search_boundary
;
217 static int search_boundary(int plane_boundary
, int search_range
, int search_step
, int vertical
, int y
, int x
)
219 return do_search_boundary(vertical
? y
: x
, plane_boundary
, search_range
, search_step
);
222 static int cmp_scores(const void *a
, const void *b
)
224 const struct PosPairCode
*pair1
= a
;
225 const struct PosPairCode
*pair2
= b
;
226 return FFDIFFSIGN(pair1
->score
, pair2
->score
);
229 static double do_block_ssd(BM3DContext
*s
, PosCode
*pos
, const uint8_t *src
, int src_stride
, int r_y
, int r_x
)
231 const uint8_t *srcp
= src
+ pos
->y
* src_stride
+ pos
->x
;
232 const uint8_t *refp
= src
+ r_y
* src_stride
+ r_x
;
233 const int block_size
= s
->block_size
;
237 for (y
= 0; y
< block_size
; y
++) {
238 for (x
= 0; x
< block_size
; x
++) {
239 double temp
= refp
[x
] - srcp
[x
];
250 static double do_block_ssd16(BM3DContext
*s
, PosCode
*pos
, const uint8_t *src
, int src_stride
, int r_y
, int r_x
)
252 const uint16_t *srcp
= (uint16_t *)src
+ pos
->y
* src_stride
/ 2 + pos
->x
;
253 const uint16_t *refp
= (uint16_t *)src
+ r_y
* src_stride
/ 2 + r_x
;
254 const int block_size
= s
->block_size
;
258 for (y
= 0; y
< block_size
; y
++) {
259 for (x
= 0; x
< block_size
; x
++) {
260 double temp
= refp
[x
] - srcp
[x
];
264 srcp
+= src_stride
/ 2;
265 refp
+= src_stride
/ 2;
271 static void do_block_matching_multi(BM3DContext
*s
, const uint8_t *src
, int src_stride
, int src_range
,
272 const PosCode
*search_pos
, int search_size
, float th_mse
,
273 int r_y
, int r_x
, int plane
, int jobnr
)
275 SliceContext
*sc
= &s
->slices
[jobnr
];
276 double MSE2SSE
= s
->group_size
* s
->block_size
* s
->block_size
* src_range
* src_range
/ (double)(s
->max
* s
->max
);
277 double distMul
= 1. / MSE2SSE
;
278 double th_sse
= th_mse
* MSE2SSE
;
279 int index
= sc
->nb_match_blocks
;
281 for (int i
= 0; i
< search_size
; i
++) {
282 PosCode pos
= search_pos
[i
];
285 dist
= s
->do_block_ssd(s
, &pos
, src
, src_stride
, r_y
, r_x
);
287 // Only match similar blocks but not identical blocks
288 if (dist
<= th_sse
&& dist
!= 0) {
289 const double score
= dist
* distMul
;
291 if (index
>= s
->group_size
&& score
>= sc
->match_blocks
[index
- 1].score
) {
295 if (index
>= s
->group_size
)
296 index
= s
->group_size
- 1;
298 sc
->match_blocks
[index
].score
= score
;
299 sc
->match_blocks
[index
].y
= pos
.y
;
300 sc
->match_blocks
[index
].x
= pos
.x
;
302 qsort(sc
->match_blocks
, index
, sizeof(PosPairCode
), cmp_scores
);
306 sc
->nb_match_blocks
= index
;
309 static void block_matching_multi(BM3DContext
*s
, const uint8_t *ref
, int ref_linesize
, int y
, int x
,
310 int exclude_cur_pos
, int plane
, int jobnr
)
312 SliceContext
*sc
= &s
->slices
[jobnr
];
313 const int width
= s
->planewidth
[plane
];
314 const int height
= s
->planeheight
[plane
];
315 const int block_size
= s
->block_size
;
316 const int step
= s
->bm_step
;
317 const int range
= s
->bm_range
/ step
* step
;
318 int l
= search_boundary(0, range
, step
, 0, y
, x
);
319 int r
= search_boundary(width
- block_size
, range
, step
, 0, y
, x
);
320 int t
= search_boundary(0, range
, step
, 1, y
, x
);
321 int b
= search_boundary(height
- block_size
, range
, step
, 1, y
, x
);
324 for (int j
= t
; j
<= b
; j
+= step
) {
325 for (int i
= l
; i
<= r
; i
+= step
) {
328 if (exclude_cur_pos
> 0 && j
== y
&& i
== x
) {
334 sc
->search_positions
[index
++] = pos
;
338 if (exclude_cur_pos
== 1) {
339 sc
->match_blocks
[0].score
= 0;
340 sc
->match_blocks
[0].y
= y
;
341 sc
->match_blocks
[0].x
= x
;
342 sc
->nb_match_blocks
= 1;
345 do_block_matching_multi(s
, ref
, ref_linesize
, s
->bm_range
,
346 sc
->search_positions
, index
, s
->th_mse
, y
, x
, plane
, jobnr
);
349 static void block_matching(BM3DContext
*s
, const uint8_t *ref
, int ref_linesize
,
350 int j
, int i
, int plane
, int jobnr
)
352 SliceContext
*sc
= &s
->slices
[jobnr
];
354 if (s
->group_size
== 1 || s
->th_mse
<= 0.f
) {
355 sc
->match_blocks
[0].score
= 1;
356 sc
->match_blocks
[0].x
= i
;
357 sc
->match_blocks
[0].y
= j
;
358 sc
->nb_match_blocks
= 1;
362 sc
->nb_match_blocks
= 0;
363 block_matching_multi(s
, ref
, ref_linesize
, j
, i
, 1, plane
, jobnr
);
366 static void get_block_row(const uint8_t *srcp
, int src_linesize
,
367 int y
, int x
, int block_size
, float *dst
)
369 const uint8_t *src
= srcp
+ y
* src_linesize
+ x
;
371 for (int j
= 0; j
< block_size
; j
++)
375 static void get_block_row16(const uint8_t *srcp
, int src_linesize
,
376 int y
, int x
, int block_size
, float *dst
)
378 const uint16_t *src
= (uint16_t *)srcp
+ y
* src_linesize
/ 2 + x
;
380 for (int j
= 0; j
< block_size
; j
++)
384 static void basic_block_filtering(BM3DContext
*s
, const uint8_t *src
, int src_linesize
,
385 const uint8_t *ref
, int ref_linesize
,
386 int y
, int x
, int plane
, int jobnr
)
388 SliceContext
*sc
= &s
->slices
[jobnr
];
389 const int pblock_size
= s
->pblock_size
;
390 const int buffer_linesize
= s
->pblock_size
* s
->pblock_size
;
391 const int nb_match_blocks
= sc
->nb_match_blocks
;
392 const int block_size
= s
->block_size
;
393 const int width
= s
->planewidth
[plane
];
394 const int pgroup_size
= s
->pgroup_size
;
395 const int group_size
= s
->group_size
;
396 float *buffer
= sc
->buffer
;
397 float *bufferh
= sc
->bufferh
;
398 float *buffert
= sc
->buffert
;
399 float *bufferv
= sc
->bufferv
;
400 float *bufferz
= sc
->bufferz
;
402 float den_weight
, num_weight
;
405 for (int k
= 0; k
< nb_match_blocks
; k
++) {
406 const int y
= sc
->match_blocks
[k
].y
;
407 const int x
= sc
->match_blocks
[k
].x
;
409 for (int i
= 0; i
< block_size
; i
++) {
410 s
->get_block_row(src
, src_linesize
, y
+ i
, x
, block_size
, bufferh
+ pblock_size
* i
);
411 sc
->tx_fn(sc
->dctf
, buffert
, bufferh
+ pblock_size
* i
, sizeof(float));
412 for (int j
= 0; j
< block_size
; j
++)
413 bufferv
[j
* pblock_size
+ i
] = buffert
[j
];
416 for (int i
= 0; i
< block_size
; i
++) {
417 sc
->tx_fn(sc
->dctf
, buffert
, bufferv
+ i
* pblock_size
, sizeof(float));
418 memcpy(buffer
+ k
* buffer_linesize
+ i
* pblock_size
,
419 buffert
, block_size
* sizeof(float));
423 for (int i
= 0; i
< block_size
; i
++) {
424 for (int j
= 0; j
< block_size
; j
++) {
425 for (int k
= 0; k
< nb_match_blocks
; k
++)
426 bufferz
[k
] = buffer
[buffer_linesize
* k
+ i
* pblock_size
+ j
];
428 sc
->tx_fn_g(sc
->gdctf
, bufferz
, bufferz
, sizeof(float));
429 bufferz
+= pgroup_size
;
433 threshold
[0] = s
->hard_threshold
* s
->sigma
* M_SQRT2
* 4.f
* block_size
* block_size
* (1 << (s
->depth
- 8)) / 255.f
;
434 threshold
[1] = threshold
[0] * sqrtf(2.f
);
435 threshold
[2] = threshold
[0] * 2.f
;
436 threshold
[3] = threshold
[0] * sqrtf(8.f
);
437 bufferz
= sc
->bufferz
;
439 for (int i
= 0; i
< block_size
; i
++) {
440 for (int j
= 0; j
< block_size
; j
++) {
441 for (int k
= 0; k
< nb_match_blocks
; k
++) {
442 const float thresh
= threshold
[(j
== 0) + (i
== 0) + (k
== 0)];
444 if (bufferz
[k
] > thresh
|| bufferz
[k
] < -thresh
) {
450 bufferz
+= pgroup_size
;
454 bufferz
= sc
->bufferz
;
456 for (int i
= 0; i
< block_size
; i
++) {
457 for (int j
= 0; j
< block_size
; j
++) {
459 sc
->itx_fn_g(sc
->gdcti
, bufferz
, bufferz
, sizeof(float));
460 for (int k
= 0; k
< nb_match_blocks
; k
++)
461 buffer
[buffer_linesize
* k
+ i
* pblock_size
+ j
] = bufferz
[k
];
462 bufferz
+= pgroup_size
;
466 den_weight
= retained
< 1 ? 1.f
: 1.f
/ retained
;
467 num_weight
= den_weight
;
470 for (int k
= 0; k
< nb_match_blocks
; k
++) {
471 float *num
= sc
->num
+ y
* width
+ x
;
472 float *den
= sc
->den
+ y
* width
+ x
;
474 for (int i
= 0; i
< block_size
; i
++) {
475 memcpy(bufferv
+ i
* pblock_size
,
476 buffer
+ k
* buffer_linesize
+ i
* pblock_size
,
477 block_size
* sizeof(float));
480 for (int i
= 0; i
< block_size
; i
++) {
481 sc
->itx_fn(sc
->dcti
, buffert
, bufferv
+ i
* pblock_size
, sizeof(float));
482 for (int j
= 0; j
< block_size
; j
++)
483 bufferh
[j
* pblock_size
+ i
] = buffert
[j
];
486 for (int i
= 0; i
< block_size
; i
++) {
487 sc
->itx_fn(sc
->dcti
, buffert
, bufferh
+ pblock_size
* i
, sizeof(float));
488 for (int j
= 0; j
< block_size
; j
++) {
489 num
[j
] += buffert
[j
] * num_weight
;
490 den
[j
] += den_weight
;
498 static void final_block_filtering(BM3DContext
*s
, const uint8_t *src
, int src_linesize
,
499 const uint8_t *ref
, int ref_linesize
,
500 int y
, int x
, int plane
, int jobnr
)
502 SliceContext
*sc
= &s
->slices
[jobnr
];
503 const int pblock_size
= s
->pblock_size
;
504 const int buffer_linesize
= s
->pblock_size
* s
->pblock_size
;
505 const int nb_match_blocks
= sc
->nb_match_blocks
;
506 const int block_size
= s
->block_size
;
507 const int width
= s
->planewidth
[plane
];
508 const int pgroup_size
= s
->pgroup_size
;
509 const int group_size
= s
->group_size
;
510 const float sigma_sqr
= s
->sigma
* s
->sigma
;
511 float *buffer
= sc
->buffer
;
512 float *bufferh
= sc
->bufferh
;
513 float *bufferv
= sc
->bufferv
;
514 float *bufferz
= sc
->bufferz
;
515 float *rbuffer
= sc
->rbuffer
;
516 float *rbufferh
= sc
->rbufferh
;
517 float *rbufferv
= sc
->rbufferv
;
518 float *rbufferz
= sc
->rbufferz
;
519 float den_weight
, num_weight
;
522 for (int k
= 0; k
< nb_match_blocks
; k
++) {
523 const int y
= sc
->match_blocks
[k
].y
;
524 const int x
= sc
->match_blocks
[k
].x
;
526 for (int i
= 0; i
< block_size
; i
++) {
527 s
->get_block_row(src
, src_linesize
, y
+ i
, x
, block_size
, bufferh
+ pblock_size
* i
);
528 s
->get_block_row(ref
, ref_linesize
, y
+ i
, x
, block_size
, rbufferh
+ pblock_size
* i
);
529 sc
->tx_fn(sc
->dctf
, bufferh
+ pblock_size
* i
, bufferh
+ pblock_size
* i
, sizeof(float));
530 sc
->tx_fn(sc
->dctf
, rbufferh
+ pblock_size
* i
, rbufferh
+ pblock_size
* i
, sizeof(float));
533 for (int i
= 0; i
< block_size
; i
++) {
534 for (int j
= 0; j
< block_size
; j
++) {
535 bufferv
[i
* pblock_size
+ j
] = bufferh
[j
* pblock_size
+ i
];
536 rbufferv
[i
* pblock_size
+ j
] = rbufferh
[j
* pblock_size
+ i
];
538 sc
->tx_fn(sc
->dctf
, bufferv
+ i
* pblock_size
, bufferv
+ i
* pblock_size
, sizeof(float));
539 sc
->tx_fn(sc
->dctf
, rbufferv
+ i
* pblock_size
, rbufferv
+ i
* pblock_size
, sizeof(float));
542 for (int i
= 0; i
< block_size
; i
++) {
543 memcpy(buffer
+ k
* buffer_linesize
+ i
* pblock_size
,
544 bufferv
+ i
* pblock_size
, block_size
* sizeof(float));
545 memcpy(rbuffer
+ k
* buffer_linesize
+ i
* pblock_size
,
546 rbufferv
+ i
* pblock_size
, block_size
* sizeof(float));
550 for (int i
= 0; i
< block_size
; i
++) {
551 for (int j
= 0; j
< block_size
; j
++) {
552 for (int k
= 0; k
< nb_match_blocks
; k
++) {
553 bufferz
[k
] = buffer
[buffer_linesize
* k
+ i
* pblock_size
+ j
];
554 rbufferz
[k
] = rbuffer
[buffer_linesize
* k
+ i
* pblock_size
+ j
];
556 if (group_size
> 1) {
557 sc
->tx_fn_g(sc
->gdctf
, bufferz
, bufferz
, sizeof(float));
558 sc
->tx_fn_g(sc
->gdctf
, rbufferz
, rbufferz
, sizeof(float));
560 bufferz
+= pgroup_size
;
561 rbufferz
+= pgroup_size
;
565 bufferz
= sc
->bufferz
;
566 rbufferz
= sc
->rbufferz
;
568 for (int i
= 0; i
< block_size
; i
++) {
569 for (int j
= 0; j
< block_size
; j
++) {
570 for (int k
= 0; k
< nb_match_blocks
; k
++) {
571 const float ref_sqr
= rbufferz
[k
] * rbufferz
[k
];
572 float wiener_coef
= ref_sqr
/ (ref_sqr
+ sigma_sqr
);
574 if (isnan(wiener_coef
))
576 bufferz
[k
] *= wiener_coef
;
577 l2_wiener
+= wiener_coef
* wiener_coef
;
579 bufferz
+= pgroup_size
;
580 rbufferz
+= pgroup_size
;
584 bufferz
= sc
->bufferz
;
586 for (int i
= 0; i
< block_size
; i
++) {
587 for (int j
= 0; j
< block_size
; j
++) {
589 sc
->itx_fn_g(sc
->gdcti
, bufferz
, bufferz
, sizeof(float));
590 for (int k
= 0; k
< nb_match_blocks
; k
++) {
591 buffer
[buffer_linesize
* k
+ i
* pblock_size
+ j
] = bufferz
[k
];
593 bufferz
+= pgroup_size
;
597 l2_wiener
= FFMAX(l2_wiener
, 1e-15f
);
598 den_weight
= 1.f
/ l2_wiener
;
599 num_weight
= den_weight
;
601 for (int k
= 0; k
< nb_match_blocks
; k
++) {
602 float *num
= sc
->num
+ y
* width
+ x
;
603 float *den
= sc
->den
+ y
* width
+ x
;
605 for (int i
= 0; i
< block_size
; i
++) {
606 memcpy(bufferv
+ i
* pblock_size
,
607 buffer
+ k
* buffer_linesize
+ i
* pblock_size
,
608 block_size
* sizeof(float));
611 for (int i
= 0; i
< block_size
; i
++) {
612 sc
->itx_fn(sc
->dcti
, bufferv
+ pblock_size
* i
, bufferv
+ pblock_size
* i
, sizeof(float));
613 for (int j
= 0; j
< block_size
; j
++) {
614 bufferh
[j
* pblock_size
+ i
] = bufferv
[i
* pblock_size
+ j
];
618 for (int i
= 0; i
< block_size
; i
++) {
619 sc
->itx_fn(sc
->dcti
, bufferh
+ pblock_size
* i
, bufferh
+ pblock_size
* i
, sizeof(float));
620 for (int j
= 0; j
< block_size
; j
++) {
621 num
[j
] += bufferh
[i
* pblock_size
+ j
] * num_weight
;
622 den
[j
] += den_weight
;
630 static void do_output(BM3DContext
*s
, uint8_t *dst
, int dst_linesize
,
631 int plane
, int nb_jobs
)
633 const int height
= s
->planeheight
[plane
];
634 const int width
= s
->planewidth
[plane
];
636 for (int i
= 0; i
< height
; i
++) {
637 for (int j
= 0; j
< width
; j
++) {
638 uint8_t *dstp
= dst
+ i
* dst_linesize
;
642 for (int k
= 0; k
< nb_jobs
; k
++) {
643 SliceContext
*sc
= &s
->slices
[k
];
644 float num
= sc
->num
[i
* width
+ j
];
645 float den
= sc
->den
[i
* width
+ j
];
651 dstp
[j
] = av_clip_uint8(lrintf(sum_num
/ sum_den
));
656 static void do_output16(BM3DContext
*s
, uint8_t *dst
, int dst_linesize
,
657 int plane
, int nb_jobs
)
659 const int height
= s
->planeheight
[plane
];
660 const int width
= s
->planewidth
[plane
];
661 const int depth
= s
->depth
;
663 for (int i
= 0; i
< height
; i
++) {
664 for (int j
= 0; j
< width
; j
++) {
665 uint16_t *dstp
= (uint16_t *)dst
+ i
* dst_linesize
/ 2;
669 for (int k
= 0; k
< nb_jobs
; k
++) {
670 SliceContext
*sc
= &s
->slices
[k
];
671 float num
= sc
->num
[i
* width
+ j
];
672 float den
= sc
->den
[i
* width
+ j
];
678 dstp
[j
] = av_clip_uintp2_c(lrintf(sum_num
/ sum_den
), depth
);
683 static int filter_slice(AVFilterContext
*ctx
, void *arg
, int jobnr
, int nb_jobs
)
685 BM3DContext
*s
= ctx
->priv
;
686 SliceContext
*sc
= &s
->slices
[jobnr
];
687 const int block_step
= s
->block_step
;
688 ThreadData
*td
= arg
;
689 const uint8_t *src
= td
->src
;
690 const uint8_t *ref
= td
->ref
;
691 const int src_linesize
= td
->src_linesize
;
692 const int ref_linesize
= td
->ref_linesize
;
693 const int plane
= td
->plane
;
694 const int width
= s
->planewidth
[plane
];
695 const int height
= s
->planeheight
[plane
];
696 const int block_pos_bottom
= FFMAX(0, height
- s
->block_size
);
697 const int block_pos_right
= FFMAX(0, width
- s
->block_size
);
698 const int slice_start
= (((height
+ block_step
- 1) / block_step
) * jobnr
/ nb_jobs
) * block_step
;
699 const int slice_end
= (jobnr
== nb_jobs
- 1) ? block_pos_bottom
+ block_step
:
700 (((height
+ block_step
- 1) / block_step
) * (jobnr
+ 1) / nb_jobs
) * block_step
;
702 memset(sc
->num
, 0, width
* height
* sizeof(float));
703 memset(sc
->den
, 0, width
* height
* sizeof(float));
705 for (int j
= slice_start
; j
< slice_end
; j
+= block_step
) {
706 if (j
> block_pos_bottom
) {
707 j
= block_pos_bottom
;
710 for (int i
= 0; i
< block_pos_right
+ block_step
; i
+= block_step
) {
711 if (i
> block_pos_right
) {
715 block_matching(s
, ref
, ref_linesize
, j
, i
, plane
, jobnr
);
717 s
->block_filtering(s
, src
, src_linesize
,
718 ref
, ref_linesize
, j
, i
, plane
, jobnr
);
725 static int filter_frame(AVFilterContext
*ctx
, AVFrame
**out
, AVFrame
*in
, AVFrame
*ref
)
727 BM3DContext
*s
= ctx
->priv
;
728 AVFilterLink
*outlink
= ctx
->outputs
[0];
731 *out
= ff_get_video_buffer(outlink
, outlink
->w
, outlink
->h
);
733 return AVERROR(ENOMEM
);
734 av_frame_copy_props(*out
, in
);
736 for (p
= 0; p
< s
->nb_planes
; p
++) {
737 const int nb_jobs
= FFMAX(1, FFMIN(s
->nb_threads
, s
->planeheight
[p
] / s
->block_size
));
740 if (!((1 << p
) & s
->planes
) || ctx
->is_disabled
) {
741 av_image_copy_plane((*out
)->data
[p
], (*out
)->linesize
[p
],
742 in
->data
[p
], in
->linesize
[p
],
743 s
->planewidth
[p
] * (1 + (s
->depth
> 8)), s
->planeheight
[p
]);
747 td
.src
= in
->data
[p
];
748 td
.src_linesize
= in
->linesize
[p
];
749 td
.ref
= ref
->data
[p
];
750 td
.ref_linesize
= ref
->linesize
[p
];
752 ff_filter_execute(ctx
, filter_slice
, &td
, NULL
, nb_jobs
);
754 s
->do_output(s
, (*out
)->data
[p
], (*out
)->linesize
[p
], p
, nb_jobs
);
760 #define SQR(x) ((x) * (x))
762 static int config_input(AVFilterLink
*inlink
)
764 const AVPixFmtDescriptor
*desc
= av_pix_fmt_desc_get(inlink
->format
);
765 AVFilterContext
*ctx
= inlink
->dst
;
766 BM3DContext
*s
= ctx
->priv
;
768 s
->nb_threads
= FFMIN(ff_filter_get_nb_threads(ctx
), MAX_NB_THREADS
);
769 s
->nb_planes
= av_pix_fmt_count_planes(inlink
->format
);
770 s
->depth
= desc
->comp
[0].depth
;
771 s
->max
= (1 << s
->depth
) - 1;
772 s
->planeheight
[1] = s
->planeheight
[2] = AV_CEIL_RSHIFT(inlink
->h
, desc
->log2_chroma_h
);
773 s
->planeheight
[0] = s
->planeheight
[3] = inlink
->h
;
774 s
->planewidth
[1] = s
->planewidth
[2] = AV_CEIL_RSHIFT(inlink
->w
, desc
->log2_chroma_w
);
775 s
->planewidth
[0] = s
->planewidth
[3] = inlink
->w
;
776 s
->pblock_size
= FFALIGN(s
->block_size
* 2, av_cpu_max_align());
777 s
->pgroup_size
= FFALIGN(s
->group_size
* 2, av_cpu_max_align());
779 for (int i
= 0; i
< s
->nb_threads
; i
++) {
780 SliceContext
*sc
= &s
->slices
[i
];
781 float iscale
= 0.5f
/ s
->block_size
;
785 sc
->num
= av_calloc(FFALIGN(s
->planewidth
[0], s
->block_size
) * FFALIGN(s
->planeheight
[0], s
->block_size
), sizeof(float));
786 sc
->den
= av_calloc(FFALIGN(s
->planewidth
[0], s
->block_size
) * FFALIGN(s
->planeheight
[0], s
->block_size
), sizeof(float));
787 if (!sc
->num
|| !sc
->den
)
788 return AVERROR(ENOMEM
);
790 ret
= av_tx_init(&sc
->dctf
, &sc
->tx_fn
, AV_TX_FLOAT_DCT
, 0, s
->block_size
>> 0, &scale
, 0);
794 ret
= av_tx_init(&sc
->dcti
, &sc
->itx_fn
, AV_TX_FLOAT_DCT
, 1, s
->block_size
>> 1, &iscale
, 0);
798 if (s
->group_size
> 1) {
799 float iscale
= 0.5f
/ s
->group_size
;
801 ret
= av_tx_init(&sc
->gdctf
, &sc
->tx_fn_g
, AV_TX_FLOAT_DCT
, 0, s
->group_size
>> 0, &scale
, 0);
805 ret
= av_tx_init(&sc
->gdcti
, &sc
->itx_fn_g
, AV_TX_FLOAT_DCT
, 1, s
->group_size
>> 1, &iscale
, 0);
810 sc
->buffer
= av_calloc(s
->pblock_size
* s
->pblock_size
* s
->pgroup_size
, sizeof(*sc
->buffer
));
811 sc
->bufferz
= av_calloc(s
->pblock_size
* s
->pblock_size
* s
->pgroup_size
, sizeof(*sc
->bufferz
));
812 sc
->bufferh
= av_calloc(s
->pblock_size
* s
->pblock_size
, sizeof(*sc
->bufferh
));
813 sc
->bufferv
= av_calloc(s
->pblock_size
* s
->pblock_size
, sizeof(*sc
->bufferv
));
814 sc
->buffert
= av_calloc(s
->pblock_size
, sizeof(*sc
->buffert
));
815 if (!sc
->bufferh
|| !sc
->bufferv
|| !sc
->buffer
|| !sc
->bufferz
|| !sc
->buffert
)
816 return AVERROR(ENOMEM
);
818 if (s
->mode
== FINAL
) {
819 sc
->rbuffer
= av_calloc(s
->pblock_size
* s
->pblock_size
* s
->pgroup_size
, sizeof(*sc
->rbuffer
));
820 sc
->rbufferz
= av_calloc(s
->pblock_size
* s
->pblock_size
* s
->pgroup_size
, sizeof(*sc
->rbufferz
));
821 sc
->rbufferh
= av_calloc(s
->pblock_size
* s
->pblock_size
, sizeof(*sc
->rbufferh
));
822 sc
->rbufferv
= av_calloc(s
->pblock_size
* s
->pblock_size
, sizeof(*sc
->rbufferv
));
823 if (!sc
->rbufferh
|| !sc
->rbufferv
|| !sc
->rbuffer
|| !sc
->rbufferz
)
824 return AVERROR(ENOMEM
);
827 sc
->search_positions
= av_calloc(SQR(2 * s
->bm_range
/ s
->bm_step
+ 1), sizeof(*sc
->search_positions
));
828 if (!sc
->search_positions
)
829 return AVERROR(ENOMEM
);
832 s
->do_output
= do_output
;
833 s
->do_block_ssd
= do_block_ssd
;
834 s
->get_block_row
= get_block_row
;
837 s
->do_output
= do_output16
;
838 s
->do_block_ssd
= do_block_ssd16
;
839 s
->get_block_row
= get_block_row16
;
845 static int activate(AVFilterContext
*ctx
)
847 BM3DContext
*s
= ctx
->priv
;
850 AVFrame
*frame
= NULL
;
855 FF_FILTER_FORWARD_STATUS_BACK(ctx
->outputs
[0], ctx
->inputs
[0]);
857 if ((ret
= ff_inlink_consume_frame(ctx
->inputs
[0], &frame
)) > 0) {
858 ret
= filter_frame(ctx
, &out
, frame
, frame
);
859 av_frame_free(&frame
);
862 ret
= ff_filter_frame(ctx
->outputs
[0], out
);
866 } else if (ff_inlink_acknowledge_status(ctx
->inputs
[0], &status
, &pts
)) {
867 ff_outlink_set_status(ctx
->outputs
[0], status
, pts
);
870 if (ff_outlink_frame_wanted(ctx
->outputs
[0]))
871 ff_inlink_request_frame(ctx
->inputs
[0]);
875 return ff_framesync_activate(&s
->fs
);
879 static int process_frame(FFFrameSync
*fs
)
881 AVFilterContext
*ctx
= fs
->parent
;
882 BM3DContext
*s
= fs
->opaque
;
883 AVFilterLink
*outlink
= ctx
->outputs
[0];
884 AVFrame
*out
= NULL
, *src
, *ref
;
887 if ((ret
= ff_framesync_get_frame(&s
->fs
, 0, &src
, 0)) < 0 ||
888 (ret
= ff_framesync_get_frame(&s
->fs
, 1, &ref
, 0)) < 0)
891 if ((ret
= filter_frame(ctx
, &out
, src
, ref
)) < 0)
894 out
->pts
= av_rescale_q(src
->pts
, s
->fs
.time_base
, outlink
->time_base
);
896 return ff_filter_frame(outlink
, out
);
899 static av_cold
int init(AVFilterContext
*ctx
)
901 BM3DContext
*s
= ctx
->priv
;
902 AVFilterPad pad
= { 0 };
905 if (s
->mode
== BASIC
) {
906 if (s
->th_mse
== 0.f
)
907 s
->th_mse
= 400.f
+ s
->sigma
* 80.f
;
908 s
->block_filtering
= basic_block_filtering
;
909 } else if (s
->mode
== FINAL
) {
911 av_log(ctx
, AV_LOG_WARNING
, "Reference stream is mandatory in final estimation mode.\n");
914 if (s
->th_mse
== 0.f
)
915 s
->th_mse
= 200.f
+ s
->sigma
* 10.f
;
917 s
->block_filtering
= final_block_filtering
;
922 if (s
->block_step
> s
->block_size
) {
923 av_log(ctx
, AV_LOG_WARNING
, "bstep: %d can't be bigger than block size. Changing to %d.\n",
924 s
->block_step
, s
->block_size
);
925 s
->block_step
= s
->block_size
;
928 if (s
->bm_step
> s
->bm_range
) {
929 av_log(ctx
, AV_LOG_WARNING
, "mstep: %d can't be bigger than block matching range. Changing to %d.\n",
930 s
->bm_step
, s
->bm_range
);
931 s
->bm_step
= s
->bm_range
;
934 pad
.type
= AVMEDIA_TYPE_VIDEO
;
936 pad
.config_props
= config_input
;
938 if ((ret
= ff_append_inpad(ctx
, &pad
)) < 0)
942 pad
.type
= AVMEDIA_TYPE_VIDEO
;
943 pad
.name
= "reference";
944 pad
.config_props
= NULL
;
946 if ((ret
= ff_append_inpad(ctx
, &pad
)) < 0)
953 static int config_output(AVFilterLink
*outlink
)
955 FilterLink
*outl
= ff_filter_link(outlink
);
956 AVFilterContext
*ctx
= outlink
->src
;
957 BM3DContext
*s
= ctx
->priv
;
958 AVFilterLink
*src
= ctx
->inputs
[0];
959 FilterLink
*srcl
= ff_filter_link(src
);
965 ref
= ctx
->inputs
[1];
967 if (src
->w
!= ref
->w
||
969 av_log(ctx
, AV_LOG_ERROR
, "First input link %s parameters "
970 "(size %dx%d) do not match the corresponding "
971 "second input link %s parameters (%dx%d) ",
972 ctx
->input_pads
[0].name
, src
->w
, src
->h
,
973 ctx
->input_pads
[1].name
, ref
->w
, ref
->h
);
974 return AVERROR(EINVAL
);
980 outlink
->time_base
= src
->time_base
;
981 outlink
->sample_aspect_ratio
= src
->sample_aspect_ratio
;
982 outl
->frame_rate
= srcl
->frame_rate
;
987 if ((ret
= ff_framesync_init(&s
->fs
, ctx
, 2)) < 0)
991 in
[0].time_base
= src
->time_base
;
992 in
[1].time_base
= ref
->time_base
;
994 in
[0].before
= EXT_STOP
;
995 in
[0].after
= EXT_STOP
;
997 in
[1].before
= EXT_STOP
;
998 in
[1].after
= EXT_STOP
;
1000 s
->fs
.on_event
= process_frame
;
1002 return ff_framesync_configure(&s
->fs
);
1005 static av_cold
void uninit(AVFilterContext
*ctx
)
1007 BM3DContext
*s
= ctx
->priv
;
1010 ff_framesync_uninit(&s
->fs
);
1012 for (int i
= 0; i
< s
->nb_threads
; i
++) {
1013 SliceContext
*sc
= &s
->slices
[i
];
1018 av_tx_uninit(&sc
->gdctf
);
1019 av_tx_uninit(&sc
->gdcti
);
1020 av_tx_uninit(&sc
->dctf
);
1021 av_tx_uninit(&sc
->dcti
);
1023 av_freep(&sc
->buffer
);
1024 av_freep(&sc
->bufferh
);
1025 av_freep(&sc
->buffert
);
1026 av_freep(&sc
->bufferv
);
1027 av_freep(&sc
->bufferz
);
1028 av_freep(&sc
->rbuffer
);
1029 av_freep(&sc
->rbufferh
);
1030 av_freep(&sc
->rbufferv
);
1031 av_freep(&sc
->rbufferz
);
1033 av_freep(&sc
->search_positions
);
1037 static const AVFilterPad bm3d_outputs
[] = {
1040 .type
= AVMEDIA_TYPE_VIDEO
,
1041 .config_props
= config_output
,
1045 const FFFilter ff_vf_bm3d
= {
1047 .p
.description
= NULL_IF_CONFIG_SMALL("Block-Matching 3D denoiser."),
1049 .p
.priv_class
= &bm3d_class
,
1050 .p
.flags
= AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
|
1051 AVFILTER_FLAG_DYNAMIC_INPUTS
|
1052 AVFILTER_FLAG_SLICE_THREADS
,
1053 .priv_size
= sizeof(BM3DContext
),
1056 .activate
= activate
,
1057 FILTER_OUTPUTS(bm3d_outputs
),
1058 FILTER_PIXFMTS_ARRAY(pix_fmts
),