2 * Copyright (c) 2016 Clément Bœsch <u pkh me>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * - better automatic defaults? see "Parameters" @ http://www.ipol.im/pub/art/2011/bcm_nlm/
24 * - temporal support (probably doesn't need any displacement according to
25 * "Denoising image sequences does not require motion estimation")
26 * - Bayer pixel format support for at least raw photos? (DNG support would be
28 * - FATE test (probably needs visual threshold test mechanism due to the use
32 #include "libavutil/avassert.h"
33 #include "libavutil/mem.h"
34 #include "libavutil/opt.h"
35 #include "libavutil/pixdesc.h"
38 #include "vf_nlmeans.h"
39 #include "vf_nlmeans_init.h"
42 typedef struct NLMeansContext
{
45 int chroma_w
, chroma_h
;
46 double pdiff_scale
; // invert of the filtering parameter (sigma*10) squared
47 double sigma
; // denoising strength
48 int patch_size
, patch_hsize
; // patch size and half size
49 int patch_size_uv
, patch_hsize_uv
; // patch size and half size for chroma planes
50 int research_size
, research_hsize
; // research size and half size
51 int research_size_uv
, research_hsize_uv
; // research size and half size for chroma planes
52 uint32_t *ii_orig
; // integral image
53 uint32_t *ii
; // integral image starting after the 0-line and 0-column
54 int ii_w
, ii_h
; // width and height of the integral image
55 ptrdiff_t ii_lz_32
; // linesize in 32-bit units of the integral image
56 float *total_weight
; // total weight for every pixel
57 float *sum
; // weighted sum for every pixel
58 int linesize
; // sum and total_weight linesize
59 float *weight_lut
; // lookup table mapping (scaled) patch differences to their associated weights
60 uint32_t max_meaningful_diff
; // maximum difference considered (if the patch difference is too high we ignore the pixel)
61 NLMeansDSPContext dsp
;
64 #define OFFSET(x) offsetof(NLMeansContext, x)
65 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
66 static const AVOption nlmeans_options
[] = {
67 { "s", "denoising strength", OFFSET(sigma
), AV_OPT_TYPE_DOUBLE
, { .dbl
= 1.0 }, 1.0, 30.0, FLAGS
},
68 { "p", "patch size", OFFSET(patch_size
), AV_OPT_TYPE_INT
, { .i64
= 3*2+1 }, 0, 99, FLAGS
},
69 { "pc", "patch size for chroma planes", OFFSET(patch_size_uv
), AV_OPT_TYPE_INT
, { .i64
= 0 }, 0, 99, FLAGS
},
70 { "r", "research window", OFFSET(research_size
), AV_OPT_TYPE_INT
, { .i64
= 7*2+1 }, 0, 99, FLAGS
},
71 { "rc", "research window for chroma planes", OFFSET(research_size_uv
), AV_OPT_TYPE_INT
, { .i64
= 0 }, 0, 99, FLAGS
},
75 AVFILTER_DEFINE_CLASS(nlmeans
);
77 static const enum AVPixelFormat pix_fmts
[] = {
78 AV_PIX_FMT_YUV410P
, AV_PIX_FMT_YUV411P
,
79 AV_PIX_FMT_YUV420P
, AV_PIX_FMT_YUV422P
,
80 AV_PIX_FMT_YUV440P
, AV_PIX_FMT_YUV444P
,
81 AV_PIX_FMT_YUVJ444P
, AV_PIX_FMT_YUVJ440P
,
82 AV_PIX_FMT_YUVJ422P
, AV_PIX_FMT_YUVJ420P
,
84 AV_PIX_FMT_GRAY8
, AV_PIX_FMT_GBRP
,
89 * Compute squared difference of an unsafe area (the zone nor s1 nor s2 could
92 * On the other hand, the line above dst and the column to its left are always
95 * There is little point in having this function SIMDified as it is likely too
96 * complex and only handle small portions of the image.
98 * @param dst integral image
99 * @param dst_linesize_32 integral image linesize (in 32-bit integers unit)
100 * @param startx integral starting x position
101 * @param starty integral starting y position
102 * @param src source plane buffer
103 * @param linesize source plane linesize
104 * @param offx source offsetting in x
105 * @param offy source offsetting in y
106 * @paran r absolute maximum source offsetting
107 * @param sw source width
108 * @param sh source height
109 * @param w width to compute
110 * @param h height to compute
112 static inline void compute_unsafe_ssd_integral_image(uint32_t *dst
, ptrdiff_t dst_linesize_32
,
113 int startx
, int starty
,
114 const uint8_t *src
, ptrdiff_t linesize
,
115 int offx
, int offy
, int r
, int sw
, int sh
,
118 for (int y
= starty
; y
< starty
+ h
; y
++) {
119 uint32_t acc
= dst
[y
*dst_linesize_32
+ startx
- 1] - dst
[(y
-1)*dst_linesize_32
+ startx
- 1];
120 const int s1y
= av_clip(y
- r
, 0, sh
- 1);
121 const int s2y
= av_clip(y
- (r
+ offy
), 0, sh
- 1);
123 for (int x
= startx
; x
< startx
+ w
; x
++) {
124 const int s1x
= av_clip(x
- r
, 0, sw
- 1);
125 const int s2x
= av_clip(x
- (r
+ offx
), 0, sw
- 1);
126 const uint8_t v1
= src
[s1y
*linesize
+ s1x
];
127 const uint8_t v2
= src
[s2y
*linesize
+ s2x
];
128 const int d
= v1
- v2
;
130 dst
[y
*dst_linesize_32
+ x
] = dst
[(y
-1)*dst_linesize_32
+ x
] + acc
;
136 * Compute the sum of squared difference integral image
137 * http://www.ipol.im/pub/art/2014/57/
138 * Integral Images for Block Matching - Gabriele Facciolo, Nicolas Limare, Enric Meinhardt-Llopis
140 * @param ii integral image of dimension (w+e*2) x (h+e*2) with
141 * an additional zeroed top line and column already
142 * "applied" to the pointer value
143 * @param ii_linesize_32 integral image linesize (in 32-bit integers unit)
144 * @param src source plane buffer
145 * @param linesize source plane linesize
146 * @param offx x-offsetting ranging in [-e;e]
147 * @param offy y-offsetting ranging in [-e;e]
148 * @param w source width
149 * @param h source height
150 * @param e research padding edge
152 static void compute_ssd_integral_image(const NLMeansDSPContext
*dsp
,
153 uint32_t *ii
, ptrdiff_t ii_linesize_32
,
154 const uint8_t *src
, ptrdiff_t linesize
, int offx
, int offy
,
157 // ii has a surrounding padding of thickness "e"
158 const int ii_w
= w
+ e
*2;
159 const int ii_h
= h
+ e
*2;
161 // we center the first source
165 // 2nd source is the frame with offsetting
166 const int s2x
= e
+ offx
;
167 const int s2y
= e
+ offy
;
169 // get the dimension of the overlapping rectangle where it is always safe
170 // to compare the 2 sources pixels
171 const int startx_safe
= FFMAX(s1x
, s2x
);
172 const int starty_safe
= FFMAX(s1y
, s2y
);
173 const int u_endx_safe
= FFMIN(s1x
+ w
, s2x
+ w
); // unaligned
174 const int endy_safe
= FFMIN(s1y
+ h
, s2y
+ h
);
176 // deduce the safe area width and height
177 const int safe_pw
= (u_endx_safe
- startx_safe
) & ~0xf;
178 const int safe_ph
= endy_safe
- starty_safe
;
180 // adjusted end x position of the safe area after width of the safe area gets aligned
181 const int endx_safe
= startx_safe
+ safe_pw
;
183 // top part where only one of s1 and s2 is still readable, or none at all
184 compute_unsafe_ssd_integral_image(ii
, ii_linesize_32
,
190 // fill the left column integral required to compute the central
192 compute_unsafe_ssd_integral_image(ii
, ii_linesize_32
,
196 startx_safe
, safe_ph
);
198 // main and safe part of the integral
199 av_assert1(startx_safe
- s1x
>= 0); av_assert1(startx_safe
- s1x
< w
);
200 av_assert1(starty_safe
- s1y
>= 0); av_assert1(starty_safe
- s1y
< h
);
201 av_assert1(startx_safe
- s2x
>= 0); av_assert1(startx_safe
- s2x
< w
);
202 av_assert1(starty_safe
- s2y
>= 0); av_assert1(starty_safe
- s2y
< h
);
203 if (safe_pw
&& safe_ph
)
204 dsp
->compute_safe_ssd_integral_image(ii
+ starty_safe
*ii_linesize_32
+ startx_safe
, ii_linesize_32
,
205 src
+ (starty_safe
- s1y
) * linesize
+ (startx_safe
- s1x
), linesize
,
206 src
+ (starty_safe
- s2y
) * linesize
+ (startx_safe
- s2x
), linesize
,
209 // right part of the integral
210 compute_unsafe_ssd_integral_image(ii
, ii_linesize_32
,
211 endx_safe
, starty_safe
,
214 ii_w
- endx_safe
, safe_ph
);
216 // bottom part where only one of s1 and s2 is still readable, or none at all
217 compute_unsafe_ssd_integral_image(ii
, ii_linesize_32
,
221 ii_w
, ii_h
- endy_safe
);
224 static int config_input(AVFilterLink
*inlink
)
226 AVFilterContext
*ctx
= inlink
->dst
;
227 NLMeansContext
*s
= ctx
->priv
;
228 const AVPixFmtDescriptor
*desc
= av_pix_fmt_desc_get(inlink
->format
);
229 const int e
= FFMAX(s
->research_hsize
, s
->research_hsize_uv
)
230 + FFMAX(s
->patch_hsize
, s
->patch_hsize_uv
);
232 s
->chroma_w
= AV_CEIL_RSHIFT(inlink
->w
, desc
->log2_chroma_w
);
233 s
->chroma_h
= AV_CEIL_RSHIFT(inlink
->h
, desc
->log2_chroma_h
);
234 s
->nb_planes
= av_pix_fmt_count_planes(inlink
->format
);
236 /* Allocate the integral image with extra edges of thickness "e"
238 * +_+-------------------------------+
239 * |0|0000000000000000000000000000000|
240 * +-x-------------------------------+
244 * |0| +-----------------------+ |
249 * |0| +-----------------------+ |
253 * +-+-------------------------------+
255 s
->ii_w
= inlink
->w
+ e
*2;
256 s
->ii_h
= inlink
->h
+ e
*2;
258 // align to 4 the linesize, "+1" is for the space of the left 0-column
259 s
->ii_lz_32
= FFALIGN(s
->ii_w
+ 1, 4);
261 // "+1" is for the space of the top 0-line
262 s
->ii_orig
= av_calloc(s
->ii_h
+ 1, s
->ii_lz_32
* sizeof(*s
->ii_orig
));
264 return AVERROR(ENOMEM
);
266 // skip top 0-line and left 0-column
267 s
->ii
= s
->ii_orig
+ s
->ii_lz_32
+ 1;
269 // allocate weighted average for every pixel
270 s
->linesize
= inlink
->w
+ 100;
271 s
->total_weight
= av_malloc_array(s
->linesize
, inlink
->h
* sizeof(*s
->total_weight
));
272 s
->sum
= av_malloc_array(s
->linesize
, inlink
->h
* sizeof(*s
->sum
));
273 if (!s
->total_weight
|| !s
->sum
)
274 return AVERROR(ENOMEM
);
281 ptrdiff_t src_linesize
;
284 const uint32_t *ii_start
;
288 static int nlmeans_slice(AVFilterContext
*ctx
, void *arg
, int jobnr
, int nb_jobs
)
290 NLMeansContext
*s
= ctx
->priv
;
291 const uint32_t max_meaningful_diff
= s
->max_meaningful_diff
;
292 const struct thread_data
*td
= arg
;
293 const ptrdiff_t src_linesize
= td
->src_linesize
;
294 const int process_h
= td
->endy
- td
->starty
;
295 const int slice_start
= (process_h
* jobnr
) / nb_jobs
;
296 const int slice_end
= (process_h
* (jobnr
+1)) / nb_jobs
;
297 const int starty
= td
->starty
+ slice_start
;
298 const int endy
= td
->starty
+ slice_end
;
300 const uint32_t *ii
= td
->ii_start
+ (starty
- p
- 1) * s
->ii_lz_32
- p
- 1;
301 const int dist_b
= 2*p
+ 1;
302 const int dist_d
= dist_b
* s
->ii_lz_32
;
303 const int dist_e
= dist_d
+ dist_b
;
304 const float *const weight_lut
= s
->weight_lut
;
305 NLMeansDSPContext
*dsp
= &s
->dsp
;
307 for (int y
= starty
; y
< endy
; y
++) {
308 const uint8_t *const src
= td
->src
+ y
*src_linesize
;
309 float *total_weight
= s
->total_weight
+ y
*s
->linesize
;
310 float *sum
= s
->sum
+ y
*s
->linesize
;
311 const uint32_t *const iia
= ii
;
312 const uint32_t *const iib
= ii
+ dist_b
;
313 const uint32_t *const iid
= ii
+ dist_d
;
314 const uint32_t *const iie
= ii
+ dist_e
;
316 dsp
->compute_weights_line(iia
, iib
, iid
, iie
, src
, total_weight
, sum
,
317 weight_lut
, max_meaningful_diff
,
318 td
->startx
, td
->endx
);
324 static void weight_averages(uint8_t *dst
, ptrdiff_t dst_linesize
,
325 const uint8_t *src
, ptrdiff_t src_linesize
,
326 float *total_weight
, float *sum
, ptrdiff_t linesize
,
329 for (int y
= 0; y
< h
; y
++) {
330 for (int x
= 0; x
< w
; x
++) {
331 // Also weight the centered pixel
332 total_weight
[x
] += 1.f
;
333 sum
[x
] += 1.f
* src
[x
];
334 dst
[x
] = av_clip_uint8(sum
[x
] / total_weight
[x
] + 0.5f
);
338 total_weight
+= linesize
;
343 static int nlmeans_plane(AVFilterContext
*ctx
, int w
, int h
, int p
, int r
,
344 uint8_t *dst
, ptrdiff_t dst_linesize
,
345 const uint8_t *src
, ptrdiff_t src_linesize
)
347 NLMeansContext
*s
= ctx
->priv
;
348 /* patches center points cover the whole research window so the patches
349 * themselves overflow the research window */
351 /* focus an integral pointer on the centered image (s1) */
352 const uint32_t *centered_ii
= s
->ii
+ e
*s
->ii_lz_32
+ e
;
354 memset(s
->total_weight
, 0, s
->linesize
* h
* sizeof(*s
->total_weight
));
355 memset(s
->sum
, 0, s
->linesize
* h
* sizeof(*s
->sum
));
357 for (int offy
= -r
; offy
<= r
; offy
++) {
358 for (int offx
= -r
; offx
<= r
; offx
++) {
360 struct thread_data td
= {
361 .src
= src
+ offy
*src_linesize
+ offx
,
362 .src_linesize
= src_linesize
,
363 .startx
= FFMAX(0, -offx
),
364 .starty
= FFMAX(0, -offy
),
365 .endx
= FFMIN(w
, w
- offx
),
366 .endy
= FFMIN(h
, h
- offy
),
367 .ii_start
= centered_ii
+ offy
*s
->ii_lz_32
+ offx
,
371 compute_ssd_integral_image(&s
->dsp
, s
->ii
, s
->ii_lz_32
,
373 offx
, offy
, e
, w
, h
);
374 ff_filter_execute(ctx
, nlmeans_slice
, &td
, NULL
,
375 FFMIN(td
.endy
- td
.starty
, ff_filter_get_nb_threads(ctx
)));
380 weight_averages(dst
, dst_linesize
, src
, src_linesize
,
381 s
->total_weight
, s
->sum
, s
->linesize
, w
, h
);
386 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*in
)
388 AVFilterContext
*ctx
= inlink
->dst
;
389 NLMeansContext
*s
= ctx
->priv
;
390 AVFilterLink
*outlink
= ctx
->outputs
[0];
392 AVFrame
*out
= ff_get_video_buffer(outlink
, outlink
->w
, outlink
->h
);
395 return AVERROR(ENOMEM
);
397 av_frame_copy_props(out
, in
);
399 for (int i
= 0; i
< s
->nb_planes
; i
++) {
400 const int w
= i
? s
->chroma_w
: inlink
->w
;
401 const int h
= i
? s
->chroma_h
: inlink
->h
;
402 const int p
= i
? s
->patch_hsize_uv
: s
->patch_hsize
;
403 const int r
= i
? s
->research_hsize_uv
: s
->research_hsize
;
404 nlmeans_plane(ctx
, w
, h
, p
, r
,
405 out
->data
[i
], out
->linesize
[i
],
406 in
->data
[i
], in
->linesize
[i
]);
410 return ff_filter_frame(outlink
, out
);
413 #define CHECK_ODD_FIELD(field, name) do { \
414 if (!(s->field & 1)) { \
416 av_log(ctx, AV_LOG_WARNING, name " size must be odd, " \
417 "setting it to %d\n", s->field); \
421 static av_cold
int init(AVFilterContext
*ctx
)
423 NLMeansContext
*s
= ctx
->priv
;
424 const double h
= s
->sigma
* 10.;
426 s
->pdiff_scale
= 1. / (h
* h
);
427 s
->max_meaningful_diff
= log(255.) / s
->pdiff_scale
;
428 s
->weight_lut
= av_calloc(s
->max_meaningful_diff
+ 1, sizeof(*s
->weight_lut
));
430 return AVERROR(ENOMEM
);
431 for (int i
= 0; i
< s
->max_meaningful_diff
; i
++)
432 s
->weight_lut
[i
] = exp(-i
* s
->pdiff_scale
);
434 CHECK_ODD_FIELD(research_size
, "Luma research window");
435 CHECK_ODD_FIELD(patch_size
, "Luma patch");
437 if (!s
->research_size_uv
) s
->research_size_uv
= s
->research_size
;
438 if (!s
->patch_size_uv
) s
->patch_size_uv
= s
->patch_size
;
440 CHECK_ODD_FIELD(research_size_uv
, "Chroma research window");
441 CHECK_ODD_FIELD(patch_size_uv
, "Chroma patch");
443 s
->research_hsize
= s
->research_size
/ 2;
444 s
->research_hsize_uv
= s
->research_size_uv
/ 2;
445 s
->patch_hsize
= s
->patch_size
/ 2;
446 s
->patch_hsize_uv
= s
->patch_size_uv
/ 2;
448 av_log(ctx
, AV_LOG_DEBUG
, "Research window: %dx%d / %dx%d, patch size: %dx%d / %dx%d\n",
449 s
->research_size
, s
->research_size
, s
->research_size_uv
, s
->research_size_uv
,
450 s
->patch_size
, s
->patch_size
, s
->patch_size_uv
, s
->patch_size_uv
);
452 ff_nlmeans_init(&s
->dsp
);
457 static av_cold
void uninit(AVFilterContext
*ctx
)
459 NLMeansContext
*s
= ctx
->priv
;
460 av_freep(&s
->weight_lut
);
461 av_freep(&s
->ii_orig
);
462 av_freep(&s
->total_weight
);
466 static const AVFilterPad nlmeans_inputs
[] = {
469 .type
= AVMEDIA_TYPE_VIDEO
,
470 .config_props
= config_input
,
471 .filter_frame
= filter_frame
,
475 const FFFilter ff_vf_nlmeans
= {
477 .p
.description
= NULL_IF_CONFIG_SMALL("Non-local means denoiser."),
478 .p
.priv_class
= &nlmeans_class
,
479 .p
.flags
= AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC
| AVFILTER_FLAG_SLICE_THREADS
,
480 .priv_size
= sizeof(NLMeansContext
),
483 FILTER_INPUTS(nlmeans_inputs
),
484 FILTER_OUTPUTS(ff_video_default_filterpad
),
485 FILTER_PIXFMTS_ARRAY(pix_fmts
),