2 * Copyright (c) 2013-2014 Clément Bœsch
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 * A simple, relatively efficient and slow DCT image denoiser.
24 * @see http://www.ipol.im/pub/art/2011/ys-dct/
26 * The DCT factorization used is based on "Fast and numerically stable
27 * algorithms for discrete cosine transforms" from Gerlind Plonkaa & Manfred
28 * Tasche (DOI: 10.1016/j.laa.2004.07.015).
31 #include "libavutil/avassert.h"
32 #include "libavutil/eval.h"
33 #include "libavutil/mem.h"
34 #include "libavutil/mem_internal.h"
35 #include "libavutil/opt.h"
40 static const char *const var_names
[] = { "c", NULL
};
41 enum { VAR_C
, VAR_VARS_NB
};
45 typedef struct DCTdnoizContext
{
48 /* coefficient factor expression */
50 AVExpr
*expr
[MAX_THREADS
];
51 double var_values
[MAX_THREADS
][VAR_VARS_NB
];
54 int pr_width
, pr_height
; // width and height to process
55 float sigma
; // used when no expression are st
56 float th
; // threshold (3*sigma)
57 float *cbuf
[2][3]; // two planar rgb color buffers
58 float *slices
[MAX_THREADS
]; // slices buffers (1 slice buffer per thread)
59 float *weights
; // dct coeff are cumulated with overlapping; these values are used for averaging
60 int p_linesize
; // line sizes for color and weights
61 int overlap
; // number of block overlapping pixels
62 int step
; // block step increment (blocksize - overlap)
63 int n
; // 1<<n is the block size
64 int bsize
; // block size, 1<<n
65 void (*filter_freq_func
)(struct DCTdnoizContext
*s
,
66 const float *src
, int src_linesize
,
67 float *dst
, int dst_linesize
,
69 void (*color_decorrelation
)(float **dst
, int dst_linesize
,
70 const uint8_t **src
, int src_linesize
,
72 void (*color_correlation
)(uint8_t **dst
, int dst_linesize
,
73 float **src
, int src_linesize
,
77 #define MIN_NBITS 3 /* blocksize = 1<<3 = 8 */
78 #define MAX_NBITS 4 /* blocksize = 1<<4 = 16 */
79 #define DEFAULT_NBITS 3
81 #define OFFSET(x) offsetof(DCTdnoizContext, x)
82 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
83 static const AVOption dctdnoiz_options
[] = {
84 { "sigma", "set noise sigma constant", OFFSET(sigma
), AV_OPT_TYPE_FLOAT
, {.dbl
=0}, 0, 999, .flags
= FLAGS
},
85 { "s", "set noise sigma constant", OFFSET(sigma
), AV_OPT_TYPE_FLOAT
, {.dbl
=0}, 0, 999, .flags
= FLAGS
},
86 { "overlap", "set number of block overlapping pixels", OFFSET(overlap
), AV_OPT_TYPE_INT
, {.i64
=-1}, -1, (1<<MAX_NBITS
)-1, .flags
= FLAGS
},
87 { "expr", "set coefficient factor expression", OFFSET(expr_str
), AV_OPT_TYPE_STRING
, {.str
=NULL
}, .flags
= FLAGS
},
88 { "e", "set coefficient factor expression", OFFSET(expr_str
), AV_OPT_TYPE_STRING
, {.str
=NULL
}, .flags
= FLAGS
},
89 { "n", "set the block size, expressed in bits", OFFSET(n
), AV_OPT_TYPE_INT
, {.i64
=DEFAULT_NBITS
}, MIN_NBITS
, MAX_NBITS
, .flags
= FLAGS
},
93 AVFILTER_DEFINE_CLASS(dctdnoiz
);
95 static void av_always_inline
fdct8_1d(float *dst
, const float *src
,
96 int dst_stridea
, int dst_strideb
,
97 int src_stridea
, int src_strideb
)
101 for (i
= 0; i
< 8; i
++) {
102 const float x00
= src
[0*src_stridea
] + src
[7*src_stridea
];
103 const float x01
= src
[1*src_stridea
] + src
[6*src_stridea
];
104 const float x02
= src
[2*src_stridea
] + src
[5*src_stridea
];
105 const float x03
= src
[3*src_stridea
] + src
[4*src_stridea
];
106 const float x04
= src
[0*src_stridea
] - src
[7*src_stridea
];
107 const float x05
= src
[1*src_stridea
] - src
[6*src_stridea
];
108 const float x06
= src
[2*src_stridea
] - src
[5*src_stridea
];
109 const float x07
= src
[3*src_stridea
] - src
[4*src_stridea
];
110 const float x08
= x00
+ x03
;
111 const float x09
= x01
+ x02
;
112 const float x0a
= x00
- x03
;
113 const float x0b
= x01
- x02
;
114 const float x0c
= 1.38703984532215f
*x04
+ 0.275899379282943f
*x07
;
115 const float x0d
= 1.17587560241936f
*x05
+ 0.785694958387102f
*x06
;
116 const float x0e
= -0.785694958387102f
*x05
+ 1.17587560241936f
*x06
;
117 const float x0f
= 0.275899379282943f
*x04
- 1.38703984532215f
*x07
;
118 const float x10
= 0.353553390593274f
* (x0c
- x0d
);
119 const float x11
= 0.353553390593274f
* (x0e
- x0f
);
120 dst
[0*dst_stridea
] = 0.353553390593274f
* (x08
+ x09
);
121 dst
[1*dst_stridea
] = 0.353553390593274f
* (x0c
+ x0d
);
122 dst
[2*dst_stridea
] = 0.461939766255643f
*x0a
+ 0.191341716182545f
*x0b
;
123 dst
[3*dst_stridea
] = 0.707106781186547f
* (x10
- x11
);
124 dst
[4*dst_stridea
] = 0.353553390593274f
* (x08
- x09
);
125 dst
[5*dst_stridea
] = 0.707106781186547f
* (x10
+ x11
);
126 dst
[6*dst_stridea
] = 0.191341716182545f
*x0a
- 0.461939766255643f
*x0b
;
127 dst
[7*dst_stridea
] = 0.353553390593274f
* (x0e
+ x0f
);
133 static void av_always_inline
idct8_1d(float *dst
, const float *src
,
134 int dst_stridea
, int dst_strideb
,
135 int src_stridea
, int src_strideb
,
140 for (i
= 0; i
< 8; i
++) {
141 const float x00
= 1.4142135623731f
*src
[0*src_stridea
];
142 const float x01
= 1.38703984532215f
*src
[1*src_stridea
] + 0.275899379282943f
*src
[7*src_stridea
];
143 const float x02
= 1.30656296487638f
*src
[2*src_stridea
] + 0.541196100146197f
*src
[6*src_stridea
];
144 const float x03
= 1.17587560241936f
*src
[3*src_stridea
] + 0.785694958387102f
*src
[5*src_stridea
];
145 const float x04
= 1.4142135623731f
*src
[4*src_stridea
];
146 const float x05
= -0.785694958387102f
*src
[3*src_stridea
] + 1.17587560241936f
*src
[5*src_stridea
];
147 const float x06
= 0.541196100146197f
*src
[2*src_stridea
] - 1.30656296487638f
*src
[6*src_stridea
];
148 const float x07
= -0.275899379282943f
*src
[1*src_stridea
] + 1.38703984532215f
*src
[7*src_stridea
];
149 const float x09
= x00
+ x04
;
150 const float x0a
= x01
+ x03
;
151 const float x0b
= 1.4142135623731f
*x02
;
152 const float x0c
= x00
- x04
;
153 const float x0d
= x01
- x03
;
154 const float x0e
= 0.353553390593274f
* (x09
- x0b
);
155 const float x0f
= 0.353553390593274f
* (x0c
+ x0d
);
156 const float x10
= 0.353553390593274f
* (x0c
- x0d
);
157 const float x11
= 1.4142135623731f
*x06
;
158 const float x12
= x05
+ x07
;
159 const float x13
= x05
- x07
;
160 const float x14
= 0.353553390593274f
* (x11
+ x12
);
161 const float x15
= 0.353553390593274f
* (x11
- x12
);
162 const float x16
= 0.5f
*x13
;
163 dst
[0*dst_stridea
] = (add
? dst
[ 0*dst_stridea
] : 0) + 0.25f
* (x09
+ x0b
) + 0.353553390593274f
*x0a
;
164 dst
[1*dst_stridea
] = (add
? dst
[ 1*dst_stridea
] : 0) + 0.707106781186547f
* (x0f
+ x15
);
165 dst
[2*dst_stridea
] = (add
? dst
[ 2*dst_stridea
] : 0) + 0.707106781186547f
* (x0f
- x15
);
166 dst
[3*dst_stridea
] = (add
? dst
[ 3*dst_stridea
] : 0) + 0.707106781186547f
* (x0e
+ x16
);
167 dst
[4*dst_stridea
] = (add
? dst
[ 4*dst_stridea
] : 0) + 0.707106781186547f
* (x0e
- x16
);
168 dst
[5*dst_stridea
] = (add
? dst
[ 5*dst_stridea
] : 0) + 0.707106781186547f
* (x10
- x14
);
169 dst
[6*dst_stridea
] = (add
? dst
[ 6*dst_stridea
] : 0) + 0.707106781186547f
* (x10
+ x14
);
170 dst
[7*dst_stridea
] = (add
? dst
[ 7*dst_stridea
] : 0) + 0.25f
* (x09
+ x0b
) - 0.353553390593274f
*x0a
;
177 static void av_always_inline
fdct16_1d(float *dst
, const float *src
,
178 int dst_stridea
, int dst_strideb
,
179 int src_stridea
, int src_strideb
)
183 for (i
= 0; i
< 16; i
++) {
184 const float x00
= src
[ 0*src_stridea
] + src
[15*src_stridea
];
185 const float x01
= src
[ 1*src_stridea
] + src
[14*src_stridea
];
186 const float x02
= src
[ 2*src_stridea
] + src
[13*src_stridea
];
187 const float x03
= src
[ 3*src_stridea
] + src
[12*src_stridea
];
188 const float x04
= src
[ 4*src_stridea
] + src
[11*src_stridea
];
189 const float x05
= src
[ 5*src_stridea
] + src
[10*src_stridea
];
190 const float x06
= src
[ 6*src_stridea
] + src
[ 9*src_stridea
];
191 const float x07
= src
[ 7*src_stridea
] + src
[ 8*src_stridea
];
192 const float x08
= src
[ 0*src_stridea
] - src
[15*src_stridea
];
193 const float x09
= src
[ 1*src_stridea
] - src
[14*src_stridea
];
194 const float x0a
= src
[ 2*src_stridea
] - src
[13*src_stridea
];
195 const float x0b
= src
[ 3*src_stridea
] - src
[12*src_stridea
];
196 const float x0c
= src
[ 4*src_stridea
] - src
[11*src_stridea
];
197 const float x0d
= src
[ 5*src_stridea
] - src
[10*src_stridea
];
198 const float x0e
= src
[ 6*src_stridea
] - src
[ 9*src_stridea
];
199 const float x0f
= src
[ 7*src_stridea
] - src
[ 8*src_stridea
];
200 const float x10
= x00
+ x07
;
201 const float x11
= x01
+ x06
;
202 const float x12
= x02
+ x05
;
203 const float x13
= x03
+ x04
;
204 const float x14
= x00
- x07
;
205 const float x15
= x01
- x06
;
206 const float x16
= x02
- x05
;
207 const float x17
= x03
- x04
;
208 const float x18
= x10
+ x13
;
209 const float x19
= x11
+ x12
;
210 const float x1a
= x10
- x13
;
211 const float x1b
= x11
- x12
;
212 const float x1c
= 1.38703984532215f
*x14
+ 0.275899379282943f
*x17
;
213 const float x1d
= 1.17587560241936f
*x15
+ 0.785694958387102f
*x16
;
214 const float x1e
= -0.785694958387102f
*x15
+ 1.17587560241936f
*x16
;
215 const float x1f
= 0.275899379282943f
*x14
- 1.38703984532215f
*x17
;
216 const float x20
= 0.25f
* (x1c
- x1d
);
217 const float x21
= 0.25f
* (x1e
- x1f
);
218 const float x22
= 1.40740373752638f
*x08
+ 0.138617169199091f
*x0f
;
219 const float x23
= 1.35331800117435f
*x09
+ 0.410524527522357f
*x0e
;
220 const float x24
= 1.24722501298667f
*x0a
+ 0.666655658477747f
*x0d
;
221 const float x25
= 1.09320186700176f
*x0b
+ 0.897167586342636f
*x0c
;
222 const float x26
= -0.897167586342636f
*x0b
+ 1.09320186700176f
*x0c
;
223 const float x27
= 0.666655658477747f
*x0a
- 1.24722501298667f
*x0d
;
224 const float x28
= -0.410524527522357f
*x09
+ 1.35331800117435f
*x0e
;
225 const float x29
= 0.138617169199091f
*x08
- 1.40740373752638f
*x0f
;
226 const float x2a
= x22
+ x25
;
227 const float x2b
= x23
+ x24
;
228 const float x2c
= x22
- x25
;
229 const float x2d
= x23
- x24
;
230 const float x2e
= 0.25f
* (x2a
- x2b
);
231 const float x2f
= 0.326640741219094f
*x2c
+ 0.135299025036549f
*x2d
;
232 const float x30
= 0.135299025036549f
*x2c
- 0.326640741219094f
*x2d
;
233 const float x31
= x26
+ x29
;
234 const float x32
= x27
+ x28
;
235 const float x33
= x26
- x29
;
236 const float x34
= x27
- x28
;
237 const float x35
= 0.25f
* (x31
- x32
);
238 const float x36
= 0.326640741219094f
*x33
+ 0.135299025036549f
*x34
;
239 const float x37
= 0.135299025036549f
*x33
- 0.326640741219094f
*x34
;
240 dst
[ 0*dst_stridea
] = 0.25f
* (x18
+ x19
);
241 dst
[ 1*dst_stridea
] = 0.25f
* (x2a
+ x2b
);
242 dst
[ 2*dst_stridea
] = 0.25f
* (x1c
+ x1d
);
243 dst
[ 3*dst_stridea
] = 0.707106781186547f
* (x2f
- x37
);
244 dst
[ 4*dst_stridea
] = 0.326640741219094f
*x1a
+ 0.135299025036549f
*x1b
;
245 dst
[ 5*dst_stridea
] = 0.707106781186547f
* (x2f
+ x37
);
246 dst
[ 6*dst_stridea
] = 0.707106781186547f
* (x20
- x21
);
247 dst
[ 7*dst_stridea
] = 0.707106781186547f
* (x2e
+ x35
);
248 dst
[ 8*dst_stridea
] = 0.25f
* (x18
- x19
);
249 dst
[ 9*dst_stridea
] = 0.707106781186547f
* (x2e
- x35
);
250 dst
[10*dst_stridea
] = 0.707106781186547f
* (x20
+ x21
);
251 dst
[11*dst_stridea
] = 0.707106781186547f
* (x30
- x36
);
252 dst
[12*dst_stridea
] = 0.135299025036549f
*x1a
- 0.326640741219094f
*x1b
;
253 dst
[13*dst_stridea
] = 0.707106781186547f
* (x30
+ x36
);
254 dst
[14*dst_stridea
] = 0.25f
* (x1e
+ x1f
);
255 dst
[15*dst_stridea
] = 0.25f
* (x31
+ x32
);
261 static void av_always_inline
idct16_1d(float *dst
, const float *src
,
262 int dst_stridea
, int dst_strideb
,
263 int src_stridea
, int src_strideb
,
268 for (i
= 0; i
< 16; i
++) {
269 const float x00
= 1.4142135623731f
*src
[ 0*src_stridea
];
270 const float x01
= 1.40740373752638f
*src
[ 1*src_stridea
] + 0.138617169199091f
*src
[15*src_stridea
];
271 const float x02
= 1.38703984532215f
*src
[ 2*src_stridea
] + 0.275899379282943f
*src
[14*src_stridea
];
272 const float x03
= 1.35331800117435f
*src
[ 3*src_stridea
] + 0.410524527522357f
*src
[13*src_stridea
];
273 const float x04
= 1.30656296487638f
*src
[ 4*src_stridea
] + 0.541196100146197f
*src
[12*src_stridea
];
274 const float x05
= 1.24722501298667f
*src
[ 5*src_stridea
] + 0.666655658477747f
*src
[11*src_stridea
];
275 const float x06
= 1.17587560241936f
*src
[ 6*src_stridea
] + 0.785694958387102f
*src
[10*src_stridea
];
276 const float x07
= 1.09320186700176f
*src
[ 7*src_stridea
] + 0.897167586342636f
*src
[ 9*src_stridea
];
277 const float x08
= 1.4142135623731f
*src
[ 8*src_stridea
];
278 const float x09
= -0.897167586342636f
*src
[ 7*src_stridea
] + 1.09320186700176f
*src
[ 9*src_stridea
];
279 const float x0a
= 0.785694958387102f
*src
[ 6*src_stridea
] - 1.17587560241936f
*src
[10*src_stridea
];
280 const float x0b
= -0.666655658477747f
*src
[ 5*src_stridea
] + 1.24722501298667f
*src
[11*src_stridea
];
281 const float x0c
= 0.541196100146197f
*src
[ 4*src_stridea
] - 1.30656296487638f
*src
[12*src_stridea
];
282 const float x0d
= -0.410524527522357f
*src
[ 3*src_stridea
] + 1.35331800117435f
*src
[13*src_stridea
];
283 const float x0e
= 0.275899379282943f
*src
[ 2*src_stridea
] - 1.38703984532215f
*src
[14*src_stridea
];
284 const float x0f
= -0.138617169199091f
*src
[ 1*src_stridea
] + 1.40740373752638f
*src
[15*src_stridea
];
285 const float x12
= x00
+ x08
;
286 const float x13
= x01
+ x07
;
287 const float x14
= x02
+ x06
;
288 const float x15
= x03
+ x05
;
289 const float x16
= 1.4142135623731f
*x04
;
290 const float x17
= x00
- x08
;
291 const float x18
= x01
- x07
;
292 const float x19
= x02
- x06
;
293 const float x1a
= x03
- x05
;
294 const float x1d
= x12
+ x16
;
295 const float x1e
= x13
+ x15
;
296 const float x1f
= 1.4142135623731f
*x14
;
297 const float x20
= x12
- x16
;
298 const float x21
= x13
- x15
;
299 const float x22
= 0.25f
* (x1d
- x1f
);
300 const float x23
= 0.25f
* (x20
+ x21
);
301 const float x24
= 0.25f
* (x20
- x21
);
302 const float x25
= 1.4142135623731f
*x17
;
303 const float x26
= 1.30656296487638f
*x18
+ 0.541196100146197f
*x1a
;
304 const float x27
= 1.4142135623731f
*x19
;
305 const float x28
= -0.541196100146197f
*x18
+ 1.30656296487638f
*x1a
;
306 const float x29
= 0.176776695296637f
* (x25
+ x27
) + 0.25f
*x26
;
307 const float x2a
= 0.25f
* (x25
- x27
);
308 const float x2b
= 0.176776695296637f
* (x25
+ x27
) - 0.25f
*x26
;
309 const float x2c
= 0.353553390593274f
*x28
;
310 const float x1b
= 0.707106781186547f
* (x2a
- x2c
);
311 const float x1c
= 0.707106781186547f
* (x2a
+ x2c
);
312 const float x2d
= 1.4142135623731f
*x0c
;
313 const float x2e
= x0b
+ x0d
;
314 const float x2f
= x0a
+ x0e
;
315 const float x30
= x09
+ x0f
;
316 const float x31
= x09
- x0f
;
317 const float x32
= x0a
- x0e
;
318 const float x33
= x0b
- x0d
;
319 const float x37
= 1.4142135623731f
*x2d
;
320 const float x38
= 1.30656296487638f
*x2e
+ 0.541196100146197f
*x30
;
321 const float x39
= 1.4142135623731f
*x2f
;
322 const float x3a
= -0.541196100146197f
*x2e
+ 1.30656296487638f
*x30
;
323 const float x3b
= 0.176776695296637f
* (x37
+ x39
) + 0.25f
*x38
;
324 const float x3c
= 0.25f
* (x37
- x39
);
325 const float x3d
= 0.176776695296637f
* (x37
+ x39
) - 0.25f
*x38
;
326 const float x3e
= 0.353553390593274f
*x3a
;
327 const float x34
= 0.707106781186547f
* (x3c
- x3e
);
328 const float x35
= 0.707106781186547f
* (x3c
+ x3e
);
329 const float x3f
= 1.4142135623731f
*x32
;
330 const float x40
= x31
+ x33
;
331 const float x41
= x31
- x33
;
332 const float x42
= 0.25f
* (x3f
+ x40
);
333 const float x43
= 0.25f
* (x3f
- x40
);
334 const float x44
= 0.353553390593274f
*x41
;
335 dst
[ 0*dst_stridea
] = (add
? dst
[ 0*dst_stridea
] : 0) + 0.176776695296637f
* (x1d
+ x1f
) + 0.25f
*x1e
;
336 dst
[ 1*dst_stridea
] = (add
? dst
[ 1*dst_stridea
] : 0) + 0.707106781186547f
* (x29
+ x3d
);
337 dst
[ 2*dst_stridea
] = (add
? dst
[ 2*dst_stridea
] : 0) + 0.707106781186547f
* (x29
- x3d
);
338 dst
[ 3*dst_stridea
] = (add
? dst
[ 3*dst_stridea
] : 0) + 0.707106781186547f
* (x23
- x43
);
339 dst
[ 4*dst_stridea
] = (add
? dst
[ 4*dst_stridea
] : 0) + 0.707106781186547f
* (x23
+ x43
);
340 dst
[ 5*dst_stridea
] = (add
? dst
[ 5*dst_stridea
] : 0) + 0.707106781186547f
* (x1b
- x35
);
341 dst
[ 6*dst_stridea
] = (add
? dst
[ 6*dst_stridea
] : 0) + 0.707106781186547f
* (x1b
+ x35
);
342 dst
[ 7*dst_stridea
] = (add
? dst
[ 7*dst_stridea
] : 0) + 0.707106781186547f
* (x22
+ x44
);
343 dst
[ 8*dst_stridea
] = (add
? dst
[ 8*dst_stridea
] : 0) + 0.707106781186547f
* (x22
- x44
);
344 dst
[ 9*dst_stridea
] = (add
? dst
[ 9*dst_stridea
] : 0) + 0.707106781186547f
* (x1c
+ x34
);
345 dst
[10*dst_stridea
] = (add
? dst
[10*dst_stridea
] : 0) + 0.707106781186547f
* (x1c
- x34
);
346 dst
[11*dst_stridea
] = (add
? dst
[11*dst_stridea
] : 0) + 0.707106781186547f
* (x24
+ x42
);
347 dst
[12*dst_stridea
] = (add
? dst
[12*dst_stridea
] : 0) + 0.707106781186547f
* (x24
- x42
);
348 dst
[13*dst_stridea
] = (add
? dst
[13*dst_stridea
] : 0) + 0.707106781186547f
* (x2b
- x3b
);
349 dst
[14*dst_stridea
] = (add
? dst
[14*dst_stridea
] : 0) + 0.707106781186547f
* (x2b
+ x3b
);
350 dst
[15*dst_stridea
] = (add
? dst
[15*dst_stridea
] : 0) + 0.176776695296637f
* (x1d
+ x1f
) - 0.25f
*x1e
;
356 #define DEF_FILTER_FREQ_FUNCS(bsize) \
357 static av_always_inline void filter_freq_##bsize(const float *src, int src_linesize, \
358 float *dst, int dst_linesize, \
359 AVExpr *expr, double *var_values, \
363 DECLARE_ALIGNED(32, float, tmp_block1)[bsize * bsize]; \
364 DECLARE_ALIGNED(32, float, tmp_block2)[bsize * bsize]; \
367 fdct##bsize##_1d(tmp_block1, src, 1, bsize, 1, src_linesize); \
368 fdct##bsize##_1d(tmp_block2, tmp_block1, bsize, 1, bsize, 1); \
370 for (i = 0; i < bsize*bsize; i++) { \
371 float *b = &tmp_block2[i]; \
372 /* frequency filtering */ \
374 var_values[VAR_C] = fabsf(*b); \
375 *b *= av_expr_eval(expr, var_values, NULL); \
377 if (fabsf(*b) < sigma_th) \
383 idct##bsize##_1d(tmp_block1, tmp_block2, 1, bsize, 1, bsize, 0); \
384 idct##bsize##_1d(dst, tmp_block1, dst_linesize, 1, bsize, 1, 1); \
387 static void filter_freq_sigma_##bsize(DCTdnoizContext *s, \
388 const float *src, int src_linesize, \
389 float *dst, int dst_linesize, int thread_id) \
391 filter_freq_##bsize(src, src_linesize, dst, dst_linesize, NULL, NULL, s->th); \
394 static void filter_freq_expr_##bsize(DCTdnoizContext *s, \
395 const float *src, int src_linesize, \
396 float *dst, int dst_linesize, int thread_id) \
398 filter_freq_##bsize(src, src_linesize, dst, dst_linesize, \
399 s->expr[thread_id], s->var_values[thread_id], 0); \
402 DEF_FILTER_FREQ_FUNCS(8)
403 DEF_FILTER_FREQ_FUNCS(16)
405 #define DCT3X3_0_0 0.5773502691896258f /* 1/sqrt(3) */
406 #define DCT3X3_0_1 0.5773502691896258f /* 1/sqrt(3) */
407 #define DCT3X3_0_2 0.5773502691896258f /* 1/sqrt(3) */
408 #define DCT3X3_1_0 0.7071067811865475f /* 1/sqrt(2) */
409 #define DCT3X3_1_2 -0.7071067811865475f /* -1/sqrt(2) */
410 #define DCT3X3_2_0 0.4082482904638631f /* 1/sqrt(6) */
411 #define DCT3X3_2_1 -0.8164965809277261f /* -2/sqrt(6) */
412 #define DCT3X3_2_2 0.4082482904638631f /* 1/sqrt(6) */
414 static av_always_inline
void color_decorrelation(float **dst
, int dst_linesize
,
415 const uint8_t **src
, int src_linesize
,
420 float *dstp_r
= dst
[0];
421 float *dstp_g
= dst
[1];
422 float *dstp_b
= dst
[2];
423 const uint8_t *srcp
= src
[0];
425 for (y
= 0; y
< h
; y
++) {
426 for (x
= 0; x
< w
; x
++) {
427 dstp_r
[x
] = srcp
[r
] * DCT3X3_0_0
+ srcp
[g
] * DCT3X3_0_1
+ srcp
[b
] * DCT3X3_0_2
;
428 dstp_g
[x
] = srcp
[r
] * DCT3X3_1_0
+ srcp
[b
] * DCT3X3_1_2
;
429 dstp_b
[x
] = srcp
[r
] * DCT3X3_2_0
+ srcp
[g
] * DCT3X3_2_1
+ srcp
[b
] * DCT3X3_2_2
;
432 srcp
+= src_linesize
- w
* 3;
433 dstp_r
+= dst_linesize
;
434 dstp_g
+= dst_linesize
;
435 dstp_b
+= dst_linesize
;
439 static av_always_inline
void color_correlation(uint8_t **dst
, int dst_linesize
,
440 float **src
, int src_linesize
,
445 const float *src_r
= src
[0];
446 const float *src_g
= src
[1];
447 const float *src_b
= src
[2];
448 uint8_t *dstp
= dst
[0];
450 for (y
= 0; y
< h
; y
++) {
451 for (x
= 0; x
< w
; x
++) {
452 dstp
[r
] = av_clip_uint8(src_r
[x
] * DCT3X3_0_0
+ src_g
[x
] * DCT3X3_1_0
+ src_b
[x
] * DCT3X3_2_0
);
453 dstp
[g
] = av_clip_uint8(src_r
[x
] * DCT3X3_0_1
+ src_b
[x
] * DCT3X3_2_1
);
454 dstp
[b
] = av_clip_uint8(src_r
[x
] * DCT3X3_0_2
+ src_g
[x
] * DCT3X3_1_2
+ src_b
[x
] * DCT3X3_2_2
);
457 dstp
+= dst_linesize
- w
* 3;
458 src_r
+= src_linesize
;
459 src_g
+= src_linesize
;
460 src_b
+= src_linesize
;
464 #define DECLARE_COLOR_FUNCS(name, r, g, b) \
465 static void color_decorrelation_##name(float **dst, int dst_linesize, \
466 const uint8_t **src, int src_linesize, \
469 color_decorrelation(dst, dst_linesize, src, src_linesize, w, h, r, g, b); \
472 static void color_correlation_##name(uint8_t **dst, int dst_linesize, \
473 float **src, int src_linesize, \
476 color_correlation(dst, dst_linesize, src, src_linesize, w, h, r, g, b); \
479 DECLARE_COLOR_FUNCS(rgb
, 0, 1, 2)
480 DECLARE_COLOR_FUNCS(bgr
, 2, 1, 0)
482 static av_always_inline
void color_decorrelation_gbrp(float **dst
, int dst_linesize
,
483 const uint8_t **src
, int src_linesize
,
487 float *dstp_r
= dst
[0];
488 float *dstp_g
= dst
[1];
489 float *dstp_b
= dst
[2];
490 const uint8_t *srcp_r
= src
[2];
491 const uint8_t *srcp_g
= src
[0];
492 const uint8_t *srcp_b
= src
[1];
494 for (y
= 0; y
< h
; y
++) {
495 for (x
= 0; x
< w
; x
++) {
496 dstp_r
[x
] = srcp_r
[x
] * DCT3X3_0_0
+ srcp_g
[x
] * DCT3X3_0_1
+ srcp_b
[x
] * DCT3X3_0_2
;
497 dstp_g
[x
] = srcp_r
[x
] * DCT3X3_1_0
+ srcp_b
[x
] * DCT3X3_1_2
;
498 dstp_b
[x
] = srcp_r
[x
] * DCT3X3_2_0
+ srcp_g
[x
] * DCT3X3_2_1
+ srcp_b
[x
] * DCT3X3_2_2
;
500 srcp_r
+= src_linesize
;
501 srcp_g
+= src_linesize
;
502 srcp_b
+= src_linesize
;
503 dstp_r
+= dst_linesize
;
504 dstp_g
+= dst_linesize
;
505 dstp_b
+= dst_linesize
;
509 static av_always_inline
void color_correlation_gbrp(uint8_t **dst
, int dst_linesize
,
510 float **src
, int src_linesize
,
514 const float *src_r
= src
[0];
515 const float *src_g
= src
[1];
516 const float *src_b
= src
[2];
517 uint8_t *dstp_r
= dst
[2];
518 uint8_t *dstp_g
= dst
[0];
519 uint8_t *dstp_b
= dst
[1];
521 for (y
= 0; y
< h
; y
++) {
522 for (x
= 0; x
< w
; x
++) {
523 dstp_r
[x
] = av_clip_uint8(src_r
[x
] * DCT3X3_0_0
+ src_g
[x
] * DCT3X3_1_0
+ src_b
[x
] * DCT3X3_2_0
);
524 dstp_g
[x
] = av_clip_uint8(src_r
[x
] * DCT3X3_0_1
+ src_b
[x
] * DCT3X3_2_1
);
525 dstp_b
[x
] = av_clip_uint8(src_r
[x
] * DCT3X3_0_2
+ src_g
[x
] * DCT3X3_1_2
+ src_b
[x
] * DCT3X3_2_2
);
527 dstp_r
+= dst_linesize
;
528 dstp_g
+= dst_linesize
;
529 dstp_b
+= dst_linesize
;
530 src_r
+= src_linesize
;
531 src_g
+= src_linesize
;
532 src_b
+= src_linesize
;
536 static int config_input(AVFilterLink
*inlink
)
538 AVFilterContext
*ctx
= inlink
->dst
;
539 DCTdnoizContext
*s
= ctx
->priv
;
540 int i
, x
, y
, bx
, by
, linesize
, *iweights
, max_slice_h
, slice_h
;
541 const int bsize
= 1 << s
->n
;
543 switch (inlink
->format
) {
544 case AV_PIX_FMT_BGR24
:
545 s
->color_decorrelation
= color_decorrelation_bgr
;
546 s
->color_correlation
= color_correlation_bgr
;
548 case AV_PIX_FMT_RGB24
:
549 s
->color_decorrelation
= color_decorrelation_rgb
;
550 s
->color_correlation
= color_correlation_rgb
;
552 case AV_PIX_FMT_GBRP
:
553 s
->color_decorrelation
= color_decorrelation_gbrp
;
554 s
->color_correlation
= color_correlation_gbrp
;
560 s
->pr_width
= inlink
->w
- (inlink
->w
- bsize
) % s
->step
;
561 s
->pr_height
= inlink
->h
- (inlink
->h
- bsize
) % s
->step
;
562 if (s
->pr_width
!= inlink
->w
)
563 av_log(ctx
, AV_LOG_WARNING
, "The last %d horizontal pixels won't be denoised\n",
564 inlink
->w
- s
->pr_width
);
565 if (s
->pr_height
!= inlink
->h
)
566 av_log(ctx
, AV_LOG_WARNING
, "The last %d vertical pixels won't be denoised\n",
567 inlink
->h
- s
->pr_height
);
569 max_slice_h
= s
->pr_height
/ ((s
->bsize
- 1) * 2);
570 if (max_slice_h
== 0)
571 return AVERROR(EINVAL
);
573 s
->nb_threads
= FFMIN3(MAX_THREADS
, ff_filter_get_nb_threads(ctx
), max_slice_h
);
574 av_log(ctx
, AV_LOG_DEBUG
, "threads: [max=%d hmax=%d user=%d] => %d\n",
575 MAX_THREADS
, max_slice_h
, ff_filter_get_nb_threads(ctx
), s
->nb_threads
);
577 s
->p_linesize
= linesize
= FFALIGN(s
->pr_width
, 32);
578 for (i
= 0; i
< 2; i
++) {
579 s
->cbuf
[i
][0] = av_malloc_array(linesize
* s
->pr_height
, sizeof(*s
->cbuf
[i
][0]));
580 s
->cbuf
[i
][1] = av_malloc_array(linesize
* s
->pr_height
, sizeof(*s
->cbuf
[i
][1]));
581 s
->cbuf
[i
][2] = av_malloc_array(linesize
* s
->pr_height
, sizeof(*s
->cbuf
[i
][2]));
582 if (!s
->cbuf
[i
][0] || !s
->cbuf
[i
][1] || !s
->cbuf
[i
][2])
583 return AVERROR(ENOMEM
);
586 /* eval expressions are probably not thread safe when the eval internal
587 * state can be changed (typically through load & store operations) */
589 for (i
= 0; i
< s
->nb_threads
; i
++) {
590 int ret
= av_expr_parse(&s
->expr
[i
], s
->expr_str
, var_names
,
591 NULL
, NULL
, NULL
, NULL
, 0, ctx
);
597 /* each slice will need to (pre & re)process the top and bottom block of
598 * the previous one in in addition to its processing area. This is because
599 * each pixel is averaged by all the surrounding blocks */
600 slice_h
= (int)ceilf(s
->pr_height
/ (float)s
->nb_threads
) + (s
->bsize
- 1) * 2;
601 for (i
= 0; i
< s
->nb_threads
; i
++) {
602 s
->slices
[i
] = av_malloc_array(linesize
, slice_h
* sizeof(*s
->slices
[i
]));
604 return AVERROR(ENOMEM
);
607 s
->weights
= av_malloc(s
->pr_height
* linesize
* sizeof(*s
->weights
));
609 return AVERROR(ENOMEM
);
610 iweights
= av_calloc(s
->pr_height
, linesize
* sizeof(*iweights
));
612 return AVERROR(ENOMEM
);
613 for (y
= 0; y
< s
->pr_height
- bsize
+ 1; y
+= s
->step
)
614 for (x
= 0; x
< s
->pr_width
- bsize
+ 1; x
+= s
->step
)
615 for (by
= 0; by
< bsize
; by
++)
616 for (bx
= 0; bx
< bsize
; bx
++)
617 iweights
[(y
+ by
)*linesize
+ x
+ bx
]++;
618 for (y
= 0; y
< s
->pr_height
; y
++)
619 for (x
= 0; x
< s
->pr_width
; x
++)
620 s
->weights
[y
*linesize
+ x
] = 1. / iweights
[y
*linesize
+ x
];
626 static av_cold
int init(AVFilterContext
*ctx
)
628 DCTdnoizContext
*s
= ctx
->priv
;
630 s
->bsize
= 1 << s
->n
;
631 if (s
->overlap
== -1)
632 s
->overlap
= s
->bsize
- 1;
634 if (s
->overlap
> s
->bsize
- 1) {
635 av_log(s
, AV_LOG_ERROR
, "Overlap value can not except %d "
636 "with a block size of %dx%d\n",
637 s
->bsize
- 1, s
->bsize
, s
->bsize
);
638 return AVERROR(EINVAL
);
643 case 3: s
->filter_freq_func
= filter_freq_expr_8
; break;
644 case 4: s
->filter_freq_func
= filter_freq_expr_16
; break;
645 default: av_assert0(0);
649 case 3: s
->filter_freq_func
= filter_freq_sigma_8
; break;
650 case 4: s
->filter_freq_func
= filter_freq_sigma_16
; break;
651 default: av_assert0(0);
655 s
->th
= s
->sigma
* 3.;
656 s
->step
= s
->bsize
- s
->overlap
;
660 static const enum AVPixelFormat pix_fmts
[] = {
661 AV_PIX_FMT_BGR24
, AV_PIX_FMT_RGB24
,
666 typedef struct ThreadData
{
670 static int filter_slice(AVFilterContext
*ctx
,
671 void *arg
, int jobnr
, int nb_jobs
)
674 DCTdnoizContext
*s
= ctx
->priv
;
675 const ThreadData
*td
= arg
;
676 const int w
= s
->pr_width
;
677 const int h
= s
->pr_height
;
678 const int slice_start
= (h
* jobnr
) / nb_jobs
;
679 const int slice_end
= (h
* (jobnr
+1)) / nb_jobs
;
680 const int slice_start_ctx
= FFMAX(slice_start
- s
->bsize
+ 1, 0);
681 const int slice_end_ctx
= FFMIN(slice_end
, h
- s
->bsize
+ 1);
682 const int slice_h
= slice_end_ctx
- slice_start_ctx
;
683 const int src_linesize
= s
->p_linesize
;
684 const int dst_linesize
= s
->p_linesize
;
685 const int slice_linesize
= s
->p_linesize
;
687 const float *src
= td
->src
+ slice_start_ctx
* src_linesize
;
688 const float *weights
= s
->weights
+ slice_start
* dst_linesize
;
689 float *slice
= s
->slices
[jobnr
];
692 memset(slice
, 0, (slice_h
+ s
->bsize
- 1) * dst_linesize
* sizeof(*slice
));
695 for (y
= 0; y
< slice_h
; y
+= s
->step
) {
696 for (x
= 0; x
< w
- s
->bsize
+ 1; x
+= s
->step
)
697 s
->filter_freq_func(s
, src
+ x
, src_linesize
,
698 slice
+ x
, slice_linesize
,
700 src
+= s
->step
* src_linesize
;
701 slice
+= s
->step
* slice_linesize
;
705 slice
= s
->slices
[jobnr
] + (slice_start
- slice_start_ctx
) * slice_linesize
;
706 dst
= td
->dst
+ slice_start
* dst_linesize
;
707 for (y
= slice_start
; y
< slice_end
; y
++) {
708 for (x
= 0; x
< w
; x
++)
709 dst
[x
] = slice
[x
] * weights
[x
];
710 slice
+= slice_linesize
;
712 weights
+= dst_linesize
;
718 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*in
)
720 AVFilterContext
*ctx
= inlink
->dst
;
721 DCTdnoizContext
*s
= ctx
->priv
;
722 AVFilterLink
*outlink
= inlink
->dst
->outputs
[0];
726 if (av_frame_is_writable(in
)) {
731 out
= ff_get_video_buffer(outlink
, outlink
->w
, outlink
->h
);
734 return AVERROR(ENOMEM
);
736 av_frame_copy_props(out
, in
);
739 s
->color_decorrelation(s
->cbuf
[0], s
->p_linesize
,
740 (const uint8_t **)in
->data
, in
->linesize
[0],
741 s
->pr_width
, s
->pr_height
);
742 for (plane
= 0; plane
< 3; plane
++) {
744 .src
= s
->cbuf
[0][plane
],
745 .dst
= s
->cbuf
[1][plane
],
747 ff_filter_execute(ctx
, filter_slice
, &td
, NULL
, s
->nb_threads
);
749 s
->color_correlation(out
->data
, out
->linesize
[0],
750 s
->cbuf
[1], s
->p_linesize
,
751 s
->pr_width
, s
->pr_height
);
755 uint8_t *dst
= out
->data
[0];
756 const uint8_t *src
= in
->data
[0];
757 const int dst_linesize
= out
->linesize
[0];
758 const int src_linesize
= in
->linesize
[0];
759 const int hpad
= (inlink
->w
- s
->pr_width
) * 3;
760 const int vpad
= (inlink
->h
- s
->pr_height
);
763 uint8_t *dstp
= dst
+ s
->pr_width
* 3;
764 const uint8_t *srcp
= src
+ s
->pr_width
* 3;
766 for (y
= 0; y
< s
->pr_height
; y
++) {
767 memcpy(dstp
, srcp
, hpad
);
768 dstp
+= dst_linesize
;
769 srcp
+= src_linesize
;
773 uint8_t *dstp
= dst
+ s
->pr_height
* dst_linesize
;
774 const uint8_t *srcp
= src
+ s
->pr_height
* src_linesize
;
776 for (y
= 0; y
< vpad
; y
++) {
777 memcpy(dstp
, srcp
, inlink
->w
* 3);
778 dstp
+= dst_linesize
;
779 srcp
+= src_linesize
;
786 return ff_filter_frame(outlink
, out
);
789 static av_cold
void uninit(AVFilterContext
*ctx
)
792 DCTdnoizContext
*s
= ctx
->priv
;
794 av_freep(&s
->weights
);
795 for (i
= 0; i
< 2; i
++) {
796 av_freep(&s
->cbuf
[i
][0]);
797 av_freep(&s
->cbuf
[i
][1]);
798 av_freep(&s
->cbuf
[i
][2]);
800 for (i
= 0; i
< s
->nb_threads
; i
++) {
801 av_freep(&s
->slices
[i
]);
802 av_expr_free(s
->expr
[i
]);
806 static const AVFilterPad dctdnoiz_inputs
[] = {
809 .type
= AVMEDIA_TYPE_VIDEO
,
810 .filter_frame
= filter_frame
,
811 .config_props
= config_input
,
815 const FFFilter ff_vf_dctdnoiz
= {
816 .p
.name
= "dctdnoiz",
817 .p
.description
= NULL_IF_CONFIG_SMALL("Denoise frames using 2D DCT."),
818 .p
.priv_class
= &dctdnoiz_class
,
819 .p
.flags
= AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC
| AVFILTER_FLAG_SLICE_THREADS
,
820 .priv_size
= sizeof(DCTdnoizContext
),
823 FILTER_INPUTS(dctdnoiz_inputs
),
824 FILTER_OUTPUTS(ff_video_default_filterpad
),
825 FILTER_PIXFMTS_ARRAY(pix_fmts
),