2 * Copyright (c) 2003-2013 Loren Merritt
3 * Copyright (c) 2015 Paul B Mahol
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /* Computes the Structural Similarity Metric between two video streams.
24 * Z. Wang, A. C. Bovik, H. R. Sheikh and E. P. Simoncelli,
25 * "Image quality assessment: From error visibility to structural similarity,"
26 * IEEE Transactions on Image Processing, vol. 13, no. 4, pp. 600-612, Apr. 2004.
28 * To improve speed, this implementation uses the standard approximation of
29 * overlapped 8x8 block sums, rather than the original gaussian weights.
34 * Calculate the SSIM between two input videos.
37 #include "libavutil/avstring.h"
38 #include "libavutil/file_open.h"
39 #include "libavutil/mem.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/pixdesc.h"
43 #include "drawutils.h"
45 #include "framesync.h"
48 typedef struct SSIMContext
{
57 double ssim
[4], ssim_total
;
66 int (*ssim_plane
)(AVFilterContext
*ctx
, void *arg
,
67 int jobnr
, int nb_jobs
);
71 #define OFFSET(x) offsetof(SSIMContext, x)
72 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
74 static const AVOption ssim_options
[] = {
75 {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str
), AV_OPT_TYPE_STRING
, {.str
=NULL
}, 0, 0, FLAGS
},
76 {"f", "Set file where to store per-frame difference information", OFFSET(stats_file_str
), AV_OPT_TYPE_STRING
, {.str
=NULL
}, 0, 0, FLAGS
},
80 FRAMESYNC_DEFINE_CLASS(ssim
, SSIMContext
, fs
);
82 static void set_meta(AVDictionary
**metadata
, const char *key
, char comp
, float d
)
85 snprintf(value
, sizeof(value
), "%f", d
);
88 snprintf(key2
, sizeof(key2
), "%s%c", key
, comp
);
89 av_dict_set(metadata
, key2
, value
, 0);
91 av_dict_set(metadata
, key
, value
, 0);
95 static void ssim_4x4xn_16bit(const uint8_t *main8
, ptrdiff_t main_stride
,
96 const uint8_t *ref8
, ptrdiff_t ref_stride
,
97 int64_t (*sums
)[4], int width
)
99 const uint16_t *main16
= (const uint16_t *)main8
;
100 const uint16_t *ref16
= (const uint16_t *)ref8
;
106 for (z
= 0; z
< width
; z
++) {
107 uint64_t s1
= 0, s2
= 0, ss
= 0, s12
= 0;
109 for (y
= 0; y
< 4; y
++) {
110 for (x
= 0; x
< 4; x
++) {
111 unsigned a
= main16
[x
+ y
* main_stride
];
112 unsigned b
= ref16
[x
+ y
* ref_stride
];
131 static void ssim_4x4xn_8bit(const uint8_t *main
, ptrdiff_t main_stride
,
132 const uint8_t *ref
, ptrdiff_t ref_stride
,
133 int (*sums
)[4], int width
)
137 for (z
= 0; z
< width
; z
++) {
138 uint32_t s1
= 0, s2
= 0, ss
= 0, s12
= 0;
140 for (y
= 0; y
< 4; y
++) {
141 for (x
= 0; x
< 4; x
++) {
142 int a
= main
[x
+ y
* main_stride
];
143 int b
= ref
[x
+ y
* ref_stride
];
162 static float ssim_end1x(int64_t s1
, int64_t s2
, int64_t ss
, int64_t s12
, int max
)
164 int64_t ssim_c1
= (int64_t)(.01*.01*max
*max
*64 + .5);
165 int64_t ssim_c2
= (int64_t)(.03*.03*max
*max
*64*63 + .5);
171 int64_t vars
= fss
* 64 - fs1
* fs1
- fs2
* fs2
;
172 int64_t covar
= fs12
* 64 - fs1
* fs2
;
174 return (float)(2 * fs1
* fs2
+ ssim_c1
) * (float)(2 * covar
+ ssim_c2
)
175 / ((float)(fs1
* fs1
+ fs2
* fs2
+ ssim_c1
) * (float)(vars
+ ssim_c2
));
178 static float ssim_end1(int s1
, int s2
, int ss
, int s12
)
180 static const int ssim_c1
= (int)(.01*.01*255*255*64 + .5);
181 static const int ssim_c2
= (int)(.03*.03*255*255*64*63 + .5);
187 int vars
= fss
* 64 - fs1
* fs1
- fs2
* fs2
;
188 int covar
= fs12
* 64 - fs1
* fs2
;
190 return (float)(2 * fs1
* fs2
+ ssim_c1
) * (float)(2 * covar
+ ssim_c2
)
191 / ((float)(fs1
* fs1
+ fs2
* fs2
+ ssim_c1
) * (float)(vars
+ ssim_c2
));
194 static float ssim_endn_16bit(const int64_t (*sum0
)[4], const int64_t (*sum1
)[4], int width
, int max
)
198 for (int i
= 0; i
< width
; i
++)
199 ssim
+= ssim_end1x(sum0
[i
][0] + sum0
[i
+ 1][0] + sum1
[i
][0] + sum1
[i
+ 1][0],
200 sum0
[i
][1] + sum0
[i
+ 1][1] + sum1
[i
][1] + sum1
[i
+ 1][1],
201 sum0
[i
][2] + sum0
[i
+ 1][2] + sum1
[i
][2] + sum1
[i
+ 1][2],
202 sum0
[i
][3] + sum0
[i
+ 1][3] + sum1
[i
][3] + sum1
[i
+ 1][3],
207 static double ssim_endn_8bit(const int (*sum0
)[4], const int (*sum1
)[4], int width
)
211 for (int i
= 0; i
< width
; i
++)
212 ssim
+= ssim_end1(sum0
[i
][0] + sum0
[i
+ 1][0] + sum1
[i
][0] + sum1
[i
+ 1][0],
213 sum0
[i
][1] + sum0
[i
+ 1][1] + sum1
[i
][1] + sum1
[i
+ 1][1],
214 sum0
[i
][2] + sum0
[i
+ 1][2] + sum1
[i
][2] + sum1
[i
+ 1][2],
215 sum0
[i
][3] + sum0
[i
+ 1][3] + sum1
[i
][3] + sum1
[i
+ 1][3]);
219 #define SUM_LEN(w) (((w) >> 2) + 3)
221 typedef struct ThreadData
{
222 const uint8_t *main_data
[4];
223 const uint8_t *ref_data
[4];
224 int main_linesize
[4];
235 static int ssim_plane_16bit(AVFilterContext
*ctx
, void *arg
,
236 int jobnr
, int nb_jobs
)
238 ThreadData
*td
= arg
;
239 double *score
= td
->score
[jobnr
];
240 void *temp
= td
->temp
[jobnr
];
241 const int max
= td
->max
;
243 for (int c
= 0; c
< td
->nb_components
; c
++) {
244 const uint8_t *main_data
= td
->main_data
[c
];
245 const uint8_t *ref_data
= td
->ref_data
[c
];
246 const int main_stride
= td
->main_linesize
[c
];
247 const int ref_stride
= td
->ref_linesize
[c
];
248 int width
= td
->planewidth
[c
];
249 int height
= td
->planeheight
[c
];
250 const int slice_start
= ((height
>> 2) * jobnr
) / nb_jobs
;
251 const int slice_end
= ((height
>> 2) * (jobnr
+1)) / nb_jobs
;
252 const int ystart
= FFMAX(1, slice_start
);
255 int64_t (*sum0
)[4] = temp
;
256 int64_t (*sum1
)[4] = sum0
+ SUM_LEN(width
);
261 for (int y
= ystart
; y
< slice_end
; y
++) {
262 for (; z
<= y
; z
++) {
263 FFSWAP(void*, sum0
, sum1
);
264 ssim_4x4xn_16bit(&main_data
[4 * z
* main_stride
], main_stride
,
265 &ref_data
[4 * z
* ref_stride
], ref_stride
,
269 ssim
+= ssim_endn_16bit((const int64_t (*)[4])sum0
, (const int64_t (*)[4])sum1
, width
- 1, max
);
278 static int ssim_plane(AVFilterContext
*ctx
, void *arg
,
279 int jobnr
, int nb_jobs
)
281 ThreadData
*td
= arg
;
282 double *score
= td
->score
[jobnr
];
283 void *temp
= td
->temp
[jobnr
];
284 SSIMDSPContext
*dsp
= td
->dsp
;
286 for (int c
= 0; c
< td
->nb_components
; c
++) {
287 const uint8_t *main_data
= td
->main_data
[c
];
288 const uint8_t *ref_data
= td
->ref_data
[c
];
289 const int main_stride
= td
->main_linesize
[c
];
290 const int ref_stride
= td
->ref_linesize
[c
];
291 int width
= td
->planewidth
[c
];
292 int height
= td
->planeheight
[c
];
293 const int slice_start
= ((height
>> 2) * jobnr
) / nb_jobs
;
294 const int slice_end
= ((height
>> 2) * (jobnr
+1)) / nb_jobs
;
295 const int ystart
= FFMAX(1, slice_start
);
298 int (*sum0
)[4] = temp
;
299 int (*sum1
)[4] = sum0
+ SUM_LEN(width
);
304 for (int y
= ystart
; y
< slice_end
; y
++) {
305 for (; z
<= y
; z
++) {
306 FFSWAP(void*, sum0
, sum1
);
307 dsp
->ssim_4x4_line(&main_data
[4 * z
* main_stride
], main_stride
,
308 &ref_data
[4 * z
* ref_stride
], ref_stride
,
312 ssim
+= dsp
->ssim_end_line((const int (*)[4])sum0
, (const int (*)[4])sum1
, width
- 1);
321 static double ssim_db(double ssim
, double weight
)
323 return (fabs(weight
- ssim
) > 1e-9) ? 10.0 * log10(weight
/ (weight
- ssim
)) : INFINITY
;
326 static int do_ssim(FFFrameSync
*fs
)
328 AVFilterContext
*ctx
= fs
->parent
;
329 SSIMContext
*s
= ctx
->priv
;
330 AVFrame
*master
, *ref
;
331 AVDictionary
**metadata
;
332 double c
[4] = {0}, ssimv
= 0.0;
336 ret
= ff_framesync_dualinput_get(fs
, &master
, &ref
);
339 if (ctx
->is_disabled
|| !ref
)
340 return ff_filter_frame(ctx
->outputs
[0], master
);
341 metadata
= &master
->metadata
;
345 td
.nb_components
= s
->nb_components
;
351 for (int n
= 0; n
< s
->nb_components
; n
++) {
352 td
.main_data
[n
] = master
->data
[n
];
353 td
.ref_data
[n
] = ref
->data
[n
];
354 td
.main_linesize
[n
] = master
->linesize
[n
];
355 td
.ref_linesize
[n
] = ref
->linesize
[n
];
356 td
.planewidth
[n
] = s
->planewidth
[n
];
357 td
.planeheight
[n
] = s
->planeheight
[n
];
360 if (master
->color_range
!= ref
->color_range
) {
361 av_log(ctx
, AV_LOG_WARNING
, "master and reference "
362 "frames use different color ranges (%s != %s)\n",
363 av_color_range_name(master
->color_range
),
364 av_color_range_name(ref
->color_range
));
367 ff_filter_execute(ctx
, s
->ssim_plane
, &td
, NULL
,
368 FFMIN((s
->planeheight
[1] + 3) >> 2, s
->nb_threads
));
370 for (i
= 0; i
< s
->nb_components
; i
++) {
371 for (int j
= 0; j
< s
->nb_threads
; j
++)
372 c
[i
] += s
->score
[j
][i
];
373 c
[i
] = c
[i
] / (((s
->planewidth
[i
] >> 2) - 1) * ((s
->planeheight
[i
] >> 2) - 1));
376 for (i
= 0; i
< s
->nb_components
; i
++) {
377 ssimv
+= s
->coefs
[i
] * c
[i
];
381 for (i
= 0; i
< s
->nb_components
; i
++) {
382 int cidx
= s
->is_rgb
? s
->rgba_map
[i
] : i
;
383 set_meta(metadata
, "lavfi.ssim.", s
->comps
[i
], c
[cidx
]);
385 s
->ssim_total
+= ssimv
;
387 set_meta(metadata
, "lavfi.ssim.All", 0, ssimv
);
388 set_meta(metadata
, "lavfi.ssim.dB", 0, ssim_db(ssimv
, 1.0));
391 fprintf(s
->stats_file
, "n:%"PRId64
" ", s
->nb_frames
);
393 for (i
= 0; i
< s
->nb_components
; i
++) {
394 int cidx
= s
->is_rgb
? s
->rgba_map
[i
] : i
;
395 fprintf(s
->stats_file
, "%c:%f ", s
->comps
[i
], c
[cidx
]);
398 fprintf(s
->stats_file
, "All:%f (%f)\n", ssimv
, ssim_db(ssimv
, 1.0));
401 return ff_filter_frame(ctx
->outputs
[0], master
);
404 static av_cold
int init(AVFilterContext
*ctx
)
406 SSIMContext
*s
= ctx
->priv
;
408 if (s
->stats_file_str
) {
409 if (!strcmp(s
->stats_file_str
, "-")) {
410 s
->stats_file
= stdout
;
412 s
->stats_file
= avpriv_fopen_utf8(s
->stats_file_str
, "w");
413 if (!s
->stats_file
) {
414 int err
= AVERROR(errno
);
415 av_log(ctx
, AV_LOG_ERROR
, "Could not open stats file %s: %s\n",
416 s
->stats_file_str
, av_err2str(err
));
422 s
->fs
.on_event
= do_ssim
;
426 static const enum AVPixelFormat pix_fmts
[] = {
427 AV_PIX_FMT_GRAY8
, AV_PIX_FMT_GRAY9
, AV_PIX_FMT_GRAY10
,
428 AV_PIX_FMT_GRAY12
, AV_PIX_FMT_GRAY14
, AV_PIX_FMT_GRAY16
,
429 AV_PIX_FMT_YUV420P
, AV_PIX_FMT_YUV422P
, AV_PIX_FMT_YUV444P
,
430 AV_PIX_FMT_YUV440P
, AV_PIX_FMT_YUV411P
, AV_PIX_FMT_YUV410P
,
431 AV_PIX_FMT_YUVJ411P
, AV_PIX_FMT_YUVJ420P
, AV_PIX_FMT_YUVJ422P
,
432 AV_PIX_FMT_YUVJ440P
, AV_PIX_FMT_YUVJ444P
,
434 #define PF(suf) AV_PIX_FMT_YUV420##suf, AV_PIX_FMT_YUV422##suf, AV_PIX_FMT_YUV444##suf, AV_PIX_FMT_GBR##suf
435 PF(P9
), PF(P10
), PF(P12
), PF(P14
), PF(P16
),
439 static int config_input_ref(AVFilterLink
*inlink
)
441 const AVPixFmtDescriptor
*desc
= av_pix_fmt_desc_get(inlink
->format
);
442 AVFilterContext
*ctx
= inlink
->dst
;
443 SSIMContext
*s
= ctx
->priv
;
446 s
->nb_threads
= ff_filter_get_nb_threads(ctx
);
447 s
->nb_components
= desc
->nb_components
;
449 if (ctx
->inputs
[0]->w
!= ctx
->inputs
[1]->w
||
450 ctx
->inputs
[0]->h
!= ctx
->inputs
[1]->h
) {
451 av_log(ctx
, AV_LOG_ERROR
, "Width and height of input videos must be same.\n");
452 return AVERROR(EINVAL
);
455 s
->is_rgb
= ff_fill_rgba_map(s
->rgba_map
, inlink
->format
) >= 0;
456 s
->comps
[0] = s
->is_rgb
? 'R' : 'Y';
457 s
->comps
[1] = s
->is_rgb
? 'G' : 'U';
458 s
->comps
[2] = s
->is_rgb
? 'B' : 'V';
461 s
->planeheight
[1] = s
->planeheight
[2] = AV_CEIL_RSHIFT(inlink
->h
, desc
->log2_chroma_h
);
462 s
->planeheight
[0] = s
->planeheight
[3] = inlink
->h
;
463 s
->planewidth
[1] = s
->planewidth
[2] = AV_CEIL_RSHIFT(inlink
->w
, desc
->log2_chroma_w
);
464 s
->planewidth
[0] = s
->planewidth
[3] = inlink
->w
;
465 for (int i
= 0; i
< s
->nb_components
; i
++)
466 sum
+= s
->planeheight
[i
] * s
->planewidth
[i
];
467 for (int i
= 0; i
< s
->nb_components
; i
++)
468 s
->coefs
[i
] = (double) s
->planeheight
[i
] * s
->planewidth
[i
] / sum
;
470 s
->temp
= av_calloc(s
->nb_threads
, sizeof(*s
->temp
));
472 return AVERROR(ENOMEM
);
474 for (int t
= 0; t
< s
->nb_threads
; t
++) {
475 s
->temp
[t
] = av_calloc(2 * SUM_LEN(inlink
->w
), (desc
->comp
[0].depth
> 8) ? sizeof(int64_t[4]) : sizeof(int[4]));
477 return AVERROR(ENOMEM
);
479 s
->max
= (1 << desc
->comp
[0].depth
) - 1;
481 s
->ssim_plane
= desc
->comp
[0].depth
> 8 ? ssim_plane_16bit
: ssim_plane
;
482 s
->dsp
.ssim_4x4_line
= ssim_4x4xn_8bit
;
483 s
->dsp
.ssim_end_line
= ssim_endn_8bit
;
485 ff_ssim_init_x86(&s
->dsp
);
488 s
->score
= av_calloc(s
->nb_threads
, sizeof(*s
->score
));
490 return AVERROR(ENOMEM
);
492 for (int t
= 0; t
< s
->nb_threads
; t
++) {
493 s
->score
[t
] = av_calloc(s
->nb_components
, sizeof(*s
->score
[0]));
495 return AVERROR(ENOMEM
);
501 static int config_output(AVFilterLink
*outlink
)
503 AVFilterContext
*ctx
= outlink
->src
;
504 SSIMContext
*s
= ctx
->priv
;
505 AVFilterLink
*mainlink
= ctx
->inputs
[0];
506 FilterLink
*il
= ff_filter_link(mainlink
);
507 FilterLink
*ol
= ff_filter_link(outlink
);
510 ret
= ff_framesync_init_dualinput(&s
->fs
, ctx
);
513 outlink
->w
= mainlink
->w
;
514 outlink
->h
= mainlink
->h
;
515 outlink
->time_base
= mainlink
->time_base
;
516 outlink
->sample_aspect_ratio
= mainlink
->sample_aspect_ratio
;
517 ol
->frame_rate
= il
->frame_rate
;
519 if ((ret
= ff_framesync_configure(&s
->fs
)) < 0)
522 outlink
->time_base
= s
->fs
.time_base
;
524 if (av_cmp_q(mainlink
->time_base
, outlink
->time_base
) ||
525 av_cmp_q(ctx
->inputs
[1]->time_base
, outlink
->time_base
))
526 av_log(ctx
, AV_LOG_WARNING
, "not matching timebases found between first input: %d/%d and second input %d/%d, results may be incorrect!\n",
527 mainlink
->time_base
.num
, mainlink
->time_base
.den
,
528 ctx
->inputs
[1]->time_base
.num
, ctx
->inputs
[1]->time_base
.den
);
533 static int activate(AVFilterContext
*ctx
)
535 SSIMContext
*s
= ctx
->priv
;
536 return ff_framesync_activate(&s
->fs
);
539 static av_cold
void uninit(AVFilterContext
*ctx
)
541 SSIMContext
*s
= ctx
->priv
;
543 if (s
->nb_frames
> 0) {
546 for (int i
= 0; i
< s
->nb_components
; i
++) {
547 int c
= s
->is_rgb
? s
->rgba_map
[i
] : i
;
548 av_strlcatf(buf
, sizeof(buf
), " %c:%f (%f)", s
->comps
[i
], s
->ssim
[c
] / s
->nb_frames
,
549 ssim_db(s
->ssim
[c
], s
->nb_frames
));
551 av_log(ctx
, AV_LOG_INFO
, "SSIM%s All:%f (%f)\n", buf
,
552 s
->ssim_total
/ s
->nb_frames
, ssim_db(s
->ssim_total
, s
->nb_frames
));
555 ff_framesync_uninit(&s
->fs
);
557 if (s
->stats_file
&& s
->stats_file
!= stdout
)
558 fclose(s
->stats_file
);
560 for (int t
= 0; t
< s
->nb_threads
&& s
->score
; t
++)
561 av_freep(&s
->score
[t
]);
564 for (int t
= 0; t
< s
->nb_threads
&& s
->temp
; t
++)
565 av_freep(&s
->temp
[t
]);
569 static const AVFilterPad ssim_inputs
[] = {
572 .type
= AVMEDIA_TYPE_VIDEO
,
575 .type
= AVMEDIA_TYPE_VIDEO
,
576 .config_props
= config_input_ref
,
580 static const AVFilterPad ssim_outputs
[] = {
583 .type
= AVMEDIA_TYPE_VIDEO
,
584 .config_props
= config_output
,
588 const FFFilter ff_vf_ssim
= {
590 .p
.description
= NULL_IF_CONFIG_SMALL("Calculate the SSIM between two video streams."),
591 .p
.priv_class
= &ssim_class
,
592 .p
.flags
= AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
|
593 AVFILTER_FLAG_SLICE_THREADS
|
594 AVFILTER_FLAG_METADATA_ONLY
,
595 .preinit
= ssim_framesync_preinit
,
598 .activate
= activate
,
599 .priv_size
= sizeof(SSIMContext
),
600 FILTER_INPUTS(ssim_inputs
),
601 FILTER_OUTPUTS(ssim_outputs
),
602 FILTER_PIXFMTS_ARRAY(pix_fmts
),