1 diff --git a/configure b/configure
2 index 94f513288a..493493b4c5 100755
5 @@ -3751,7 +3751,7 @@ vaguedenoiser_filter_deps="gpl"
6 vflip_vulkan_filter_deps="vulkan spirv_compiler"
7 vidstabdetect_filter_deps="libvidstab"
8 vidstabtransform_filter_deps="libvidstab"
9 -libvmaf_filter_deps="libvmaf pthreads"
10 +libvmaf_filter_deps="libvmaf"
11 zmq_filter_deps="libzmq"
12 zoompan_filter_deps="swscale"
13 zscale_filter_deps="libzimg const_nan"
14 @@ -6626,7 +6626,7 @@ enabled libtwolame && require libtwolame twolame.h twolame_init -ltwolame
15 enabled libuavs3d && require_pkg_config libuavs3d "uavs3d >= 1.1.41" uavs3d.h uavs3d_decode
16 enabled libv4l2 && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl
17 enabled libvidstab && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
18 -enabled libvmaf && require_pkg_config libvmaf "libvmaf >= 1.5.2" libvmaf.h compute_vmaf
19 +enabled libvmaf && require_pkg_config libvmaf "libvmaf >= 2.0.0" libvmaf.h vmaf_init
20 enabled libvo_amrwbenc && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
21 enabled libvorbis && require_pkg_config libvorbis vorbis vorbis/codec.h vorbis_info_init &&
22 require_pkg_config libvorbisenc vorbisenc vorbis/vorbisenc.h vorbis_encode_init
23 diff --git a/doc/filters.texi b/doc/filters.texi
24 index 248c09caf8..9a890d1555 100644
25 --- a/doc/filters.texi
26 +++ b/doc/filters.texi
27 @@ -14666,68 +14666,60 @@ ffmpeg -i input.mov -vf lensfun=make=Canon:model="Canon EOS 100D":lens_model="Ca
31 -Obtain the VMAF (Video Multi-Method Assessment Fusion)
32 -score between two input videos.
33 +Calulate the VMAF (Video Multi-Method Assessment Fusion) score for a
34 +reference/distorted pair of input videos.
36 -The first input is the encoded video, and the second input is the reference video.
37 +The first input is the distorted video, and the second input is the reference video.
39 The obtained VMAF score is printed through the logging system.
41 It requires Netflix's vmaf library (libvmaf) as a pre-requisite.
42 After installing the library it can be enabled using:
43 @code{./configure --enable-libvmaf}.
44 -If no model path is specified it uses the default model: @code{vmaf_v0.6.1.pkl}.
46 The filter has following options:
50 -Set the model path which is to be used for SVM.
51 -Default value: @code{"/usr/local/share/model/vmaf_v0.6.1.pkl"}
54 -Set the file path to be used to store logs.
56 +A `|` delimited list of vmaf models. Each model can be configured with a number of parameters.
57 +Default value: @code{"version=vmaf_v0.6.1"}
60 -Set the format of the log file (csv, json or xml).
62 +Deprecated, use model='path=...'.
64 @item enable_transform
65 -This option can enable/disable the @code{score_transform} applied to the final predicted VMAF score,
66 -if you have specified score_transform option in the input parameter file passed to @code{run_vmaf_training.py}
67 -Default value: @code{false}
68 +Deprecated, use model='enable_transform=true'.
71 -Invokes the phone model which will generate VMAF scores higher than in the
72 -regular model, which is more suitable for laptop, TV, etc. viewing conditions.
73 -Default value: @code{false}
74 +Deprecated, use model='enable_transform=true'.
76 +@item enable_conf_interval
77 +Deprecated, use model='enable_conf_interval=true'.
80 +A `|` delimited list of features. Each feature can be configured with a number of parameters.
83 -Enables computing psnr along with vmaf.
84 -Default value: @code{false}
85 +Deprecated, use feature='name=psnr'.
88 -Enables computing ssim along with vmaf.
89 -Default value: @code{false}
90 +Deprecated, use feature='name=ssim'.
93 -Enables computing ms_ssim along with vmaf.
94 -Default value: @code{false}
95 +Deprecated, use feature='name=ms_ssim'.
98 -Set the pool method to be used for computing vmaf.
99 -Options are @code{min}, @code{harmonic_mean} or @code{mean} (default).
101 +Set the file path to be used to store log files.
104 +Set the format of the log file (xml, json, csv, or sub).
107 -Set number of threads to be used when computing vmaf.
108 -Default value: @code{0}, which makes use of all available logical processors.
109 +Set number of threads to be used when initializing libvmaf.
110 +Default value: @code{0}, no threads.
113 -Set interval for frame subsampling used when computing vmaf.
114 -Default value: @code{1}
116 -@item enable_conf_interval
117 -Enables confidence interval.
118 -Default value: @code{false}
119 +Set frame subsampling interval to be used.
122 This filter also supports the @ref{framesync} options.
123 @@ -14735,23 +14727,31 @@ This filter also supports the @ref{framesync} options.
127 -On the below examples the input file @file{main.mpg} being processed is
128 -compared with the reference file @file{ref.mpg}.
129 +In the examples below, a distorted video @file{distorted.mpg} is
130 +compared with a reference file @file{reference.mpg}.
135 +ffmpeg -i distorted.mpg -i reference.mpg -lavfi libvmaf=log_path=output.xml -f null -
139 +Example with multiple models:
141 -ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf -f null -
142 +ffmpeg -i distorted.mpg -i reference.mpg -lavfi libvmaf='model=version=vmaf_v0.6.1\\:name=vmaf|version=vmaf_v0.6.1neg\\:name=vmaf_neg' -f null -
146 -Example with options:
147 +Example with multiple addtional features:
149 -ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf="psnr=1:log_fmt=json" -f null -
150 +ffmpeg -i distorted.mpg -i reference.mpg -lavfi libvmaf='feature=name=psnr|name=ciede' -f null -
154 Example with options and different containers:
156 -ffmpeg -i main.mpg -i ref.mkv -lavfi "[0:v]settb=AVTB,setpts=PTS-STARTPTS[main];[1:v]settb=AVTB,setpts=PTS-STARTPTS[ref];[main][ref]libvmaf=psnr=1:log_fmt=json" -f null -
157 +ffmpeg -i distorted.mpg -i reference.mkv -lavfi "[0:v]settb=AVTB,setpts=PTS-STARTPTS[main];[1:v]settb=AVTB,setpts=PTS-STARTPTS[ref];[main][ref]libvmaf=log_fmt=json:log_path=output.json" -f null -
161 diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c
162 index 5d492126eb..eee1c280ef 100644
163 --- a/libavfilter/vf_libvmaf.c
164 +++ b/libavfilter/vf_libvmaf.c
166 * Calculate the VMAF between two input videos.
169 -#include <pthread.h>
172 #include "libavutil/avstring.h"
173 #include "libavutil/opt.h"
174 #include "libavutil/pixdesc.h"
176 typedef struct LIBVMAFContext {
177 const AVClass *class;
179 - const AVPixFmtDescriptor *desc;
183 - int vmaf_thread_created;
184 - pthread_t vmaf_thread;
185 - pthread_mutex_t lock;
186 - pthread_cond_t cond;
196 int enable_transform;
199 @@ -65,184 +51,487 @@ typedef struct LIBVMAFContext {
202 int enable_conf_interval;
208 + unsigned model_cnt;
209 + unsigned frame_cnt;
213 #define OFFSET(x) offsetof(LIBVMAFContext, x)
214 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
216 static const AVOption libvmaf_options[] = {
217 - {"model_path", "Set the model to be used for computing vmaf.", OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
218 - {"log_path", "Set the file path to be used to store logs.", OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
219 - {"log_fmt", "Set the format of the log (csv, json or xml).", OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
220 - {"enable_transform", "Enables transform for computing vmaf.", OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
221 - {"phone_model", "Invokes the phone model that will generate higher VMAF scores.", OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
222 - {"psnr", "Enables computing psnr along with vmaf.", OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
223 - {"ssim", "Enables computing ssim along with vmaf.", OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
224 - {"ms_ssim", "Enables computing ms-ssim along with vmaf.", OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
225 + {"model_path", "use model='path=...'.", OFFSET(model_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
226 + {"log_path", "Set the file path to be used to write log.", OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
227 + {"log_fmt", "Set the format of the log (csv, json, xml, or sub).", OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str="xml"}, 0, 1, FLAGS},
228 + {"enable_transform", "use model='enable_transform=true'.", OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
229 + {"phone_model", "use model='enable_transform=true'.", OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
230 + {"psnr", "use feature='name=psnr'.", OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
231 + {"ssim", "use feature='name=ssim'.", OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
232 + {"ms_ssim", "use feature='name=ms_ssim'.", OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
233 {"pool", "Set the pool method to be used for computing vmaf.", OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
234 {"n_threads", "Set number of threads to be used when computing vmaf.", OFFSET(n_threads), AV_OPT_TYPE_INT, {.i64=0}, 0, UINT_MAX, FLAGS},
235 {"n_subsample", "Set interval for frame subsampling used when computing vmaf.", OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=1}, 1, UINT_MAX, FLAGS},
236 - {"enable_conf_interval", "Enables confidence interval.", OFFSET(enable_conf_interval), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
237 + {"enable_conf_interval", "model='enable_conf_interval=true'.", OFFSET(enable_conf_interval), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
238 + {"model", "Set the model to be used for computing vmaf.", OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str="version=vmaf_v0.6.1"}, 0, 1, FLAGS},
239 + {"feature", "Set the feature to be used for computing vmaf.", OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
243 FRAMESYNC_DEFINE_CLASS(libvmaf, LIBVMAFContext, fs);
245 -#define read_frame_fn(type, bits) \
246 - static int read_frame_##bits##bit(float *ref_data, float *main_data, \
247 - float *temp_data, int stride, void *ctx) \
249 - LIBVMAFContext *s = (LIBVMAFContext *) ctx; \
252 - pthread_mutex_lock(&s->lock); \
254 - while (!s->frame_set && !s->eof) { \
255 - pthread_cond_wait(&s->cond, &s->lock); \
258 - if (s->frame_set) { \
259 - int ref_stride = s->gref->linesize[0]; \
260 - int main_stride = s->gmain->linesize[0]; \
262 - const type *ref_ptr = (const type *) s->gref->data[0]; \
263 - const type *main_ptr = (const type *) s->gmain->data[0]; \
265 - float *ptr = ref_data; \
266 - float factor = 1.f / (1 << (bits - 8)); \
268 - int h = s->height; \
269 - int w = s->width; \
273 - for (i = 0; i < h; i++) { \
274 - for ( j = 0; j < w; j++) { \
275 - ptr[j] = ref_ptr[j] * factor; \
277 - ref_ptr += ref_stride / sizeof(*ref_ptr); \
278 - ptr += stride / sizeof(*ptr); \
283 - for (i = 0; i < h; i++) { \
284 - for (j = 0; j < w; j++) { \
285 - ptr[j] = main_ptr[j] * factor; \
287 - main_ptr += main_stride / sizeof(*main_ptr); \
288 - ptr += stride / sizeof(*ptr); \
292 - ret = !s->frame_set; \
294 - av_frame_unref(s->gref); \
295 - av_frame_unref(s->gmain); \
296 - s->frame_set = 0; \
298 - pthread_cond_signal(&s->cond); \
299 - pthread_mutex_unlock(&s->lock); \
306 +static enum VmafPixelFormat pix_fmt_map(enum AVPixelFormat av_pix_fmt)
308 + switch (av_pix_fmt) {
309 + case AV_PIX_FMT_YUV420P:
310 + case AV_PIX_FMT_YUV420P10LE:
311 + case AV_PIX_FMT_YUV420P12LE:
312 + case AV_PIX_FMT_YUV420P16LE:
313 + return VMAF_PIX_FMT_YUV420P;
314 + case AV_PIX_FMT_YUV422P:
315 + case AV_PIX_FMT_YUV422P10LE:
316 + case AV_PIX_FMT_YUV422P12LE:
317 + case AV_PIX_FMT_YUV422P16LE:
318 + return VMAF_PIX_FMT_YUV422P;
319 + case AV_PIX_FMT_YUV444P:
320 + case AV_PIX_FMT_YUV444P10LE:
321 + case AV_PIX_FMT_YUV444P12LE:
322 + case AV_PIX_FMT_YUV444P16LE:
323 + return VMAF_PIX_FMT_YUV444P;
325 + return VMAF_PIX_FMT_UNKNOWN;
329 -read_frame_fn(uint8_t, 8);
330 -read_frame_fn(uint16_t, 10);
331 +static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bpc)
333 + int err = vmaf_picture_alloc(dst, pix_fmt_map(src->format), bpc,
334 + src->width, src->height);
336 + return AVERROR(ENOMEM);
338 + for (unsigned i = 0; i < 3; i++) {
339 + uint8_t *src_data = src->data[i];
340 + uint8_t *dst_data = dst->data[i];
341 + for (unsigned j = 0; j < dst->h[i]; j++) {
342 + memcpy(dst_data, src_data, sizeof(*dst_data) * dst->w[i]);
343 + src_data += src->linesize[i];
344 + dst_data += dst->stride[i];
351 -static void compute_vmaf_score(LIBVMAFContext *s)
352 +static int do_vmaf(FFFrameSync *fs)
354 - int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
355 - int stride, void *ctx);
357 + AVFilterContext *ctx = fs->parent;
358 + LIBVMAFContext *s = ctx->priv;
359 + VmafPicture pic_ref, pic_dist;
360 + AVFrame *ref, *dist;
363 - if (s->desc->comp[0].depth <= 8) {
364 - read_frame = read_frame_8bit;
366 - read_frame = read_frame_10bit;
367 + int ret = ff_framesync_dualinput_get(fs, &dist, &ref);
370 + if (ctx->is_disabled || !ref)
371 + return ff_filter_frame(ctx->outputs[0], dist);
373 + err = copy_picture_data(ref, &pic_ref, s->bpc);
375 + av_log(s, AV_LOG_ERROR, "problem during vmaf_picture_alloc.\n");
376 + return AVERROR(ENOMEM);
379 + err = copy_picture_data(dist, &pic_dist, s->bpc);
381 + av_log(s, AV_LOG_ERROR, "problem during vmaf_picture_alloc.\n");
382 + vmaf_picture_unref(&pic_ref);
383 + return AVERROR(ENOMEM);
386 - format = (char *) s->desc->name;
387 + err = vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cnt++);
389 + av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n");
390 + return AVERROR(EINVAL);
393 - s->error = compute_vmaf(&s->vmaf_score, format, s->width, s->height,
394 - read_frame, s, s->model_path, s->log_path,
395 - s->log_fmt, 0, 0, s->enable_transform,
396 - s->phone_model, s->psnr, s->ssim,
397 - s->ms_ssim, s->pool,
398 - s->n_threads, s->n_subsample, s->enable_conf_interval);
399 + return ff_filter_frame(ctx->outputs[0], dist);
402 -static void *call_vmaf(void *ctx)
404 +static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt)
406 - LIBVMAFContext *s = (LIBVMAFContext *) ctx;
407 - compute_vmaf_score(s);
409 - av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
411 - pthread_mutex_lock(&s->lock);
412 - pthread_cond_signal(&s->cond);
413 - pthread_mutex_unlock(&s->lock);
414 + AVDictionary **dict = NULL;
415 + char *str_copy = NULL;
416 + char *saveptr = NULL;
424 + for (char *p = str; *p; p++) {
429 + dict = av_calloc(cnt2, sizeof(*dict));
433 + str_copy = av_strdup(str);
438 + for (unsigned i = 0; i < cnt2; i++) {
439 + char *s = av_strtok(i == 0 ? str_copy : NULL, "|", &saveptr);
442 + err = av_dict_parse_string(&dict[(*cnt)++], s, "=", ":", 0);
452 + for (unsigned i = 0; i < *cnt; i++) {
454 + av_dict_free(&dict[i]);
458 - pthread_exit(NULL);
465 -static int do_vmaf(FFFrameSync *fs)
466 +static int parse_features(AVFilterContext *ctx)
468 - AVFilterContext *ctx = fs->parent;
469 LIBVMAFContext *s = ctx->priv;
470 - AVFrame *master, *ref;
472 + AVDictionary **dict = NULL;
476 - ret = ff_framesync_dualinput_get(fs, &master, &ref);
480 - return ff_filter_frame(ctx->outputs[0], master);
481 + if (!s->feature_cfg)
484 + dict = delimited_dict_parse(s->feature_cfg, &dict_cnt);
486 + av_log(ctx, AV_LOG_ERROR,
487 + "could not parse feature config: %s\n", s->feature_cfg);
488 + return AVERROR(EINVAL);
491 - pthread_mutex_lock(&s->lock);
492 + for (unsigned i = 0; i < dict_cnt; i++) {
493 + char *feature_name = NULL;
494 + VmafFeatureDictionary *feature_opts_dict = NULL;
495 + AVDictionaryEntry *e = NULL;
497 + while (e = av_dict_get(dict[i], "", e, AV_DICT_IGNORE_SUFFIX)) {
498 + if (av_stristr(e->key, "name")) {
499 + feature_name = e->value;
503 + err = vmaf_feature_dictionary_set(&feature_opts_dict, e->key,
506 + av_log(ctx, AV_LOG_ERROR,
507 + "could not set feature option: %s.%s=%s\n",
508 + feature_name, e->key, e->value);
513 + err = vmaf_use_feature(s->vmaf, feature_name, feature_opts_dict);
515 + av_log(ctx, AV_LOG_ERROR,
516 + "problem during vmaf_use_feature: %s\n", feature_name);
521 - while (s->frame_set && !s->error) {
522 - pthread_cond_wait(&s->cond, &s->lock);
524 + for (unsigned i = 0; i < dict_cnt; i++) {
526 + av_dict_free(&dict[i]);
532 +static int parse_models(AVFilterContext *ctx)
534 + LIBVMAFContext *s = ctx->priv;
535 + AVDictionary **dict;
539 + if (!s->model_cfg) return 0;
543 + dict = delimited_dict_parse(s->model_cfg, &dict_cnt);
545 av_log(ctx, AV_LOG_ERROR,
546 - "libvmaf encountered an error, check log for details\n");
547 - pthread_mutex_unlock(&s->lock);
548 + "could not parse model config: %s\n", s->model_cfg);
549 return AVERROR(EINVAL);
552 - av_frame_ref(s->gref, ref);
553 - av_frame_ref(s->gmain, master);
554 + s->model_cnt = dict_cnt;
555 + s->model = av_calloc(s->model_cnt, sizeof(*s->model));
557 + return AVERROR(ENOMEM);
559 + for (unsigned i = 0; i < dict_cnt; i++) {
560 + VmafModelConfig model_cfg = { 0 };
561 + AVDictionaryEntry *e = NULL;
562 + char *version = NULL;
565 + while (e = av_dict_get(dict[i], "", e, AV_DICT_IGNORE_SUFFIX)) {
566 + if (av_stristr(e->key, "disable_clip")) {
567 + model_cfg.flags |= av_stristr(e->value, "true") ?
568 + VMAF_MODEL_FLAG_DISABLE_CLIP : 0;
572 + if (av_stristr(e->key, "enable_transform")) {
573 + model_cfg.flags |= av_stristr(e->value, "true") ?
574 + VMAF_MODEL_FLAG_ENABLE_TRANSFORM : 0;
578 + if (av_stristr(e->key, "name")) {
579 + model_cfg.name = e->value;
583 + if (av_stristr(e->key, "version")) {
584 + version = e->value;
588 + if (av_stristr(e->key, "path")) {
595 + err = vmaf_model_load(&s->model[i], &model_cfg, version);
597 + av_log(ctx, AV_LOG_ERROR,
598 + "could not load libvmaf model with version: %s\n",
604 + if (path && !s->model[i]) {
605 + err = vmaf_model_load_from_path(&s->model[i], &model_cfg, path);
607 + av_log(ctx, AV_LOG_ERROR,
608 + "could not load libvmaf model with path: %s\n",
614 + if (!s->model[i]) {
615 + av_log(ctx, AV_LOG_ERROR,
616 + "could not load libvmaf model with config: %s\n",
621 + while (e = av_dict_get(dict[i], "", e, AV_DICT_IGNORE_SUFFIX)) {
622 + VmafFeatureDictionary *feature_opts_dict = NULL;
623 + char *feature_opt = NULL;
625 + char *feature_name = av_strtok(e->key, ".", &feature_opt);
629 + err = vmaf_feature_dictionary_set(&feature_opts_dict,
630 + feature_opt, e->value);
632 + av_log(ctx, AV_LOG_ERROR,
633 + "could not set feature option: %s.%s=%s\n",
634 + feature_name, feature_opt, e->value);
635 + err = AVERROR(EINVAL);
639 + err = vmaf_model_feature_overload(s->model[i], feature_name,
640 + feature_opts_dict);
642 + av_log(ctx, AV_LOG_ERROR,
643 + "could not overload feature: %s\n", feature_name);
644 + err = AVERROR(EINVAL);
650 + for (unsigned i = 0; i < s->model_cnt; i++) {
651 + err = vmaf_use_features_from_model(s->vmaf, s->model[i]);
653 + av_log(ctx, AV_LOG_ERROR,
654 + "problem during vmaf_use_features_from_model\n");
655 + err = AVERROR(EINVAL);
661 + for (unsigned i = 0; i < dict_cnt; i++) {
663 + av_dict_free(&dict[i]);
669 +static enum VmafLogLevel log_level_map(int log_level)
671 + switch (log_level) {
673 + return VMAF_LOG_LEVEL_NONE;
675 + return VMAF_LOG_LEVEL_ERROR;
676 + case AV_LOG_WARNING:
677 + return VMAF_LOG_LEVEL_WARNING;
679 + return VMAF_LOG_LEVEL_INFO;
681 + return VMAF_LOG_LEVEL_DEBUG;
683 + return VMAF_LOG_LEVEL_INFO;
687 +static int parse_deprecated_options(AVFilterContext *ctx)
689 + LIBVMAFContext *s = ctx->priv;
690 + VmafModel *model = NULL;
691 + VmafModelCollection *model_collection = NULL;
692 + enum VmafModelFlags flags = VMAF_MODEL_FLAGS_DEFAULT;
695 + VmafModelConfig model_cfg = {
700 + if (s->enable_transform || s->phone_model)
701 + flags |= VMAF_MODEL_FLAG_ENABLE_TRANSFORM;
703 + if (!s->model_path)
704 + goto extra_metrics_only;
706 + if (s->enable_conf_interval) {
707 + err = vmaf_model_collection_load_from_path(&model, &model_collection,
708 + &model_cfg, s->model_path);
710 + av_log(ctx, AV_LOG_ERROR,
711 + "problem loading model file: %s\n", s->model_path);
715 + err = vmaf_use_features_from_model_collection(s->vmaf, model_collection);
717 + av_log(ctx, AV_LOG_ERROR,
718 + "problem loading feature extractors from model file: %s\n",
723 + err = vmaf_model_load_from_path(&model, &model_cfg, s->model_path);
725 + av_log(ctx, AV_LOG_ERROR,
726 + "problem loading model file: %s\n", s->model_path);
729 + err = vmaf_use_features_from_model(s->vmaf, model);
731 + av_log(ctx, AV_LOG_ERROR,
732 + "problem loading feature extractors from model file: %s\n",
740 + VmafFeatureDictionary *d = NULL;
741 + vmaf_feature_dictionary_set(&d, "enable_chroma", "false");
743 + err = vmaf_use_feature(s->vmaf, "psnr", d);
745 + av_log(ctx, AV_LOG_ERROR,
746 + "problem loading feature extractor: psnr\n");
753 + err = vmaf_use_feature(s->vmaf, "float_ssim", NULL);
755 + av_log(ctx, AV_LOG_ERROR,
756 + "problem loading feature extractor: ssim\n");
761 - pthread_cond_signal(&s->cond);
762 - pthread_mutex_unlock(&s->lock);
764 + err = vmaf_use_feature(s->vmaf, "float_ms_ssim", NULL);
766 + av_log(ctx, AV_LOG_ERROR,
767 + "problem loading feature extractor: ms_ssim\n");
772 - return ff_filter_frame(ctx->outputs[0], master);
777 static av_cold int init(AVFilterContext *ctx)
779 LIBVMAFContext *s = ctx->priv;
782 - s->gref = av_frame_alloc();
783 - s->gmain = av_frame_alloc();
784 - if (!s->gref || !s->gmain)
785 - return AVERROR(ENOMEM);
786 + VmafConfiguration cfg = {
787 + .log_level = log_level_map(av_log_get_level()),
788 + .n_subsample = s->n_subsample,
789 + .n_threads = s->n_threads,
792 + err = vmaf_init(&s->vmaf, cfg);
794 + return AVERROR(EINVAL);
796 + err = parse_deprecated_options(ctx);
801 + err = parse_models(ctx);
805 - s->vmaf_thread_created = 0;
806 - pthread_mutex_init(&s->lock, NULL);
807 - pthread_cond_init (&s->cond, NULL);
808 + err = parse_features(ctx);
812 s->fs.on_event = do_vmaf;
814 @@ -256,26 +545,31 @@ static const enum AVPixelFormat pix_fmts[] = {
816 static int config_input_ref(AVFilterLink *inlink)
818 - AVFilterContext *ctx = inlink->dst;
819 + AVFilterContext *ctx = inlink->dst;
820 LIBVMAFContext *s = ctx->priv;
822 + const AVPixFmtDescriptor *desc;
825 - if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
826 - ctx->inputs[0]->h != ctx->inputs[1]->h) {
827 - av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
828 - return AVERROR(EINVAL);
829 + if (ctx->inputs[0]->w != ctx->inputs[1]->w) {
830 + av_log(ctx, AV_LOG_ERROR, "input width must match.\n");
831 + err |= AVERROR(EINVAL);
834 - s->desc = av_pix_fmt_desc_get(inlink->format);
835 - s->width = ctx->inputs[0]->w;
836 - s->height = ctx->inputs[0]->h;
837 + if (ctx->inputs[0]->h != ctx->inputs[1]->h) {
838 + av_log(ctx, AV_LOG_ERROR, "input height must match.\n");
839 + err |= AVERROR(EINVAL);
842 - th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
844 - av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
845 - return AVERROR(EINVAL);
846 + if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
847 + av_log(ctx, AV_LOG_ERROR, "input pix_fmt must match.\n");
848 + err |= AVERROR(EINVAL);
850 - s->vmaf_thread_created = 1;
855 + desc = av_pix_fmt_desc_get(inlink->format);
856 + s->bpc = desc->comp[0].depth;
860 @@ -307,28 +601,80 @@ static int activate(AVFilterContext *ctx)
861 return ff_framesync_activate(&s->fs);
864 +static enum VmafOutputFormat log_fmt_map(const char *log_fmt)
867 + if (av_stristr(log_fmt, "xml"))
868 + return VMAF_OUTPUT_FORMAT_XML;
869 + if (av_stristr(log_fmt, "json"))
870 + return VMAF_OUTPUT_FORMAT_JSON;
871 + if (av_stristr(log_fmt, "csv"))
872 + return VMAF_OUTPUT_FORMAT_CSV;
873 + if (av_stristr(log_fmt, "sub"))
874 + return VMAF_OUTPUT_FORMAT_SUB;
877 + return VMAF_OUTPUT_FORMAT_XML;
880 +static enum VmafPoolingMethod pool_method_map(const char *pool_method)
883 + if (av_stristr(pool_method, "min"))
884 + return VMAF_POOL_METHOD_MIN;
885 + if (av_stristr(pool_method, "mean"))
886 + return VMAF_POOL_METHOD_MEAN;
887 + if (av_stristr(pool_method, "harmonic_mean"))
888 + return VMAF_POOL_METHOD_HARMONIC_MEAN;
891 + return VMAF_POOL_METHOD_MEAN;
894 static av_cold void uninit(AVFilterContext *ctx)
896 LIBVMAFContext *s = ctx->priv;
899 ff_framesync_uninit(&s->fs);
901 - pthread_mutex_lock(&s->lock);
903 - pthread_cond_signal(&s->cond);
904 - pthread_mutex_unlock(&s->lock);
908 - if (s->vmaf_thread_created)
910 - pthread_join(s->vmaf_thread, NULL);
911 - s->vmaf_thread_created = 0;
912 + err = vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
914 + av_log(ctx, AV_LOG_ERROR,
915 + "problem flushing libvmaf context.\n");
918 - av_frame_free(&s->gref);
919 - av_frame_free(&s->gmain);
920 + for (unsigned i = 0; i < s->model_cnt; i++) {
922 + err = vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(s->pool),
923 + &vmaf_score, 0, s->frame_cnt - 1);
925 + av_log(ctx, AV_LOG_ERROR,
926 + "problem getting pooled vmaf score.\n");
929 + av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n", vmaf_score);
933 + if (s->log_path && !err)
934 + vmaf_write_output(s->vmaf, s->log_path, log_fmt_map(s->log_fmt));
939 + for (unsigned i = 0; i < s->model_cnt; i++) {
941 + vmaf_model_destroy(s->model[i]);
946 - pthread_mutex_destroy(&s->lock);
947 - pthread_cond_destroy(&s->cond);
949 + vmaf_close(s->vmaf);
952 static const AVFilterPad libvmaf_inputs[] = {