ffmpeg/trunk/ffmpeg-vmaf2.x.patch

   1 diff --git a/configure b/configure
   2 index 94f513288a..493493b4c5 100755
   3 --- a/configure
   4 +++ b/configure
   5 @@ -3751,7 +3751,7 @@ vaguedenoiser_filter_deps="gpl"
   6  vflip_vulkan_filter_deps="vulkan spirv_compiler"
   7  vidstabdetect_filter_deps="libvidstab"
   8  vidstabtransform_filter_deps="libvidstab"
   9 -libvmaf_filter_deps="libvmaf pthreads"
  10 +libvmaf_filter_deps="libvmaf"
  11  zmq_filter_deps="libzmq"
  12  zoompan_filter_deps="swscale"
  13  zscale_filter_deps="libzimg const_nan"
  14 @@ -6626,7 +6626,7 @@ enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame
  15  enabled libuavs3d         && require_pkg_config libuavs3d "uavs3d >= 1.1.41" uavs3d.h uavs3d_decode
  16  enabled libv4l2           && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl
  17  enabled libvidstab        && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
  18 -enabled libvmaf           && require_pkg_config libvmaf "libvmaf >= 1.5.2" libvmaf.h compute_vmaf
  19 +enabled libvmaf           && require_pkg_config libvmaf "libvmaf >= 2.0.0" libvmaf.h vmaf_init
  20  enabled libvo_amrwbenc    && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
  21  enabled libvorbis         && require_pkg_config libvorbis vorbis vorbis/codec.h vorbis_info_init &&
  22                               require_pkg_config libvorbisenc vorbisenc vorbis/vorbisenc.h vorbis_encode_init
  23 diff --git a/doc/filters.texi b/doc/filters.texi
  24 index 248c09caf8..9a890d1555 100644
  25 --- a/doc/filters.texi
  26 +++ b/doc/filters.texi
  27 @@ -14666,68 +14666,60 @@ ffmpeg -i input.mov -vf lensfun=make=Canon:model="Canon EOS 100D":lens_model="Ca
  28
  29  @section libvmaf
  30
  31 -Obtain the VMAF (Video Multi-Method Assessment Fusion)
  32 -score between two input videos.
  33 +Calulate the VMAF (Video Multi-Method Assessment Fusion) score for a
  34 +reference/distorted pair of input videos.
  35
  36 -The first input is the encoded video, and the second input is the reference video.
  37 +The first input is the distorted video, and the second input is the reference video.
  38
  39  The obtained VMAF score is printed through the logging system.
  40
  41  It requires Netflix's vmaf library (libvmaf) as a pre-requisite.
  42  After installing the library it can be enabled using:
  43  @code{./configure --enable-libvmaf}.
  44 -If no model path is specified it uses the default model: @code{vmaf_v0.6.1.pkl}.
  45
  46  The filter has following options:
  47
  48  @table @option
  49 -@item model_path
  50 -Set the model path which is to be used for SVM.
  51 -Default value: @code{"/usr/local/share/model/vmaf_v0.6.1.pkl"}
  52 -
  53 -@item log_path
  54 -Set the file path to be used to store logs.
  55 +@item model
  56 +A `|` delimited list of vmaf models. Each model can be configured with a number of parameters.
  57 +Default value: @code{"version=vmaf_v0.6.1"}
  58
  59 -@item log_fmt
  60 -Set the format of the log file (csv, json or xml).
  61 +@item model_path
  62 +Deprecated, use model='path=...'.
  63
  64  @item enable_transform
  65 -This option can enable/disable the @code{score_transform} applied to the final predicted VMAF score,
  66 -if you have specified score_transform option in the input parameter file passed to @code{run_vmaf_training.py}
  67 -Default value: @code{false}
  68 +Deprecated, use model='enable_transform=true'.
  69
  70  @item phone_model
  71 -Invokes the phone model which will generate VMAF scores higher than in the
  72 -regular model, which is more suitable for laptop, TV, etc. viewing conditions.
  73 -Default value: @code{false}
  74 +Deprecated, use model='enable_transform=true'.
  75 +
  76 +@item enable_conf_interval
  77 +Deprecated, use model='enable_conf_interval=true'.
  78 +
  79 +@item feature
  80 +A `|` delimited list of features. Each feature can be configured with a number of parameters.
  81
  82  @item psnr
  83 -Enables computing psnr along with vmaf.
  84 -Default value: @code{false}
  85 +Deprecated, use feature='name=psnr'.
  86
  87  @item ssim
  88 -Enables computing ssim along with vmaf.
  89 -Default value: @code{false}
  90 +Deprecated, use feature='name=ssim'.
  91
  92  @item ms_ssim
  93 -Enables computing ms_ssim along with vmaf.
  94 -Default value: @code{false}
  95 +Deprecated, use feature='name=ms_ssim'.
  96
  97 -@item pool
  98 -Set the pool method to be used for computing vmaf.
  99 -Options are @code{min}, @code{harmonic_mean} or @code{mean} (default).
 100 +@item log_path
 101 +Set the file path to be used to store log files.
 102 +
 103 +@item log_fmt
 104 +Set the format of the log file (xml, json, csv, or sub).
 105
 106  @item n_threads
 107 -Set number of threads to be used when computing vmaf.
 108 -Default value: @code{0}, which makes use of all available logical processors.
 109 +Set number of threads to be used when initializing libvmaf.
 110 +Default value: @code{0}, no threads.
 111
 112  @item n_subsample
 113 -Set interval for frame subsampling used when computing vmaf.
 114 -Default value: @code{1}
 115 -
 116 -@item enable_conf_interval
 117 -Enables confidence interval.
 118 -Default value: @code{false}
 119 +Set frame subsampling interval to be used.
 120  @end table
 121
 122  This filter also supports the @ref{framesync} options.
 123 @@ -14735,23 +14727,31 @@ This filter also supports the @ref{framesync} options.
 124  @subsection Examples
 125  @itemize
 126  @item
 127 -On the below examples the input file @file{main.mpg} being processed is
 128 -compared with the reference file @file{ref.mpg}.
 129 +In the examples below, a distorted video @file{distorted.mpg} is
 130 +compared with a reference file @file{reference.mpg}.
 131
 132 +@item
 133 +Basic usage:
 134 +@example
 135 +ffmpeg -i distorted.mpg -i reference.mpg -lavfi libvmaf=log_path=output.xml -f null -
 136 +@end example
 137 +
 138 +@item
 139 +Example with multiple models:
 140  @example
 141 -ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf -f null -
 142 +ffmpeg -i distorted.mpg -i reference.mpg -lavfi libvmaf='model=version=vmaf_v0.6.1\\:name=vmaf|version=vmaf_v0.6.1neg\\:name=vmaf_neg' -f null -
 143  @end example
 144
 145  @item
 146 -Example with options:
 147 +Example with multiple addtional features:
 148  @example
 149 -ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf="psnr=1:log_fmt=json" -f null -
 150 +ffmpeg -i distorted.mpg -i reference.mpg -lavfi libvmaf='feature=name=psnr|name=ciede' -f null -
 151  @end example
 152
 153  @item
 154  Example with options and different containers:
 155  @example
 156 -ffmpeg -i main.mpg -i ref.mkv -lavfi "[0:v]settb=AVTB,setpts=PTS-STARTPTS[main];[1:v]settb=AVTB,setpts=PTS-STARTPTS[ref];[main][ref]libvmaf=psnr=1:log_fmt=json" -f null -
 157 +ffmpeg -i distorted.mpg -i reference.mkv -lavfi "[0:v]settb=AVTB,setpts=PTS-STARTPTS[main];[1:v]settb=AVTB,setpts=PTS-STARTPTS[ref];[main][ref]libvmaf=log_fmt=json:log_path=output.json" -f null -
 158  @end example
 159  @end itemize
 160
 161 diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c
 162 index 5d492126eb..eee1c280ef 100644
 163 --- a/libavfilter/vf_libvmaf.c
 164 +++ b/libavfilter/vf_libvmaf.c
 165 @@ -24,8 +24,8 @@
 166   * Calculate the VMAF between two input videos.
 167   */
 168
 169 -#include <pthread.h>
 170  #include <libvmaf.h>
 171 +
 172  #include "libavutil/avstring.h"
 173  #include "libavutil/opt.h"
 174  #include "libavutil/pixdesc.h"
 175 @@ -39,23 +39,9 @@
 176  typedef struct LIBVMAFContext {
 177      const AVClass *class;
 178      FFFrameSync fs;
 179 -    const AVPixFmtDescriptor *desc;
 180 -    int width;
 181 -    int height;
 182 -    double vmaf_score;
 183 -    int vmaf_thread_created;
 184 -    pthread_t vmaf_thread;
 185 -    pthread_mutex_t lock;
 186 -    pthread_cond_t cond;
 187 -    int eof;
 188 -    AVFrame *gmain;
 189 -    AVFrame *gref;
 190 -    int frame_set;
 191      char *model_path;
 192      char *log_path;
 193      char *log_fmt;
 194 -    int disable_clip;
 195 -    int disable_avx;
 196      int enable_transform;
 197      int phone_model;
 198      int psnr;
 199 @@ -65,184 +51,487 @@ typedef struct LIBVMAFContext {
 200      int n_threads;
 201      int n_subsample;
 202      int enable_conf_interval;
 203 -    int error;
 204 +    char *model_cfg;
 205 +    char *feature_cfg;
 206 +    VmafContext *vmaf;
 207 +    VmafModel **model;
 208 +    unsigned model_cnt;
 209 +    unsigned frame_cnt;
 210 +    unsigned bpc;
 211  } LIBVMAFContext;
 212
 213  #define OFFSET(x) offsetof(LIBVMAFContext, x)
 214  #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 215
 216  static const AVOption libvmaf_options[] = {
 217 -    {"model_path",  "Set the model to be used for computing vmaf.",                     OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
 218 -    {"log_path",  "Set the file path to be used to store logs.",                        OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
 219 -    {"log_fmt",  "Set the format of the log (csv, json or xml).",                       OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
 220 -    {"enable_transform",  "Enables transform for computing vmaf.",                      OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
 221 -    {"phone_model",  "Invokes the phone model that will generate higher VMAF scores.",  OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
 222 -    {"psnr",  "Enables computing psnr along with vmaf.",                                OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
 223 -    {"ssim",  "Enables computing ssim along with vmaf.",                                OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
 224 -    {"ms_ssim",  "Enables computing ms-ssim along with vmaf.",                          OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
 225 +    {"model_path",  "use model='path=...'.",                                            OFFSET(model_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
 226 +    {"log_path",  "Set the file path to be used to write log.",                         OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
 227 +    {"log_fmt",  "Set the format of the log (csv, json, xml, or sub).",                 OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str="xml"}, 0, 1, FLAGS},
 228 +    {"enable_transform",  "use model='enable_transform=true'.",                         OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
 229 +    {"phone_model",  "use model='enable_transform=true'.",                              OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
 230 +    {"psnr",  "use feature='name=psnr'.",                                               OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
 231 +    {"ssim",  "use feature='name=ssim'.",                                               OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
 232 +    {"ms_ssim",  "use feature='name=ms_ssim'.",                                         OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
 233      {"pool",  "Set the pool method to be used for computing vmaf.",                     OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
 234      {"n_threads", "Set number of threads to be used when computing vmaf.",              OFFSET(n_threads), AV_OPT_TYPE_INT, {.i64=0}, 0, UINT_MAX, FLAGS},
 235      {"n_subsample", "Set interval for frame subsampling used when computing vmaf.",     OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=1}, 1, UINT_MAX, FLAGS},
 236 -    {"enable_conf_interval",  "Enables confidence interval.",                           OFFSET(enable_conf_interval), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
 237 +    {"enable_conf_interval",  "model='enable_conf_interval=true'.",                     OFFSET(enable_conf_interval), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS|AV_OPT_FLAG_DEPRECATED},
 238 +    {"model",  "Set the model to be used for computing vmaf.",                          OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str="version=vmaf_v0.6.1"}, 0, 1, FLAGS},
 239 +    {"feature",  "Set the feature to be used for computing vmaf.",                      OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
 240      { NULL }
 241  };
 242
 243  FRAMESYNC_DEFINE_CLASS(libvmaf, LIBVMAFContext, fs);
 244
 245 -#define read_frame_fn(type, bits)                                               \
 246 -    static int read_frame_##bits##bit(float *ref_data, float *main_data,        \
 247 -                                      float *temp_data, int stride, void *ctx)  \
 248 -{                                                                               \
 249 -    LIBVMAFContext *s = (LIBVMAFContext *) ctx;                                 \
 250 -    int ret;                                                                    \
 251 -    \
 252 -    pthread_mutex_lock(&s->lock);                                               \
 253 -    \
 254 -    while (!s->frame_set && !s->eof) {                                          \
 255 -        pthread_cond_wait(&s->cond, &s->lock);                                  \
 256 -    }                                                                           \
 257 -    \
 258 -    if (s->frame_set) {                                                         \
 259 -        int ref_stride = s->gref->linesize[0];                                  \
 260 -        int main_stride = s->gmain->linesize[0];                                \
 261 -        \
 262 -        const type *ref_ptr = (const type *) s->gref->data[0];                  \
 263 -        const type *main_ptr = (const type *) s->gmain->data[0];                \
 264 -        \
 265 -        float *ptr = ref_data;                                                  \
 266 -        float factor = 1.f / (1 << (bits - 8));                                 \
 267 -        \
 268 -        int h = s->height;                                                      \
 269 -        int w = s->width;                                                       \
 270 -        \
 271 -        int i,j;                                                                \
 272 -        \
 273 -        for (i = 0; i < h; i++) {                                               \
 274 -            for ( j = 0; j < w; j++) {                                          \
 275 -                ptr[j] = ref_ptr[j] * factor;                                   \
 276 -            }                                                                   \
 277 -            ref_ptr += ref_stride / sizeof(*ref_ptr);                           \
 278 -            ptr += stride / sizeof(*ptr);                                       \
 279 -        }                                                                       \
 280 -        \
 281 -        ptr = main_data;                                                        \
 282 -        \
 283 -        for (i = 0; i < h; i++) {                                               \
 284 -            for (j = 0; j < w; j++) {                                           \
 285 -                ptr[j] = main_ptr[j] * factor;                                  \
 286 -            }                                                                   \
 287 -            main_ptr += main_stride / sizeof(*main_ptr);                        \
 288 -            ptr += stride / sizeof(*ptr);                                       \
 289 -        }                                                                       \
 290 -    }                                                                           \
 291 -    \
 292 -    ret = !s->frame_set;                                                        \
 293 -    \
 294 -    av_frame_unref(s->gref);                                                    \
 295 -    av_frame_unref(s->gmain);                                                   \
 296 -    s->frame_set = 0;                                                           \
 297 -    \
 298 -    pthread_cond_signal(&s->cond);                                              \
 299 -    pthread_mutex_unlock(&s->lock);                                             \
 300 -    \
 301 -    if (ret) {                                                                  \
 302 -        return 2;                                                               \
 303 -    }                                                                           \
 304 -    \
 305 -    return 0;                                                                   \
 306 +static enum VmafPixelFormat pix_fmt_map(enum AVPixelFormat av_pix_fmt)
 307 +{
 308 +    switch (av_pix_fmt) {
 309 +    case AV_PIX_FMT_YUV420P:
 310 +    case AV_PIX_FMT_YUV420P10LE:
 311 +    case AV_PIX_FMT_YUV420P12LE:
 312 +    case AV_PIX_FMT_YUV420P16LE:
 313 +        return VMAF_PIX_FMT_YUV420P;
 314 +    case AV_PIX_FMT_YUV422P:
 315 +    case AV_PIX_FMT_YUV422P10LE:
 316 +    case AV_PIX_FMT_YUV422P12LE:
 317 +    case AV_PIX_FMT_YUV422P16LE:
 318 +        return VMAF_PIX_FMT_YUV422P;
 319 +    case AV_PIX_FMT_YUV444P:
 320 +    case AV_PIX_FMT_YUV444P10LE:
 321 +    case AV_PIX_FMT_YUV444P12LE:
 322 +    case AV_PIX_FMT_YUV444P16LE:
 323 +        return VMAF_PIX_FMT_YUV444P;
 324 +    default:
 325 +        return VMAF_PIX_FMT_UNKNOWN;
 326 +    }
 327  }
 328
 329 -read_frame_fn(uint8_t, 8);
 330 -read_frame_fn(uint16_t, 10);
 331 +static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bpc)
 332 +{
 333 +    int err = vmaf_picture_alloc(dst, pix_fmt_map(src->format), bpc,
 334 +                                 src->width, src->height);
 335 +    if (err)
 336 +        return AVERROR(ENOMEM);
 337 +
 338 +    for (unsigned i = 0; i < 3; i++) {
 339 +        uint8_t *src_data = src->data[i];
 340 +        uint8_t *dst_data = dst->data[i];
 341 +        for (unsigned j = 0; j < dst->h[i]; j++) {
 342 +            memcpy(dst_data, src_data, sizeof(*dst_data) * dst->w[i]);
 343 +            src_data += src->linesize[i];
 344 +            dst_data += dst->stride[i];
 345 +        }
 346 +    }
 347 +
 348 +    return 0;
 349 +}
 350
 351 -static void compute_vmaf_score(LIBVMAFContext *s)
 352 +static int do_vmaf(FFFrameSync *fs)
 353  {
 354 -    int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
 355 -                      int stride, void *ctx);
 356 -    char *format;
 357 +    AVFilterContext *ctx = fs->parent;
 358 +    LIBVMAFContext *s = ctx->priv;
 359 +    VmafPicture pic_ref, pic_dist;
 360 +    AVFrame *ref, *dist;
 361 +    int err = 0;
 362
 363 -    if (s->desc->comp[0].depth <= 8) {
 364 -        read_frame = read_frame_8bit;
 365 -    } else {
 366 -        read_frame = read_frame_10bit;
 367 +    int ret = ff_framesync_dualinput_get(fs, &dist, &ref);
 368 +    if (ret < 0)
 369 +        return ret;
 370 +    if (ctx->is_disabled || !ref)
 371 +        return ff_filter_frame(ctx->outputs[0], dist);
 372 +
 373 +    err = copy_picture_data(ref, &pic_ref, s->bpc);
 374 +    if (err) {
 375 +        av_log(s, AV_LOG_ERROR, "problem during vmaf_picture_alloc.\n");
 376 +        return AVERROR(ENOMEM);
 377 +    }
 378 +
 379 +    err = copy_picture_data(dist, &pic_dist, s->bpc);
 380 +    if (err) {
 381 +        av_log(s, AV_LOG_ERROR, "problem during vmaf_picture_alloc.\n");
 382 +        vmaf_picture_unref(&pic_ref);
 383 +        return AVERROR(ENOMEM);
 384      }
 385
 386 -    format = (char *) s->desc->name;
 387 +    err = vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cnt++);
 388 +    if (err) {
 389 +        av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n");
 390 +        return AVERROR(EINVAL);
 391 +    }
 392
 393 -    s->error = compute_vmaf(&s->vmaf_score, format, s->width, s->height,
 394 -                            read_frame, s, s->model_path, s->log_path,
 395 -                            s->log_fmt, 0, 0, s->enable_transform,
 396 -                            s->phone_model, s->psnr, s->ssim,
 397 -                            s->ms_ssim, s->pool,
 398 -                            s->n_threads, s->n_subsample, s->enable_conf_interval);
 399 +    return ff_filter_frame(ctx->outputs[0], dist);
 400  }
 401
 402 -static void *call_vmaf(void *ctx)
 403 +
 404 +static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt)
 405  {
 406 -    LIBVMAFContext *s = (LIBVMAFContext *) ctx;
 407 -    compute_vmaf_score(s);
 408 -    if (!s->error) {
 409 -        av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
 410 -    } else {
 411 -        pthread_mutex_lock(&s->lock);
 412 -        pthread_cond_signal(&s->cond);
 413 -        pthread_mutex_unlock(&s->lock);
 414 +    AVDictionary **dict = NULL;
 415 +    char *str_copy = NULL;
 416 +    char *saveptr = NULL;
 417 +    unsigned cnt2;
 418 +    int err = 0;
 419 +
 420 +    if (!str)
 421 +        return NULL;
 422 +
 423 +    cnt2 = 1;
 424 +    for (char *p = str; *p; p++) {
 425 +        if (*p == '|')
 426 +            cnt2++;
 427 +    }
 428 +
 429 +    dict = av_calloc(cnt2, sizeof(*dict));
 430 +    if (!dict)
 431 +        goto fail;
 432 +
 433 +    str_copy = av_strdup(str);
 434 +    if (!str_copy)
 435 +        goto fail;
 436 +
 437 +    *cnt = 0;
 438 +    for (unsigned i = 0; i < cnt2; i++) {
 439 +        char *s = av_strtok(i == 0 ? str_copy : NULL, "|", &saveptr);
 440 +        if (!s)
 441 +            continue;
 442 +        err = av_dict_parse_string(&dict[(*cnt)++], s, "=", ":", 0);
 443 +        if (err)
 444 +            goto fail;
 445 +    }
 446 +
 447 +    av_free(str_copy);
 448 +    return dict;
 449 +
 450 +fail:
 451 +    if (dict) {
 452 +        for (unsigned i = 0; i < *cnt; i++) {
 453 +            if (dict[i])
 454 +                av_dict_free(&dict[i]);
 455 +        }
 456 +        av_free(dict);
 457      }
 458 -    pthread_exit(NULL);
 459 +
 460 +    av_free(str_copy);
 461 +    *cnt = 0;
 462      return NULL;
 463  }
 464
 465 -static int do_vmaf(FFFrameSync *fs)
 466 +static int parse_features(AVFilterContext *ctx)
 467  {
 468 -    AVFilterContext *ctx = fs->parent;
 469      LIBVMAFContext *s = ctx->priv;
 470 -    AVFrame *master, *ref;
 471 -    int ret;
 472 +    AVDictionary **dict = NULL;
 473 +    unsigned dict_cnt;
 474 +    int err = 0;
 475
 476 -    ret = ff_framesync_dualinput_get(fs, &master, &ref);
 477 -    if (ret < 0)
 478 -        return ret;
 479 -    if (!ref)
 480 -        return ff_filter_frame(ctx->outputs[0], master);
 481 +    if (!s->feature_cfg)
 482 +        return 0;
 483 +
 484 +    dict = delimited_dict_parse(s->feature_cfg, &dict_cnt);
 485 +    if (!dict) {
 486 +        av_log(ctx, AV_LOG_ERROR,
 487 +               "could not parse feature config: %s\n", s->feature_cfg);
 488 +        return AVERROR(EINVAL);
 489 +    }
 490
 491 -    pthread_mutex_lock(&s->lock);
 492 +    for (unsigned i = 0; i < dict_cnt; i++) {
 493 +        char *feature_name = NULL;
 494 +        VmafFeatureDictionary *feature_opts_dict = NULL;
 495 +        AVDictionaryEntry *e = NULL;
 496 +
 497 +        while (e = av_dict_get(dict[i], "", e, AV_DICT_IGNORE_SUFFIX)) {
 498 +            if (av_stristr(e->key, "name")) {
 499 +                feature_name = e->value;
 500 +                continue;
 501 +            }
 502 +
 503 +            err = vmaf_feature_dictionary_set(&feature_opts_dict, e->key,
 504 +                                              e->value);
 505 +            if (err) {
 506 +                av_log(ctx, AV_LOG_ERROR,
 507 +                       "could not set feature option: %s.%s=%s\n",
 508 +                       feature_name, e->key, e->value);
 509 +                goto exit;
 510 +            }
 511 +        }
 512 +
 513 +        err = vmaf_use_feature(s->vmaf, feature_name, feature_opts_dict);
 514 +        if (err) {
 515 +            av_log(ctx, AV_LOG_ERROR,
 516 +                   "problem during vmaf_use_feature: %s\n", feature_name);
 517 +            goto exit;
 518 +        }
 519 +    }
 520
 521 -    while (s->frame_set && !s->error) {
 522 -        pthread_cond_wait(&s->cond, &s->lock);
 523 +exit:
 524 +    for (unsigned i = 0; i < dict_cnt; i++) {
 525 +        if (dict[i])
 526 +            av_dict_free(&dict[i]);
 527      }
 528 +    av_free(dict);
 529 +    return err;
 530 +}
 531 +
 532 +static int parse_models(AVFilterContext *ctx)
 533 +{
 534 +    LIBVMAFContext *s = ctx->priv;
 535 +    AVDictionary **dict;
 536 +    unsigned dict_cnt;
 537 +    int err = 0;
 538 +
 539 +    if (!s->model_cfg) return 0;
 540
 541 -    if (s->error) {
 542 +    dict_cnt = 0;
 543 +    dict = delimited_dict_parse(s->model_cfg, &dict_cnt);
 544 +    if (!dict) {
 545          av_log(ctx, AV_LOG_ERROR,
 546 -               "libvmaf encountered an error, check log for details\n");
 547 -        pthread_mutex_unlock(&s->lock);
 548 +               "could not parse model config: %s\n", s->model_cfg);
 549          return AVERROR(EINVAL);
 550      }
 551
 552 -    av_frame_ref(s->gref, ref);
 553 -    av_frame_ref(s->gmain, master);
 554 +    s->model_cnt = dict_cnt;
 555 +    s->model = av_calloc(s->model_cnt, sizeof(*s->model));
 556 +    if (!s->model)
 557 +        return AVERROR(ENOMEM);
 558 +
 559 +    for (unsigned i = 0; i < dict_cnt; i++) {
 560 +        VmafModelConfig model_cfg = { 0 };
 561 +        AVDictionaryEntry *e = NULL;
 562 +        char *version = NULL;
 563 +        char  *path = NULL;
 564 +
 565 +        while (e = av_dict_get(dict[i], "", e, AV_DICT_IGNORE_SUFFIX)) {
 566 +            if (av_stristr(e->key, "disable_clip")) {
 567 +                model_cfg.flags |= av_stristr(e->value, "true") ?
 568 +                    VMAF_MODEL_FLAG_DISABLE_CLIP : 0;
 569 +                continue;
 570 +            }
 571 +
 572 +            if (av_stristr(e->key, "enable_transform")) {
 573 +                model_cfg.flags |= av_stristr(e->value, "true") ?
 574 +                    VMAF_MODEL_FLAG_ENABLE_TRANSFORM : 0;
 575 +                continue;
 576 +            }
 577 +
 578 +            if (av_stristr(e->key, "name")) {
 579 +                model_cfg.name = e->value;
 580 +                continue;
 581 +            }
 582 +
 583 +            if (av_stristr(e->key, "version")) {
 584 +                version = e->value;
 585 +                continue;
 586 +            }
 587 +
 588 +            if (av_stristr(e->key, "path")) {
 589 +                path = e->value;
 590 +                continue;
 591 +            }
 592 +        }
 593 +
 594 +        if (version) {
 595 +            err = vmaf_model_load(&s->model[i], &model_cfg, version);
 596 +            if (err) {
 597 +                av_log(ctx, AV_LOG_ERROR,
 598 +                       "could not load libvmaf model with version: %s\n",
 599 +                       version);
 600 +                goto exit;
 601 +            }
 602 +        }
 603 +
 604 +        if (path && !s->model[i]) {
 605 +            err = vmaf_model_load_from_path(&s->model[i], &model_cfg, path);
 606 +            if (err) {
 607 +                av_log(ctx, AV_LOG_ERROR,
 608 +                       "could not load libvmaf model with path: %s\n",
 609 +                       path);
 610 +                goto exit;
 611 +            }
 612 +        }
 613 +
 614 +        if (!s->model[i]) {
 615 +            av_log(ctx, AV_LOG_ERROR,
 616 +                   "could not load libvmaf model with config: %s\n",
 617 +                   s->model_cfg);
 618 +            goto exit;
 619 +        }
 620 +
 621 +        while (e = av_dict_get(dict[i], "", e, AV_DICT_IGNORE_SUFFIX)) {
 622 +            VmafFeatureDictionary *feature_opts_dict = NULL;
 623 +            char *feature_opt = NULL;
 624 +
 625 +            char *feature_name = av_strtok(e->key, ".", &feature_opt);
 626 +            if (!feature_opt)
 627 +                continue;
 628 +
 629 +            err = vmaf_feature_dictionary_set(&feature_opts_dict,
 630 +                                              feature_opt, e->value);
 631 +            if (err) {
 632 +                av_log(ctx, AV_LOG_ERROR,
 633 +                       "could not set feature option: %s.%s=%s\n",
 634 +                       feature_name, feature_opt, e->value);
 635 +                err = AVERROR(EINVAL);
 636 +                goto exit;
 637 +            }
 638 +
 639 +            err = vmaf_model_feature_overload(s->model[i], feature_name,
 640 +                                              feature_opts_dict);
 641 +            if (err) {
 642 +                av_log(ctx, AV_LOG_ERROR,
 643 +                       "could not overload feature: %s\n", feature_name);
 644 +                err = AVERROR(EINVAL);
 645 +                goto exit;
 646 +            }
 647 +        }
 648 +    }
 649 +
 650 +    for (unsigned i = 0; i < s->model_cnt; i++) {
 651 +        err = vmaf_use_features_from_model(s->vmaf, s->model[i]);
 652 +        if (err) {
 653 +            av_log(ctx, AV_LOG_ERROR,
 654 +                   "problem during vmaf_use_features_from_model\n");
 655 +            err = AVERROR(EINVAL);
 656 +            goto exit;
 657 +        }
 658 +    }
 659 +
 660 +exit:
 661 +    for (unsigned i = 0; i < dict_cnt; i++) {
 662 +        if (dict[i])
 663 +            av_dict_free(&dict[i]);
 664 +    }
 665 +    av_free(dict);
 666 +    return err;
 667 +}
 668 +
 669 +static enum VmafLogLevel log_level_map(int log_level)
 670 +{
 671 +    switch (log_level) {
 672 +    case AV_LOG_QUIET:
 673 +        return VMAF_LOG_LEVEL_NONE;
 674 +    case AV_LOG_ERROR:
 675 +        return VMAF_LOG_LEVEL_ERROR;
 676 +    case AV_LOG_WARNING:
 677 +        return VMAF_LOG_LEVEL_WARNING;
 678 +    case AV_LOG_INFO:
 679 +        return VMAF_LOG_LEVEL_INFO;
 680 +    case AV_LOG_DEBUG:
 681 +        return VMAF_LOG_LEVEL_DEBUG;
 682 +    default:
 683 +        return VMAF_LOG_LEVEL_INFO;
 684 +    }
 685 +}
 686 +
 687 +static int parse_deprecated_options(AVFilterContext *ctx)
 688 +{
 689 +    LIBVMAFContext *s = ctx->priv;
 690 +    VmafModel *model = NULL;
 691 +    VmafModelCollection *model_collection = NULL;
 692 +    enum VmafModelFlags flags = VMAF_MODEL_FLAGS_DEFAULT;
 693 +    int err = 0;
 694 +
 695 +    VmafModelConfig model_cfg = {
 696 +        .name = "vmaf",
 697 +        .flags = flags,
 698 +    };
 699 +
 700 +    if (s->enable_transform || s->phone_model)
 701 +        flags |= VMAF_MODEL_FLAG_ENABLE_TRANSFORM;
 702 +
 703 +    if (!s->model_path)
 704 +        goto extra_metrics_only;
 705 +
 706 +    if (s->enable_conf_interval) {
 707 +        err = vmaf_model_collection_load_from_path(&model, &model_collection,
 708 +                                                   &model_cfg, s->model_path);
 709 +        if (err) {
 710 +            av_log(ctx, AV_LOG_ERROR,
 711 +                   "problem loading model file: %s\n", s->model_path);
 712 +            goto exit;
 713 +        }
 714 +
 715 +        err = vmaf_use_features_from_model_collection(s->vmaf, model_collection);
 716 +        if (err) {
 717 +            av_log(ctx, AV_LOG_ERROR,
 718 +                   "problem loading feature extractors from model file: %s\n",
 719 +                   s->model_path);
 720 +            goto exit;
 721 +        }
 722 +    } else {
 723 +        err = vmaf_model_load_from_path(&model, &model_cfg, s->model_path);
 724 +        if (err) {
 725 +                av_log(ctx, AV_LOG_ERROR,
 726 +                      "problem loading model file: %s\n", s->model_path);
 727 +            goto exit;
 728 +        }
 729 +        err = vmaf_use_features_from_model(s->vmaf, model);
 730 +        if (err) {
 731 +            av_log(ctx, AV_LOG_ERROR,
 732 +                   "problem loading feature extractors from model file: %s\n",
 733 +                   s->model_path);
 734 +            goto exit;
 735 +        }
 736 +    }
 737 +
 738 +extra_metrics_only:
 739 +    if (s->psnr) {
 740 +        VmafFeatureDictionary *d = NULL;
 741 +        vmaf_feature_dictionary_set(&d, "enable_chroma", "false");
 742 +
 743 +        err = vmaf_use_feature(s->vmaf, "psnr", d);
 744 +        if (err) {
 745 +            av_log(ctx, AV_LOG_ERROR,
 746 +                   "problem loading feature extractor: psnr\n");
 747 +            goto exit;
 748 +        }
 749 +    }
 750
 751 -    s->frame_set = 1;
 752 +    if (s->ssim) {
 753 +        err = vmaf_use_feature(s->vmaf, "float_ssim", NULL);
 754 +        if (err) {
 755 +            av_log(ctx, AV_LOG_ERROR,
 756 +                   "problem loading feature extractor: ssim\n");
 757 +            goto exit;
 758 +        }
 759 +    }
 760
 761 -    pthread_cond_signal(&s->cond);
 762 -    pthread_mutex_unlock(&s->lock);
 763 +    if (s->ms_ssim) {
 764 +        err = vmaf_use_feature(s->vmaf, "float_ms_ssim", NULL);
 765 +        if (err) {
 766 +            av_log(ctx, AV_LOG_ERROR,
 767 +                   "problem loading feature extractor: ms_ssim\n");
 768 +            goto exit;
 769 +        }
 770 +    }
 771
 772 -    return ff_filter_frame(ctx->outputs[0], master);
 773 +exit:
 774 +    return err;
 775  }
 776
 777  static av_cold int init(AVFilterContext *ctx)
 778  {
 779      LIBVMAFContext *s = ctx->priv;
 780 +    int err = 0;
 781
 782 -    s->gref = av_frame_alloc();
 783 -    s->gmain = av_frame_alloc();
 784 -    if (!s->gref || !s->gmain)
 785 -        return AVERROR(ENOMEM);
 786 +    VmafConfiguration cfg = {
 787 +        .log_level = log_level_map(av_log_get_level()),
 788 +        .n_subsample = s->n_subsample,
 789 +        .n_threads = s->n_threads,
 790 +    };
 791 +
 792 +    err = vmaf_init(&s->vmaf, cfg);
 793 +    if (err)
 794 +        return AVERROR(EINVAL);
 795 +
 796 +    err = parse_deprecated_options(ctx);
 797 +    if (err)
 798 +        return err;
 799
 800 -    s->error = 0;
 801 +    err = parse_models(ctx);
 802 +    if (err)
 803 +        return err;
 804
 805 -    s->vmaf_thread_created = 0;
 806 -    pthread_mutex_init(&s->lock, NULL);
 807 -    pthread_cond_init (&s->cond, NULL);
 808 +    err = parse_features(ctx);
 809 +    if (err)
 810 +        return err;
 811
 812      s->fs.on_event = do_vmaf;
 813      return 0;
 814 @@ -256,26 +545,31 @@ static const enum AVPixelFormat pix_fmts[] = {
 815
 816  static int config_input_ref(AVFilterLink *inlink)
 817  {
 818 -    AVFilterContext *ctx  = inlink->dst;
 819 +    AVFilterContext *ctx = inlink->dst;
 820      LIBVMAFContext *s = ctx->priv;
 821 -    int th;
 822 +    const AVPixFmtDescriptor *desc;
 823 +    int err = 0;
 824
 825 -    if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
 826 -        ctx->inputs[0]->h != ctx->inputs[1]->h) {
 827 -        av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
 828 -        return AVERROR(EINVAL);
 829 +    if (ctx->inputs[0]->w != ctx->inputs[1]->w) {
 830 +        av_log(ctx, AV_LOG_ERROR, "input width must match.\n");
 831 +        err |= AVERROR(EINVAL);
 832      }
 833
 834 -    s->desc = av_pix_fmt_desc_get(inlink->format);
 835 -    s->width = ctx->inputs[0]->w;
 836 -    s->height = ctx->inputs[0]->h;
 837 +    if (ctx->inputs[0]->h != ctx->inputs[1]->h) {
 838 +        av_log(ctx, AV_LOG_ERROR, "input height must match.\n");
 839 +        err |= AVERROR(EINVAL);
 840 +    }
 841
 842 -    th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
 843 -    if (th) {
 844 -        av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
 845 -        return AVERROR(EINVAL);
 846 +    if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
 847 +        av_log(ctx, AV_LOG_ERROR, "input pix_fmt must match.\n");
 848 +        err |= AVERROR(EINVAL);
 849      }
 850 -    s->vmaf_thread_created = 1;
 851 +
 852 +    if (err)
 853 +        return err;
 854 +
 855 +    desc = av_pix_fmt_desc_get(inlink->format);
 856 +    s->bpc = desc->comp[0].depth;
 857
 858      return 0;
 859  }
 860 @@ -307,28 +601,80 @@ static int activate(AVFilterContext *ctx)
 861      return ff_framesync_activate(&s->fs);
 862  }
 863
 864 +static enum VmafOutputFormat log_fmt_map(const char *log_fmt)
 865 +{
 866 +    if (log_fmt) {
 867 +        if (av_stristr(log_fmt, "xml"))
 868 +            return VMAF_OUTPUT_FORMAT_XML;
 869 +        if (av_stristr(log_fmt, "json"))
 870 +            return VMAF_OUTPUT_FORMAT_JSON;
 871 +        if (av_stristr(log_fmt, "csv"))
 872 +            return VMAF_OUTPUT_FORMAT_CSV;
 873 +        if (av_stristr(log_fmt, "sub"))
 874 +            return VMAF_OUTPUT_FORMAT_SUB;
 875 +    }
 876 +
 877 +    return VMAF_OUTPUT_FORMAT_XML;
 878 +}
 879 +
 880 +static enum VmafPoolingMethod pool_method_map(const char *pool_method)
 881 +{
 882 +    if (pool_method) {
 883 +        if (av_stristr(pool_method, "min"))
 884 +            return VMAF_POOL_METHOD_MIN;
 885 +        if (av_stristr(pool_method, "mean"))
 886 +            return VMAF_POOL_METHOD_MEAN;
 887 +        if (av_stristr(pool_method, "harmonic_mean"))
 888 +            return VMAF_POOL_METHOD_HARMONIC_MEAN;
 889 +    }
 890 +
 891 +    return VMAF_POOL_METHOD_MEAN;
 892 +}
 893 +
 894  static av_cold void uninit(AVFilterContext *ctx)
 895  {
 896      LIBVMAFContext *s = ctx->priv;
 897 +    int err = 0;
 898
 899      ff_framesync_uninit(&s->fs);
 900
 901 -    pthread_mutex_lock(&s->lock);
 902 -    s->eof = 1;
 903 -    pthread_cond_signal(&s->cond);
 904 -    pthread_mutex_unlock(&s->lock);
 905 +    if (!s->frame_cnt)
 906 +        goto clean_up;
 907
 908 -    if (s->vmaf_thread_created)
 909 -    {
 910 -        pthread_join(s->vmaf_thread, NULL);
 911 -        s->vmaf_thread_created = 0;
 912 +    err = vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
 913 +    if (err) {
 914 +        av_log(ctx, AV_LOG_ERROR,
 915 +               "problem flushing libvmaf context.\n");
 916      }
 917
 918 -    av_frame_free(&s->gref);
 919 -    av_frame_free(&s->gmain);
 920 +    for (unsigned i = 0; i < s->model_cnt; i++) {
 921 +        double vmaf_score;
 922 +        err = vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(s->pool),
 923 +                                &vmaf_score, 0, s->frame_cnt - 1);
 924 +        if (err) {
 925 +            av_log(ctx, AV_LOG_ERROR,
 926 +                   "problem getting pooled vmaf score.\n");
 927 +        }
 928 +
 929 +        av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n", vmaf_score);
 930 +    }
 931 +
 932 +    if (s->vmaf) {
 933 +        if (s->log_path && !err)
 934 +            vmaf_write_output(s->vmaf, s->log_path, log_fmt_map(s->log_fmt));
 935 +    }
 936 +
 937 +clean_up:
 938 +    if (s->model) {
 939 +        for (unsigned i = 0; i < s->model_cnt; i++) {
 940 +            if (s->model[i])
 941 +                vmaf_model_destroy(s->model[i]);
 942 +        }
 943 +        av_free(s->model);
 944 +    }
 945
 946 -    pthread_mutex_destroy(&s->lock);
 947 -    pthread_cond_destroy(&s->cond);
 948 +    if (s->vmaf)
 949 +        vmaf_close(s->vmaf);
 950  }
 951
 952  static const AVFilterPad libvmaf_inputs[] = {
 953 --
 954 2.20.1
 955