libavfilter/vf_signature.c

   1 /*
   2  * Copyright (c) 2017 Gerion Entrup
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along
  17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19  */
  20
  21 /**
  22  * @file
  23  * MPEG-7 video signature calculation and lookup filter
  24  * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf
  25  */
  26
  27 #include "libavcodec/put_bits.h"
  28 #include "libavformat/avformat.h"
  29 #include "libavutil/mem.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/avstring.h"
  32 #include "libavutil/file_open.h"
  33 #include "avfilter.h"
  34 #include "filters.h"
  35 #include "signature.h"
  36 #include "signature_lookup.c"
  37
  38 #define OFFSET(x) offsetof(SignatureContext, x)
  39 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
  40 #define BLOCK_LCM (int64_t) 476985600
  41
  42 static const AVOption signature_options[] = {
  43     { "detectmode", "set the detectmode",
  44         OFFSET(mode),         AV_OPT_TYPE_INT,    {.i64 = MODE_OFF}, 0, NB_LOOKUP_MODE-1, FLAGS, .unit = "mode" },
  45         { "off",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_OFF},  0, 0, .flags = FLAGS, .unit = "mode" },
  46         { "full", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FULL}, 0, 0, .flags = FLAGS, .unit = "mode" },
  47         { "fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FAST}, 0, 0, .flags = FLAGS, .unit = "mode" },
  48     { "nb_inputs",  "number of inputs",
  49         OFFSET(nb_inputs),    AV_OPT_TYPE_INT,    {.i64 = 1},        1, INT_MAX,          FLAGS },
  50     { "filename",   "filename for output files",
  51         OFFSET(filename),     AV_OPT_TYPE_STRING, {.str = ""},       0, NB_FORMATS-1,     FLAGS },
  52     { "format",     "set output format",
  53         OFFSET(format),       AV_OPT_TYPE_INT,    {.i64 = FORMAT_BINARY}, 0, 1,           FLAGS , .unit = "format" },
  54         { "binary", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_BINARY}, 0, 0, FLAGS, .unit = "format" },
  55         { "xml",    0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_XML},    0, 0, FLAGS, .unit = "format" },
  56     { "th_d",       "threshold to detect one word as similar",
  57         OFFSET(thworddist),   AV_OPT_TYPE_INT,    {.i64 = 9000},     1, INT_MAX,          FLAGS },
  58     { "th_dc",      "threshold to detect all words as similar",
  59         OFFSET(thcomposdist), AV_OPT_TYPE_INT,    {.i64 = 60000},    1, INT_MAX,          FLAGS },
  60     { "th_xh",      "threshold to detect frames as similar",
  61         OFFSET(thl1),         AV_OPT_TYPE_INT,    {.i64 = 116},      1, INT_MAX,          FLAGS },
  62     { "th_di",      "minimum length of matching sequence in frames",
  63         OFFSET(thdi),         AV_OPT_TYPE_INT,    {.i64 = 0},        0, INT_MAX,          FLAGS },
  64     { "th_it",      "threshold for relation of good to all frames",
  65         OFFSET(thit),         AV_OPT_TYPE_DOUBLE, {.dbl = 0.5},    0.0, 1.0,              FLAGS },
  66     { NULL }
  67 };
  68
  69 AVFILTER_DEFINE_CLASS(signature);
  70
  71 /* all formats with a separate gray value */
  72 static const enum AVPixelFormat pix_fmts[] = {
  73     AV_PIX_FMT_GRAY8,
  74     AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
  75     AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
  76     AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
  77     AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P,
  78     AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
  79     AV_PIX_FMT_YUVJ440P,
  80     AV_PIX_FMT_NV12, AV_PIX_FMT_NV21,
  81     AV_PIX_FMT_NONE
  82 };
  83
  84 static int config_input(AVFilterLink *inlink)
  85 {
  86     AVFilterContext *ctx = inlink->dst;
  87     SignatureContext *sic = ctx->priv;
  88     StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
  89
  90     sc->time_base = inlink->time_base;
  91     /* test for overflow */
  92     sc->divide = (((uint64_t) inlink->w/32) * (inlink->w/32 + 1) * (inlink->h/32 * inlink->h/32 + 1) > INT64_MAX / (BLOCK_LCM * 255));
  93     if (sc->divide) {
  94         av_log(ctx, AV_LOG_WARNING, "Input dimension too high for precise calculation, numbers will be rounded.\n");
  95     }
  96     sc->w = inlink->w;
  97     sc->h = inlink->h;
  98     return 0;
  99 }
 100
 101 static int get_block_size(const Block *b)
 102 {
 103     return (b->to.y - b->up.y + 1) * (b->to.x - b->up.x + 1);
 104 }
 105
 106 static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
 107 {
 108     uint64_t sum = 0;
 109
 110     int x0, y0, x1, y1;
 111
 112     x0 = b->up.x;
 113     y0 = b->up.y;
 114     x1 = b->to.x;
 115     y1 = b->to.y;
 116
 117     if (x0-1 >= 0 && y0-1 >= 0) {
 118         sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];
 119     } else if (x0-1 >= 0) {
 120         sum = intpic[y1][x1] - intpic[y1][x0-1];
 121     } else if (y0-1 >= 0) {
 122         sum = intpic[y1][x1] - intpic[y0-1][x1];
 123     } else {
 124         sum = intpic[y1][x1];
 125     }
 126     return sum;
 127 }
 128
 129 static int cmp(const void *x, const void *y)
 130 {
 131     const uint64_t *a = x, *b = y;
 132     return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
 133 }
 134
 135 /**
 136  * sets the bit at position pos to 1 in data
 137  */
 138 static void set_bit(uint8_t* data, size_t pos)
 139 {
 140     uint8_t mask = 1 << 7-(pos%8);
 141     data[pos/8] |= mask;
 142 }
 143
 144 static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
 145 {
 146     AVFilterContext *ctx = inlink->dst;
 147     SignatureContext *sic = ctx->priv;
 148     StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
 149     FineSignature* fs;
 150
 151     static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
 152     /* indexes of words : 210,217,219,274,334  44,175,233,270,273  57,70,103,237,269  100,285,295,337,354  101,102,111,275,296
 153     s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10...
 154     */
 155     static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
 156     static const uint8_t      s2usw[25]   = { 5,10,11, 15, 20, 21, 12, 22,  6,  0,  1,  2,  7, 13, 14,  8,  9,  3, 23, 16, 17, 24,  4, 18, 19};
 157
 158     uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */
 159     uint64_t intpic[32][32];
 160     uint64_t rowcount;
 161     uint8_t *p = picref->data[0];
 162     int inti, intj;
 163     int *intjlut;
 164
 165     uint64_t conflist[DIFFELEM_SIZE];
 166     int f = 0, g = 0, w = 0;
 167     int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1, a, b;
 168     int64_t denom;
 169     int i, j, k, ternary;
 170     uint64_t blocksum;
 171     int blocksize;
 172     int64_t th; /* threshold */
 173     int64_t sum;
 174
 175     int64_t precfactor = (sc->divide) ? 65536 : BLOCK_LCM;
 176
 177     /* initialize fs */
 178     if (sc->curfinesig) {
 179         fs = av_mallocz(sizeof(FineSignature));
 180         if (!fs)
 181             return AVERROR(ENOMEM);
 182         sc->curfinesig->next = fs;
 183         fs->prev = sc->curfinesig;
 184         sc->curfinesig = fs;
 185     } else {
 186         fs = sc->curfinesig = sc->finesiglist;
 187         sc->curcoarsesig1->first = fs;
 188     }
 189
 190     fs->pts = picref->pts;
 191     fs->index = sc->lastindex++;
 192
 193     memset(intpic, 0, sizeof(uint64_t)*32*32);
 194     intjlut = av_malloc_array(inlink->w, sizeof(int));
 195     if (!intjlut)
 196         return AVERROR(ENOMEM);
 197     for (i = 0; i < inlink->w; i++) {
 198         intjlut[i] = (i*32)/inlink->w;
 199     }
 200
 201     for (i = 0; i < inlink->h; i++) {
 202         inti = (i*32)/inlink->h;
 203         for (j = 0; j < inlink->w; j++) {
 204             intj = intjlut[j];
 205             intpic[inti][intj] += p[j];
 206         }
 207         p += picref->linesize[0];
 208     }
 209     av_freep(&intjlut);
 210
 211     /* The following calculates a summed area table (intpic) and brings the numbers
 212      * in intpic to the same denominator.
 213      * So you only have to handle the numinator in the following sections.
 214      */
 215     dh1 = inlink->h / 32;
 216     if (inlink->h % 32)
 217         dh2 = dh1 + 1;
 218     dw1 = inlink->w / 32;
 219     if (inlink->w % 32)
 220         dw2 = dw1 + 1;
 221     denom = (sc->divide) ? dh1 * (int64_t)dh2 * dw1 * dw2 : 1;
 222
 223     for (i = 0; i < 32; i++) {
 224         rowcount = 0;
 225         a = 1;
 226         if (dh2 > 1) {
 227             a = ((inlink->h*(i+1))%32 == 0) ? (inlink->h*(i+1))/32 - 1 : (inlink->h*(i+1))/32;
 228             a -= ((inlink->h*i)%32 == 0) ? (inlink->h*i)/32 - 1 : (inlink->h*i)/32;
 229             a = (a == dh1)? dh2 : dh1;
 230         }
 231         for (j = 0; j < 32; j++) {
 232             b = 1;
 233             if (dw2 > 1) {
 234                 b = ((inlink->w*(j+1))%32 == 0) ? (inlink->w*(j+1))/32 - 1 : (inlink->w*(j+1))/32;
 235                 b -= ((inlink->w*j)%32 == 0) ? (inlink->w*j)/32 - 1 : (inlink->w*j)/32;
 236                 b = (b == dw1)? dw2 : dw1;
 237             }
 238             rowcount += intpic[i][j] * a * b * precfactor / denom;
 239             if (i > 0) {
 240                 intpic[i][j] = intpic[i-1][j] + rowcount;
 241             } else {
 242                 intpic[i][j] = rowcount;
 243             }
 244         }
 245     }
 246
 247     denom = (sc->divide) ? 1 : dh1 * (int64_t)dh2 * dw1 * dw2;
 248
 249     for (i = 0; i < ELEMENT_COUNT; i++) {
 250         const ElemCat* elemcat = elements[i];
 251         int64_t* elemsignature;
 252         uint64_t* sortsignature;
 253
 254         elemsignature = av_malloc_array(elemcat->elem_count, 2 * sizeof(int64_t));
 255         if (!elemsignature)
 256             return AVERROR(ENOMEM);
 257         sortsignature = elemsignature + elemcat->elem_count;
 258
 259         for (j = 0; j < elemcat->elem_count; j++) {
 260             blocksum = 0;
 261             blocksize = 0;
 262             for (k = 0; k < elemcat->left_count; k++) {
 263                 blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
 264                 blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
 265             }
 266             sum = blocksum / blocksize;
 267             if (elemcat->av_elem) {
 268                 sum -= 128 * precfactor * denom;
 269             } else {
 270                 blocksum = 0;
 271                 blocksize = 0;
 272                 for (; k < elemcat->block_count; k++) {
 273                     blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
 274                     blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
 275                 }
 276                 sum -= blocksum / blocksize;
 277                 conflist[g++] = FFABS(sum * 8 / (precfactor * denom));
 278             }
 279
 280             elemsignature[j] = sum;
 281             sortsignature[j] = FFABS(sum);
 282         }
 283
 284         /* get threshold */
 285         qsort(sortsignature, elemcat->elem_count, sizeof(uint64_t), cmp);
 286         th = sortsignature[(int) (elemcat->elem_count*0.333)];
 287
 288         /* ternarize */
 289         for (j = 0; j < elemcat->elem_count; j++) {
 290             if (elemsignature[j] < -th) {
 291                 ternary = 0;
 292             } else if (elemsignature[j] <= th) {
 293                 ternary = 1;
 294             } else {
 295                 ternary = 2;
 296             }
 297             fs->framesig[f/5] += ternary * pot3[f%5];
 298
 299             if (f == wordvec[w]) {
 300                 fs->words[s2usw[w]/5] += ternary * pot3[wordt2b[s2usw[w]/5]++];
 301                 if (w < 24)
 302                     w++;
 303             }
 304             f++;
 305         }
 306         av_freep(&elemsignature);
 307     }
 308
 309     /* confidence */
 310     qsort(conflist, DIFFELEM_SIZE, sizeof(uint64_t), cmp);
 311     fs->confidence = FFMIN(conflist[DIFFELEM_SIZE/2], 255);
 312
 313     /* coarsesignature */
 314     if (sc->coarsecount == 0) {
 315         if (sc->curcoarsesig2) {
 316             sc->curcoarsesig1 = av_mallocz(sizeof(CoarseSignature));
 317             if (!sc->curcoarsesig1)
 318                 return AVERROR(ENOMEM);
 319             sc->curcoarsesig1->first = fs;
 320             sc->curcoarsesig2->next = sc->curcoarsesig1;
 321             sc->coarseend = sc->curcoarsesig1;
 322         }
 323     }
 324     if (sc->coarsecount == 45) {
 325         sc->midcoarse = 1;
 326         sc->curcoarsesig2 = av_mallocz(sizeof(CoarseSignature));
 327         if (!sc->curcoarsesig2)
 328             return AVERROR(ENOMEM);
 329         sc->curcoarsesig2->first = fs;
 330         sc->curcoarsesig1->next = sc->curcoarsesig2;
 331         sc->coarseend = sc->curcoarsesig2;
 332     }
 333     for (i = 0; i < 5; i++) {
 334         set_bit(sc->curcoarsesig1->data[i], fs->words[i]);
 335     }
 336     /* assuming the actual frame is the last */
 337     sc->curcoarsesig1->last = fs;
 338     if (sc->midcoarse) {
 339         for (i = 0; i < 5; i++) {
 340             set_bit(sc->curcoarsesig2->data[i], fs->words[i]);
 341         }
 342         sc->curcoarsesig2->last = fs;
 343     }
 344
 345     sc->coarsecount = (sc->coarsecount+1)%90;
 346
 347     /* debug printing finesignature */
 348     if (av_log_get_level() == AV_LOG_DEBUG) {
 349         av_log(ctx, AV_LOG_DEBUG, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink), fs->confidence);
 350
 351         av_log(ctx, AV_LOG_DEBUG, "words:");
 352         for (i = 0; i < 5; i++) {
 353             av_log(ctx, AV_LOG_DEBUG, " %d:", fs->words[i] );
 354             av_log(ctx, AV_LOG_DEBUG, " %d", fs->words[i] / pot3[0] );
 355             for (j = 1; j < 5; j++)
 356                 av_log(ctx, AV_LOG_DEBUG, ",%d", fs->words[i] % pot3[j-1] / pot3[j] );
 357             av_log(ctx, AV_LOG_DEBUG, ";");
 358         }
 359         av_log(ctx, AV_LOG_DEBUG, "\n");
 360
 361         av_log(ctx, AV_LOG_DEBUG, "framesignature:");
 362         for (i = 0; i < SIGELEM_SIZE/5; i++) {
 363             av_log(ctx, AV_LOG_DEBUG, " %d", fs->framesig[i] / pot3[0] );
 364             for (j = 1; j < 5; j++)
 365                 av_log(ctx, AV_LOG_DEBUG, ",%d", fs->framesig[i] % pot3[j-1] / pot3[j] );
 366         }
 367         av_log(ctx, AV_LOG_DEBUG, "\n");
 368     }
 369
 370     if (FF_INLINK_IDX(inlink) == 0)
 371         return ff_filter_frame(inlink->dst->outputs[0], picref);
 372     return 1;
 373 }
 374
 375 static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
 376 {
 377     FineSignature* fs;
 378     CoarseSignature* cs;
 379     int i, j;
 380     FILE* f;
 381     unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
 382
 383     if (!sc->coarseend->last)
 384         return AVERROR(EINVAL); // No frames ?
 385
 386     f = avpriv_fopen_utf8(filename, "w");
 387     if (!f) {
 388         int err = AVERROR(EINVAL);
 389         av_log(ctx, AV_LOG_ERROR, "cannot open xml file %s: %s\n", filename, av_err2str(err));
 390         return err;
 391     }
 392
 393     /* header */
 394     fprintf(f, "<?xml version='1.0' encoding='ASCII' ?>\n");
 395     fprintf(f, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
 396     fprintf(f, "  <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
 397     fprintf(f, "    <Descriptor xsi:type=\"VideoSignatureType\">\n");
 398     fprintf(f, "      <VideoSignatureRegion>\n");
 399     fprintf(f, "        <VideoSignatureSpatialRegion>\n");
 400     fprintf(f, "          <Pixel>0 0 </Pixel>\n");
 401     fprintf(f, "          <Pixel>%d %d </Pixel>\n", sc->w - 1, sc->h - 1);
 402     fprintf(f, "        </VideoSignatureSpatialRegion>\n");
 403     fprintf(f, "        <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
 404     /* hoping num is 1, other values are vague */
 405     fprintf(f, "        <MediaTimeUnit>%d</MediaTimeUnit>\n", sc->time_base.den / sc->time_base.num);
 406     fprintf(f, "        <MediaTimeOfSpatialRegion>\n");
 407     fprintf(f, "          <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
 408     fprintf(f, "          <EndMediaTimeOfSpatialRegion>%" PRIu64 "</EndMediaTimeOfSpatialRegion>\n", sc->coarseend->last->pts);
 409     fprintf(f, "        </MediaTimeOfSpatialRegion>\n");
 410
 411     /* coarsesignatures */
 412     for (cs = sc->coarsesiglist; cs; cs = cs->next) {
 413         fprintf(f, "        <VSVideoSegment>\n");
 414         fprintf(f, "          <StartFrameOfSegment>%" PRIu32 "</StartFrameOfSegment>\n", cs->first->index);
 415         fprintf(f, "          <EndFrameOfSegment>%" PRIu32 "</EndFrameOfSegment>\n", cs->last->index);
 416         fprintf(f, "          <MediaTimeOfSegment>\n");
 417         fprintf(f, "            <StartMediaTimeOfSegment>%" PRIu64 "</StartMediaTimeOfSegment>\n", cs->first->pts);
 418         fprintf(f, "            <EndMediaTimeOfSegment>%" PRIu64 "</EndMediaTimeOfSegment>\n", cs->last->pts);
 419         fprintf(f, "          </MediaTimeOfSegment>\n");
 420         for (i = 0; i < 5; i++) {
 421             fprintf(f, "          <BagOfWords>");
 422             for (j = 0; j < 31; j++) {
 423                 uint8_t n = cs->data[i][j];
 424                 if (j < 30) {
 425                     fprintf(f, "%d  %d  %d  %d  %d  %d  %d  %d  ", (n & 0x80) >> 7,
 426                                                                    (n & 0x40) >> 6,
 427                                                                    (n & 0x20) >> 5,
 428                                                                    (n & 0x10) >> 4,
 429                                                                    (n & 0x08) >> 3,
 430                                                                    (n & 0x04) >> 2,
 431                                                                    (n & 0x02) >> 1,
 432                                                                    (n & 0x01));
 433                 } else {
 434                     /* print only 3 bit in last byte */
 435                     fprintf(f, "%d  %d  %d ", (n & 0x80) >> 7,
 436                                               (n & 0x40) >> 6,
 437                                               (n & 0x20) >> 5);
 438                 }
 439             }
 440             fprintf(f, "</BagOfWords>\n");
 441         }
 442         fprintf(f, "        </VSVideoSegment>\n");
 443     }
 444
 445     /* finesignatures */
 446     for (fs = sc->finesiglist; fs; fs = fs->next) {
 447         fprintf(f, "        <VideoFrame>\n");
 448         fprintf(f, "          <MediaTimeOfFrame>%" PRIu64 "</MediaTimeOfFrame>\n", fs->pts);
 449         /* confidence */
 450         fprintf(f, "          <FrameConfidence>%d</FrameConfidence>\n", fs->confidence);
 451         /* words */
 452         fprintf(f, "          <Word>");
 453         for (i = 0; i < 5; i++) {
 454             fprintf(f, "%d ", fs->words[i]);
 455             if (i < 4) {
 456                 fprintf(f, " ");
 457             }
 458         }
 459         fprintf(f, "</Word>\n");
 460         /* framesignature */
 461         fprintf(f, "          <FrameSignature>");
 462         for (i = 0; i< SIGELEM_SIZE/5; i++) {
 463             if (i > 0) {
 464                 fprintf(f, " ");
 465             }
 466             fprintf(f, "%d ", fs->framesig[i] / pot3[0]);
 467             for (j = 1; j < 5; j++)
 468                 fprintf(f, " %d ", fs->framesig[i] % pot3[j-1] / pot3[j] );
 469         }
 470         fprintf(f, "</FrameSignature>\n");
 471         fprintf(f, "        </VideoFrame>\n");
 472     }
 473     fprintf(f, "      </VideoSignatureRegion>\n");
 474     fprintf(f, "    </Descriptor>\n");
 475     fprintf(f, "  </DescriptionUnit>\n");
 476     fprintf(f, "</Mpeg7>\n");
 477
 478     fclose(f);
 479     return 0;
 480 }
 481
 482 static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
 483 {
 484     FILE* f;
 485     FineSignature* fs;
 486     CoarseSignature* cs;
 487     uint32_t numofsegments = (sc->lastindex + 44)/45;
 488     int i, j;
 489     PutBitContext buf;
 490     /* buffer + header + coarsesignatures + finesignature */
 491     int len = (512 + 6 * 32 + 3*16 + 2 +
 492         numofsegments * (4*32 + 1 + 5*243) +
 493         sc->lastindex * (2 + 32 + 6*8 + 608)) / 8;
 494     uint8_t* buffer = av_malloc_array(len, sizeof(uint8_t));
 495     if (!buffer)
 496         return AVERROR(ENOMEM);
 497
 498     f = avpriv_fopen_utf8(filename, "wb");
 499     if (!f) {
 500         int err = AVERROR(EINVAL);
 501         av_log(ctx, AV_LOG_ERROR, "cannot open file %s: %s\n", filename, av_err2str(err));
 502         av_freep(&buffer);
 503         return err;
 504     }
 505     init_put_bits(&buf, buffer, len);
 506
 507     put_bits32(&buf, 1); /* NumOfSpatial Regions, only 1 supported */
 508     put_bits(&buf, 1, 1); /* SpatialLocationFlag, always the whole image */
 509     put_bits32(&buf, 0); /* PixelX,1 PixelY,1, 0,0 */
 510     put_bits(&buf, 16, sc->w-1 & 0xFFFF); /* PixelX,2 */
 511     put_bits(&buf, 16, sc->h-1 & 0xFFFF); /* PixelY,2 */
 512     put_bits32(&buf, 0); /* StartFrameOfSpatialRegion */
 513     put_bits32(&buf, sc->lastindex); /* NumOfFrames */
 514     /* hoping num is 1, other values are vague */
 515     /* den/num might be greater than 16 bit, so cutting it */
 516     put_bits(&buf, 16, 0xFFFF & (sc->time_base.den / sc->time_base.num)); /* MediaTimeUnit */
 517     put_bits(&buf, 1, 1); /* MediaTimeFlagOfSpatialRegion */
 518     put_bits32(&buf, 0); /* StartMediaTimeOfSpatialRegion */
 519     put_bits32(&buf, 0xFFFFFFFF & sc->coarseend->last->pts); /* EndMediaTimeOfSpatialRegion */
 520     put_bits32(&buf, numofsegments); /* NumOfSegments */
 521     /* coarsesignatures */
 522     for (cs = sc->coarsesiglist; cs; cs = cs->next) {
 523         put_bits32(&buf, cs->first->index); /* StartFrameOfSegment */
 524         put_bits32(&buf, cs->last->index); /* EndFrameOfSegment */
 525         put_bits(&buf, 1, 1); /* MediaTimeFlagOfSegment */
 526         put_bits32(&buf, 0xFFFFFFFF & cs->first->pts); /* StartMediaTimeOfSegment */
 527         put_bits32(&buf, 0xFFFFFFFF & cs->last->pts); /* EndMediaTimeOfSegment */
 528         for (i = 0; i < 5; i++) {
 529             /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */
 530             for (j = 0; j < 30; j++) {
 531                 put_bits(&buf, 8, cs->data[i][j]);
 532             }
 533             put_bits(&buf, 3, cs->data[i][30] >> 5);
 534         }
 535     }
 536     /* finesignatures */
 537     put_bits(&buf, 1, 0); /* CompressionFlag, only 0 supported */
 538     for (fs = sc->finesiglist; fs; fs = fs->next) {
 539         put_bits(&buf, 1, 1); /* MediaTimeFlagOfFrame */
 540         put_bits32(&buf, 0xFFFFFFFF & fs->pts); /* MediaTimeOfFrame */
 541         put_bits(&buf, 8, fs->confidence); /* FrameConfidence */
 542         for (i = 0; i < 5; i++) {
 543             put_bits(&buf, 8, fs->words[i]); /* Words */
 544         }
 545         /* framesignature */
 546         for (i = 0; i < SIGELEM_SIZE/5; i++) {
 547             put_bits(&buf, 8, fs->framesig[i]);
 548         }
 549     }
 550
 551     flush_put_bits(&buf);
 552     fwrite(buffer, 1, put_bytes_output(&buf), f);
 553     fclose(f);
 554     av_freep(&buffer);
 555     return 0;
 556 }
 557
 558 static int export(AVFilterContext *ctx, StreamContext *sc, int input)
 559 {
 560     SignatureContext* sic = ctx->priv;
 561     char filename[1024];
 562
 563     if (sic->nb_inputs > 1) {
 564         /* error already handled */
 565         av_assert0(av_get_frame_filename(filename, sizeof(filename), sic->filename, input) == 0);
 566     } else {
 567         if (av_strlcpy(filename, sic->filename, sizeof(filename)) >= sizeof(filename))
 568             return AVERROR(EINVAL);
 569     }
 570     if (sic->format == FORMAT_XML) {
 571         return xml_export(ctx, sc, filename);
 572     } else {
 573         return binary_export(ctx, sc, filename);
 574     }
 575 }
 576
 577 static int request_frame(AVFilterLink *outlink)
 578 {
 579     AVFilterContext *ctx = outlink->src;
 580     SignatureContext *sic = ctx->priv;
 581     StreamContext *sc, *sc2;
 582     MatchingInfo match;
 583     int i, j, ret;
 584     int lookup = 1; /* indicates wheather EOF of all files is reached */
 585
 586     /* process all inputs */
 587     for (i = 0; i < sic->nb_inputs; i++){
 588         sc = &(sic->streamcontexts[i]);
 589
 590         ret = ff_request_frame(ctx->inputs[i]);
 591
 592         /* return if unexpected error occurs in input stream */
 593         if (ret < 0 && ret != AVERROR_EOF)
 594             return ret;
 595
 596         /* export signature at EOF */
 597         if (ret == AVERROR_EOF && !sc->exported) {
 598             /* export if wanted */
 599             if (strlen(sic->filename) > 0) {
 600                 if (export(ctx, sc, i) < 0)
 601                     return ret;
 602             }
 603             sc->exported = 1;
 604         }
 605         lookup &= sc->exported;
 606     }
 607
 608     /* signature lookup */
 609     if (lookup && sic->mode != MODE_OFF) {
 610         /* iterate over every pair */
 611         for (i = 0; i < sic->nb_inputs; i++) {
 612             sc = &(sic->streamcontexts[i]);
 613             for (j = i+1; j < sic->nb_inputs; j++) {
 614                 sc2 = &(sic->streamcontexts[j]);
 615                 match = lookup_signatures(ctx, sic, sc, sc2, sic->mode);
 616                 if (match.score != 0) {
 617                     av_log(ctx, AV_LOG_INFO, "matching of video %d at %f and %d at %f, %d frames matching\n",
 618                             i, ((double) match.first->pts * sc->time_base.num) / sc->time_base.den,
 619                             j, ((double) match.second->pts * sc2->time_base.num) / sc2->time_base.den,
 620                             match.matchframes);
 621                     if (match.whole)
 622                         av_log(ctx, AV_LOG_INFO, "whole video matching\n");
 623                 } else {
 624                     av_log(ctx, AV_LOG_INFO, "no matching of video %d and %d\n", i, j);
 625                 }
 626             }
 627         }
 628     }
 629
 630     return ret;
 631 }
 632
 633 static av_cold int init(AVFilterContext *ctx)
 634 {
 635
 636     SignatureContext *sic = ctx->priv;
 637     StreamContext *sc;
 638     int i, ret;
 639     char tmp[1024];
 640
 641     sic->streamcontexts = av_mallocz(sic->nb_inputs * sizeof(StreamContext));
 642     if (!sic->streamcontexts)
 643         return AVERROR(ENOMEM);
 644
 645     for (i = 0; i < sic->nb_inputs; i++) {
 646         AVFilterPad pad = {
 647             .type = AVMEDIA_TYPE_VIDEO,
 648             .name = av_asprintf("in%d", i),
 649             .config_props = config_input,
 650             .filter_frame = filter_frame,
 651         };
 652
 653         if (!pad.name)
 654             return AVERROR(ENOMEM);
 655         if ((ret = ff_append_inpad_free_name(ctx, &pad)) < 0)
 656             return ret;
 657
 658         sc = &(sic->streamcontexts[i]);
 659
 660         sc->lastindex = 0;
 661         sc->finesiglist = av_mallocz(sizeof(FineSignature));
 662         if (!sc->finesiglist)
 663             return AVERROR(ENOMEM);
 664         sc->curfinesig = NULL;
 665
 666         sc->coarsesiglist = av_mallocz(sizeof(CoarseSignature));
 667         if (!sc->coarsesiglist)
 668             return AVERROR(ENOMEM);
 669         sc->curcoarsesig1 = sc->coarsesiglist;
 670         sc->coarseend = sc->coarsesiglist;
 671         sc->coarsecount = 0;
 672         sc->midcoarse = 0;
 673     }
 674
 675     /* check filename */
 676     if (sic->nb_inputs > 1 && strlen(sic->filename) > 0 && av_get_frame_filename(tmp, sizeof(tmp), sic->filename, 0) == -1) {
 677         av_log(ctx, AV_LOG_ERROR, "The filename must contain %%d or %%0nd, if you have more than one input.\n");
 678         return AVERROR(EINVAL);
 679     }
 680
 681     return 0;
 682 }
 683
 684
 685
 686 static av_cold void uninit(AVFilterContext *ctx)
 687 {
 688     SignatureContext *sic = ctx->priv;
 689     StreamContext *sc;
 690     void* tmp;
 691     FineSignature* finsig;
 692     CoarseSignature* cousig;
 693     int i;
 694
 695
 696     /* free the lists */
 697     if (sic->streamcontexts != NULL) {
 698         for (i = 0; i < sic->nb_inputs; i++) {
 699             sc = &(sic->streamcontexts[i]);
 700             finsig = sc->finesiglist;
 701             cousig = sc->coarsesiglist;
 702
 703             while (finsig) {
 704                 tmp = finsig;
 705                 finsig = finsig->next;
 706                 av_freep(&tmp);
 707             }
 708             sc->finesiglist = NULL;
 709
 710             while (cousig) {
 711                 tmp = cousig;
 712                 cousig = cousig->next;
 713                 av_freep(&tmp);
 714             }
 715             sc->coarsesiglist = NULL;
 716         }
 717         av_freep(&sic->streamcontexts);
 718     }
 719 }
 720
 721 static int config_output(AVFilterLink *outlink)
 722 {
 723     AVFilterContext *ctx = outlink->src;
 724     AVFilterLink *inlink = ctx->inputs[0];
 725     FilterLink       *il = ff_filter_link(inlink);
 726     FilterLink       *ol = ff_filter_link(outlink);
 727
 728     outlink->time_base = inlink->time_base;
 729     ol->frame_rate = il->frame_rate;
 730     outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
 731     outlink->w = inlink->w;
 732     outlink->h = inlink->h;
 733
 734     return 0;
 735 }
 736
 737 static const AVFilterPad signature_outputs[] = {
 738     {
 739         .name          = "default",
 740         .type          = AVMEDIA_TYPE_VIDEO,
 741         .request_frame = request_frame,
 742         .config_props  = config_output,
 743     },
 744 };
 745
 746 const FFFilter ff_vf_signature = {
 747     .p.name        = "signature",
 748     .p.description = NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"),
 749     .p.priv_class  = &signature_class,
 750     .p.inputs      = NULL,
 751     .p.flags       = AVFILTER_FLAG_DYNAMIC_INPUTS,
 752     .priv_size     = sizeof(SignatureContext),
 753     .init          = init,
 754     .uninit        = uninit,
 755     FILTER_OUTPUTS(signature_outputs),
 756     FILTER_PIXFMTS_ARRAY(pix_fmts),
 757 };