2 * Copyright (c) 2017 Gerion Entrup
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 * MPEG-7 video signature calculation and lookup filter
24 * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf
27 #include "libavcodec/put_bits.h"
28 #include "libavformat/avformat.h"
29 #include "libavutil/mem.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/avstring.h"
32 #include "libavutil/file_open.h"
35 #include "signature.h"
36 #include "signature_lookup.c"
38 #define OFFSET(x) offsetof(SignatureContext, x)
39 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
40 #define BLOCK_LCM (int64_t) 476985600
42 static const AVOption signature_options
[] = {
43 { "detectmode", "set the detectmode",
44 OFFSET(mode
), AV_OPT_TYPE_INT
, {.i64
= MODE_OFF
}, 0, NB_LOOKUP_MODE
-1, FLAGS
, .unit
= "mode" },
45 { "off", NULL
, 0, AV_OPT_TYPE_CONST
, {.i64
= MODE_OFF
}, 0, 0, .flags
= FLAGS
, .unit
= "mode" },
46 { "full", NULL
, 0, AV_OPT_TYPE_CONST
, {.i64
= MODE_FULL
}, 0, 0, .flags
= FLAGS
, .unit
= "mode" },
47 { "fast", NULL
, 0, AV_OPT_TYPE_CONST
, {.i64
= MODE_FAST
}, 0, 0, .flags
= FLAGS
, .unit
= "mode" },
48 { "nb_inputs", "number of inputs",
49 OFFSET(nb_inputs
), AV_OPT_TYPE_INT
, {.i64
= 1}, 1, INT_MAX
, FLAGS
},
50 { "filename", "filename for output files",
51 OFFSET(filename
), AV_OPT_TYPE_STRING
, {.str
= ""}, 0, NB_FORMATS
-1, FLAGS
},
52 { "format", "set output format",
53 OFFSET(format
), AV_OPT_TYPE_INT
, {.i64
= FORMAT_BINARY
}, 0, 1, FLAGS
, .unit
= "format" },
54 { "binary", 0, 0, AV_OPT_TYPE_CONST
, {.i64
=FORMAT_BINARY
}, 0, 0, FLAGS
, .unit
= "format" },
55 { "xml", 0, 0, AV_OPT_TYPE_CONST
, {.i64
=FORMAT_XML
}, 0, 0, FLAGS
, .unit
= "format" },
56 { "th_d", "threshold to detect one word as similar",
57 OFFSET(thworddist
), AV_OPT_TYPE_INT
, {.i64
= 9000}, 1, INT_MAX
, FLAGS
},
58 { "th_dc", "threshold to detect all words as similar",
59 OFFSET(thcomposdist
), AV_OPT_TYPE_INT
, {.i64
= 60000}, 1, INT_MAX
, FLAGS
},
60 { "th_xh", "threshold to detect frames as similar",
61 OFFSET(thl1
), AV_OPT_TYPE_INT
, {.i64
= 116}, 1, INT_MAX
, FLAGS
},
62 { "th_di", "minimum length of matching sequence in frames",
63 OFFSET(thdi
), AV_OPT_TYPE_INT
, {.i64
= 0}, 0, INT_MAX
, FLAGS
},
64 { "th_it", "threshold for relation of good to all frames",
65 OFFSET(thit
), AV_OPT_TYPE_DOUBLE
, {.dbl
= 0.5}, 0.0, 1.0, FLAGS
},
69 AVFILTER_DEFINE_CLASS(signature
);
71 /* all formats with a separate gray value */
72 static const enum AVPixelFormat pix_fmts
[] = {
74 AV_PIX_FMT_YUV410P
, AV_PIX_FMT_YUV411P
,
75 AV_PIX_FMT_YUV420P
, AV_PIX_FMT_YUV422P
,
76 AV_PIX_FMT_YUV440P
, AV_PIX_FMT_YUV444P
,
77 AV_PIX_FMT_YUVJ411P
, AV_PIX_FMT_YUVJ420P
,
78 AV_PIX_FMT_YUVJ422P
, AV_PIX_FMT_YUVJ444P
,
80 AV_PIX_FMT_NV12
, AV_PIX_FMT_NV21
,
84 static int config_input(AVFilterLink
*inlink
)
86 AVFilterContext
*ctx
= inlink
->dst
;
87 SignatureContext
*sic
= ctx
->priv
;
88 StreamContext
*sc
= &(sic
->streamcontexts
[FF_INLINK_IDX(inlink
)]);
90 sc
->time_base
= inlink
->time_base
;
91 /* test for overflow */
92 sc
->divide
= (((uint64_t) inlink
->w
/32) * (inlink
->w
/32 + 1) * (inlink
->h
/32 * inlink
->h
/32 + 1) > INT64_MAX
/ (BLOCK_LCM
* 255));
94 av_log(ctx
, AV_LOG_WARNING
, "Input dimension too high for precise calculation, numbers will be rounded.\n");
101 static int get_block_size(const Block
*b
)
103 return (b
->to
.y
- b
->up
.y
+ 1) * (b
->to
.x
- b
->up
.x
+ 1);
106 static uint64_t get_block_sum(StreamContext
*sc
, uint64_t intpic
[32][32], const Block
*b
)
117 if (x0
-1 >= 0 && y0
-1 >= 0) {
118 sum
= intpic
[y1
][x1
] + intpic
[y0
-1][x0
-1] - intpic
[y1
][x0
-1] - intpic
[y0
-1][x1
];
119 } else if (x0
-1 >= 0) {
120 sum
= intpic
[y1
][x1
] - intpic
[y1
][x0
-1];
121 } else if (y0
-1 >= 0) {
122 sum
= intpic
[y1
][x1
] - intpic
[y0
-1][x1
];
124 sum
= intpic
[y1
][x1
];
129 static int cmp(const void *x
, const void *y
)
131 const uint64_t *a
= x
, *b
= y
;
132 return *a
< *b
? -1 : ( *a
> *b
? 1 : 0 );
136 * sets the bit at position pos to 1 in data
138 static void set_bit(uint8_t* data
, size_t pos
)
140 uint8_t mask
= 1 << 7-(pos
%8);
144 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*picref
)
146 AVFilterContext
*ctx
= inlink
->dst
;
147 SignatureContext
*sic
= ctx
->priv
;
148 StreamContext
*sc
= &(sic
->streamcontexts
[FF_INLINK_IDX(inlink
)]);
151 static const uint8_t pot3
[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
152 /* indexes of words : 210,217,219,274,334 44,175,233,270,273 57,70,103,237,269 100,285,295,337,354 101,102,111,275,296
153 s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10...
155 static const unsigned int wordvec
[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
156 static const uint8_t s2usw
[25] = { 5,10,11, 15, 20, 21, 12, 22, 6, 0, 1, 2, 7, 13, 14, 8, 9, 3, 23, 16, 17, 24, 4, 18, 19};
158 uint8_t wordt2b
[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */
159 uint64_t intpic
[32][32];
161 uint8_t *p
= picref
->data
[0];
165 uint64_t conflist
[DIFFELEM_SIZE
];
166 int f
= 0, g
= 0, w
= 0;
167 int32_t dh1
= 1, dh2
= 1, dw1
= 1, dw2
= 1, a
, b
;
169 int i
, j
, k
, ternary
;
172 int64_t th
; /* threshold */
175 int64_t precfactor
= (sc
->divide
) ? 65536 : BLOCK_LCM
;
178 if (sc
->curfinesig
) {
179 fs
= av_mallocz(sizeof(FineSignature
));
181 return AVERROR(ENOMEM
);
182 sc
->curfinesig
->next
= fs
;
183 fs
->prev
= sc
->curfinesig
;
186 fs
= sc
->curfinesig
= sc
->finesiglist
;
187 sc
->curcoarsesig1
->first
= fs
;
190 fs
->pts
= picref
->pts
;
191 fs
->index
= sc
->lastindex
++;
193 memset(intpic
, 0, sizeof(uint64_t)*32*32);
194 intjlut
= av_malloc_array(inlink
->w
, sizeof(int));
196 return AVERROR(ENOMEM
);
197 for (i
= 0; i
< inlink
->w
; i
++) {
198 intjlut
[i
] = (i
*32)/inlink
->w
;
201 for (i
= 0; i
< inlink
->h
; i
++) {
202 inti
= (i
*32)/inlink
->h
;
203 for (j
= 0; j
< inlink
->w
; j
++) {
205 intpic
[inti
][intj
] += p
[j
];
207 p
+= picref
->linesize
[0];
211 /* The following calculates a summed area table (intpic) and brings the numbers
212 * in intpic to the same denominator.
213 * So you only have to handle the numinator in the following sections.
215 dh1
= inlink
->h
/ 32;
218 dw1
= inlink
->w
/ 32;
221 denom
= (sc
->divide
) ? dh1
* (int64_t)dh2
* dw1
* dw2
: 1;
223 for (i
= 0; i
< 32; i
++) {
227 a
= ((inlink
->h
*(i
+1))%32 == 0) ? (inlink
->h
*(i
+1))/32 - 1 : (inlink
->h
*(i
+1))/32;
228 a
-= ((inlink
->h
*i
)%32 == 0) ? (inlink
->h
*i
)/32 - 1 : (inlink
->h
*i
)/32;
229 a
= (a
== dh1
)? dh2
: dh1
;
231 for (j
= 0; j
< 32; j
++) {
234 b
= ((inlink
->w
*(j
+1))%32 == 0) ? (inlink
->w
*(j
+1))/32 - 1 : (inlink
->w
*(j
+1))/32;
235 b
-= ((inlink
->w
*j
)%32 == 0) ? (inlink
->w
*j
)/32 - 1 : (inlink
->w
*j
)/32;
236 b
= (b
== dw1
)? dw2
: dw1
;
238 rowcount
+= intpic
[i
][j
] * a
* b
* precfactor
/ denom
;
240 intpic
[i
][j
] = intpic
[i
-1][j
] + rowcount
;
242 intpic
[i
][j
] = rowcount
;
247 denom
= (sc
->divide
) ? 1 : dh1
* (int64_t)dh2
* dw1
* dw2
;
249 for (i
= 0; i
< ELEMENT_COUNT
; i
++) {
250 const ElemCat
* elemcat
= elements
[i
];
251 int64_t* elemsignature
;
252 uint64_t* sortsignature
;
254 elemsignature
= av_malloc_array(elemcat
->elem_count
, 2 * sizeof(int64_t));
256 return AVERROR(ENOMEM
);
257 sortsignature
= elemsignature
+ elemcat
->elem_count
;
259 for (j
= 0; j
< elemcat
->elem_count
; j
++) {
262 for (k
= 0; k
< elemcat
->left_count
; k
++) {
263 blocksum
+= get_block_sum(sc
, intpic
, &elemcat
->blocks
[j
*elemcat
->block_count
+k
]);
264 blocksize
+= get_block_size(&elemcat
->blocks
[j
*elemcat
->block_count
+k
]);
266 sum
= blocksum
/ blocksize
;
267 if (elemcat
->av_elem
) {
268 sum
-= 128 * precfactor
* denom
;
272 for (; k
< elemcat
->block_count
; k
++) {
273 blocksum
+= get_block_sum(sc
, intpic
, &elemcat
->blocks
[j
*elemcat
->block_count
+k
]);
274 blocksize
+= get_block_size(&elemcat
->blocks
[j
*elemcat
->block_count
+k
]);
276 sum
-= blocksum
/ blocksize
;
277 conflist
[g
++] = FFABS(sum
* 8 / (precfactor
* denom
));
280 elemsignature
[j
] = sum
;
281 sortsignature
[j
] = FFABS(sum
);
285 qsort(sortsignature
, elemcat
->elem_count
, sizeof(uint64_t), cmp
);
286 th
= sortsignature
[(int) (elemcat
->elem_count
*0.333)];
289 for (j
= 0; j
< elemcat
->elem_count
; j
++) {
290 if (elemsignature
[j
] < -th
) {
292 } else if (elemsignature
[j
] <= th
) {
297 fs
->framesig
[f
/5] += ternary
* pot3
[f
%5];
299 if (f
== wordvec
[w
]) {
300 fs
->words
[s2usw
[w
]/5] += ternary
* pot3
[wordt2b
[s2usw
[w
]/5]++];
306 av_freep(&elemsignature
);
310 qsort(conflist
, DIFFELEM_SIZE
, sizeof(uint64_t), cmp
);
311 fs
->confidence
= FFMIN(conflist
[DIFFELEM_SIZE
/2], 255);
313 /* coarsesignature */
314 if (sc
->coarsecount
== 0) {
315 if (sc
->curcoarsesig2
) {
316 sc
->curcoarsesig1
= av_mallocz(sizeof(CoarseSignature
));
317 if (!sc
->curcoarsesig1
)
318 return AVERROR(ENOMEM
);
319 sc
->curcoarsesig1
->first
= fs
;
320 sc
->curcoarsesig2
->next
= sc
->curcoarsesig1
;
321 sc
->coarseend
= sc
->curcoarsesig1
;
324 if (sc
->coarsecount
== 45) {
326 sc
->curcoarsesig2
= av_mallocz(sizeof(CoarseSignature
));
327 if (!sc
->curcoarsesig2
)
328 return AVERROR(ENOMEM
);
329 sc
->curcoarsesig2
->first
= fs
;
330 sc
->curcoarsesig1
->next
= sc
->curcoarsesig2
;
331 sc
->coarseend
= sc
->curcoarsesig2
;
333 for (i
= 0; i
< 5; i
++) {
334 set_bit(sc
->curcoarsesig1
->data
[i
], fs
->words
[i
]);
336 /* assuming the actual frame is the last */
337 sc
->curcoarsesig1
->last
= fs
;
339 for (i
= 0; i
< 5; i
++) {
340 set_bit(sc
->curcoarsesig2
->data
[i
], fs
->words
[i
]);
342 sc
->curcoarsesig2
->last
= fs
;
345 sc
->coarsecount
= (sc
->coarsecount
+1)%90;
347 /* debug printing finesignature */
348 if (av_log_get_level() == AV_LOG_DEBUG
) {
349 av_log(ctx
, AV_LOG_DEBUG
, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink
), fs
->confidence
);
351 av_log(ctx
, AV_LOG_DEBUG
, "words:");
352 for (i
= 0; i
< 5; i
++) {
353 av_log(ctx
, AV_LOG_DEBUG
, " %d:", fs
->words
[i
] );
354 av_log(ctx
, AV_LOG_DEBUG
, " %d", fs
->words
[i
] / pot3
[0] );
355 for (j
= 1; j
< 5; j
++)
356 av_log(ctx
, AV_LOG_DEBUG
, ",%d", fs
->words
[i
] % pot3
[j
-1] / pot3
[j
] );
357 av_log(ctx
, AV_LOG_DEBUG
, ";");
359 av_log(ctx
, AV_LOG_DEBUG
, "\n");
361 av_log(ctx
, AV_LOG_DEBUG
, "framesignature:");
362 for (i
= 0; i
< SIGELEM_SIZE
/5; i
++) {
363 av_log(ctx
, AV_LOG_DEBUG
, " %d", fs
->framesig
[i
] / pot3
[0] );
364 for (j
= 1; j
< 5; j
++)
365 av_log(ctx
, AV_LOG_DEBUG
, ",%d", fs
->framesig
[i
] % pot3
[j
-1] / pot3
[j
] );
367 av_log(ctx
, AV_LOG_DEBUG
, "\n");
370 if (FF_INLINK_IDX(inlink
) == 0)
371 return ff_filter_frame(inlink
->dst
->outputs
[0], picref
);
375 static int xml_export(AVFilterContext
*ctx
, StreamContext
*sc
, const char* filename
)
381 unsigned int pot3
[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
383 if (!sc
->coarseend
->last
)
384 return AVERROR(EINVAL
); // No frames ?
386 f
= avpriv_fopen_utf8(filename
, "w");
388 int err
= AVERROR(EINVAL
);
389 av_log(ctx
, AV_LOG_ERROR
, "cannot open xml file %s: %s\n", filename
, av_err2str(err
));
394 fprintf(f
, "<?xml version='1.0' encoding='ASCII' ?>\n");
395 fprintf(f
, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
396 fprintf(f
, " <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
397 fprintf(f
, " <Descriptor xsi:type=\"VideoSignatureType\">\n");
398 fprintf(f
, " <VideoSignatureRegion>\n");
399 fprintf(f
, " <VideoSignatureSpatialRegion>\n");
400 fprintf(f
, " <Pixel>0 0 </Pixel>\n");
401 fprintf(f
, " <Pixel>%d %d </Pixel>\n", sc
->w
- 1, sc
->h
- 1);
402 fprintf(f
, " </VideoSignatureSpatialRegion>\n");
403 fprintf(f
, " <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
404 /* hoping num is 1, other values are vague */
405 fprintf(f
, " <MediaTimeUnit>%d</MediaTimeUnit>\n", sc
->time_base
.den
/ sc
->time_base
.num
);
406 fprintf(f
, " <MediaTimeOfSpatialRegion>\n");
407 fprintf(f
, " <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
408 fprintf(f
, " <EndMediaTimeOfSpatialRegion>%" PRIu64
"</EndMediaTimeOfSpatialRegion>\n", sc
->coarseend
->last
->pts
);
409 fprintf(f
, " </MediaTimeOfSpatialRegion>\n");
411 /* coarsesignatures */
412 for (cs
= sc
->coarsesiglist
; cs
; cs
= cs
->next
) {
413 fprintf(f
, " <VSVideoSegment>\n");
414 fprintf(f
, " <StartFrameOfSegment>%" PRIu32
"</StartFrameOfSegment>\n", cs
->first
->index
);
415 fprintf(f
, " <EndFrameOfSegment>%" PRIu32
"</EndFrameOfSegment>\n", cs
->last
->index
);
416 fprintf(f
, " <MediaTimeOfSegment>\n");
417 fprintf(f
, " <StartMediaTimeOfSegment>%" PRIu64
"</StartMediaTimeOfSegment>\n", cs
->first
->pts
);
418 fprintf(f
, " <EndMediaTimeOfSegment>%" PRIu64
"</EndMediaTimeOfSegment>\n", cs
->last
->pts
);
419 fprintf(f
, " </MediaTimeOfSegment>\n");
420 for (i
= 0; i
< 5; i
++) {
421 fprintf(f
, " <BagOfWords>");
422 for (j
= 0; j
< 31; j
++) {
423 uint8_t n
= cs
->data
[i
][j
];
425 fprintf(f
, "%d %d %d %d %d %d %d %d ", (n
& 0x80) >> 7,
434 /* print only 3 bit in last byte */
435 fprintf(f
, "%d %d %d ", (n
& 0x80) >> 7,
440 fprintf(f
, "</BagOfWords>\n");
442 fprintf(f
, " </VSVideoSegment>\n");
446 for (fs
= sc
->finesiglist
; fs
; fs
= fs
->next
) {
447 fprintf(f
, " <VideoFrame>\n");
448 fprintf(f
, " <MediaTimeOfFrame>%" PRIu64
"</MediaTimeOfFrame>\n", fs
->pts
);
450 fprintf(f
, " <FrameConfidence>%d</FrameConfidence>\n", fs
->confidence
);
452 fprintf(f
, " <Word>");
453 for (i
= 0; i
< 5; i
++) {
454 fprintf(f
, "%d ", fs
->words
[i
]);
459 fprintf(f
, "</Word>\n");
461 fprintf(f
, " <FrameSignature>");
462 for (i
= 0; i
< SIGELEM_SIZE
/5; i
++) {
466 fprintf(f
, "%d ", fs
->framesig
[i
] / pot3
[0]);
467 for (j
= 1; j
< 5; j
++)
468 fprintf(f
, " %d ", fs
->framesig
[i
] % pot3
[j
-1] / pot3
[j
] );
470 fprintf(f
, "</FrameSignature>\n");
471 fprintf(f
, " </VideoFrame>\n");
473 fprintf(f
, " </VideoSignatureRegion>\n");
474 fprintf(f
, " </Descriptor>\n");
475 fprintf(f
, " </DescriptionUnit>\n");
476 fprintf(f
, "</Mpeg7>\n");
482 static int binary_export(AVFilterContext
*ctx
, StreamContext
*sc
, const char* filename
)
487 uint32_t numofsegments
= (sc
->lastindex
+ 44)/45;
490 /* buffer + header + coarsesignatures + finesignature */
491 int len
= (512 + 6 * 32 + 3*16 + 2 +
492 numofsegments
* (4*32 + 1 + 5*243) +
493 sc
->lastindex
* (2 + 32 + 6*8 + 608)) / 8;
494 uint8_t* buffer
= av_malloc_array(len
, sizeof(uint8_t));
496 return AVERROR(ENOMEM
);
498 f
= avpriv_fopen_utf8(filename
, "wb");
500 int err
= AVERROR(EINVAL
);
501 av_log(ctx
, AV_LOG_ERROR
, "cannot open file %s: %s\n", filename
, av_err2str(err
));
505 init_put_bits(&buf
, buffer
, len
);
507 put_bits32(&buf
, 1); /* NumOfSpatial Regions, only 1 supported */
508 put_bits(&buf
, 1, 1); /* SpatialLocationFlag, always the whole image */
509 put_bits32(&buf
, 0); /* PixelX,1 PixelY,1, 0,0 */
510 put_bits(&buf
, 16, sc
->w
-1 & 0xFFFF); /* PixelX,2 */
511 put_bits(&buf
, 16, sc
->h
-1 & 0xFFFF); /* PixelY,2 */
512 put_bits32(&buf
, 0); /* StartFrameOfSpatialRegion */
513 put_bits32(&buf
, sc
->lastindex
); /* NumOfFrames */
514 /* hoping num is 1, other values are vague */
515 /* den/num might be greater than 16 bit, so cutting it */
516 put_bits(&buf
, 16, 0xFFFF & (sc
->time_base
.den
/ sc
->time_base
.num
)); /* MediaTimeUnit */
517 put_bits(&buf
, 1, 1); /* MediaTimeFlagOfSpatialRegion */
518 put_bits32(&buf
, 0); /* StartMediaTimeOfSpatialRegion */
519 put_bits32(&buf
, 0xFFFFFFFF & sc
->coarseend
->last
->pts
); /* EndMediaTimeOfSpatialRegion */
520 put_bits32(&buf
, numofsegments
); /* NumOfSegments */
521 /* coarsesignatures */
522 for (cs
= sc
->coarsesiglist
; cs
; cs
= cs
->next
) {
523 put_bits32(&buf
, cs
->first
->index
); /* StartFrameOfSegment */
524 put_bits32(&buf
, cs
->last
->index
); /* EndFrameOfSegment */
525 put_bits(&buf
, 1, 1); /* MediaTimeFlagOfSegment */
526 put_bits32(&buf
, 0xFFFFFFFF & cs
->first
->pts
); /* StartMediaTimeOfSegment */
527 put_bits32(&buf
, 0xFFFFFFFF & cs
->last
->pts
); /* EndMediaTimeOfSegment */
528 for (i
= 0; i
< 5; i
++) {
529 /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */
530 for (j
= 0; j
< 30; j
++) {
531 put_bits(&buf
, 8, cs
->data
[i
][j
]);
533 put_bits(&buf
, 3, cs
->data
[i
][30] >> 5);
537 put_bits(&buf
, 1, 0); /* CompressionFlag, only 0 supported */
538 for (fs
= sc
->finesiglist
; fs
; fs
= fs
->next
) {
539 put_bits(&buf
, 1, 1); /* MediaTimeFlagOfFrame */
540 put_bits32(&buf
, 0xFFFFFFFF & fs
->pts
); /* MediaTimeOfFrame */
541 put_bits(&buf
, 8, fs
->confidence
); /* FrameConfidence */
542 for (i
= 0; i
< 5; i
++) {
543 put_bits(&buf
, 8, fs
->words
[i
]); /* Words */
546 for (i
= 0; i
< SIGELEM_SIZE
/5; i
++) {
547 put_bits(&buf
, 8, fs
->framesig
[i
]);
551 flush_put_bits(&buf
);
552 fwrite(buffer
, 1, put_bytes_output(&buf
), f
);
558 static int export(AVFilterContext
*ctx
, StreamContext
*sc
, int input
)
560 SignatureContext
* sic
= ctx
->priv
;
563 if (sic
->nb_inputs
> 1) {
564 /* error already handled */
565 av_assert0(av_get_frame_filename(filename
, sizeof(filename
), sic
->filename
, input
) == 0);
567 if (av_strlcpy(filename
, sic
->filename
, sizeof(filename
)) >= sizeof(filename
))
568 return AVERROR(EINVAL
);
570 if (sic
->format
== FORMAT_XML
) {
571 return xml_export(ctx
, sc
, filename
);
573 return binary_export(ctx
, sc
, filename
);
577 static int request_frame(AVFilterLink
*outlink
)
579 AVFilterContext
*ctx
= outlink
->src
;
580 SignatureContext
*sic
= ctx
->priv
;
581 StreamContext
*sc
, *sc2
;
584 int lookup
= 1; /* indicates wheather EOF of all files is reached */
586 /* process all inputs */
587 for (i
= 0; i
< sic
->nb_inputs
; i
++){
588 sc
= &(sic
->streamcontexts
[i
]);
590 ret
= ff_request_frame(ctx
->inputs
[i
]);
592 /* return if unexpected error occurs in input stream */
593 if (ret
< 0 && ret
!= AVERROR_EOF
)
596 /* export signature at EOF */
597 if (ret
== AVERROR_EOF
&& !sc
->exported
) {
598 /* export if wanted */
599 if (strlen(sic
->filename
) > 0) {
600 if (export(ctx
, sc
, i
) < 0)
605 lookup
&= sc
->exported
;
608 /* signature lookup */
609 if (lookup
&& sic
->mode
!= MODE_OFF
) {
610 /* iterate over every pair */
611 for (i
= 0; i
< sic
->nb_inputs
; i
++) {
612 sc
= &(sic
->streamcontexts
[i
]);
613 for (j
= i
+1; j
< sic
->nb_inputs
; j
++) {
614 sc2
= &(sic
->streamcontexts
[j
]);
615 match
= lookup_signatures(ctx
, sic
, sc
, sc2
, sic
->mode
);
616 if (match
.score
!= 0) {
617 av_log(ctx
, AV_LOG_INFO
, "matching of video %d at %f and %d at %f, %d frames matching\n",
618 i
, ((double) match
.first
->pts
* sc
->time_base
.num
) / sc
->time_base
.den
,
619 j
, ((double) match
.second
->pts
* sc2
->time_base
.num
) / sc2
->time_base
.den
,
622 av_log(ctx
, AV_LOG_INFO
, "whole video matching\n");
624 av_log(ctx
, AV_LOG_INFO
, "no matching of video %d and %d\n", i
, j
);
633 static av_cold
int init(AVFilterContext
*ctx
)
636 SignatureContext
*sic
= ctx
->priv
;
641 sic
->streamcontexts
= av_mallocz(sic
->nb_inputs
* sizeof(StreamContext
));
642 if (!sic
->streamcontexts
)
643 return AVERROR(ENOMEM
);
645 for (i
= 0; i
< sic
->nb_inputs
; i
++) {
647 .type
= AVMEDIA_TYPE_VIDEO
,
648 .name
= av_asprintf("in%d", i
),
649 .config_props
= config_input
,
650 .filter_frame
= filter_frame
,
654 return AVERROR(ENOMEM
);
655 if ((ret
= ff_append_inpad_free_name(ctx
, &pad
)) < 0)
658 sc
= &(sic
->streamcontexts
[i
]);
661 sc
->finesiglist
= av_mallocz(sizeof(FineSignature
));
662 if (!sc
->finesiglist
)
663 return AVERROR(ENOMEM
);
664 sc
->curfinesig
= NULL
;
666 sc
->coarsesiglist
= av_mallocz(sizeof(CoarseSignature
));
667 if (!sc
->coarsesiglist
)
668 return AVERROR(ENOMEM
);
669 sc
->curcoarsesig1
= sc
->coarsesiglist
;
670 sc
->coarseend
= sc
->coarsesiglist
;
676 if (sic
->nb_inputs
> 1 && strlen(sic
->filename
) > 0 && av_get_frame_filename(tmp
, sizeof(tmp
), sic
->filename
, 0) == -1) {
677 av_log(ctx
, AV_LOG_ERROR
, "The filename must contain %%d or %%0nd, if you have more than one input.\n");
678 return AVERROR(EINVAL
);
686 static av_cold
void uninit(AVFilterContext
*ctx
)
688 SignatureContext
*sic
= ctx
->priv
;
691 FineSignature
* finsig
;
692 CoarseSignature
* cousig
;
697 if (sic
->streamcontexts
!= NULL
) {
698 for (i
= 0; i
< sic
->nb_inputs
; i
++) {
699 sc
= &(sic
->streamcontexts
[i
]);
700 finsig
= sc
->finesiglist
;
701 cousig
= sc
->coarsesiglist
;
705 finsig
= finsig
->next
;
708 sc
->finesiglist
= NULL
;
712 cousig
= cousig
->next
;
715 sc
->coarsesiglist
= NULL
;
717 av_freep(&sic
->streamcontexts
);
721 static int config_output(AVFilterLink
*outlink
)
723 AVFilterContext
*ctx
= outlink
->src
;
724 AVFilterLink
*inlink
= ctx
->inputs
[0];
725 FilterLink
*il
= ff_filter_link(inlink
);
726 FilterLink
*ol
= ff_filter_link(outlink
);
728 outlink
->time_base
= inlink
->time_base
;
729 ol
->frame_rate
= il
->frame_rate
;
730 outlink
->sample_aspect_ratio
= inlink
->sample_aspect_ratio
;
731 outlink
->w
= inlink
->w
;
732 outlink
->h
= inlink
->h
;
737 static const AVFilterPad signature_outputs
[] = {
740 .type
= AVMEDIA_TYPE_VIDEO
,
741 .request_frame
= request_frame
,
742 .config_props
= config_output
,
746 const FFFilter ff_vf_signature
= {
747 .p
.name
= "signature",
748 .p
.description
= NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"),
749 .p
.priv_class
= &signature_class
,
751 .p
.flags
= AVFILTER_FLAG_DYNAMIC_INPUTS
,
752 .priv_size
= sizeof(SignatureContext
),
755 FILTER_OUTPUTS(signature_outputs
),
756 FILTER_PIXFMTS_ARRAY(pix_fmts
),