2 * Copyright (c) 2014-2015 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 * @todo switch to dualinput
25 #include "libavutil/mem.h"
26 #include "libavutil/opt.h"
31 #include "lavfutils.h"
35 typedef struct FOCContext
{
39 int xmin
, ymin
, xmax
, ymax
;
43 AVFrame
*needle_frame
[MAX_MIPMAPS
];
44 AVFrame
*haystack_frame
[MAX_MIPMAPS
];
48 #define OFFSET(x) offsetof(FOCContext, x)
49 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
50 static const AVOption find_rect_options
[] = {
51 { "object", "object bitmap filename", OFFSET(obj_filename
), AV_OPT_TYPE_STRING
, {.str
= NULL
}, .flags
= FLAGS
},
52 { "threshold", "set threshold", OFFSET(threshold
), AV_OPT_TYPE_FLOAT
, {.dbl
= 0.5}, 0, 1.0, FLAGS
},
53 { "mipmaps", "set mipmaps", OFFSET(mipmaps
), AV_OPT_TYPE_INT
, {.i64
= 3}, 1, MAX_MIPMAPS
, FLAGS
},
54 { "xmin", "", OFFSET(xmin
), AV_OPT_TYPE_INT
, {.i64
= 0}, 0, INT_MAX
, FLAGS
},
55 { "ymin", "", OFFSET(ymin
), AV_OPT_TYPE_INT
, {.i64
= 0}, 0, INT_MAX
, FLAGS
},
56 { "xmax", "", OFFSET(xmax
), AV_OPT_TYPE_INT
, {.i64
= 0}, 0, INT_MAX
, FLAGS
},
57 { "ymax", "", OFFSET(ymax
), AV_OPT_TYPE_INT
, {.i64
= 0}, 0, INT_MAX
, FLAGS
},
58 { "discard", "", OFFSET(discard
), AV_OPT_TYPE_BOOL
, {.i64
= 0}, 0, 1, FLAGS
},
62 AVFILTER_DEFINE_CLASS(find_rect
);
64 static AVFrame
*downscale(AVFrame
*in
)
67 AVFrame
*frame
= av_frame_alloc();
72 frame
->format
= in
->format
;
73 frame
->width
= (in
->width
+ 1) / 2;
74 frame
->height
= (in
->height
+ 1) / 2;
76 if (av_frame_get_buffer(frame
, 0) < 0) {
77 av_frame_free(&frame
);
83 for(y
= 0; y
< frame
->height
; y
++) {
84 for(x
= 0; x
< frame
->width
; x
++) {
87 + src
[2*x
+0 + in
->linesize
[0]]
88 + src
[2*x
+1 + in
->linesize
[0]]
91 src
+= 2*in
->linesize
[0];
92 dst
+= frame
->linesize
[0];
97 static float compare(const AVFrame
*haystack
, const AVFrame
*obj
, int offx
, int offy
)
102 int64_t oo_sum_v
= 0;
103 int64_t hh_sum_v
= 0;
104 int64_t oh_sum_v
= 0;
106 int n
= obj
->height
* obj
->width
;
107 const uint8_t *odat
= obj
->data
[0];
108 const uint8_t *hdat
= haystack
->data
[0] + offx
+ offy
* haystack
->linesize
[0];
109 int64_t o_sigma
, h_sigma
;
111 for(y
= 0; y
< obj
->height
; y
++) {
112 for(x
= 0; x
< obj
->width
; x
++) {
117 oo_sum_v
+= o_v
* o_v
;
118 hh_sum_v
+= h_v
* h_v
;
119 oh_sum_v
+= o_v
* h_v
;
121 odat
+= obj
->linesize
[0];
122 hdat
+= haystack
->linesize
[0];
124 o_sigma
= n
*oo_sum_v
- o_sum_v
*(int64_t)o_sum_v
;
125 h_sigma
= n
*hh_sum_v
- h_sum_v
*(int64_t)h_sum_v
;
127 if (o_sigma
== 0 || h_sigma
== 0)
130 c
= (n
*oh_sum_v
- o_sum_v
*(int64_t)h_sum_v
) / (sqrt(o_sigma
)*sqrt(h_sigma
));
135 static int config_input(AVFilterLink
*inlink
)
137 AVFilterContext
*ctx
= inlink
->dst
;
138 FOCContext
*foc
= ctx
->priv
;
141 foc
->xmax
= inlink
->w
- foc
->obj_frame
->width
;
143 foc
->ymax
= inlink
->h
- foc
->obj_frame
->height
;
148 static float search(FOCContext
*foc
, int pass
, int maxpass
, int xmin
, int xmax
, int ymin
, int ymax
, int *best_x
, int *best_y
, float best_score
)
152 if (pass
+ 1 <= maxpass
) {
154 search(foc
, pass
+1, maxpass
, xmin
>>1, (xmax
+1)>>1, ymin
>>1, (ymax
+1)>>1, &sub_x
, &sub_y
, 2.0);
155 xmin
= FFMAX(xmin
, 2*sub_x
- 4);
156 xmax
= FFMIN(xmax
, 2*sub_x
+ 4);
157 ymin
= FFMAX(ymin
, 2*sub_y
- 4);
158 ymax
= FFMIN(ymax
, 2*sub_y
+ 4);
161 for (y
= ymin
; y
<= ymax
; y
++) {
162 for (x
= xmin
; x
<= xmax
; x
++) {
163 float score
= compare(foc
->haystack_frame
[pass
], foc
->needle_frame
[pass
], x
, y
);
164 if (score
< best_score
) {
174 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*in
)
176 FilterLink
*inl
= ff_filter_link(inlink
);
177 AVFilterContext
*ctx
= inlink
->dst
;
178 FOCContext
*foc
= ctx
->priv
;
184 foc
->haystack_frame
[0] = av_frame_clone(in
);
185 for (i
=1; i
<foc
->mipmaps
; i
++) {
186 foc
->haystack_frame
[i
] = downscale(foc
->haystack_frame
[i
-1]);
189 best_score
= search(foc
, 0, 0,
190 FFMAX(foc
->xmin
, foc
->last_x
- 8),
191 FFMIN(foc
->xmax
, foc
->last_x
+ 8),
192 FFMAX(foc
->ymin
, foc
->last_y
- 8),
193 FFMIN(foc
->ymax
, foc
->last_y
+ 8),
194 &best_x
, &best_y
, 2.0);
196 best_score
= search(foc
, 0, foc
->mipmaps
- 1, foc
->xmin
, foc
->xmax
, foc
->ymin
, foc
->ymax
,
197 &best_x
, &best_y
, best_score
);
199 for (i
=0; i
<MAX_MIPMAPS
; i
++) {
200 av_frame_free(&foc
->haystack_frame
[i
]);
203 if (best_score
> foc
->threshold
) {
208 return ff_filter_frame(ctx
->outputs
[0], in
);
212 av_log(ctx
, AV_LOG_INFO
, "Found at n=%"PRId64
" pts_time=%f x=%d y=%d with score=%f\n",
213 inl
->frame_count_out
, TS2D(in
->pts
) * av_q2d(inlink
->time_base
),
214 best_x
, best_y
, best_score
);
215 foc
->last_x
= best_x
;
216 foc
->last_y
= best_y
;
218 snprintf(buf
, sizeof(buf
), "%f", best_score
);
220 av_dict_set_int(&in
->metadata
, "lavfi.rect.w", foc
->obj_frame
->width
, 0);
221 av_dict_set_int(&in
->metadata
, "lavfi.rect.h", foc
->obj_frame
->height
, 0);
222 av_dict_set_int(&in
->metadata
, "lavfi.rect.x", best_x
, 0);
223 av_dict_set_int(&in
->metadata
, "lavfi.rect.y", best_y
, 0);
224 av_dict_set(&in
->metadata
, "lavfi.rect.score", buf
, 0);
226 return ff_filter_frame(ctx
->outputs
[0], in
);
229 static av_cold
void uninit(AVFilterContext
*ctx
)
231 FOCContext
*foc
= ctx
->priv
;
234 for (i
= 0; i
< MAX_MIPMAPS
; i
++) {
235 av_frame_free(&foc
->needle_frame
[i
]);
236 av_frame_free(&foc
->haystack_frame
[i
]);
240 av_freep(&foc
->obj_frame
->data
[0]);
241 av_frame_free(&foc
->obj_frame
);
244 static av_cold
int init(AVFilterContext
*ctx
)
246 FOCContext
*foc
= ctx
->priv
;
249 if (!foc
->obj_filename
) {
250 av_log(ctx
, AV_LOG_ERROR
, "object filename not set\n");
251 return AVERROR(EINVAL
);
254 foc
->obj_frame
= av_frame_alloc();
256 return AVERROR(ENOMEM
);
258 if ((ret
= ff_load_image(foc
->obj_frame
->data
, foc
->obj_frame
->linesize
,
259 &foc
->obj_frame
->width
, &foc
->obj_frame
->height
,
260 &foc
->obj_frame
->format
, foc
->obj_filename
, ctx
)) < 0)
263 if (foc
->obj_frame
->format
!= AV_PIX_FMT_GRAY8
) {
264 av_log(ctx
, AV_LOG_ERROR
, "object image is not a grayscale image\n");
265 return AVERROR(EINVAL
);
268 foc
->needle_frame
[0] = av_frame_clone(foc
->obj_frame
);
269 for (i
= 1; i
< foc
->mipmaps
; i
++) {
270 foc
->needle_frame
[i
] = downscale(foc
->needle_frame
[i
-1]);
271 if (!foc
->needle_frame
[i
])
272 return AVERROR(ENOMEM
);
278 static const AVFilterPad foc_inputs
[] = {
281 .type
= AVMEDIA_TYPE_VIDEO
,
282 .config_props
= config_input
,
283 .filter_frame
= filter_frame
,
287 const FFFilter ff_vf_find_rect
= {
288 .p
.name
= "find_rect",
289 .p
.description
= NULL_IF_CONFIG_SMALL("Find a user specified object."),
290 .p
.flags
= AVFILTER_FLAG_METADATA_ONLY
,
291 .p
.priv_class
= &find_rect_class
,
292 .priv_size
= sizeof(FOCContext
),
295 FILTER_INPUTS(foc_inputs
),
296 FILTER_OUTPUTS(ff_video_default_filterpad
),
297 FILTER_PIXFMTS(AV_PIX_FMT_YUV420P
, AV_PIX_FMT_YUVJ420P
),