avcodec/jpegxl_parse{,r}: fix integer overflow for some malformed files
[FFMpeg-mirror.git] / libavfilter / vf_deshake_opencl.c
blobdc3df0e98931c7d8c6957a10505151771646a291
1 /*
2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 * Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
19 * Copyright (C) 2009, Willow Garage Inc., all rights reserved.
20 * Copyright (C) 2013, OpenCV Foundation, all rights reserved.
21 * Third party copyrights are property of their respective owners.
23 * Redistribution and use in source and binary forms, with or without modification,
24 * are permitted provided that the following conditions are met:
26 * * Redistribution's of source code must retain the above copyright notice,
27 * this list of conditions and the following disclaimer.
29 * * Redistribution's in binary form must reproduce the above copyright notice,
30 * this list of conditions and the following disclaimer in the documentation
31 * and/or other materials provided with the distribution.
33 * * The name of the copyright holders may not be used to endorse or promote products
34 * derived from this software without specific prior written permission.
36 * This software is provided by the copyright holders and contributors "as is" and
37 * any express or implied warranties, including, but not limited to, the implied
38 * warranties of merchantability and fitness for a particular purpose are disclaimed.
39 * In no event shall the Intel Corporation or contributors be liable for any direct,
40 * indirect, incidental, special, exemplary, or consequential damages
41 * (including, but not limited to, procurement of substitute goods or services;
42 * loss of use, data, or profits; or business interruption) however caused
43 * and on any theory of liability, whether in contract, strict liability,
44 * or tort (including negligence or otherwise) arising in any way out of
45 * the use of this software, even if advised of the possibility of such damage.
48 #include <float.h>
49 #include <libavutil/lfg.h>
50 #include "libavutil/opt.h"
51 #include "libavutil/mem.h"
52 #include "libavutil/fifo.h"
53 #include "libavutil/common.h"
54 #include "libavutil/avassert.h"
55 #include "libavutil/pixdesc.h"
56 #include "libavutil/pixfmt.h"
57 #include "avfilter.h"
58 #include "framequeue.h"
59 #include "filters.h"
60 #include "transform.h"
61 #include "opencl.h"
62 #include "opencl_source.h"
63 #include "video.h"
66 This filter matches feature points between frames (dealing with outliers) and then
67 uses the matches to estimate an affine transform between frames. This transform is
68 decomposed into various values (translation, scale, rotation) and the values are
69 summed relative to the start of the video to obtain on absolute camera position
70 for each frame. This "camera path" is then smoothed via a gaussian filter, resulting
71 in a new path that is turned back into an affine transform and applied to each
72 frame to render it.
74 High-level overview:
76 All of the work to extract motion data from frames occurs in queue_frame. Motion data
77 is buffered in a smoothing window, so queue_frame simply computes the absolute camera
78 positions and places them in ringbuffers.
80 filter_frame is responsible for looking at the absolute camera positions currently
81 in the ringbuffers, applying the gaussian filter, and then transforming the frames.
84 // Number of bits for BRIEF descriptors
85 #define BREIFN 512
86 // Size of the patch from which a BRIEF descriptor is extracted
87 // This is the size used in OpenCV
88 #define BRIEF_PATCH_SIZE 31
89 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
91 #define MATCHES_CONTIG_SIZE 2000
93 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
95 typedef struct PointPair {
96 // Previous frame
97 cl_float2 p1;
98 // Current frame
99 cl_float2 p2;
100 } PointPair;
102 typedef struct MotionVector {
103 PointPair p;
104 // Used to mark vectors as potential outliers
105 cl_int should_consider;
106 } MotionVector;
108 // Denotes the indices for the different types of motion in the ringbuffers array
109 enum RingbufferIndices {
110 RingbufX,
111 RingbufY,
112 RingbufRot,
113 RingbufScaleX,
114 RingbufScaleY,
116 // Should always be last
117 RingbufCount
120 // Struct that holds data for drawing point match debug data
121 typedef struct DebugMatches {
122 MotionVector *matches;
123 // The points used to calculate the affine transform for a frame
124 MotionVector model_matches[3];
126 int num_matches;
127 // For cases where we couldn't calculate a model
128 int num_model_matches;
129 } DebugMatches;
131 // Groups together the ringbuffers that store absolute distortion / position values
132 // for each frame
133 typedef struct AbsoluteFrameMotion {
134 // Array with the various ringbuffers, indexed via the RingbufferIndices enum
135 AVFifo *ringbuffers[RingbufCount];
137 // Offset to get to the current frame being processed
138 // (not in bytes)
139 int curr_frame_offset;
140 // Keeps track of where the start and end of contiguous motion data is (to
141 // deal with cases where no motion data is found between two frames)
142 int data_start_offset;
143 int data_end_offset;
145 AVFifo *debug_matches;
146 } AbsoluteFrameMotion;
148 // Takes care of freeing the arrays within the DebugMatches inside of the
149 // debug_matches ringbuffer and then freeing the buffer itself.
150 static void free_debug_matches(AbsoluteFrameMotion *afm) {
151 DebugMatches dm;
153 if (!afm->debug_matches) {
154 return;
157 while (av_fifo_read(afm->debug_matches, &dm, 1) >= 0)
158 av_freep(&dm.matches);
160 av_fifo_freep2(&afm->debug_matches);
163 // Stores the translation, scale, rotation, and skew deltas between two frames
164 typedef struct FrameDelta {
165 cl_float2 translation;
166 float rotation;
167 cl_float2 scale;
168 cl_float2 skew;
169 } FrameDelta;
171 typedef struct SimilarityMatrix {
172 // The 2x3 similarity matrix
173 double matrix[6];
174 } SimilarityMatrix;
176 typedef struct CropInfo {
177 // The top left corner of the bounding box for the crop
178 cl_float2 top_left;
179 // The bottom right corner of the bounding box for the crop
180 cl_float2 bottom_right;
181 } CropInfo;
183 // Returned from function that determines start and end values for iteration
184 // around the current frame in a ringbuffer
185 typedef struct IterIndices {
186 int start;
187 int end;
188 } IterIndices;
190 typedef struct DeshakeOpenCLContext {
191 OpenCLFilterContext ocf;
192 // Whether or not the above `OpenCLFilterContext` has been initialized
193 int initialized;
195 // These variables are used in the activate callback
196 int64_t duration;
197 int eof;
199 // State for random number generation
200 AVLFG alfg;
202 // FIFO frame queue used to buffer future frames for processing
203 FFFrameQueue fq;
204 // Ringbuffers for frame positions
205 AbsoluteFrameMotion abs_motion;
207 // The number of frames' motion to consider before and after the frame we are
208 // smoothing
209 int smooth_window;
210 // The number of the frame we are currently processing
211 int curr_frame;
213 // Stores a 1d array of normalised gaussian kernel values for convolution
214 float *gauss_kernel;
216 // Buffer for error values used in RANSAC code
217 float *ransac_err;
219 // Information regarding how to crop the smoothed luminance (or RGB) planes
220 CropInfo crop_y;
221 // Information regarding how to crop the smoothed chroma planes
222 CropInfo crop_uv;
224 // Whether or not we are processing YUV input (as oppposed to RGB)
225 int is_yuv;
226 // The underlying format of the hardware surfaces
227 int sw_format;
229 // Buffer to copy `matches` into for the CPU to work with
230 MotionVector *matches_host;
231 MotionVector *matches_contig_host;
233 MotionVector *inliers;
235 cl_command_queue command_queue;
236 cl_kernel kernel_grayscale;
237 cl_kernel kernel_harris_response;
238 cl_kernel kernel_refine_features;
239 cl_kernel kernel_brief_descriptors;
240 cl_kernel kernel_match_descriptors;
241 cl_kernel kernel_transform;
242 cl_kernel kernel_crop_upscale;
244 // Stores a frame converted to grayscale
245 cl_mem grayscale;
246 // Stores the harris response for a frame (measure of "cornerness" for each pixel)
247 cl_mem harris_buf;
249 // Detected features after non-maximum suppression and sub-pixel refinement
250 cl_mem refined_features;
251 // Saved from the previous frame
252 cl_mem prev_refined_features;
254 // BRIEF sampling pattern that is randomly initialized
255 cl_mem brief_pattern;
256 // Feature point descriptors for the current frame
257 cl_mem descriptors;
258 // Feature point descriptors for the previous frame
259 cl_mem prev_descriptors;
260 // Vectors between points in current and previous frame
261 cl_mem matches;
262 cl_mem matches_contig;
263 // Holds the matrix to transform luminance (or RGB) with
264 cl_mem transform_y;
265 // Holds the matrix to transform chroma with
266 cl_mem transform_uv;
268 // Configurable options
270 int tripod_mode;
271 int debug_on;
272 int should_crop;
274 // Whether or not feature points should be refined at a sub-pixel level
275 cl_int refine_features;
276 // If the user sets a value other than the default, 0, this percentage is
277 // translated into a sigma value ranging from 0.5 to 40.0
278 float smooth_percent;
279 // This number is multiplied by the video frame rate to determine the size
280 // of the smooth window
281 float smooth_window_multiplier;
283 // Debug stuff
285 cl_kernel kernel_draw_debug_info;
286 cl_mem debug_matches;
287 cl_mem debug_model_matches;
289 // These store the total time spent executing the different kernels in nanoseconds
290 unsigned long long grayscale_time;
291 unsigned long long harris_response_time;
292 unsigned long long refine_features_time;
293 unsigned long long brief_descriptors_time;
294 unsigned long long match_descriptors_time;
295 unsigned long long transform_time;
296 unsigned long long crop_upscale_time;
298 // Time spent copying matched features from the device to the host
299 unsigned long long read_buf_time;
300 } DeshakeOpenCLContext;
302 // Returns a random uniformly-distributed number in [low, high]
303 static int rand_in(int low, int high, AVLFG *alfg) {
304 return (av_lfg_get(alfg) % (high - low)) + low;
307 // Returns the average execution time for an event given the total time and the
308 // number of frames processed.
309 static double averaged_event_time_ms(unsigned long long total_time, int num_frames) {
310 return (double)total_time / (double)num_frames / 1000000.0;
313 // The following code is loosely ported from OpenCV
315 // Estimates affine transform from 3 point pairs
316 // model is a 2x3 matrix:
317 // a b c
318 // d e f
319 static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
321 // src points
322 double x1 = point_pairs[0].p.p1.s[0];
323 double y1 = point_pairs[0].p.p1.s[1];
324 double x2 = point_pairs[1].p.p1.s[0];
325 double y2 = point_pairs[1].p.p1.s[1];
326 double x3 = point_pairs[2].p.p1.s[0];
327 double y3 = point_pairs[2].p.p1.s[1];
329 // dest points
330 double X1 = point_pairs[0].p.p2.s[0];
331 double Y1 = point_pairs[0].p.p2.s[1];
332 double X2 = point_pairs[1].p.p2.s[0];
333 double Y2 = point_pairs[1].p.p2.s[1];
334 double X3 = point_pairs[2].p.p2.s[0];
335 double Y3 = point_pairs[2].p.p2.s[1];
337 double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
339 model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
340 model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
341 model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
343 model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
344 model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
345 model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
348 // Checks that the 3 points in the given array are not collinear
349 static int points_not_collinear(const cl_float2 **points)
351 int j, k, i = 2;
353 for (j = 0; j < i; j++) {
354 double dx1 = points[j]->s[0] - points[i]->s[0];
355 double dy1 = points[j]->s[1] - points[i]->s[1];
357 for (k = 0; k < j; k++) {
358 double dx2 = points[k]->s[0] - points[i]->s[0];
359 double dy2 = points[k]->s[1] - points[i]->s[1];
361 // Assuming a 3840 x 2160 video with a point at (0, 0) and one at
362 // (3839, 2159), this prevents a third point from being within roughly
363 // 0.5 of a pixel of the line connecting the two on both axes
364 if (fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
365 return 0;
370 return 1;
373 // Checks a subset of 3 point pairs to make sure that the points are not collinear
374 // and not too close to each other
375 static int check_subset(const MotionVector *pairs_subset)
377 const cl_float2 *prev_points[] = {
378 &pairs_subset[0].p.p1,
379 &pairs_subset[1].p.p1,
380 &pairs_subset[2].p.p1
383 const cl_float2 *curr_points[] = {
384 &pairs_subset[0].p.p2,
385 &pairs_subset[1].p.p2,
386 &pairs_subset[2].p.p2
389 return points_not_collinear(prev_points) && points_not_collinear(curr_points);
392 // Selects a random subset of 3 points from point_pairs and places them in pairs_subset
393 static int get_subset(
394 AVLFG *alfg,
395 const MotionVector *point_pairs,
396 const int num_point_pairs,
397 MotionVector *pairs_subset,
398 int max_attempts
400 int idx[3];
401 int i = 0, j, iters = 0;
403 for (; iters < max_attempts; iters++) {
404 for (i = 0; i < 3 && iters < max_attempts;) {
405 int idx_i = 0;
407 for (;;) {
408 idx_i = idx[i] = rand_in(0, num_point_pairs, alfg);
410 for (j = 0; j < i; j++) {
411 if (idx_i == idx[j]) {
412 break;
416 if (j == i) {
417 break;
421 pairs_subset[i] = point_pairs[idx[i]];
422 i++;
425 if (i == 3 && !check_subset(pairs_subset)) {
426 continue;
428 break;
431 return i == 3 && iters < max_attempts;
434 // Computes the error for each of the given points based on the given model.
435 static void compute_error(
436 const MotionVector *point_pairs,
437 const int num_point_pairs,
438 const double *model,
439 float *err
441 double F0 = model[0], F1 = model[1], F2 = model[2];
442 double F3 = model[3], F4 = model[4], F5 = model[5];
444 for (int i = 0; i < num_point_pairs; i++) {
445 const cl_float2 *f = &point_pairs[i].p.p1;
446 const cl_float2 *t = &point_pairs[i].p.p2;
448 double a = F0*f->s[0] + F1*f->s[1] + F2 - t->s[0];
449 double b = F3*f->s[0] + F4*f->s[1] + F5 - t->s[1];
451 err[i] = a*a + b*b;
455 // Determines which of the given point matches are inliers for the given model
456 // based on the specified threshold.
458 // err must be an array of num_point_pairs length
459 static int find_inliers(
460 MotionVector *point_pairs,
461 const int num_point_pairs,
462 const double *model,
463 float *err,
464 double thresh
466 float t = (float)(thresh * thresh);
467 int i, n = num_point_pairs, num_inliers = 0;
469 compute_error(point_pairs, num_point_pairs, model, err);
471 for (i = 0; i < n; i++) {
472 if (err[i] <= t) {
473 // This is an inlier
474 point_pairs[i].should_consider = 1;
475 num_inliers += 1;
476 } else {
477 point_pairs[i].should_consider = 0;
481 return num_inliers;
484 // Determines the number of iterations required to achieve the desired confidence level.
486 // The equation used to determine the number of iterations to do is:
487 // 1 - confidence = (1 - inlier_probability^num_points)^num_iters
489 // Solving for num_iters:
491 // num_iters = log(1 - confidence) / log(1 - inlier_probability^num_points)
493 // A more in-depth explanation can be found at https://en.wikipedia.org/wiki/Random_sample_consensus
494 // under the 'Parameters' heading
495 static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
497 double num, denom;
499 confidence = av_clipd(confidence, 0.0, 1.0);
500 num_outliers = av_clipd(num_outliers, 0.0, 1.0);
502 // avoid inf's & nan's
503 num = FFMAX(1.0 - confidence, DBL_MIN);
504 denom = 1.0 - pow(1.0 - num_outliers, 3);
505 if (denom < DBL_MIN) {
506 return 0;
509 num = log(num);
510 denom = log(denom);
512 return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (int)round(num / denom);
515 // Estimates an affine transform between the given pairs of points using RANdom
516 // SAmple Consensus
517 static int estimate_affine_2d(
518 DeshakeOpenCLContext *deshake_ctx,
519 MotionVector *point_pairs,
520 DebugMatches *debug_matches,
521 const int num_point_pairs,
522 double *model_out,
523 const double threshold,
524 const int max_iters,
525 const double confidence
527 int result = 0;
528 double best_model[6], model[6];
529 MotionVector pairs_subset[3], best_pairs[3];
531 int iter, niters = FFMAX(max_iters, 1);
532 int good_count, max_good_count = 0;
534 // We need at least 3 points to build a model from
535 if (num_point_pairs < 3) {
536 return 0;
537 } else if (num_point_pairs == 3) {
538 // There are only 3 points, so RANSAC doesn't apply here
539 run_estimate_kernel(point_pairs, model_out);
541 for (int i = 0; i < 3; ++i) {
542 point_pairs[i].should_consider = 1;
545 return 1;
548 for (iter = 0; iter < niters; ++iter) {
549 int found = get_subset(&deshake_ctx->alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
551 if (!found) {
552 if (iter == 0) {
553 return 0;
556 break;
559 run_estimate_kernel(pairs_subset, model);
560 good_count = find_inliers(point_pairs, num_point_pairs, model, deshake_ctx->ransac_err, threshold);
562 if (good_count > FFMAX(max_good_count, 2)) {
563 for (int mi = 0; mi < 6; ++mi) {
564 best_model[mi] = model[mi];
567 for (int pi = 0; pi < 3; pi++) {
568 best_pairs[pi] = pairs_subset[pi];
571 max_good_count = good_count;
572 niters = ransac_update_num_iters(
573 confidence,
574 (double)(num_point_pairs - good_count) / num_point_pairs,
575 niters
580 if (max_good_count > 0) {
581 for (int mi = 0; mi < 6; ++mi) {
582 model_out[mi] = best_model[mi];
585 for (int pi = 0; pi < 3; ++pi) {
586 debug_matches->model_matches[pi] = best_pairs[pi];
588 debug_matches->num_model_matches = 3;
590 // Find the inliers again for the best model for debugging
591 find_inliers(point_pairs, num_point_pairs, best_model, deshake_ctx->ransac_err, threshold);
592 result = 1;
595 return result;
598 // "Wiggles" the first point in best_pairs around a tiny bit in order to decrease the
599 // total error
600 static void optimize_model(
601 DeshakeOpenCLContext *deshake_ctx,
602 MotionVector *best_pairs,
603 MotionVector *inliers,
604 const int num_inliers,
605 float best_err,
606 double *model_out
608 float move_x_val = 0.01;
609 float move_y_val = 0.01;
610 int move_x = 1;
611 float old_move_x_val = 0;
612 double model[6];
613 int last_changed = 0;
615 for (int iters = 0; iters < 200; iters++) {
616 float total_err = 0;
618 if (move_x) {
619 best_pairs[0].p.p2.s[0] += move_x_val;
620 } else {
621 best_pairs[0].p.p2.s[0] += move_y_val;
624 run_estimate_kernel(best_pairs, model);
625 compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
627 for (int j = 0; j < num_inliers; j++) {
628 total_err += deshake_ctx->ransac_err[j];
631 if (total_err < best_err) {
632 for (int mi = 0; mi < 6; ++mi) {
633 model_out[mi] = model[mi];
636 best_err = total_err;
637 last_changed = iters;
638 } else {
639 // Undo the change
640 if (move_x) {
641 best_pairs[0].p.p2.s[0] -= move_x_val;
642 } else {
643 best_pairs[0].p.p2.s[0] -= move_y_val;
646 if (iters - last_changed > 4) {
647 // We've already improved the model as much as we can
648 break;
651 old_move_x_val = move_x_val;
653 if (move_x) {
654 move_x_val *= -1;
655 } else {
656 move_y_val *= -1;
659 if (old_move_x_val < 0) {
660 move_x = 0;
661 } else {
662 move_x = 1;
668 // Uses a process similar to that of RANSAC to find a transform that minimizes
669 // the total error for a set of point matches determined to be inliers
671 // (Pick random subsets, compute model, find total error, iterate until error
672 // is minimized.)
673 static int minimize_error(
674 DeshakeOpenCLContext *deshake_ctx,
675 MotionVector *inliers,
676 DebugMatches *debug_matches,
677 const int num_inliers,
678 double *model_out,
679 const int max_iters
681 int result = 0;
682 float best_err = FLT_MAX;
683 double best_model[6], model[6];
684 MotionVector pairs_subset[3], best_pairs[3];
686 for (int i = 0; i < max_iters; i++) {
687 float total_err = 0;
688 int found = get_subset(&deshake_ctx->alfg, inliers, num_inliers, pairs_subset, 10000);
690 if (!found) {
691 if (i == 0) {
692 return 0;
695 break;
698 run_estimate_kernel(pairs_subset, model);
699 compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
701 for (int j = 0; j < num_inliers; j++) {
702 total_err += deshake_ctx->ransac_err[j];
705 if (i == 0 || total_err < best_err) {
706 for (int mi = 0; mi < 6; ++mi) {
707 best_model[mi] = model[mi];
710 for (int pi = 0; pi < 3; pi++) {
711 best_pairs[pi] = pairs_subset[pi];
714 best_err = total_err;
718 for (int mi = 0; mi < 6; ++mi) {
719 model_out[mi] = best_model[mi];
722 for (int pi = 0; pi < 3; ++pi) {
723 debug_matches->model_matches[pi] = best_pairs[pi];
725 debug_matches->num_model_matches = 3;
726 result = 1;
728 optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
729 return result;
732 // End code from OpenCV
734 // Decomposes a similarity matrix into translation, rotation, scale, and skew
736 // See http://frederic-wang.fr/decomposition-of-2d-transform-matrices.html
737 static FrameDelta decompose_transform(double *model)
739 FrameDelta ret;
741 double a = model[0];
742 double c = model[1];
743 double e = model[2];
744 double b = model[3];
745 double d = model[4];
746 double f = model[5];
747 double delta = a * d - b * c;
749 memset(&ret, 0, sizeof(ret));
751 ret.translation.s[0] = e;
752 ret.translation.s[1] = f;
754 // This is the QR method
755 if (a != 0 || b != 0) {
756 double r = hypot(a, b);
758 ret.rotation = FFSIGN(b) * acos(a / r);
759 ret.scale.s[0] = r;
760 ret.scale.s[1] = delta / r;
761 ret.skew.s[0] = atan((a * c + b * d) / (r * r));
762 ret.skew.s[1] = 0;
763 } else if (c != 0 || d != 0) {
764 double s = sqrt(c * c + d * d);
766 ret.rotation = M_PI / 2 - FFSIGN(d) * acos(-c / s);
767 ret.scale.s[0] = delta / s;
768 ret.scale.s[1] = s;
769 ret.skew.s[0] = 0;
770 ret.skew.s[1] = atan((a * c + b * d) / (s * s));
771 } // otherwise there is only translation
773 return ret;
776 // Move valid vectors from the 2d buffer into a 1d buffer where they are contiguous
777 static int make_vectors_contig(
778 DeshakeOpenCLContext *deshake_ctx,
779 int size_y,
780 int size_x
782 int num_vectors = 0;
784 for (int i = 0; i < size_y; ++i) {
785 for (int j = 0; j < size_x; ++j) {
786 MotionVector v = deshake_ctx->matches_host[j + i * size_x];
788 if (v.should_consider) {
789 deshake_ctx->matches_contig_host[num_vectors] = v;
790 ++num_vectors;
793 // Make sure we do not exceed the amount of space we allocated for these vectors
794 if (num_vectors == MATCHES_CONTIG_SIZE - 1) {
795 return num_vectors;
799 return num_vectors;
802 // Returns the gaussian kernel value for the given x coordinate and sigma value
803 static float gaussian_for(int x, float sigma) {
804 return 1.0f / expf(((float)x * (float)x) / (2.0f * sigma * sigma));
807 // Makes a normalized gaussian kernel of the given length for the given sigma
808 // and places it in gauss_kernel
809 static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
811 float gauss_sum = 0;
812 int window_half = length / 2;
814 for (int i = 0; i < length; ++i) {
815 float val = gaussian_for(i - window_half, sigma);
817 gauss_sum += val;
818 gauss_kernel[i] = val;
821 // Normalize the gaussian values
822 for (int i = 0; i < length; ++i) {
823 gauss_kernel[i] /= gauss_sum;
827 // Returns indices to start and end iteration at in order to iterate over a window
828 // of length size centered at the current frame in a ringbuffer
830 // Always returns numbers that result in a window of length size, even if that
831 // means specifying negative indices or indices past the end of the values in the
832 // ringbuffers. Make sure you clip indices appropriately within your loop.
833 static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length) {
834 IterIndices indices;
836 indices.start = deshake_ctx->abs_motion.curr_frame_offset - (length / 2);
837 indices.end = deshake_ctx->abs_motion.curr_frame_offset + (length / 2) + (length % 2);
839 return indices;
842 // Sets val to the value in the given ringbuffer at the given offset, taking care of
843 // clipping the offset into the appropriate range
844 static void ringbuf_float_at(
845 DeshakeOpenCLContext *deshake_ctx,
846 AVFifo *values,
847 float *val,
848 int offset
850 int clip_start, clip_end, offset_clipped;
851 if (deshake_ctx->abs_motion.data_end_offset != -1) {
852 clip_end = deshake_ctx->abs_motion.data_end_offset;
853 } else {
854 // This expression represents the last valid index in the buffer,
855 // which we use repeatedly at the end of the video.
856 clip_end = deshake_ctx->smooth_window - av_fifo_can_write(values) - 1;
859 if (deshake_ctx->abs_motion.data_start_offset != -1) {
860 clip_start = deshake_ctx->abs_motion.data_start_offset;
861 } else {
862 // Negative indices will occur at the start of the video, and we want
863 // them to be clipped to 0 in order to repeatedly use the position of
864 // the first frame.
865 clip_start = 0;
868 offset_clipped = av_clip(
869 offset,
870 clip_start,
871 clip_end
874 av_fifo_peek(values, val, 1, offset_clipped);
877 // Returns smoothed current frame value of the given buffer of floats based on the
878 // given Gaussian kernel and its length (also the window length, centered around the
879 // current frame) and the "maximum value" of the motion.
881 // This "maximum value" should be the width / height of the image in the case of
882 // translation and an empirically chosen constant for rotation / scale.
884 // The sigma chosen to generate the final gaussian kernel with used to smooth the
885 // camera path is either hardcoded (set by user, deshake_ctx->smooth_percent) or
886 // adaptively chosen.
887 static float smooth(
888 DeshakeOpenCLContext *deshake_ctx,
889 float *gauss_kernel,
890 int length,
891 float max_val,
892 AVFifo *values
894 float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
895 percent_of_max, inverted_percent;
896 IterIndices indices = start_end_for(deshake_ctx, length);
897 float large_sigma = 40.0f;
898 float small_sigma = 2.0f;
899 float best_sigma;
901 if (deshake_ctx->smooth_percent) {
902 best_sigma = (large_sigma - 0.5f) * deshake_ctx->smooth_percent + 0.5f;
903 } else {
904 // Strategy to adaptively smooth trajectory:
906 // 1. Smooth path with large and small sigma values
907 // 2. Take the absolute value of the difference between them
908 // 3. Get a percentage by putting the difference over the "max value"
909 // 4, Invert the percentage
910 // 5. Calculate a new sigma value weighted towards the larger sigma value
911 // 6. Determine final smoothed trajectory value using that sigma
913 make_gauss_kernel(gauss_kernel, length, large_sigma);
914 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
915 ringbuf_float_at(deshake_ctx, values, &old, i);
916 new_large_s += old * gauss_kernel[j];
919 make_gauss_kernel(gauss_kernel, length, small_sigma);
920 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
921 ringbuf_float_at(deshake_ctx, values, &old, i);
922 new_small_s += old * gauss_kernel[j];
925 diff_between = fabsf(new_large_s - new_small_s);
926 percent_of_max = diff_between / max_val;
927 inverted_percent = 1 - percent_of_max;
928 best_sigma = large_sigma * powf(inverted_percent, 40);
931 make_gauss_kernel(gauss_kernel, length, best_sigma);
932 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
933 ringbuf_float_at(deshake_ctx, values, &old, i);
934 new_best += old * gauss_kernel[j];
937 return new_best;
940 // Returns the position of the given point after the transform is applied
941 static cl_float2 transformed_point(float x, float y, float *transform) {
942 cl_float2 ret;
944 ret.s[0] = x * transform[0] + y * transform[1] + transform[2];
945 ret.s[1] = x * transform[3] + y * transform[4] + transform[5];
947 return ret;
950 // Creates an affine transform that scales from the center of a frame
951 static void transform_center_scale(
952 float x_shift,
953 float y_shift,
954 float angle,
955 float scale_x,
956 float scale_y,
957 float center_w,
958 float center_h,
959 float *matrix
961 cl_float2 center_s;
962 float center_s_w, center_s_h;
964 ff_get_matrix(
968 scale_x,
969 scale_y,
970 matrix
973 center_s = transformed_point(center_w, center_h, matrix);
974 center_s_w = center_w - center_s.s[0];
975 center_s_h = center_h - center_s.s[1];
977 ff_get_matrix(
978 x_shift + center_s_w,
979 y_shift + center_s_h,
980 angle,
981 scale_x,
982 scale_y,
983 matrix
987 // Determines the crop necessary to eliminate black borders from a smoothed frame
988 // and updates target crop accordingly
989 static void update_needed_crop(
990 CropInfo* crop,
991 float *transform,
992 float frame_width,
993 float frame_height
995 float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
997 cl_float2 top_left = transformed_point(0, 0, transform);
998 cl_float2 top_right = transformed_point(frame_width, 0, transform);
999 cl_float2 bottom_left = transformed_point(0, frame_height, transform);
1000 cl_float2 bottom_right = transformed_point(frame_width, frame_height, transform);
1001 float ar_h = frame_height / frame_width;
1002 float ar_w = frame_width / frame_height;
1004 if (crop->bottom_right.s[0] == 0) {
1005 // The crop hasn't been set to the original size of the plane
1006 crop->bottom_right.s[0] = frame_width;
1007 crop->bottom_right.s[1] = frame_height;
1010 crop->top_left.s[0] = FFMAX3(
1011 crop->top_left.s[0],
1012 top_left.s[0],
1013 bottom_left.s[0]
1016 crop->top_left.s[1] = FFMAX3(
1017 crop->top_left.s[1],
1018 top_left.s[1],
1019 top_right.s[1]
1022 crop->bottom_right.s[0] = FFMIN3(
1023 crop->bottom_right.s[0],
1024 bottom_right.s[0],
1025 top_right.s[0]
1028 crop->bottom_right.s[1] = FFMIN3(
1029 crop->bottom_right.s[1],
1030 bottom_right.s[1],
1031 bottom_left.s[1]
1034 // Make sure our potentially new bounding box has the same aspect ratio
1035 new_height = crop->bottom_right.s[1] - crop->top_left.s[1];
1036 new_width = crop->bottom_right.s[0] - crop->top_left.s[0];
1038 adjusted_width = new_height * ar_w;
1039 adjusted_x = crop->bottom_right.s[0] - adjusted_width;
1041 if (adjusted_x >= crop->top_left.s[0]) {
1042 crop->top_left.s[0] = adjusted_x;
1043 } else {
1044 adjusted_height = new_width * ar_h;
1045 adjusted_y = crop->bottom_right.s[1] - adjusted_height;
1046 crop->top_left.s[1] = adjusted_y;
1050 static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)
1052 DeshakeOpenCLContext *ctx = avctx->priv;
1053 cl_int cle;
1055 for (int i = 0; i < RingbufCount; i++)
1056 av_fifo_freep2(&ctx->abs_motion.ringbuffers[i]);
1058 if (ctx->debug_on)
1059 free_debug_matches(&ctx->abs_motion);
1061 if (ctx->gauss_kernel)
1062 av_freep(&ctx->gauss_kernel);
1064 if (ctx->ransac_err)
1065 av_freep(&ctx->ransac_err);
1067 if (ctx->matches_host)
1068 av_freep(&ctx->matches_host);
1070 if (ctx->matches_contig_host)
1071 av_freep(&ctx->matches_contig_host);
1073 if (ctx->inliers)
1074 av_freep(&ctx->inliers);
1076 ff_framequeue_free(&ctx->fq);
1078 CL_RELEASE_KERNEL(ctx->kernel_grayscale);
1079 CL_RELEASE_KERNEL(ctx->kernel_harris_response);
1080 CL_RELEASE_KERNEL(ctx->kernel_refine_features);
1081 CL_RELEASE_KERNEL(ctx->kernel_brief_descriptors);
1082 CL_RELEASE_KERNEL(ctx->kernel_match_descriptors);
1083 CL_RELEASE_KERNEL(ctx->kernel_crop_upscale);
1084 if (ctx->debug_on)
1085 CL_RELEASE_KERNEL(ctx->kernel_draw_debug_info);
1087 CL_RELEASE_QUEUE(ctx->command_queue);
1089 if (!ctx->is_yuv)
1090 CL_RELEASE_MEMORY(ctx->grayscale);
1091 CL_RELEASE_MEMORY(ctx->harris_buf);
1092 CL_RELEASE_MEMORY(ctx->refined_features);
1093 CL_RELEASE_MEMORY(ctx->prev_refined_features);
1094 CL_RELEASE_MEMORY(ctx->brief_pattern);
1095 CL_RELEASE_MEMORY(ctx->descriptors);
1096 CL_RELEASE_MEMORY(ctx->prev_descriptors);
1097 CL_RELEASE_MEMORY(ctx->matches);
1098 CL_RELEASE_MEMORY(ctx->matches_contig);
1099 CL_RELEASE_MEMORY(ctx->transform_y);
1100 CL_RELEASE_MEMORY(ctx->transform_uv);
1101 if (ctx->debug_on) {
1102 CL_RELEASE_MEMORY(ctx->debug_matches);
1103 CL_RELEASE_MEMORY(ctx->debug_model_matches);
1106 ff_opencl_filter_uninit(avctx);
1109 static int deshake_opencl_init(AVFilterContext *avctx)
1111 DeshakeOpenCLContext *ctx = avctx->priv;
1112 AVFilterLink *outlink = avctx->outputs[0];
1113 AVFilterLink *inlink = avctx->inputs[0];
1114 FilterLink *inl = ff_filter_link(inlink);
1115 // Pointer to the host-side pattern buffer to be initialized and then copied
1116 // to the GPU
1117 PointPair *pattern_host = NULL;
1118 cl_int cle;
1119 int err;
1120 cl_ulong8 zeroed_ulong8;
1121 FFFrameQueueGlobal fqg;
1122 cl_image_format grayscale_format;
1123 cl_image_desc grayscale_desc;
1124 cl_command_queue_properties queue_props;
1126 const enum AVPixelFormat disallowed_formats[14] = {
1127 AV_PIX_FMT_GBRP,
1128 AV_PIX_FMT_GBRP9BE,
1129 AV_PIX_FMT_GBRP9LE,
1130 AV_PIX_FMT_GBRP10BE,
1131 AV_PIX_FMT_GBRP10LE,
1132 AV_PIX_FMT_GBRP16BE,
1133 AV_PIX_FMT_GBRP16LE,
1134 AV_PIX_FMT_GBRAP,
1135 AV_PIX_FMT_GBRAP16BE,
1136 AV_PIX_FMT_GBRAP16LE,
1137 AV_PIX_FMT_GBRAP12BE,
1138 AV_PIX_FMT_GBRAP12LE,
1139 AV_PIX_FMT_GBRAP10BE,
1140 AV_PIX_FMT_GBRAP10LE
1143 // Number of elements for an array
1144 const int image_grid_32 = ROUNDED_UP_DIV(outlink->h, 32) * ROUNDED_UP_DIV(outlink->w, 32);
1146 const int descriptor_buf_size = image_grid_32 * (BREIFN / 8);
1147 const int features_buf_size = image_grid_32 * sizeof(cl_float2);
1149 const AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inl->hw_frames_ctx->data;
1150 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hw_frames_ctx->sw_format);
1152 av_assert0(hw_frames_ctx);
1153 av_assert0(desc);
1155 ff_framequeue_global_init(&fqg);
1156 ff_framequeue_init(&ctx->fq, &fqg);
1157 ctx->eof = 0;
1158 ctx->smooth_window = (int)(av_q2d(inl->frame_rate) * ctx->smooth_window_multiplier);
1159 ctx->curr_frame = 0;
1161 memset(&zeroed_ulong8, 0, sizeof(cl_ulong8));
1163 ctx->gauss_kernel = av_malloc_array(ctx->smooth_window, sizeof(float));
1164 if (!ctx->gauss_kernel) {
1165 err = AVERROR(ENOMEM);
1166 goto fail;
1169 ctx->ransac_err = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(float));
1170 if (!ctx->ransac_err) {
1171 err = AVERROR(ENOMEM);
1172 goto fail;
1175 for (int i = 0; i < RingbufCount; i++) {
1176 ctx->abs_motion.ringbuffers[i] = av_fifo_alloc2(ctx->smooth_window,
1177 sizeof(float), 0);
1179 if (!ctx->abs_motion.ringbuffers[i]) {
1180 err = AVERROR(ENOMEM);
1181 goto fail;
1185 if (ctx->debug_on) {
1186 ctx->abs_motion.debug_matches = av_fifo_alloc2(
1187 ctx->smooth_window / 2,
1188 sizeof(DebugMatches), 0
1191 if (!ctx->abs_motion.debug_matches) {
1192 err = AVERROR(ENOMEM);
1193 goto fail;
1197 ctx->abs_motion.curr_frame_offset = 0;
1198 ctx->abs_motion.data_start_offset = -1;
1199 ctx->abs_motion.data_end_offset = -1;
1201 pattern_host = av_malloc_array(BREIFN, sizeof(PointPair));
1202 if (!pattern_host) {
1203 err = AVERROR(ENOMEM);
1204 goto fail;
1207 ctx->matches_host = av_malloc_array(image_grid_32, sizeof(MotionVector));
1208 if (!ctx->matches_host) {
1209 err = AVERROR(ENOMEM);
1210 goto fail;
1213 ctx->matches_contig_host = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1214 if (!ctx->matches_contig_host) {
1215 err = AVERROR(ENOMEM);
1216 goto fail;
1219 ctx->inliers = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));
1220 if (!ctx->inliers) {
1221 err = AVERROR(ENOMEM);
1222 goto fail;
1225 // Initializing the patch pattern for building BREIF descriptors with
1226 av_lfg_init(&ctx->alfg, 234342424);
1227 for (int i = 0; i < BREIFN; ++i) {
1228 PointPair pair;
1230 for (int j = 0; j < 2; ++j) {
1231 pair.p1.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1232 pair.p2.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1235 pattern_host[i] = pair;
1238 for (int i = 0; i < 14; i++) {
1239 if (ctx->sw_format == disallowed_formats[i]) {
1240 av_log(avctx, AV_LOG_ERROR, "unsupported format in deshake_opencl.\n");
1241 err = AVERROR(ENOSYS);
1242 goto fail;
1246 if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
1247 ctx->is_yuv = 0;
1248 } else {
1249 ctx->is_yuv = 1;
1251 ctx->sw_format = hw_frames_ctx->sw_format;
1253 err = ff_opencl_filter_load_program(avctx, &ff_source_deshake_cl, 1);
1254 if (err < 0)
1255 goto fail;
1257 if (ctx->debug_on) {
1258 queue_props = CL_QUEUE_PROFILING_ENABLE;
1259 } else {
1260 queue_props = 0;
1262 ctx->command_queue = clCreateCommandQueue(
1263 ctx->ocf.hwctx->context,
1264 ctx->ocf.hwctx->device_id,
1265 queue_props,
1266 &cle
1268 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL command queue %d.\n", cle);
1270 CL_CREATE_KERNEL(ctx, grayscale);
1271 CL_CREATE_KERNEL(ctx, harris_response);
1272 CL_CREATE_KERNEL(ctx, refine_features);
1273 CL_CREATE_KERNEL(ctx, brief_descriptors);
1274 CL_CREATE_KERNEL(ctx, match_descriptors);
1275 CL_CREATE_KERNEL(ctx, transform);
1276 CL_CREATE_KERNEL(ctx, crop_upscale);
1277 if (ctx->debug_on)
1278 CL_CREATE_KERNEL(ctx, draw_debug_info);
1280 if (!ctx->is_yuv) {
1281 grayscale_format.image_channel_order = CL_R;
1282 grayscale_format.image_channel_data_type = CL_FLOAT;
1284 grayscale_desc = (cl_image_desc) {
1285 .image_type = CL_MEM_OBJECT_IMAGE2D,
1286 .image_width = outlink->w,
1287 .image_height = outlink->h,
1288 .image_depth = 0,
1289 .image_array_size = 0,
1290 .image_row_pitch = 0,
1291 .image_slice_pitch = 0,
1292 .num_mip_levels = 0,
1293 .num_samples = 0,
1294 .buffer = NULL,
1297 ctx->grayscale = clCreateImage(
1298 ctx->ocf.hwctx->context,
1300 &grayscale_format,
1301 &grayscale_desc,
1302 NULL,
1303 &cle
1305 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create grayscale image: %d.\n", cle);
1308 CL_CREATE_BUFFER(ctx, harris_buf, outlink->h * outlink->w * sizeof(float));
1309 CL_CREATE_BUFFER(ctx, refined_features, features_buf_size);
1310 CL_CREATE_BUFFER(ctx, prev_refined_features, features_buf_size);
1311 CL_CREATE_BUFFER_FLAGS(
1312 ctx,
1313 brief_pattern,
1314 CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1315 BREIFN * sizeof(PointPair),
1316 pattern_host
1318 CL_CREATE_BUFFER(ctx, descriptors, descriptor_buf_size);
1319 CL_CREATE_BUFFER(ctx, prev_descriptors, descriptor_buf_size);
1320 CL_CREATE_BUFFER(ctx, matches, image_grid_32 * sizeof(MotionVector));
1321 CL_CREATE_BUFFER(ctx, matches_contig, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1322 CL_CREATE_BUFFER(ctx, transform_y, 9 * sizeof(float));
1323 CL_CREATE_BUFFER(ctx, transform_uv, 9 * sizeof(float));
1324 if (ctx->debug_on) {
1325 CL_CREATE_BUFFER(ctx, debug_matches, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1326 CL_CREATE_BUFFER(ctx, debug_model_matches, 3 * sizeof(MotionVector));
1329 ctx->initialized = 1;
1330 av_freep(&pattern_host);
1332 return 0;
1334 fail:
1335 av_freep(&pattern_host);
1336 return err;
1339 // Logs debug information about the transform data
1340 static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame) {
1341 av_log(avctx, AV_LOG_VERBOSE,
1342 "Frame %d:\n"
1343 "\tframe moved from: %f x, %f y\n"
1344 "\t to: %f x, %f y\n"
1345 "\t rotated from: %f degrees\n"
1346 "\t to: %f degrees\n"
1347 "\t scaled from: %f x, %f y\n"
1348 "\t to: %f x, %f y\n"
1349 "\n"
1350 "\tframe moved by: %f x, %f y\n"
1351 "\t rotated by: %f degrees\n"
1352 "\t scaled by: %f x, %f y\n",
1353 curr_frame,
1354 old_vals[RingbufX], old_vals[RingbufY],
1355 new_vals[RingbufX], new_vals[RingbufY],
1356 old_vals[RingbufRot] * (180.0 / M_PI),
1357 new_vals[RingbufRot] * (180.0 / M_PI),
1358 old_vals[RingbufScaleX], old_vals[RingbufScaleY],
1359 new_vals[RingbufScaleX], new_vals[RingbufScaleY],
1360 old_vals[RingbufX] - new_vals[RingbufX], old_vals[RingbufY] - new_vals[RingbufY],
1361 old_vals[RingbufRot] * (180.0 / M_PI) - new_vals[RingbufRot] * (180.0 / M_PI),
1362 new_vals[RingbufScaleX] / old_vals[RingbufScaleX], new_vals[RingbufScaleY] / old_vals[RingbufScaleY]
1366 // Uses the buffered motion information to determine a transform that smooths the
1367 // given frame and applies it
1368 static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
1370 AVFilterContext *avctx = link->dst;
1371 AVFilterLink *outlink = avctx->outputs[0];
1372 FilterLink *outl = ff_filter_link(outlink);
1373 DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1374 AVFrame *cropped_frame = NULL, *transformed_frame = NULL;
1375 int err;
1376 cl_int cle;
1377 float new_vals[RingbufCount];
1378 float old_vals[RingbufCount];
1379 // Luma (in the case of YUV) transform, or just the transform in the case of RGB
1380 float transform_y[9];
1381 // Chroma transform
1382 float transform_uv[9];
1383 // Luma crop transform (or RGB)
1384 float transform_crop_y[9];
1385 // Chroma crop transform
1386 float transform_crop_uv[9];
1387 float transform_debug_rgb[9];
1388 size_t global_work[2];
1389 int64_t duration;
1390 cl_mem src, transformed, dst;
1391 cl_mem transforms[AV_VIDEO_MAX_PLANES];
1392 CropInfo crops[AV_VIDEO_MAX_PLANES];
1393 cl_event transform_event, crop_upscale_event;
1394 DebugMatches debug_matches;
1395 cl_int num_model_matches;
1397 const float center_w = (float)input_frame->width / 2;
1398 const float center_h = (float)input_frame->height / 2;
1400 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(deshake_ctx->sw_format);
1401 const int chroma_width = AV_CEIL_RSHIFT(input_frame->width, desc->log2_chroma_w);
1402 const int chroma_height = AV_CEIL_RSHIFT(input_frame->height, desc->log2_chroma_h);
1404 const float center_w_chroma = (float)chroma_width / 2;
1405 const float center_h_chroma = (float)chroma_height / 2;
1407 const float luma_w_over_chroma_w = ((float)input_frame->width / (float)chroma_width);
1408 const float luma_h_over_chroma_h = ((float)input_frame->height / (float)chroma_height);
1410 if (deshake_ctx->debug_on) {
1411 av_fifo_read(
1412 deshake_ctx->abs_motion.debug_matches,
1413 &debug_matches, 1);
1416 if (input_frame->duration) {
1417 duration = input_frame->duration;
1418 } else {
1419 duration = av_rescale_q(1, av_inv_q(outl->frame_rate), outlink->time_base);
1421 deshake_ctx->duration = input_frame->pts + duration;
1423 // Get the absolute transform data for this frame
1424 for (int i = 0; i < RingbufCount; i++) {
1425 av_fifo_peek(deshake_ctx->abs_motion.ringbuffers[i],
1426 &old_vals[i], 1,
1427 deshake_ctx->abs_motion.curr_frame_offset);
1430 if (deshake_ctx->tripod_mode) {
1431 // If tripod mode is turned on we simply undo all motion relative to the
1432 // first frame
1434 new_vals[RingbufX] = 0.0f;
1435 new_vals[RingbufY] = 0.0f;
1436 new_vals[RingbufRot] = 0.0f;
1437 new_vals[RingbufScaleX] = 1.0f;
1438 new_vals[RingbufScaleY] = 1.0f;
1439 } else {
1440 // Tripod mode is off and we need to smooth a moving camera
1442 new_vals[RingbufX] = smooth(
1443 deshake_ctx,
1444 deshake_ctx->gauss_kernel,
1445 deshake_ctx->smooth_window,
1446 input_frame->width,
1447 deshake_ctx->abs_motion.ringbuffers[RingbufX]
1449 new_vals[RingbufY] = smooth(
1450 deshake_ctx,
1451 deshake_ctx->gauss_kernel,
1452 deshake_ctx->smooth_window,
1453 input_frame->height,
1454 deshake_ctx->abs_motion.ringbuffers[RingbufY]
1456 new_vals[RingbufRot] = smooth(
1457 deshake_ctx,
1458 deshake_ctx->gauss_kernel,
1459 deshake_ctx->smooth_window,
1460 M_PI / 4,
1461 deshake_ctx->abs_motion.ringbuffers[RingbufRot]
1463 new_vals[RingbufScaleX] = smooth(
1464 deshake_ctx,
1465 deshake_ctx->gauss_kernel,
1466 deshake_ctx->smooth_window,
1467 2.0f,
1468 deshake_ctx->abs_motion.ringbuffers[RingbufScaleX]
1470 new_vals[RingbufScaleY] = smooth(
1471 deshake_ctx,
1472 deshake_ctx->gauss_kernel,
1473 deshake_ctx->smooth_window,
1474 2.0f,
1475 deshake_ctx->abs_motion.ringbuffers[RingbufScaleY]
1479 transform_center_scale(
1480 old_vals[RingbufX] - new_vals[RingbufX],
1481 old_vals[RingbufY] - new_vals[RingbufY],
1482 old_vals[RingbufRot] - new_vals[RingbufRot],
1483 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1484 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1485 center_w,
1486 center_h,
1487 transform_y
1490 transform_center_scale(
1491 (old_vals[RingbufX] - new_vals[RingbufX]) / luma_w_over_chroma_w,
1492 (old_vals[RingbufY] - new_vals[RingbufY]) / luma_h_over_chroma_h,
1493 old_vals[RingbufRot] - new_vals[RingbufRot],
1494 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1495 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1496 center_w_chroma,
1497 center_h_chroma,
1498 transform_uv
1501 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_y, NULL);
1502 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_uv, 9 * sizeof(float), transform_uv, NULL);
1504 if (deshake_ctx->debug_on)
1505 transform_debug(avctx, new_vals, old_vals, deshake_ctx->curr_frame);
1507 cropped_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1508 if (!cropped_frame) {
1509 err = AVERROR(ENOMEM);
1510 goto fail;
1513 transformed_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1514 if (!transformed_frame) {
1515 err = AVERROR(ENOMEM);
1516 goto fail;
1519 transforms[0] = deshake_ctx->transform_y;
1520 transforms[1] = transforms[2] = deshake_ctx->transform_uv;
1522 for (int p = 0; p < AV_VIDEO_MAX_PLANES; p++) {
1523 // Transform all of the planes appropriately
1524 src = (cl_mem)input_frame->data[p];
1525 transformed = (cl_mem)transformed_frame->data[p];
1527 if (!transformed)
1528 break;
1530 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1531 if (err < 0)
1532 goto fail;
1534 CL_RUN_KERNEL_WITH_ARGS(
1535 deshake_ctx->command_queue,
1536 deshake_ctx->kernel_transform,
1537 global_work,
1538 NULL,
1539 &transform_event,
1540 { sizeof(cl_mem), &src },
1541 { sizeof(cl_mem), &transformed },
1542 { sizeof(cl_mem), &transforms[p] },
1546 if (deshake_ctx->debug_on && !deshake_ctx->is_yuv && debug_matches.num_matches > 0) {
1547 CL_BLOCKING_WRITE_BUFFER(
1548 deshake_ctx->command_queue,
1549 deshake_ctx->debug_matches,
1550 debug_matches.num_matches * sizeof(MotionVector),
1551 debug_matches.matches,
1552 NULL
1555 CL_BLOCKING_WRITE_BUFFER(
1556 deshake_ctx->command_queue,
1557 deshake_ctx->debug_model_matches,
1558 debug_matches.num_model_matches * sizeof(MotionVector),
1559 debug_matches.model_matches,
1560 NULL
1563 num_model_matches = debug_matches.num_model_matches;
1565 // Invert the transform
1566 transform_center_scale(
1567 new_vals[RingbufX] - old_vals[RingbufX],
1568 new_vals[RingbufY] - old_vals[RingbufY],
1569 new_vals[RingbufRot] - old_vals[RingbufRot],
1570 old_vals[RingbufScaleX] / new_vals[RingbufScaleX],
1571 old_vals[RingbufScaleY] / new_vals[RingbufScaleY],
1572 center_w,
1573 center_h,
1574 transform_debug_rgb
1577 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_debug_rgb, NULL);
1579 transformed = (cl_mem)transformed_frame->data[0];
1580 CL_RUN_KERNEL_WITH_ARGS(
1581 deshake_ctx->command_queue,
1582 deshake_ctx->kernel_draw_debug_info,
1583 (size_t[]){ debug_matches.num_matches },
1584 NULL,
1585 NULL,
1586 { sizeof(cl_mem), &transformed },
1587 { sizeof(cl_mem), &deshake_ctx->debug_matches },
1588 { sizeof(cl_mem), &deshake_ctx->debug_model_matches },
1589 { sizeof(cl_int), &num_model_matches },
1590 { sizeof(cl_mem), &deshake_ctx->transform_y }
1594 if (deshake_ctx->should_crop) {
1595 // Generate transforms for cropping
1596 transform_center_scale(
1597 (old_vals[RingbufX] - new_vals[RingbufX]) / 5,
1598 (old_vals[RingbufY] - new_vals[RingbufY]) / 5,
1599 (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1600 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1601 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1602 center_w,
1603 center_h,
1604 transform_crop_y
1606 update_needed_crop(&deshake_ctx->crop_y, transform_crop_y, input_frame->width, input_frame->height);
1608 transform_center_scale(
1609 (old_vals[RingbufX] - new_vals[RingbufX]) / (5 * luma_w_over_chroma_w),
1610 (old_vals[RingbufY] - new_vals[RingbufY]) / (5 * luma_h_over_chroma_h),
1611 (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1612 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1613 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1614 center_w_chroma,
1615 center_h_chroma,
1616 transform_crop_uv
1618 update_needed_crop(&deshake_ctx->crop_uv, transform_crop_uv, chroma_width, chroma_height);
1620 crops[0] = deshake_ctx->crop_y;
1621 crops[1] = crops[2] = deshake_ctx->crop_uv;
1623 for (int p = 0; p < AV_VIDEO_MAX_PLANES; p++) {
1624 // Crop all of the planes appropriately
1625 dst = (cl_mem)cropped_frame->data[p];
1626 transformed = (cl_mem)transformed_frame->data[p];
1628 if (!dst)
1629 break;
1631 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1632 if (err < 0)
1633 goto fail;
1635 CL_RUN_KERNEL_WITH_ARGS(
1636 deshake_ctx->command_queue,
1637 deshake_ctx->kernel_crop_upscale,
1638 global_work,
1639 NULL,
1640 &crop_upscale_event,
1641 { sizeof(cl_mem), &transformed },
1642 { sizeof(cl_mem), &dst },
1643 { sizeof(cl_float2), &crops[p].top_left },
1644 { sizeof(cl_float2), &crops[p].bottom_right },
1649 if (deshake_ctx->curr_frame < deshake_ctx->smooth_window / 2) {
1650 // This means we are somewhere at the start of the video. We need to
1651 // increment the current frame offset until it reaches the center of
1652 // the ringbuffers (as the current frame will be located there for
1653 // the rest of the video).
1655 // The end of the video is taken care of by draining motion data
1656 // one-by-one out of the buffer, causing the (at that point fixed)
1657 // offset to move towards later frames' data.
1658 ++deshake_ctx->abs_motion.curr_frame_offset;
1661 if (deshake_ctx->abs_motion.data_end_offset != -1) {
1662 // Keep the end offset in sync with the frame it's supposed to be
1663 // positioned at
1664 --deshake_ctx->abs_motion.data_end_offset;
1666 if (deshake_ctx->abs_motion.data_end_offset == deshake_ctx->abs_motion.curr_frame_offset - 1) {
1667 // The end offset would be the start of the new video sequence; flip to
1668 // start offset
1669 deshake_ctx->abs_motion.data_end_offset = -1;
1670 deshake_ctx->abs_motion.data_start_offset = deshake_ctx->abs_motion.curr_frame_offset;
1672 } else if (deshake_ctx->abs_motion.data_start_offset != -1) {
1673 // Keep the start offset in sync with the frame it's supposed to be
1674 // positioned at
1675 --deshake_ctx->abs_motion.data_start_offset;
1678 if (deshake_ctx->debug_on) {
1679 deshake_ctx->transform_time += ff_opencl_get_event_time(transform_event);
1680 if (deshake_ctx->should_crop) {
1681 deshake_ctx->crop_upscale_time += ff_opencl_get_event_time(crop_upscale_event);
1685 ++deshake_ctx->curr_frame;
1687 if (deshake_ctx->debug_on)
1688 av_freep(&debug_matches.matches);
1690 if (deshake_ctx->should_crop) {
1691 err = av_frame_copy_props(cropped_frame, input_frame);
1692 if (err < 0)
1693 goto fail;
1695 av_frame_free(&transformed_frame);
1696 av_frame_free(&input_frame);
1697 return ff_filter_frame(outlink, cropped_frame);
1699 } else {
1700 err = av_frame_copy_props(transformed_frame, input_frame);
1701 if (err < 0)
1702 goto fail;
1704 av_frame_free(&cropped_frame);
1705 av_frame_free(&input_frame);
1706 return ff_filter_frame(outlink, transformed_frame);
1709 fail:
1710 clFinish(deshake_ctx->command_queue);
1712 if (deshake_ctx->debug_on)
1713 if (debug_matches.matches)
1714 av_freep(&debug_matches.matches);
1716 av_frame_free(&input_frame);
1717 av_frame_free(&transformed_frame);
1718 av_frame_free(&cropped_frame);
1719 return err;
1722 // Add the given frame to the frame queue to eventually be processed.
1724 // Also determines the motion from the previous frame and updates the stored
1725 // motion information accordingly.
1726 static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
1728 AVFilterContext *avctx = link->dst;
1729 DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1730 int err;
1731 int num_vectors;
1732 int num_inliers = 0;
1733 cl_int cle;
1734 FrameDelta relative;
1735 SimilarityMatrix model;
1736 size_t global_work[2];
1737 size_t harris_global_work[2];
1738 size_t grid_32_global_work[2];
1739 int grid_32_h, grid_32_w;
1740 size_t local_work[2];
1741 cl_mem src, temp;
1742 float prev_vals[5];
1743 float new_vals[5];
1744 cl_event grayscale_event, harris_response_event, refine_features_event,
1745 brief_event, match_descriptors_event, read_buf_event;
1746 DebugMatches debug_matches;
1748 num_vectors = 0;
1750 local_work[0] = 8;
1751 local_work[1] = 8;
1753 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, 0, 0);
1754 if (err < 0)
1755 goto fail;
1757 err = ff_opencl_filter_work_size_from_image(avctx, harris_global_work, input_frame, 0, 8);
1758 if (err < 0)
1759 goto fail;
1761 err = ff_opencl_filter_work_size_from_image(avctx, grid_32_global_work, input_frame, 0, 32);
1762 if (err < 0)
1763 goto fail;
1765 // We want a single work-item for each 32x32 block of pixels in the input frame
1766 grid_32_global_work[0] /= 32;
1767 grid_32_global_work[1] /= 32;
1769 grid_32_h = ROUNDED_UP_DIV(input_frame->height, 32);
1770 grid_32_w = ROUNDED_UP_DIV(input_frame->width, 32);
1772 if (deshake_ctx->is_yuv) {
1773 deshake_ctx->grayscale = (cl_mem)input_frame->data[0];
1774 } else {
1775 src = (cl_mem)input_frame->data[0];
1777 CL_RUN_KERNEL_WITH_ARGS(
1778 deshake_ctx->command_queue,
1779 deshake_ctx->kernel_grayscale,
1780 global_work,
1781 NULL,
1782 &grayscale_event,
1783 { sizeof(cl_mem), &src },
1784 { sizeof(cl_mem), &deshake_ctx->grayscale }
1788 CL_RUN_KERNEL_WITH_ARGS(
1789 deshake_ctx->command_queue,
1790 deshake_ctx->kernel_harris_response,
1791 harris_global_work,
1792 local_work,
1793 &harris_response_event,
1794 { sizeof(cl_mem), &deshake_ctx->grayscale },
1795 { sizeof(cl_mem), &deshake_ctx->harris_buf }
1798 CL_RUN_KERNEL_WITH_ARGS(
1799 deshake_ctx->command_queue,
1800 deshake_ctx->kernel_refine_features,
1801 grid_32_global_work,
1802 NULL,
1803 &refine_features_event,
1804 { sizeof(cl_mem), &deshake_ctx->grayscale },
1805 { sizeof(cl_mem), &deshake_ctx->harris_buf },
1806 { sizeof(cl_mem), &deshake_ctx->refined_features },
1807 { sizeof(cl_int), &deshake_ctx->refine_features }
1810 CL_RUN_KERNEL_WITH_ARGS(
1811 deshake_ctx->command_queue,
1812 deshake_ctx->kernel_brief_descriptors,
1813 grid_32_global_work,
1814 NULL,
1815 &brief_event,
1816 { sizeof(cl_mem), &deshake_ctx->grayscale },
1817 { sizeof(cl_mem), &deshake_ctx->refined_features },
1818 { sizeof(cl_mem), &deshake_ctx->descriptors },
1819 { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1822 if (!av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {
1823 // This is the first frame we've been given to queue, meaning there is
1824 // no previous frame to match descriptors to
1826 goto no_motion_data;
1829 CL_RUN_KERNEL_WITH_ARGS(
1830 deshake_ctx->command_queue,
1831 deshake_ctx->kernel_match_descriptors,
1832 grid_32_global_work,
1833 NULL,
1834 &match_descriptors_event,
1835 { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1836 { sizeof(cl_mem), &deshake_ctx->refined_features },
1837 { sizeof(cl_mem), &deshake_ctx->descriptors },
1838 { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1839 { sizeof(cl_mem), &deshake_ctx->matches }
1842 cle = clEnqueueReadBuffer(
1843 deshake_ctx->command_queue,
1844 deshake_ctx->matches,
1845 CL_TRUE,
1847 grid_32_h * grid_32_w * sizeof(MotionVector),
1848 deshake_ctx->matches_host,
1850 NULL,
1851 &read_buf_event
1853 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to read matches to host: %d.\n", cle);
1855 num_vectors = make_vectors_contig(deshake_ctx, grid_32_h, grid_32_w);
1857 if (num_vectors < 10) {
1858 // Not enough matches to get reliable motion data for this frame
1860 // From this point on all data is relative to this frame rather than the
1861 // original frame. We have to make sure that we don't mix values that were
1862 // relative to the original frame with the new values relative to this
1863 // frame when doing the gaussian smoothing. We keep track of where the old
1864 // values end using this data_end_offset field in order to accomplish
1865 // that goal.
1867 // If no motion data is present for multiple frames in a short window of
1868 // time, we leave the end where it was to avoid mixing 0s in with the
1869 // old data (and just treat them all as part of the new values)
1870 if (deshake_ctx->abs_motion.data_end_offset == -1) {
1871 deshake_ctx->abs_motion.data_end_offset =
1872 av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX]) - 1;
1875 goto no_motion_data;
1878 if (!estimate_affine_2d(
1879 deshake_ctx,
1880 deshake_ctx->matches_contig_host,
1881 &debug_matches,
1882 num_vectors,
1883 model.matrix,
1884 10.0,
1885 3000,
1886 0.999999999999
1887 )) {
1888 goto no_motion_data;
1891 for (int i = 0; i < num_vectors; i++) {
1892 if (deshake_ctx->matches_contig_host[i].should_consider) {
1893 deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[i];
1894 num_inliers++;
1898 if (!minimize_error(
1899 deshake_ctx,
1900 deshake_ctx->inliers,
1901 &debug_matches,
1902 num_inliers,
1903 model.matrix,
1905 )) {
1906 goto no_motion_data;
1910 relative = decompose_transform(model.matrix);
1912 // Get the absolute transform data for the previous frame
1913 for (int i = 0; i < RingbufCount; i++) {
1914 av_fifo_peek(
1915 deshake_ctx->abs_motion.ringbuffers[i],
1916 &prev_vals[i], 1,
1917 av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[i]) - 1);
1920 new_vals[RingbufX] = prev_vals[RingbufX] + relative.translation.s[0];
1921 new_vals[RingbufY] = prev_vals[RingbufY] + relative.translation.s[1];
1922 new_vals[RingbufRot] = prev_vals[RingbufRot] + relative.rotation;
1923 new_vals[RingbufScaleX] = prev_vals[RingbufScaleX] / relative.scale.s[0];
1924 new_vals[RingbufScaleY] = prev_vals[RingbufScaleY] / relative.scale.s[1];
1926 if (deshake_ctx->debug_on) {
1927 if (!deshake_ctx->is_yuv) {
1928 deshake_ctx->grayscale_time += ff_opencl_get_event_time(grayscale_event);
1930 deshake_ctx->harris_response_time += ff_opencl_get_event_time(harris_response_event);
1931 deshake_ctx->refine_features_time += ff_opencl_get_event_time(refine_features_event);
1932 deshake_ctx->brief_descriptors_time += ff_opencl_get_event_time(brief_event);
1933 deshake_ctx->match_descriptors_time += ff_opencl_get_event_time(match_descriptors_event);
1934 deshake_ctx->read_buf_time += ff_opencl_get_event_time(read_buf_event);
1937 goto end;
1939 no_motion_data:
1940 new_vals[RingbufX] = 0.0f;
1941 new_vals[RingbufY] = 0.0f;
1942 new_vals[RingbufRot] = 0.0f;
1943 new_vals[RingbufScaleX] = 1.0f;
1944 new_vals[RingbufScaleY] = 1.0f;
1946 for (int i = 0; i < num_vectors; i++) {
1947 deshake_ctx->matches_contig_host[i].should_consider = 0;
1949 debug_matches.num_model_matches = 0;
1951 if (deshake_ctx->debug_on) {
1952 av_log(avctx, AV_LOG_VERBOSE,
1953 "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1957 goto end;
1959 end:
1960 // Swap the descriptor buffers (we don't need the previous frame's descriptors
1961 // again so we will use that space for the next frame's descriptors)
1962 temp = deshake_ctx->prev_descriptors;
1963 deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1964 deshake_ctx->descriptors = temp;
1966 // Same for the refined features
1967 temp = deshake_ctx->prev_refined_features;
1968 deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1969 deshake_ctx->refined_features = temp;
1971 if (deshake_ctx->debug_on) {
1972 if (num_vectors == 0) {
1973 debug_matches.matches = NULL;
1974 } else {
1975 debug_matches.matches = av_malloc_array(num_vectors, sizeof(MotionVector));
1977 if (!debug_matches.matches) {
1978 err = AVERROR(ENOMEM);
1979 goto fail;
1983 for (int i = 0; i < num_vectors; i++) {
1984 debug_matches.matches[i] = deshake_ctx->matches_contig_host[i];
1986 debug_matches.num_matches = num_vectors;
1988 av_fifo_write(
1989 deshake_ctx->abs_motion.debug_matches,
1990 &debug_matches, 1);
1993 for (int i = 0; i < RingbufCount; i++) {
1994 av_fifo_write(deshake_ctx->abs_motion.ringbuffers[i], &new_vals[i], 1);
1997 return ff_framequeue_add(&deshake_ctx->fq, input_frame);
1999 fail:
2000 clFinish(deshake_ctx->command_queue);
2001 av_frame_free(&input_frame);
2002 return err;
2005 static int activate(AVFilterContext *ctx)
2007 AVFilterLink *inlink = ctx->inputs[0];
2008 AVFilterLink *outlink = ctx->outputs[0];
2009 DeshakeOpenCLContext *deshake_ctx = ctx->priv;
2010 AVFrame *frame = NULL;
2011 int ret, status;
2012 int64_t pts;
2014 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
2016 if (!deshake_ctx->eof) {
2017 ret = ff_inlink_consume_frame(inlink, &frame);
2018 if (ret < 0)
2019 return ret;
2020 if (ret > 0) {
2021 if (!frame->hw_frames_ctx)
2022 return AVERROR(EINVAL);
2024 if (!deshake_ctx->initialized) {
2025 ret = deshake_opencl_init(ctx);
2026 if (ret < 0)
2027 return ret;
2030 // If there is no more space in the ringbuffers, remove the oldest
2031 // values to make room for the new ones
2032 if (!av_fifo_can_write(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {
2033 for (int i = 0; i < RingbufCount; i++) {
2034 av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);
2037 ret = queue_frame(inlink, frame);
2038 if (ret < 0)
2039 return ret;
2040 if (ret >= 0) {
2041 // See if we have enough buffered frames to process one
2043 // "enough" is half the smooth window of queued frames into the future
2044 if (ff_framequeue_queued_frames(&deshake_ctx->fq) >= deshake_ctx->smooth_window / 2) {
2045 return filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2051 if (!deshake_ctx->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
2052 if (status == AVERROR_EOF) {
2053 deshake_ctx->eof = 1;
2057 if (deshake_ctx->eof) {
2058 // Finish processing the rest of the frames in the queue.
2059 while(ff_framequeue_queued_frames(&deshake_ctx->fq) != 0) {
2060 for (int i = 0; i < RingbufCount; i++) {
2061 av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);
2064 ret = filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2065 if (ret < 0) {
2066 return ret;
2070 if (deshake_ctx->debug_on) {
2071 av_log(ctx, AV_LOG_VERBOSE,
2072 "Average kernel execution times:\n"
2073 "\t grayscale: %0.3f ms\n"
2074 "\t harris_response: %0.3f ms\n"
2075 "\t refine_features: %0.3f ms\n"
2076 "\tbrief_descriptors: %0.3f ms\n"
2077 "\tmatch_descriptors: %0.3f ms\n"
2078 "\t transform: %0.3f ms\n"
2079 "\t crop_upscale: %0.3f ms\n"
2080 "Average buffer read times:\n"
2081 "\t features buf: %0.3f ms\n",
2082 averaged_event_time_ms(deshake_ctx->grayscale_time, deshake_ctx->curr_frame),
2083 averaged_event_time_ms(deshake_ctx->harris_response_time, deshake_ctx->curr_frame),
2084 averaged_event_time_ms(deshake_ctx->refine_features_time, deshake_ctx->curr_frame),
2085 averaged_event_time_ms(deshake_ctx->brief_descriptors_time, deshake_ctx->curr_frame),
2086 averaged_event_time_ms(deshake_ctx->match_descriptors_time, deshake_ctx->curr_frame),
2087 averaged_event_time_ms(deshake_ctx->transform_time, deshake_ctx->curr_frame),
2088 averaged_event_time_ms(deshake_ctx->crop_upscale_time, deshake_ctx->curr_frame),
2089 averaged_event_time_ms(deshake_ctx->read_buf_time, deshake_ctx->curr_frame)
2093 ff_outlink_set_status(outlink, AVERROR_EOF, deshake_ctx->duration);
2094 return 0;
2097 if (!deshake_ctx->eof) {
2098 FF_FILTER_FORWARD_WANTED(outlink, inlink);
2101 return FFERROR_NOT_READY;
2104 static const AVFilterPad deshake_opencl_inputs[] = {
2106 .name = "default",
2107 .type = AVMEDIA_TYPE_VIDEO,
2108 .config_props = &ff_opencl_filter_config_input,
2112 static const AVFilterPad deshake_opencl_outputs[] = {
2114 .name = "default",
2115 .type = AVMEDIA_TYPE_VIDEO,
2116 .config_props = &ff_opencl_filter_config_output,
2120 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2121 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2123 static const AVOption deshake_opencl_options[] = {
2125 "tripod", "simulates a tripod by preventing any camera movement whatsoever "
2126 "from the original frame",
2127 OFFSET(tripod_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2130 "debug", "turn on additional debugging information",
2131 OFFSET(debug_on), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2134 "adaptive_crop", "attempt to subtly crop borders to reduce mirrored content",
2135 OFFSET(should_crop), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2138 "refine_features", "refine feature point locations at a sub-pixel level",
2139 OFFSET(refine_features), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2142 "smooth_strength", "smoothing strength (0 attempts to adaptively determine optimal strength)",
2143 OFFSET(smooth_percent), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 1.0f, FLAGS
2146 "smooth_window_multiplier", "multiplier for number of frames to buffer for motion data",
2147 OFFSET(smooth_window_multiplier), AV_OPT_TYPE_FLOAT, {.dbl = 2.0}, 0.1, 10.0, FLAGS
2149 { NULL }
2152 AVFILTER_DEFINE_CLASS(deshake_opencl);
2154 const FFFilter ff_vf_deshake_opencl = {
2155 .p.name = "deshake_opencl",
2156 .p.description = NULL_IF_CONFIG_SMALL("Feature-point based video stabilization filter"),
2157 .p.priv_class = &deshake_opencl_class,
2158 .p.flags = AVFILTER_FLAG_HWDEVICE,
2159 .priv_size = sizeof(DeshakeOpenCLContext),
2160 .init = &ff_opencl_filter_init,
2161 .uninit = &deshake_opencl_uninit,
2162 .activate = activate,
2163 FILTER_INPUTS(deshake_opencl_inputs),
2164 FILTER_OUTPUTS(deshake_opencl_outputs),
2165 FILTER_SINGLE_PIXFMT(AV_PIX_FMT_OPENCL),
2166 .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,