2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #ifndef AOM_AOM_DSP_MATHUTILS_H_
13 #define AOM_AOM_DSP_MATHUTILS_H_
19 #include "aom_dsp/aom_dsp_common.h"
20 #include "aom_mem/aom_mem.h"
22 static const double TINY_NEAR_ZERO
= 1.0E-16;
24 // Solves Ax = b, where x and b are column vectors of size nx1 and A is nxn
25 static INLINE
int linsolve(int n
, double *A
, int stride
, double *b
, double *x
) {
28 // Forward elimination
29 for (k
= 0; k
< n
- 1; k
++) {
30 // Bring the largest magnitude to the diagonal position
31 for (i
= n
- 1; i
> k
; i
--) {
32 if (fabs(A
[(i
- 1) * stride
+ k
]) < fabs(A
[i
* stride
+ k
])) {
33 for (j
= 0; j
< n
; j
++) {
34 c
= A
[i
* stride
+ j
];
35 A
[i
* stride
+ j
] = A
[(i
- 1) * stride
+ j
];
36 A
[(i
- 1) * stride
+ j
] = c
;
43 for (i
= k
; i
< n
- 1; i
++) {
44 if (fabs(A
[k
* stride
+ k
]) < TINY_NEAR_ZERO
) return 0;
45 c
= A
[(i
+ 1) * stride
+ k
] / A
[k
* stride
+ k
];
46 for (j
= 0; j
< n
; j
++) A
[(i
+ 1) * stride
+ j
] -= c
* A
[k
* stride
+ j
];
50 // Backward substitution
51 for (i
= n
- 1; i
>= 0; i
--) {
52 if (fabs(A
[i
* stride
+ i
]) < TINY_NEAR_ZERO
) return 0;
54 for (j
= i
+ 1; j
<= n
- 1; j
++) c
+= A
[i
* stride
+ j
] * x
[j
];
55 x
[i
] = (b
[i
] - c
) / A
[i
* stride
+ i
];
61 ////////////////////////////////////////////////////////////////////////////////
63 // Solves for n-dim x in a least squares sense to minimize |Ax - b|^2
64 // The solution is simply x = (A'A)^-1 A'b or simply the solution for
65 // the system: A'A x = A'b
66 static INLINE
int least_squares(int n
, double *A
, int rows
, int stride
,
67 double *b
, double *scratch
, double *x
) {
69 double *scratch_
= NULL
;
72 scratch_
= (double *)aom_malloc(sizeof(*scratch
) * n
* (n
+ 1));
76 Atb
= scratch
+ n
* n
;
78 for (i
= 0; i
< n
; ++i
) {
79 for (j
= i
; j
< n
; ++j
) {
81 for (k
= 0; k
< rows
; ++k
)
82 AtA
[i
* n
+ j
] += A
[k
* stride
+ i
] * A
[k
* stride
+ j
];
83 AtA
[j
* n
+ i
] = AtA
[i
* n
+ j
];
86 for (k
= 0; k
< rows
; ++k
) Atb
[i
] += A
[k
* stride
+ i
] * b
[k
];
88 int ret
= linsolve(n
, AtA
, n
, Atb
, x
);
94 static INLINE
void multiply_mat(const double *m1
, const double *m2
, double *res
,
95 const int m1_rows
, const int inner_dim
,
100 for (row
= 0; row
< m1_rows
; ++row
) {
101 for (col
= 0; col
< m2_cols
; ++col
) {
103 for (inner
= 0; inner
< inner_dim
; ++inner
)
104 sum
+= m1
[row
* inner_dim
+ inner
] * m2
[inner
* m2_cols
+ col
];
111 // The functions below are needed only for homography computation
112 // Remove if the homography models are not used.
114 ///////////////////////////////////////////////////////////////////////////////
116 // Adopted from Numerical Recipes in C
118 static INLINE
double apply_sign(double a
, double b
) {
119 return ((b
) >= 0 ? fabs(a
) : -fabs(a
));
122 static INLINE
double pythag(double a
, double b
) {
124 const double absa
= fabs(a
);
125 const double absb
= fabs(b
);
129 return absa
* sqrt(1.0 + ct
* ct
);
132 return (absb
== 0) ? 0 : absb
* sqrt(1.0 + ct
* ct
);
136 static INLINE
int svdcmp(double **u
, int m
, int n
, double w
[], double **v
) {
137 const int max_its
= 30;
138 int flag
, i
, its
, j
, jj
, k
, l
, nm
;
139 double anorm
, c
, f
, g
, h
, s
, scale
, x
, y
, z
;
140 double *rv1
= (double *)aom_malloc(sizeof(*rv1
) * (n
+ 1));
141 g
= scale
= anorm
= 0.0;
142 for (i
= 0; i
< n
; i
++) {
147 for (k
= i
; k
< m
; k
++) scale
+= fabs(u
[k
][i
]);
149 for (k
= i
; k
< m
; k
++) {
151 s
+= u
[k
][i
] * u
[k
][i
];
154 g
= -apply_sign(sqrt(s
), f
);
157 for (j
= l
; j
< n
; j
++) {
158 for (s
= 0.0, k
= i
; k
< m
; k
++) s
+= u
[k
][i
] * u
[k
][j
];
160 for (k
= i
; k
< m
; k
++) u
[k
][j
] += f
* u
[k
][i
];
162 for (k
= i
; k
< m
; k
++) u
[k
][i
] *= scale
;
167 if (i
< m
&& i
!= n
- 1) {
168 for (k
= l
; k
< n
; k
++) scale
+= fabs(u
[i
][k
]);
170 for (k
= l
; k
< n
; k
++) {
172 s
+= u
[i
][k
] * u
[i
][k
];
175 g
= -apply_sign(sqrt(s
), f
);
178 for (k
= l
; k
< n
; k
++) rv1
[k
] = u
[i
][k
] / h
;
179 for (j
= l
; j
< m
; j
++) {
180 for (s
= 0.0, k
= l
; k
< n
; k
++) s
+= u
[j
][k
] * u
[i
][k
];
181 for (k
= l
; k
< n
; k
++) u
[j
][k
] += s
* rv1
[k
];
183 for (k
= l
; k
< n
; k
++) u
[i
][k
] *= scale
;
186 anorm
= fmax(anorm
, (fabs(w
[i
]) + fabs(rv1
[i
])));
189 for (i
= n
- 1; i
>= 0; i
--) {
192 for (j
= l
; j
< n
; j
++) v
[j
][i
] = (u
[i
][j
] / u
[i
][l
]) / g
;
193 for (j
= l
; j
< n
; j
++) {
194 for (s
= 0.0, k
= l
; k
< n
; k
++) s
+= u
[i
][k
] * v
[k
][j
];
195 for (k
= l
; k
< n
; k
++) v
[k
][j
] += s
* v
[k
][i
];
198 for (j
= l
; j
< n
; j
++) v
[i
][j
] = v
[j
][i
] = 0.0;
204 for (i
= AOMMIN(m
, n
) - 1; i
>= 0; i
--) {
207 for (j
= l
; j
< n
; j
++) u
[i
][j
] = 0.0;
210 for (j
= l
; j
< n
; j
++) {
211 for (s
= 0.0, k
= l
; k
< m
; k
++) s
+= u
[k
][i
] * u
[k
][j
];
212 f
= (s
/ u
[i
][i
]) * g
;
213 for (k
= i
; k
< m
; k
++) u
[k
][j
] += f
* u
[k
][i
];
215 for (j
= i
; j
< m
; j
++) u
[j
][i
] *= g
;
217 for (j
= i
; j
< m
; j
++) u
[j
][i
] = 0.0;
221 for (k
= n
- 1; k
>= 0; k
--) {
222 for (its
= 0; its
< max_its
; its
++) {
224 for (l
= k
; l
>= 0; l
--) {
226 if ((double)(fabs(rv1
[l
]) + anorm
) == anorm
|| nm
< 0) {
230 if ((double)(fabs(w
[nm
]) + anorm
) == anorm
) break;
235 for (i
= l
; i
<= k
; i
++) {
238 if ((double)(fabs(f
) + anorm
) == anorm
) break;
245 for (j
= 0; j
< m
; j
++) {
248 u
[j
][nm
] = y
* c
+ z
* s
;
249 u
[j
][i
] = z
* c
- y
* s
;
257 for (j
= 0; j
< n
; j
++) v
[j
][k
] = -v
[j
][k
];
261 if (its
== max_its
- 1) {
271 f
= ((y
- z
) * (y
+ z
) + (g
- h
) * (g
+ h
)) / (2.0 * h
* y
);
273 f
= ((x
- z
) * (x
+ z
) + h
* ((y
/ (f
+ apply_sign(g
, f
))) - h
)) / x
;
275 for (j
= l
; j
<= nm
; j
++) {
289 for (jj
= 0; jj
< n
; jj
++) {
292 v
[jj
][j
] = x
* c
+ z
* s
;
293 v
[jj
][i
] = z
* c
- x
* s
;
304 for (jj
= 0; jj
< m
; jj
++) {
307 u
[jj
][j
] = y
* c
+ z
* s
;
308 u
[jj
][i
] = z
* c
- y
* s
;
320 static INLINE
int SVD(double *U
, double *W
, double *V
, double *matx
, int M
,
322 // Assumes allocation for U is MxN
323 double **nrU
= (double **)aom_malloc((M
) * sizeof(*nrU
));
324 double **nrV
= (double **)aom_malloc((N
) * sizeof(*nrV
));
327 problem
= !(nrU
&& nrV
);
329 for (i
= 0; i
< M
; i
++) {
332 for (i
= 0; i
< N
; i
++) {
336 if (nrU
) aom_free(nrU
);
337 if (nrV
) aom_free(nrV
);
341 /* copy from given matx into nrU */
342 for (i
= 0; i
< M
; i
++) {
343 memcpy(&(nrU
[i
][0]), matx
+ N
* i
, N
* sizeof(*matx
));
346 /* HERE IT IS: do SVD */
347 if (svdcmp(nrU
, M
, N
, W
, nrV
)) {
353 /* aom_free Numerical Recipes arrays */
360 #endif // AOM_AOM_DSP_MATHUTILS_H_