1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // MSVC++ requires this to be set before any other includes to get M_PI.
6 #define _USE_MATH_DEFINES
8 #include "media/filters/wsola_internals.h"
14 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h"
16 #include "media/base/audio_bus.h"
22 bool InInterval(int n
, Interval q
) {
23 return n
>= q
.first
&& n
<= q
.second
;
26 float MultiChannelSimilarityMeasure(const float* dot_prod_a_b
,
27 const float* energy_a
,
28 const float* energy_b
,
30 const float kEpsilon
= 1e-12f
;
31 float similarity_measure
= 0.0f
;
32 for (int n
= 0; n
< channels
; ++n
) {
33 similarity_measure
+= dot_prod_a_b
[n
] / sqrt(energy_a
[n
] * energy_b
[n
] +
36 return similarity_measure
;
39 void MultiChannelDotProduct(const AudioBus
* a
,
45 DCHECK_EQ(a
->channels(), b
->channels());
46 DCHECK_GE(frame_offset_a
, 0);
47 DCHECK_GE(frame_offset_b
, 0);
48 DCHECK_LE(frame_offset_a
+ num_frames
, a
->frames());
49 DCHECK_LE(frame_offset_b
+ num_frames
, b
->frames());
51 memset(dot_product
, 0, sizeof(*dot_product
) * a
->channels());
52 for (int k
= 0; k
< a
->channels(); ++k
) {
53 const float* ch_a
= a
->channel(k
) + frame_offset_a
;
54 const float* ch_b
= b
->channel(k
) + frame_offset_b
;
55 for (int n
= 0; n
< num_frames
; ++n
) {
56 dot_product
[k
] += *ch_a
++ * *ch_b
++;
61 void MultiChannelMovingBlockEnergies(const AudioBus
* input
,
64 int num_blocks
= input
->frames() - (frames_per_block
- 1);
65 int channels
= input
->channels();
67 for (int k
= 0; k
< input
->channels(); ++k
) {
68 const float* input_channel
= input
->channel(k
);
72 // First block of channel |k|.
73 for (int m
= 0; m
< frames_per_block
; ++m
) {
74 energy
[k
] += input_channel
[m
] * input_channel
[m
];
77 const float* slide_out
= input_channel
;
78 const float* slide_in
= input_channel
+ frames_per_block
;
79 for (int n
= 1; n
< num_blocks
; ++n
, ++slide_in
, ++slide_out
) {
80 energy
[k
+ n
* channels
] = energy
[k
+ (n
- 1) * channels
] - *slide_out
*
81 *slide_out
+ *slide_in
* *slide_in
;
86 // Fit the curve f(x) = a * x^2 + b * x + c such that
90 // and return the maximum, assuming that y[0] <= y[1] >= y[2].
91 void QuadraticInterpolation(const float* y_values
,
93 float* extremum_value
) {
94 float a
= 0.5f
* (y_values
[2] + y_values
[0]) - y_values
[1];
95 float b
= 0.5f
* (y_values
[2] - y_values
[0]);
96 float c
= y_values
[1];
99 // The coordinates are colinear (within floating-point error).
101 *extremum_value
= y_values
[1];
103 *extremum
= -b
/ (2.f
* a
);
104 *extremum_value
= a
* (*extremum
) * (*extremum
) + b
* (*extremum
) + c
;
108 int DecimatedSearch(int decimation
,
109 Interval exclude_interval
,
110 const AudioBus
* target_block
,
111 const AudioBus
* search_segment
,
112 const float* energy_target_block
,
113 const float* energy_candidate_blocks
) {
114 int channels
= search_segment
->channels();
115 int block_size
= target_block
->frames();
116 int num_candidate_blocks
= search_segment
->frames() - (block_size
- 1);
117 scoped_ptr
<float[]> dot_prod(new float[channels
]);
118 float similarity
[3]; // Three elements for cubic interpolation.
121 MultiChannelDotProduct(target_block
, 0, search_segment
, n
, block_size
,
123 similarity
[0] = MultiChannelSimilarityMeasure(
124 dot_prod
.get(), energy_target_block
,
125 &energy_candidate_blocks
[n
* channels
], channels
);
127 // Set the starting point as optimal point.
128 float best_similarity
= similarity
[0];
129 int optimal_index
= 0;
132 if (n
>= num_candidate_blocks
) {
136 MultiChannelDotProduct(target_block
, 0, search_segment
, n
, block_size
,
138 similarity
[1] = MultiChannelSimilarityMeasure(
139 dot_prod
.get(), energy_target_block
,
140 &energy_candidate_blocks
[n
* channels
], channels
);
143 if (n
>= num_candidate_blocks
) {
144 // We cannot do any more sampling. Compare these two values and return the
146 return similarity
[1] > similarity
[0] ? decimation
: 0;
149 for (; n
< num_candidate_blocks
; n
+= decimation
) {
150 MultiChannelDotProduct(target_block
, 0, search_segment
, n
, block_size
,
153 similarity
[2] = MultiChannelSimilarityMeasure(
154 dot_prod
.get(), energy_target_block
,
155 &energy_candidate_blocks
[n
* channels
], channels
);
157 if ((similarity
[1] > similarity
[0] && similarity
[1] >= similarity
[2]) ||
158 (similarity
[1] >= similarity
[0] && similarity
[1] > similarity
[2])) {
159 // A local maximum is found. Do a cubic interpolation for a better
160 // estimate of candidate maximum.
161 float normalized_candidate_index
;
162 float candidate_similarity
;
163 QuadraticInterpolation(similarity
, &normalized_candidate_index
,
164 &candidate_similarity
);
166 int candidate_index
= n
- decimation
+ static_cast<int>(
167 normalized_candidate_index
* decimation
+ 0.5f
);
168 if (candidate_similarity
> best_similarity
&&
169 !InInterval(candidate_index
, exclude_interval
)) {
170 optimal_index
= candidate_index
;
171 best_similarity
= candidate_similarity
;
173 } else if (n
+ decimation
>= num_candidate_blocks
&&
174 similarity
[2] > best_similarity
&&
175 !InInterval(n
, exclude_interval
)) {
176 // If this is the end-point and has a better similarity-measure than
177 // optimal, then we accept it as optimal point.
179 best_similarity
= similarity
[2];
181 memmove(similarity
, &similarity
[1], 2 * sizeof(*similarity
));
183 return optimal_index
;
186 int FullSearch(int low_limit
,
188 Interval exclude_interval
,
189 const AudioBus
* target_block
,
190 const AudioBus
* search_block
,
191 const float* energy_target_block
,
192 const float* energy_candidate_blocks
) {
193 int channels
= search_block
->channels();
194 int block_size
= target_block
->frames();
195 scoped_ptr
<float[]> dot_prod(new float[channels
]);
197 float best_similarity
= std::numeric_limits
<float>::min();
198 int optimal_index
= 0;
200 for (int n
= low_limit
; n
<= high_limit
; ++n
) {
201 if (InInterval(n
, exclude_interval
)) {
204 MultiChannelDotProduct(target_block
, 0, search_block
, n
, block_size
,
207 float similarity
= MultiChannelSimilarityMeasure(
208 dot_prod
.get(), energy_target_block
,
209 &energy_candidate_blocks
[n
* channels
], channels
);
211 if (similarity
> best_similarity
) {
212 best_similarity
= similarity
;
217 return optimal_index
;
220 int OptimalIndex(const AudioBus
* search_block
,
221 const AudioBus
* target_block
,
222 Interval exclude_interval
) {
223 int channels
= search_block
->channels();
224 DCHECK_EQ(channels
, target_block
->channels());
225 int target_size
= target_block
->frames();
226 int num_candidate_blocks
= search_block
->frames() - (target_size
- 1);
228 // This is a compromise between complexity reduction and search accuracy. I
229 // don't have a proof that down sample of order 5 is optimal. One can compute
230 // a decimation factor that minimizes complexity given the size of
231 // |search_block| and |target_block|. However, my experiments show the rate of
232 // missing the optimal index is significant. This value is chosen
233 // heuristically based on experiments.
234 const int kSearchDecimation
= 5;
236 scoped_ptr
<float[]> energy_target_block(new float[channels
]);
237 scoped_ptr
<float[]> energy_candidate_blocks(
238 new float[channels
* num_candidate_blocks
]);
240 // Energy of all candid frames.
241 MultiChannelMovingBlockEnergies(search_block
, target_size
,
242 energy_candidate_blocks
.get());
244 // Energy of target frame.
245 MultiChannelDotProduct(target_block
, 0, target_block
, 0,
246 target_size
, energy_target_block
.get());
248 int optimal_index
= DecimatedSearch(kSearchDecimation
,
249 exclude_interval
, target_block
,
250 search_block
, energy_target_block
.get(),
251 energy_candidate_blocks
.get());
253 int lim_low
= std::max(0, optimal_index
- kSearchDecimation
);
254 int lim_high
= std::min(num_candidate_blocks
- 1,
255 optimal_index
+ kSearchDecimation
);
256 return FullSearch(lim_low
, lim_high
, exclude_interval
, target_block
,
257 search_block
, energy_target_block
.get(),
258 energy_candidate_blocks
.get());
261 void GetSymmetricHanningWindow(int window_length
, float* window
) {
262 const float scale
= 2.0f
* M_PI
/ window_length
;
263 for (int n
= 0; n
< window_length
; ++n
)
264 window
[n
] = 0.5f
* (1.0f
- cosf(n
* scale
));
267 } // namespace internal