frameworks/ex/variablespeed/jni/sola_time_scaler.cc

   1 /*
   2  * Copyright (C) 2011 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include "sola_time_scaler.h"
  18
  19 #include <math.h>
  20 #include <hlogging.h>
  21 #include <algorithm>
  22
  23 #include "ring_buffer.h"
  24
  25 #define FLAGS_sola_ring_buffer 2.0
  26 #define FLAGS_sola_enable_correlation true
  27
  28
  29 namespace video_editing {
  30
  31 // Returns a cross-correlation score for the specified buffers.
  32 int SolaAnalyzer::Correlate(const float* buffer1, const float* buffer2,
  33                             int num_frames) {
  34   CHECK(initialized_);
  35
  36   int score = 0;
  37   num_frames *= num_channels_;
  38   while (num_frames-- > 0) {
  39     // Increment the score if the sign bits match.
  40     score += ((bit_cast<int32>(*buffer1++) ^ bit_cast<int32>(*buffer2++)) >= 0)
  41               ? 1 : 0;
  42   }
  43   return score;
  44 }
  45
  46 // Trivial SolaAnalyzer class to bypass correlation.
  47 class SolaBypassAnalyzer : public SolaAnalyzer {
  48  public:
  49   SolaBypassAnalyzer() { }
  50   virtual int Correlate(const float*, const float*, int num_frames) {
  51     return num_frames * num_channels_;
  52   }
  53 };
  54
  55
  56 // Default constructor.
  57 SolaTimeScaler::SolaTimeScaler()
  58     : input_buffer_(NULL), output_buffer_(NULL), analyzer_(NULL) {
  59   sample_rate_ = 0;
  60   num_channels_ = 0;
  61
  62   draining_ = false;
  63   initialized_ = false;
  64 }
  65
  66 SolaTimeScaler::~SolaTimeScaler() {
  67   delete input_buffer_;
  68   delete output_buffer_;
  69   delete analyzer_;
  70 }
  71
  72 // Injects a SolaAnalyzer instance for analyzing signal frames.
  73 void SolaTimeScaler::set_analyzer(SolaAnalyzer* analyzer) {
  74   MutexLock lock(&mutex_);  // lock out processing while updating
  75   delete analyzer_;
  76   analyzer_ = analyzer;
  77 }
  78
  79 // Initializes a SOLA timescaler.
  80 void SolaTimeScaler::Init(double sample_rate,
  81                           int num_channels,
  82                           double initial_speed,
  83                           double window_duration,
  84                           double overlap_duration) {
  85   MutexLock lock(&mutex_);  // lock out processing while updating
  86
  87   sample_rate_ = sample_rate;
  88   num_channels_ = num_channels;
  89   speed_ = initial_speed;
  90   window_duration_ = window_duration;
  91   overlap_duration_ = overlap_duration;
  92
  93   initialized_ = true;
  94   GenerateParameters();
  95   Reset();
  96 }
  97
  98 // Adjusts the rate scaling factor.
  99 void SolaTimeScaler::set_speed(double speed) {
 100   MutexLock lock(&mutex_);  // lock out processing while updating
 101
 102   speed_ = speed;
 103   GenerateParameters();
 104 }
 105
 106 // Generates processing parameters from the current settings.
 107 void SolaTimeScaler::GenerateParameters() {
 108   if (speed_ < 0.1) {
 109     LOGE("Requested speed %fx limited to 0.1x", speed_);
 110     speed_ = 0.1;
 111   } else if (speed_ > 8.0) {
 112     LOGE("Requested speed %fx limited to 8.0x", speed_);
 113     speed_ = 8.0;
 114   }
 115
 116   ratio_ = 1.0 / speed_;
 117
 118   num_window_frames_ = nearbyint(sample_rate_ * window_duration_);
 119
 120   // Limit the overlap to half the window size, and round up to an odd number.
 121   // Half of overlap window (rounded down) is also a useful number.
 122   overlap_duration_ = min(overlap_duration_, window_duration_ / 2.0);
 123   num_overlap_frames_ = nearbyint(sample_rate_ * overlap_duration_);
 124   num_overlap_frames_ |= 1;
 125   half_overlap_frames_ = num_overlap_frames_ >> 1;
 126
 127   if (speed_ >= 1.) {
 128     // For compression (speed up), adjacent input windows overlap in the output.
 129     input_window_offset_ = num_window_frames_;
 130     target_merge_offset_ = nearbyint(num_window_frames_ * ratio_);
 131   } else {
 132     // For expansion (slow down), each input window start point overlaps the
 133     // previous, and they are placed adjacently in the output
 134     // (+/- half the overlap size).
 135     input_window_offset_ = nearbyint(num_window_frames_ * speed_);
 136     target_merge_offset_ = num_window_frames_;
 137   }
 138
 139   // Make sure we copy enough extra data to be able to perform a
 140   // frame correlation over the range of target merge point +/- half overlap,
 141   // even when the previous merge point was adjusted backwards a half overlap.
 142   max_frames_to_merge_ = max(num_window_frames_,
 143       target_merge_offset_ + (2 * num_overlap_frames_));
 144   min_output_to_hold_=
 145       max_frames_to_merge_ + num_overlap_frames_ - target_merge_offset_;
 146 }
 147
 148 // The input buffer has one writer and reader.
 149 // The output buffer has one reader/updater, and one reader/consumer.
 150 static const int kInputReader = 0;
 151 static const int kOutputAnalysis = 0;
 152 static const int kOutputConsumer = 1;
 153
 154 void SolaTimeScaler::Reset() {
 155   CHECK(initialized_);
 156   double duration = max(FLAGS_sola_ring_buffer, 20. * window_duration_);
 157   draining_ = false;
 158
 159   delete input_buffer_;
 160   input_buffer_ = new RingBuffer();
 161   input_buffer_->Init(static_cast<int>
 162       (sample_rate_ * duration), num_channels_, 1);
 163
 164   delete output_buffer_;
 165   output_buffer_ = new RingBuffer();
 166   output_buffer_->Init(static_cast<int>
 167       (sample_rate_ * ratio_ * duration), num_channels_, 2);
 168
 169   if (analyzer_ == NULL) {
 170     if (FLAGS_sola_enable_correlation) {
 171       analyzer_ = new SolaAnalyzer();
 172     } else {
 173       analyzer_ = new SolaBypassAnalyzer();
 174     }
 175   }
 176   analyzer_->Init(sample_rate_, num_channels_);
 177 }
 178
 179 // Returns the number of frames that the input buffer can accept.
 180 int SolaTimeScaler::input_limit() const {
 181   CHECK(initialized_);
 182   return input_buffer_->overhead();
 183 }
 184
 185 // Returns the number of available output frames.
 186 int SolaTimeScaler::available() {
 187   CHECK(initialized_);
 188
 189   int available = output_buffer_->available(kOutputConsumer);
 190   if (available > min_output_to_hold_) {
 191     available -= min_output_to_hold_;
 192   } else if (draining_) {
 193     Process();
 194     available = output_buffer_->available(kOutputConsumer);
 195     if (available > min_output_to_hold_) {
 196       available -= min_output_to_hold_;
 197     }
 198   } else {
 199     available = 0;
 200   }
 201   return available;
 202 }
 203
 204 void SolaTimeScaler::Drain() {
 205   CHECK(initialized_);
 206
 207   draining_ = true;
 208 }
 209
 210
 211 // Feeds audio to the timescaler, and processes as much data as possible.
 212 int SolaTimeScaler::InjectSamples(float* buffer, int num_frames) {
 213   CHECK(initialized_);
 214
 215   // Do not write more frames than the buffer can accept.
 216   num_frames = min(input_limit(), num_frames);
 217   if (!num_frames) {
 218     return 0;
 219   }
 220
 221   // Copy samples to the input buffer and then process whatever can be consumed.
 222   input_buffer_->Write(buffer, num_frames);
 223   Process();
 224   return num_frames;
 225 }
 226
 227 // Retrieves audio data from the timescaler.
 228 int SolaTimeScaler::RetrieveSamples(float* buffer, int num_frames) {
 229   CHECK(initialized_);
 230
 231   // Do not read more frames than available.
 232   num_frames = min(available(), num_frames);
 233   if (!num_frames) {
 234     return 0;
 235   }
 236
 237   output_buffer_->Copy(kOutputConsumer, buffer, num_frames);
 238   output_buffer_->Seek(kOutputConsumer,
 239                        output_buffer_->Tell(kOutputConsumer) + num_frames);
 240
 241   return num_frames;
 242 }
 243
 244 // Munges input samples to produce output.
 245 bool SolaTimeScaler::Process() {
 246   CHECK(initialized_);
 247   bool generated_data = false;
 248
 249   // We can only process data if there is sufficient input available
 250   // (or we are draining the latency), and there is sufficient room
 251   // for output to be merged.
 252   while (((input_buffer_->available(kInputReader) > max_frames_to_merge_) ||
 253          draining_) && (output_buffer_->overhead() >= max_frames_to_merge_)) {
 254     MutexLock lock(&mutex_);  // lock out updates while processing each window
 255
 256     // Determine the number of samples to merge into the output.
 257     int input_count =
 258         min(input_buffer_->available(kInputReader), max_frames_to_merge_);
 259     if (input_count == 0) {
 260       break;
 261     }
 262     // The input reader always points to the next window to process.
 263     float* input_pointer = input_buffer_->GetPointer(kInputReader, input_count);
 264
 265     // The analysis reader always points to the ideal target merge point,
 266     // minus half an overlap window (ie, the starting point for correlation).
 267     // That means the available data from that point equals the number
 268     // of samples that must be cross-faded.
 269     int output_merge_cnt = output_buffer_->available(kOutputAnalysis);
 270     float* output_pointer =
 271         output_buffer_->GetPointer(kOutputAnalysis, output_merge_cnt);
 272
 273     // If there is not enough data to do a proper correlation,
 274     // just merge at the ideal target point. Otherwise,
 275     // find the best correlation score, working from the center out.
 276     int merge_offset = min(output_merge_cnt, half_overlap_frames_);
 277
 278     if ((output_merge_cnt >= (2 * num_overlap_frames_)) &&
 279         (input_count >= num_overlap_frames_)) {
 280       int best_offset = merge_offset;
 281       int best_score = 0;
 282       int score;
 283       for (int i = 0; i <= half_overlap_frames_; ++i) {
 284         score = analyzer_->Correlate(input_pointer,
 285             output_pointer + ((merge_offset + i) * num_channels_),
 286             num_overlap_frames_);
 287         if (score > best_score) {
 288           best_score = score;
 289           best_offset = merge_offset + i;
 290           if (score == (num_overlap_frames_ * num_channels_)) {
 291             break;  // It doesn't get better than perfect.
 292           }
 293         }
 294         if (i > 0) {
 295           score = analyzer_->Correlate(input_pointer,
 296               output_pointer + ((merge_offset - i) * num_channels_),
 297               num_overlap_frames_);
 298           if (score > best_score) {
 299             best_score = score;
 300             best_offset = merge_offset - i;
 301             if (score == (num_overlap_frames_ * num_channels_)) {
 302               break;  // It doesn't get better than perfect.
 303             }
 304           }
 305         }
 306       }
 307       merge_offset = best_offset;
 308     } else if ((output_merge_cnt > 0) && !draining_) {
 309       LOGE("no correlation performed");
 310     }
 311
 312     // Crossfade the overlap between input and output, and then
 313     // copy in the remaining input.
 314     int crossfade_count = max(0, (output_merge_cnt - merge_offset));
 315     crossfade_count = min(crossfade_count, input_count);
 316     int remaining_count = input_count - crossfade_count;
 317
 318     float* merge_pointer = output_pointer + (merge_offset * num_channels_);
 319     float flt_count = static_cast<float>(crossfade_count);
 320     for (int i = 0; i < crossfade_count; ++i) {
 321       // Linear cross-fade, for now.
 322       float input_scale = static_cast<float>(i) / flt_count;
 323       float output_scale = 1. - input_scale;
 324       for (int j = 0; j < num_channels_; ++j) {
 325         *merge_pointer = (*merge_pointer * output_scale) +
 326                          (*input_pointer++ * input_scale);
 327         ++merge_pointer;
 328       }
 329     }
 330     // Copy the merged buffer back into the output, if necessary, and
 331     // append the rest of the window.
 332     output_buffer_->MergeBack(kOutputAnalysis,
 333                               output_pointer, output_merge_cnt);
 334     output_buffer_->Write(input_pointer, remaining_count);
 335
 336     // Advance the output analysis pointer to the next target merge point,
 337     // minus half an overlap window.  The target merge point is always
 338     // calculated as a delta from the previous ideal target, not the actual
 339     // target, to avoid drift.
 340     int output_advance = target_merge_offset_;
 341     if (output_merge_cnt < half_overlap_frames_) {
 342       // On the first window, back up the pointer for the next correlation.
 343       // Thereafter, that compensation is preserved.
 344       output_advance -= half_overlap_frames_;
 345     }
 346
 347     // Don't advance beyond the available data, when finishing up.
 348     if (draining_) {
 349       output_advance =
 350           min(output_advance, output_buffer_->available(kOutputAnalysis));
 351     }
 352     output_buffer_->Seek(kOutputAnalysis,
 353         output_buffer_->Tell(kOutputAnalysis) + output_advance);
 354
 355     // Advance the input pointer beyond the frames that are no longer needed.
 356     input_buffer_->Seek(kInputReader, input_buffer_->Tell(kInputReader) +
 357                         min(input_count, input_window_offset_));
 358
 359     if ((crossfade_count + remaining_count) > 0) {
 360       generated_data = true;
 361     }
 362   }  // while (more to process)
 363   return generated_data;
 364 }
 365
 366 }  // namespace video_editing