media/base/audio_splicer.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/base/audio_splicer.h"
   6
   7 #include <cstdlib>
   8 #include <deque>
   9
  10 #include "base/logging.h"
  11 #include "media/base/audio_buffer.h"
  12 #include "media/base/audio_bus.h"
  13 #include "media/base/audio_decoder_config.h"
  14 #include "media/base/audio_timestamp_helper.h"
  15 #include "media/base/vector_math.h"
  16
  17 namespace media {
  18
  19 // Minimum gap size needed before the splicer will take action to
  20 // fill a gap. This avoids periodically inserting and then dropping samples
  21 // when the buffer timestamps are slightly off because of timestamp rounding
  22 // in the source content. Unit is frames.
  23 static const int kMinGapSize = 2;
  24
  25 // AudioBuffer::TrimStart() is not as accurate as the timestamp helper, so
  26 // manually adjust the duration and timestamp after trimming.
  27 static void AccurateTrimStart(int frames_to_trim,
  28                               const scoped_refptr<AudioBuffer> buffer,
  29                               const AudioTimestampHelper& timestamp_helper) {
  30   buffer->TrimStart(frames_to_trim);
  31   buffer->set_timestamp(timestamp_helper.GetTimestamp());
  32 }
  33
  34 // Returns an AudioBus whose frame buffer is backed by the provided AudioBuffer.
  35 static scoped_ptr<AudioBus> CreateAudioBufferWrapper(
  36     const scoped_refptr<AudioBuffer>& buffer) {
  37   scoped_ptr<AudioBus> wrapper =
  38       AudioBus::CreateWrapper(buffer->channel_count());
  39   wrapper->set_frames(buffer->frame_count());
  40   for (int ch = 0; ch < buffer->channel_count(); ++ch) {
  41     wrapper->SetChannelData(
  42         ch, reinterpret_cast<float*>(buffer->channel_data()[ch]));
  43   }
  44   return wrapper.Pass();
  45 }
  46
  47 class AudioStreamSanitizer {
  48  public:
  49   explicit AudioStreamSanitizer(int samples_per_second);
  50   ~AudioStreamSanitizer();
  51
  52   // Resets the sanitizer state by clearing the output buffers queue, and
  53   // resetting the timestamp helper.
  54   void Reset();
  55
  56   // Similar to Reset(), but initializes the timestamp helper with the given
  57   // parameters.
  58   void ResetTimestampState(int64 frame_count, base::TimeDelta base_timestamp);
  59
  60   // Adds a new buffer full of samples or end of stream buffer to the splicer.
  61   // Returns true if the buffer was accepted. False is returned if an error
  62   // occurred.
  63   bool AddInput(const scoped_refptr<AudioBuffer>& input);
  64
  65   // Returns true if the sanitizer has a buffer to return.
  66   bool HasNextBuffer() const;
  67
  68   // Removes the next buffer from the output buffer queue and returns it; should
  69   // only be called if HasNextBuffer() returns true.
  70   scoped_refptr<AudioBuffer> GetNextBuffer();
  71
  72   // Returns the total frame count of all buffers available for output.
  73   int GetFrameCount() const;
  74
  75   const AudioTimestampHelper& timestamp_helper() {
  76     return output_timestamp_helper_;
  77   }
  78
  79   // Transfer all buffers into |output|.  Returns false if AddInput() on the
  80   // |output| sanitizer fails for any buffer removed from |this|.
  81   bool DrainInto(AudioStreamSanitizer* output);
  82
  83  private:
  84   void AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer);
  85
  86   AudioTimestampHelper output_timestamp_helper_;
  87   bool received_end_of_stream_;
  88
  89   typedef std::deque<scoped_refptr<AudioBuffer> > BufferQueue;
  90   BufferQueue output_buffers_;
  91
  92   DISALLOW_ASSIGN(AudioStreamSanitizer);
  93 };
  94
  95 AudioStreamSanitizer::AudioStreamSanitizer(int samples_per_second)
  96     : output_timestamp_helper_(samples_per_second),
  97       received_end_of_stream_(false) {}
  98
  99 AudioStreamSanitizer::~AudioStreamSanitizer() {}
 100
 101 void AudioStreamSanitizer::Reset() {
 102   ResetTimestampState(0, kNoTimestamp());
 103 }
 104
 105 void AudioStreamSanitizer::ResetTimestampState(int64 frame_count,
 106                                                base::TimeDelta base_timestamp) {
 107   output_buffers_.clear();
 108   received_end_of_stream_ = false;
 109   output_timestamp_helper_.SetBaseTimestamp(base_timestamp);
 110   if (frame_count > 0)
 111     output_timestamp_helper_.AddFrames(frame_count);
 112 }
 113
 114 bool AudioStreamSanitizer::AddInput(const scoped_refptr<AudioBuffer>& input) {
 115   DCHECK(!received_end_of_stream_ || input->end_of_stream());
 116
 117   if (input->end_of_stream()) {
 118     output_buffers_.push_back(input);
 119     received_end_of_stream_ = true;
 120     return true;
 121   }
 122
 123   DCHECK(input->timestamp() != kNoTimestamp());
 124   DCHECK(input->duration() > base::TimeDelta());
 125   DCHECK_GT(input->frame_count(), 0);
 126
 127   if (output_timestamp_helper_.base_timestamp() == kNoTimestamp())
 128     output_timestamp_helper_.SetBaseTimestamp(input->timestamp());
 129
 130   if (output_timestamp_helper_.base_timestamp() > input->timestamp()) {
 131     DVLOG(1) << "Input timestamp is before the base timestamp.";
 132     return false;
 133   }
 134
 135   const base::TimeDelta timestamp = input->timestamp();
 136   const base::TimeDelta expected_timestamp =
 137       output_timestamp_helper_.GetTimestamp();
 138   const base::TimeDelta delta = timestamp - expected_timestamp;
 139
 140   if (std::abs(delta.InMilliseconds()) >
 141       AudioSplicer::kMaxTimeDeltaInMilliseconds) {
 142     DVLOG(1) << "Timestamp delta too large: " << delta.InMicroseconds() << "us";
 143     return false;
 144   }
 145
 146   int frames_to_fill = 0;
 147   if (delta != base::TimeDelta())
 148     frames_to_fill = output_timestamp_helper_.GetFramesToTarget(timestamp);
 149
 150   if (frames_to_fill == 0 || std::abs(frames_to_fill) < kMinGapSize) {
 151     AddOutputBuffer(input);
 152     return true;
 153   }
 154
 155   if (frames_to_fill > 0) {
 156     DVLOG(1) << "Gap detected @ " << expected_timestamp.InMicroseconds()
 157              << " us: " << delta.InMicroseconds() << " us";
 158
 159     // Create a buffer with enough silence samples to fill the gap and
 160     // add it to the output buffer.
 161     scoped_refptr<AudioBuffer> gap =
 162         AudioBuffer::CreateEmptyBuffer(input->channel_layout(),
 163                                        input->channel_count(),
 164                                        input->sample_rate(),
 165                                        frames_to_fill,
 166                                        expected_timestamp);
 167     AddOutputBuffer(gap);
 168
 169     // Add the input buffer now that the gap has been filled.
 170     AddOutputBuffer(input);
 171     return true;
 172   }
 173
 174   // Overlapping buffers marked as splice frames are handled by AudioSplicer,
 175   // but decoder and demuxer quirks may sometimes produce overlapping samples
 176   // which need to be sanitized.
 177   //
 178   // A crossfade can't be done here because only the current buffer is available
 179   // at this point, not previous buffers.
 180   DVLOG(1) << "Overlap detected @ " << expected_timestamp.InMicroseconds()
 181            << " us: " << -delta.InMicroseconds() << " us";
 182
 183   const int frames_to_skip = -frames_to_fill;
 184   if (input->frame_count() <= frames_to_skip) {
 185     DVLOG(1) << "Dropping whole buffer";
 186     return true;
 187   }
 188
 189   // Copy the trailing samples that do not overlap samples already output
 190   // into a new buffer.  Add this new buffer to the output queue.
 191   //
 192   // TODO(acolwell): Implement a cross-fade here so the transition is less
 193   // jarring.
 194   AccurateTrimStart(frames_to_skip, input, output_timestamp_helper_);
 195   AddOutputBuffer(input);
 196   return true;
 197 }
 198
 199 bool AudioStreamSanitizer::HasNextBuffer() const {
 200   return !output_buffers_.empty();
 201 }
 202
 203 scoped_refptr<AudioBuffer> AudioStreamSanitizer::GetNextBuffer() {
 204   scoped_refptr<AudioBuffer> ret = output_buffers_.front();
 205   output_buffers_.pop_front();
 206   return ret;
 207 }
 208
 209 void AudioStreamSanitizer::AddOutputBuffer(
 210     const scoped_refptr<AudioBuffer>& buffer) {
 211   output_timestamp_helper_.AddFrames(buffer->frame_count());
 212   output_buffers_.push_back(buffer);
 213 }
 214
 215 int AudioStreamSanitizer::GetFrameCount() const {
 216   int frame_count = 0;
 217   for (BufferQueue::const_iterator it = output_buffers_.begin();
 218        it != output_buffers_.end(); ++it) {
 219     frame_count += (*it)->frame_count();
 220   }
 221   return frame_count;
 222 }
 223
 224 bool AudioStreamSanitizer::DrainInto(AudioStreamSanitizer* output) {
 225   while (HasNextBuffer()) {
 226     if (!output->AddInput(GetNextBuffer()))
 227       return false;
 228   }
 229   return true;
 230 }
 231
 232 AudioSplicer::AudioSplicer(int samples_per_second)
 233     : max_crossfade_duration_(
 234           base::TimeDelta::FromMilliseconds(kCrossfadeDurationInMilliseconds)),
 235       splice_timestamp_(kNoTimestamp()),
 236       max_splice_end_timestamp_(kNoTimestamp()),
 237       output_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
 238       pre_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
 239       post_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
 240       have_all_pre_splice_buffers_(false) {}
 241
 242 AudioSplicer::~AudioSplicer() {}
 243
 244 void AudioSplicer::Reset() {
 245   output_sanitizer_->Reset();
 246   pre_splice_sanitizer_->Reset();
 247   post_splice_sanitizer_->Reset();
 248   have_all_pre_splice_buffers_ = false;
 249   reset_splice_timestamps();
 250 }
 251
 252 bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {
 253   // If we're not processing a splice, add the input to the output queue.
 254   if (splice_timestamp_ == kNoTimestamp()) {
 255     DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
 256     DCHECK(!post_splice_sanitizer_->HasNextBuffer());
 257     return output_sanitizer_->AddInput(input);
 258   }
 259
 260   const AudioTimestampHelper& output_ts_helper =
 261       output_sanitizer_->timestamp_helper();
 262
 263   if (!have_all_pre_splice_buffers_) {
 264     DCHECK(!input->end_of_stream());
 265
 266     // If the provided buffer is entirely before the splice point it can also be
 267     // added to the output queue.
 268     if (input->timestamp() + input->duration() < splice_timestamp_) {
 269       DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
 270       return output_sanitizer_->AddInput(input);
 271     }
 272
 273     // If we've encountered the first pre splice buffer, reset the pre splice
 274     // sanitizer based on |output_sanitizer_|.  This is done so that gaps and
 275     // overlaps between buffers across the sanitizers are accounted for prior
 276     // to calculating crossfade.
 277     if (!pre_splice_sanitizer_->HasNextBuffer()) {
 278       pre_splice_sanitizer_->ResetTimestampState(
 279           output_ts_helper.frame_count(), output_ts_helper.base_timestamp());
 280     }
 281
 282     return pre_splice_sanitizer_->AddInput(input);
 283   }
 284
 285   // The first post splice buffer is expected to match |splice_timestamp_|.
 286   if (!post_splice_sanitizer_->HasNextBuffer())
 287     CHECK(splice_timestamp_ == input->timestamp());
 288
 289   // At this point we have all the fade out preroll buffers from the decoder.
 290   // We now need to wait until we have enough data to perform the crossfade (or
 291   // we receive an end of stream).
 292   if (!post_splice_sanitizer_->AddInput(input))
 293     return false;
 294
 295   // Ensure |output_sanitizer_| has a valid base timestamp so we can use it for
 296   // timestamp calculations.
 297   if (output_ts_helper.base_timestamp() == kNoTimestamp()) {
 298     output_sanitizer_->ResetTimestampState(
 299         0, pre_splice_sanitizer_->timestamp_helper().base_timestamp());
 300   }
 301
 302   // If a splice frame was incorrectly marked due to poor demuxed timestamps, we
 303   // may not actually have a splice.  Here we check if any frames exist before
 304   // the splice.  In this case, just transfer all data to the output sanitizer.
 305   const int frames_before_splice =
 306       output_ts_helper.GetFramesToTarget(splice_timestamp_);
 307   if (frames_before_splice < 0 ||
 308       pre_splice_sanitizer_->GetFrameCount() <= frames_before_splice) {
 309     CHECK(pre_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
 310
 311     // If the file contains incorrectly muxed timestamps, there may be huge gaps
 312     // between the demuxed and decoded timestamps.
 313     if (!post_splice_sanitizer_->DrainInto(output_sanitizer_.get()))
 314       return false;
 315
 316     reset_splice_timestamps();
 317     return true;
 318   }
 319
 320   // Wait until we have enough data to crossfade or end of stream.
 321   if (!input->end_of_stream() &&
 322       input->timestamp() + input->duration() < max_splice_end_timestamp_) {
 323     return true;
 324   }
 325
 326   scoped_refptr<AudioBuffer> crossfade_buffer;
 327   scoped_ptr<AudioBus> pre_splice =
 328       ExtractCrossfadeFromPreSplice(&crossfade_buffer);
 329
 330   // Crossfade the pre splice and post splice sections and transfer all relevant
 331   // buffers into |output_sanitizer_|.
 332   CrossfadePostSplice(pre_splice.Pass(), crossfade_buffer);
 333
 334   // Clear the splice timestamp so new splices can be accepted.
 335   reset_splice_timestamps();
 336   return true;
 337 }
 338
 339 bool AudioSplicer::HasNextBuffer() const {
 340   return output_sanitizer_->HasNextBuffer();
 341 }
 342
 343 scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() {
 344   return output_sanitizer_->GetNextBuffer();
 345 }
 346
 347 void AudioSplicer::SetSpliceTimestamp(base::TimeDelta splice_timestamp) {
 348   if (splice_timestamp == kNoTimestamp()) {
 349     DCHECK(splice_timestamp_ != kNoTimestamp());
 350     DCHECK(!have_all_pre_splice_buffers_);
 351     have_all_pre_splice_buffers_ = true;
 352     return;
 353   }
 354
 355   if (splice_timestamp_ == splice_timestamp)
 356     return;
 357
 358   // TODO(dalecurtis): We may need the concept of a future_splice_timestamp_ to
 359   // handle cases where another splice comes in before we've received 5ms of
 360   // data from the last one.  Leave this as a CHECK for now to figure out if
 361   // this case is possible.
 362   CHECK(splice_timestamp_ == kNoTimestamp());
 363   splice_timestamp_ = splice_timestamp;
 364   max_splice_end_timestamp_ = splice_timestamp_ + max_crossfade_duration_;
 365   pre_splice_sanitizer_->Reset();
 366   post_splice_sanitizer_->Reset();
 367   have_all_pre_splice_buffers_ = false;
 368 }
 369
 370 scoped_ptr<AudioBus> AudioSplicer::ExtractCrossfadeFromPreSplice(
 371     scoped_refptr<AudioBuffer>* crossfade_buffer) {
 372   DCHECK(crossfade_buffer);
 373   const AudioTimestampHelper& output_ts_helper =
 374       output_sanitizer_->timestamp_helper();
 375
 376   int frames_before_splice =
 377       output_ts_helper.GetFramesToTarget(splice_timestamp_);
 378
 379   // Determine crossfade frame count based on available frames in each splicer
 380   // and capping to the maximum crossfade duration.
 381   const int max_crossfade_frame_count =
 382       output_ts_helper.GetFramesToTarget(max_splice_end_timestamp_) -
 383       frames_before_splice;
 384   const int frames_to_crossfade = std::min(
 385       max_crossfade_frame_count,
 386       std::min(pre_splice_sanitizer_->GetFrameCount() - frames_before_splice,
 387                post_splice_sanitizer_->GetFrameCount()));
 388   // There must always be frames to crossfade, otherwise the splice should not
 389   // have been generated.
 390   DCHECK_GT(frames_to_crossfade, 0);
 391
 392   int frames_read = 0;
 393   scoped_ptr<AudioBus> output_bus;
 394   while (pre_splice_sanitizer_->HasNextBuffer() &&
 395          frames_read < frames_to_crossfade) {
 396     scoped_refptr<AudioBuffer> preroll = pre_splice_sanitizer_->GetNextBuffer();
 397
 398     // We don't know the channel count until we see the first buffer, so wait
 399     // until the first buffer to allocate the output AudioBus.
 400     if (!output_bus) {
 401       output_bus =
 402           AudioBus::Create(preroll->channel_count(), frames_to_crossfade);
 403       // Allocate output buffer for crossfade.
 404       *crossfade_buffer = AudioBuffer::CreateBuffer(kSampleFormatPlanarF32,
 405                                                     preroll->channel_layout(),
 406                                                     preroll->channel_count(),
 407                                                     preroll->sample_rate(),
 408                                                     frames_to_crossfade);
 409     }
 410
 411     // There may be enough of a gap introduced during decoding such that an
 412     // entire buffer exists before the splice point.
 413     if (frames_before_splice >= preroll->frame_count()) {
 414       // Adjust the number of frames remaining before the splice.  NOTE: This is
 415       // safe since |pre_splice_sanitizer_| is a continuation of the timeline in
 416       // |output_sanitizer_|.  As such we're guaranteed there are no gaps or
 417       // overlaps in the timeline between the two sanitizers.
 418       frames_before_splice -= preroll->frame_count();
 419       CHECK(output_sanitizer_->AddInput(preroll));
 420       continue;
 421     }
 422
 423     const int frames_to_read =
 424         std::min(preroll->frame_count() - frames_before_splice,
 425                  output_bus->frames() - frames_read);
 426     preroll->ReadFrames(
 427         frames_to_read, frames_before_splice, frames_read, output_bus.get());
 428     frames_read += frames_to_read;
 429
 430     // If only part of the buffer was consumed, trim it appropriately and stick
 431     // it into the output queue.
 432     if (frames_before_splice) {
 433       preroll->TrimEnd(preroll->frame_count() - frames_before_splice);
 434       CHECK(output_sanitizer_->AddInput(preroll));
 435       frames_before_splice = 0;
 436     }
 437   }
 438
 439   // Ensure outputs were properly allocated.  The method should not have been
 440   // called if there is not enough data to crossfade.
 441   // TODO(dalecurtis): Convert to DCHECK() once http://crbug.com/356073 fixed.
 442   CHECK(output_bus);
 443   CHECK(crossfade_buffer->get());
 444
 445   // All necessary buffers have been processed, it's safe to reset.
 446   pre_splice_sanitizer_->Reset();
 447   DCHECK_EQ(output_bus->frames(), frames_read);
 448   DCHECK_EQ(output_ts_helper.GetFramesToTarget(splice_timestamp_), 0);
 449   return output_bus.Pass();
 450 }
 451
 452 void AudioSplicer::CrossfadePostSplice(
 453     scoped_ptr<AudioBus> pre_splice_bus,
 454     scoped_refptr<AudioBuffer> crossfade_buffer) {
 455   // Use the calculated timestamp and duration to ensure there's no extra gaps
 456   // or overlaps to process when adding the buffer to |output_sanitizer_|.
 457   const AudioTimestampHelper& output_ts_helper =
 458       output_sanitizer_->timestamp_helper();
 459   crossfade_buffer->set_timestamp(output_ts_helper.GetTimestamp());
 460
 461   // AudioBuffer::ReadFrames() only allows output into an AudioBus, so wrap
 462   // our AudioBuffer in one so we can avoid extra data copies.
 463   scoped_ptr<AudioBus> output_bus = CreateAudioBufferWrapper(crossfade_buffer);
 464
 465   // Extract crossfade section from the |post_splice_sanitizer_|.
 466   int frames_read = 0, frames_to_trim = 0;
 467   scoped_refptr<AudioBuffer> remainder;
 468   while (post_splice_sanitizer_->HasNextBuffer() &&
 469          frames_read < output_bus->frames()) {
 470     scoped_refptr<AudioBuffer> postroll =
 471         post_splice_sanitizer_->GetNextBuffer();
 472     const int frames_to_read =
 473         std::min(postroll->frame_count(), output_bus->frames() - frames_read);
 474     postroll->ReadFrames(frames_to_read, 0, frames_read, output_bus.get());
 475     frames_read += frames_to_read;
 476
 477     // If only part of the buffer was consumed, save it for after we've added
 478     // the crossfade buffer
 479     if (frames_to_read < postroll->frame_count()) {
 480       DCHECK(!remainder.get());
 481       remainder.swap(postroll);
 482       frames_to_trim = frames_to_read;
 483     }
 484   }
 485
 486   DCHECK_EQ(output_bus->frames(), frames_read);
 487
 488   // Crossfade the audio into |crossfade_buffer|.
 489   for (int ch = 0; ch < output_bus->channels(); ++ch) {
 490     vector_math::Crossfade(pre_splice_bus->channel(ch),
 491                            pre_splice_bus->frames(),
 492                            output_bus->channel(ch));
 493   }
 494
 495   CHECK(output_sanitizer_->AddInput(crossfade_buffer));
 496   DCHECK_EQ(crossfade_buffer->frame_count(), output_bus->frames());
 497
 498   if (remainder.get()) {
 499     // Trim off consumed frames.
 500     AccurateTrimStart(frames_to_trim, remainder, output_ts_helper);
 501     CHECK(output_sanitizer_->AddInput(remainder));
 502   }
 503
 504   // Transfer all remaining buffers out and reset once empty.
 505   CHECK(post_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
 506   post_splice_sanitizer_->Reset();
 507 }
 508
 509 }  // namespace media