media/base/audio_splicer.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/base/audio_splicer.h"
   6
   7 #include <cstdlib>
   8 #include <deque>
   9
  10 #include "base/logging.h"
  11 #include "media/base/audio_buffer.h"
  12 #include "media/base/audio_bus.h"
  13 #include "media/base/audio_decoder_config.h"
  14 #include "media/base/audio_timestamp_helper.h"
  15 #include "media/base/vector_math.h"
  16
  17 namespace media {
  18
  19 // Largest gap or overlap allowed by this class. Anything
  20 // larger than this will trigger an error.
  21 // This is an arbitrary value, but the initial selection of 50ms
  22 // roughly represents the duration of 2 compressed AAC or MP3 frames.
  23 static const int kMaxTimeDeltaInMilliseconds = 50;
  24
  25 // Minimum gap size needed before the splicer will take action to
  26 // fill a gap. This avoids periodically inserting and then dropping samples
  27 // when the buffer timestamps are slightly off because of timestamp rounding
  28 // in the source content. Unit is frames.
  29 static const int kMinGapSize = 2;
  30
  31 // AudioBuffer::TrimStart() is not as accurate as the timestamp helper, so
  32 // manually adjust the duration and timestamp after trimming.
  33 static void AccurateTrimStart(int frames_to_trim,
  34                               const scoped_refptr<AudioBuffer> buffer,
  35                               const AudioTimestampHelper& timestamp_helper) {
  36   buffer->TrimStart(frames_to_trim);
  37   buffer->set_timestamp(timestamp_helper.GetTimestamp());
  38 }
  39
  40 // Returns an AudioBus whose frame buffer is backed by the provided AudioBuffer.
  41 static scoped_ptr<AudioBus> CreateAudioBufferWrapper(
  42     const scoped_refptr<AudioBuffer>& buffer) {
  43   scoped_ptr<AudioBus> wrapper =
  44       AudioBus::CreateWrapper(buffer->channel_count());
  45   wrapper->set_frames(buffer->frame_count());
  46   for (int ch = 0; ch < buffer->channel_count(); ++ch) {
  47     wrapper->SetChannelData(
  48         ch, reinterpret_cast<float*>(buffer->channel_data()[ch]));
  49   }
  50   return wrapper.Pass();
  51 }
  52
  53 class AudioStreamSanitizer {
  54  public:
  55   explicit AudioStreamSanitizer(int samples_per_second);
  56   ~AudioStreamSanitizer();
  57
  58   // Resets the sanitizer state by clearing the output buffers queue, and
  59   // resetting the timestamp helper.
  60   void Reset();
  61
  62   // Similar to Reset(), but initializes the timestamp helper with the given
  63   // parameters.
  64   void ResetTimestampState(int64 frame_count, base::TimeDelta base_timestamp);
  65
  66   // Adds a new buffer full of samples or end of stream buffer to the splicer.
  67   // Returns true if the buffer was accepted. False is returned if an error
  68   // occurred.
  69   bool AddInput(const scoped_refptr<AudioBuffer>& input);
  70
  71   // Returns true if the sanitizer has a buffer to return.
  72   bool HasNextBuffer() const;
  73
  74   // Removes the next buffer from the output buffer queue and returns it; should
  75   // only be called if HasNextBuffer() returns true.
  76   scoped_refptr<AudioBuffer> GetNextBuffer();
  77
  78   // Returns the total frame count of all buffers available for output.
  79   int GetFrameCount() const;
  80
  81   const AudioTimestampHelper& timestamp_helper() {
  82     return output_timestamp_helper_;
  83   }
  84
  85   // Transfer all buffers into |output|.  Returns false if AddInput() on the
  86   // |output| sanitizer fails for any buffer removed from |this|.
  87   bool DrainInto(AudioStreamSanitizer* output);
  88
  89  private:
  90   void AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer);
  91
  92   AudioTimestampHelper output_timestamp_helper_;
  93   bool received_end_of_stream_;
  94
  95   typedef std::deque<scoped_refptr<AudioBuffer> > BufferQueue;
  96   BufferQueue output_buffers_;
  97
  98   DISALLOW_ASSIGN(AudioStreamSanitizer);
  99 };
 100
 101 AudioStreamSanitizer::AudioStreamSanitizer(int samples_per_second)
 102     : output_timestamp_helper_(samples_per_second),
 103       received_end_of_stream_(false) {}
 104
 105 AudioStreamSanitizer::~AudioStreamSanitizer() {}
 106
 107 void AudioStreamSanitizer::Reset() {
 108   ResetTimestampState(0, kNoTimestamp());
 109 }
 110
 111 void AudioStreamSanitizer::ResetTimestampState(int64 frame_count,
 112                                                base::TimeDelta base_timestamp) {
 113   output_buffers_.clear();
 114   received_end_of_stream_ = false;
 115   output_timestamp_helper_.SetBaseTimestamp(base_timestamp);
 116   if (frame_count > 0)
 117     output_timestamp_helper_.AddFrames(frame_count);
 118 }
 119
 120 bool AudioStreamSanitizer::AddInput(const scoped_refptr<AudioBuffer>& input) {
 121   DCHECK(!received_end_of_stream_ || input->end_of_stream());
 122
 123   if (input->end_of_stream()) {
 124     output_buffers_.push_back(input);
 125     received_end_of_stream_ = true;
 126     return true;
 127   }
 128
 129   DCHECK(input->timestamp() != kNoTimestamp());
 130   DCHECK(input->duration() > base::TimeDelta());
 131   DCHECK_GT(input->frame_count(), 0);
 132
 133   if (output_timestamp_helper_.base_timestamp() == kNoTimestamp())
 134     output_timestamp_helper_.SetBaseTimestamp(input->timestamp());
 135
 136   if (output_timestamp_helper_.base_timestamp() > input->timestamp()) {
 137     DVLOG(1) << "Input timestamp is before the base timestamp.";
 138     return false;
 139   }
 140
 141   const base::TimeDelta timestamp = input->timestamp();
 142   const base::TimeDelta expected_timestamp =
 143       output_timestamp_helper_.GetTimestamp();
 144   const base::TimeDelta delta = timestamp - expected_timestamp;
 145
 146   if (std::abs(delta.InMilliseconds()) > kMaxTimeDeltaInMilliseconds) {
 147     DVLOG(1) << "Timestamp delta too large: " << delta.InMicroseconds() << "us";
 148     return false;
 149   }
 150
 151   int frames_to_fill = 0;
 152   if (delta != base::TimeDelta())
 153     frames_to_fill = output_timestamp_helper_.GetFramesToTarget(timestamp);
 154
 155   if (frames_to_fill == 0 || std::abs(frames_to_fill) < kMinGapSize) {
 156     AddOutputBuffer(input);
 157     return true;
 158   }
 159
 160   if (frames_to_fill > 0) {
 161     DVLOG(1) << "Gap detected @ " << expected_timestamp.InMicroseconds()
 162              << " us: " << delta.InMicroseconds() << " us";
 163
 164     // Create a buffer with enough silence samples to fill the gap and
 165     // add it to the output buffer.
 166     scoped_refptr<AudioBuffer> gap =
 167         AudioBuffer::CreateEmptyBuffer(input->channel_layout(),
 168                                        input->channel_count(),
 169                                        input->sample_rate(),
 170                                        frames_to_fill,
 171                                        expected_timestamp);
 172     AddOutputBuffer(gap);
 173
 174     // Add the input buffer now that the gap has been filled.
 175     AddOutputBuffer(input);
 176     return true;
 177   }
 178
 179   // Overlapping buffers marked as splice frames are handled by AudioSplicer,
 180   // but decoder and demuxer quirks may sometimes produce overlapping samples
 181   // which need to be sanitized.
 182   //
 183   // A crossfade can't be done here because only the current buffer is available
 184   // at this point, not previous buffers.
 185   DVLOG(1) << "Overlap detected @ " << expected_timestamp.InMicroseconds()
 186            << " us: " << -delta.InMicroseconds() << " us";
 187
 188   const int frames_to_skip = -frames_to_fill;
 189   if (input->frame_count() <= frames_to_skip) {
 190     DVLOG(1) << "Dropping whole buffer";
 191     return true;
 192   }
 193
 194   // Copy the trailing samples that do not overlap samples already output
 195   // into a new buffer.  Add this new buffer to the output queue.
 196   //
 197   // TODO(acolwell): Implement a cross-fade here so the transition is less
 198   // jarring.
 199   AccurateTrimStart(frames_to_skip, input, output_timestamp_helper_);
 200   AddOutputBuffer(input);
 201   return true;
 202 }
 203
 204 bool AudioStreamSanitizer::HasNextBuffer() const {
 205   return !output_buffers_.empty();
 206 }
 207
 208 scoped_refptr<AudioBuffer> AudioStreamSanitizer::GetNextBuffer() {
 209   scoped_refptr<AudioBuffer> ret = output_buffers_.front();
 210   output_buffers_.pop_front();
 211   return ret;
 212 }
 213
 214 void AudioStreamSanitizer::AddOutputBuffer(
 215     const scoped_refptr<AudioBuffer>& buffer) {
 216   output_timestamp_helper_.AddFrames(buffer->frame_count());
 217   output_buffers_.push_back(buffer);
 218 }
 219
 220 int AudioStreamSanitizer::GetFrameCount() const {
 221   int frame_count = 0;
 222   for (BufferQueue::const_iterator it = output_buffers_.begin();
 223        it != output_buffers_.end(); ++it) {
 224     frame_count += (*it)->frame_count();
 225   }
 226   return frame_count;
 227 }
 228
 229 bool AudioStreamSanitizer::DrainInto(AudioStreamSanitizer* output) {
 230   while (HasNextBuffer()) {
 231     if (!output->AddInput(GetNextBuffer()))
 232       return false;
 233   }
 234   return true;
 235 }
 236
 237 AudioSplicer::AudioSplicer(int samples_per_second)
 238     : max_crossfade_duration_(
 239           base::TimeDelta::FromMilliseconds(kCrossfadeDurationInMilliseconds)),
 240       splice_timestamp_(kNoTimestamp()),
 241       max_splice_end_timestamp_(kNoTimestamp()),
 242       output_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
 243       pre_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
 244       post_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
 245       have_all_pre_splice_buffers_(false) {}
 246
 247 AudioSplicer::~AudioSplicer() {}
 248
 249 void AudioSplicer::Reset() {
 250   output_sanitizer_->Reset();
 251   pre_splice_sanitizer_->Reset();
 252   post_splice_sanitizer_->Reset();
 253   have_all_pre_splice_buffers_ = false;
 254   reset_splice_timestamps();
 255 }
 256
 257 bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {
 258   // If we're not processing a splice, add the input to the output queue.
 259   if (splice_timestamp_ == kNoTimestamp()) {
 260     DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
 261     DCHECK(!post_splice_sanitizer_->HasNextBuffer());
 262     return output_sanitizer_->AddInput(input);
 263   }
 264
 265   const AudioTimestampHelper& output_ts_helper =
 266       output_sanitizer_->timestamp_helper();
 267
 268   if (!have_all_pre_splice_buffers_) {
 269     DCHECK(!input->end_of_stream());
 270
 271     // If the provided buffer is entirely before the splice point it can also be
 272     // added to the output queue.
 273     if (input->timestamp() + input->duration() < splice_timestamp_) {
 274       DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
 275       return output_sanitizer_->AddInput(input);
 276     }
 277
 278     // If we've encountered the first pre splice buffer, reset the pre splice
 279     // sanitizer based on |output_sanitizer_|.  This is done so that gaps and
 280     // overlaps between buffers across the sanitizers are accounted for prior
 281     // to calculating crossfade.
 282     if (!pre_splice_sanitizer_->HasNextBuffer()) {
 283       pre_splice_sanitizer_->ResetTimestampState(
 284           output_ts_helper.frame_count(), output_ts_helper.base_timestamp());
 285     }
 286
 287     return pre_splice_sanitizer_->AddInput(input);
 288   }
 289
 290   // The first post splice buffer is expected to match |splice_timestamp_|.
 291   if (!post_splice_sanitizer_->HasNextBuffer())
 292     CHECK(splice_timestamp_ == input->timestamp());
 293
 294   // At this point we have all the fade out preroll buffers from the decoder.
 295   // We now need to wait until we have enough data to perform the crossfade (or
 296   // we receive an end of stream).
 297   if (!post_splice_sanitizer_->AddInput(input))
 298     return false;
 299
 300   // Ensure |output_sanitizer_| has a valid base timestamp so we can use it for
 301   // timestamp calculations.
 302   if (output_ts_helper.base_timestamp() == kNoTimestamp()) {
 303     output_sanitizer_->ResetTimestampState(
 304         0, pre_splice_sanitizer_->timestamp_helper().base_timestamp());
 305   }
 306
 307   // If a splice frame was incorrectly marked due to poor demuxed timestamps, we
 308   // may not actually have a splice.  Here we check if any frames exist before
 309   // the splice.  In this case, just transfer all data to the output sanitizer.
 310   if (pre_splice_sanitizer_->GetFrameCount() <=
 311       output_ts_helper.GetFramesToTarget(splice_timestamp_)) {
 312     CHECK(pre_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
 313     CHECK(post_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
 314     reset_splice_timestamps();
 315     return true;
 316   }
 317
 318   // Wait until we have enough data to crossfade or end of stream.
 319   if (!input->end_of_stream() &&
 320       input->timestamp() + input->duration() < max_splice_end_timestamp_) {
 321     return true;
 322   }
 323
 324   scoped_refptr<AudioBuffer> crossfade_buffer;
 325   scoped_ptr<AudioBus> pre_splice =
 326       ExtractCrossfadeFromPreSplice(&crossfade_buffer);
 327
 328   // Crossfade the pre splice and post splice sections and transfer all relevant
 329   // buffers into |output_sanitizer_|.
 330   CrossfadePostSplice(pre_splice.Pass(), crossfade_buffer);
 331
 332   // Clear the splice timestamp so new splices can be accepted.
 333   reset_splice_timestamps();
 334   return true;
 335 }
 336
 337 bool AudioSplicer::HasNextBuffer() const {
 338   return output_sanitizer_->HasNextBuffer();
 339 }
 340
 341 scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() {
 342   return output_sanitizer_->GetNextBuffer();
 343 }
 344
 345 void AudioSplicer::SetSpliceTimestamp(base::TimeDelta splice_timestamp) {
 346   if (splice_timestamp == kNoTimestamp()) {
 347     DCHECK(splice_timestamp_ != kNoTimestamp());
 348     DCHECK(!have_all_pre_splice_buffers_);
 349     have_all_pre_splice_buffers_ = true;
 350     return;
 351   }
 352
 353   if (splice_timestamp_ == splice_timestamp)
 354     return;
 355
 356   // TODO(dalecurtis): We may need the concept of a future_splice_timestamp_ to
 357   // handle cases where another splice comes in before we've received 5ms of
 358   // data from the last one.  Leave this as a CHECK for now to figure out if
 359   // this case is possible.
 360   CHECK(splice_timestamp_ == kNoTimestamp());
 361   splice_timestamp_ = splice_timestamp;
 362   max_splice_end_timestamp_ = splice_timestamp_ + max_crossfade_duration_;
 363   pre_splice_sanitizer_->Reset();
 364   post_splice_sanitizer_->Reset();
 365   have_all_pre_splice_buffers_ = false;
 366 }
 367
 368 scoped_ptr<AudioBus> AudioSplicer::ExtractCrossfadeFromPreSplice(
 369     scoped_refptr<AudioBuffer>* crossfade_buffer) {
 370   DCHECK(crossfade_buffer);
 371   const AudioTimestampHelper& output_ts_helper =
 372       output_sanitizer_->timestamp_helper();
 373
 374   int frames_before_splice =
 375       output_ts_helper.GetFramesToTarget(splice_timestamp_);
 376
 377   // Determine crossfade frame count based on available frames in each splicer
 378   // and capping to the maximum crossfade duration.
 379   const int max_crossfade_frame_count =
 380       output_ts_helper.GetFramesToTarget(max_splice_end_timestamp_) -
 381       frames_before_splice;
 382   const int frames_to_crossfade = std::min(
 383       max_crossfade_frame_count,
 384       std::min(pre_splice_sanitizer_->GetFrameCount() - frames_before_splice,
 385                post_splice_sanitizer_->GetFrameCount()));
 386   // There must always be frames to crossfade, otherwise the splice should not
 387   // have been generated.
 388   DCHECK_GT(frames_to_crossfade, 0);
 389
 390   int frames_read = 0;
 391   scoped_ptr<AudioBus> output_bus;
 392   while (pre_splice_sanitizer_->HasNextBuffer() &&
 393          frames_read < frames_to_crossfade) {
 394     scoped_refptr<AudioBuffer> preroll = pre_splice_sanitizer_->GetNextBuffer();
 395
 396     // We don't know the channel count until we see the first buffer, so wait
 397     // until the first buffer to allocate the output AudioBus.
 398     if (!output_bus) {
 399       output_bus =
 400           AudioBus::Create(preroll->channel_count(), frames_to_crossfade);
 401       // Allocate output buffer for crossfade.
 402       *crossfade_buffer = AudioBuffer::CreateBuffer(kSampleFormatPlanarF32,
 403                                                     preroll->channel_layout(),
 404                                                     preroll->channel_count(),
 405                                                     preroll->sample_rate(),
 406                                                     frames_to_crossfade);
 407     }
 408
 409     // There may be enough of a gap introduced during decoding such that an
 410     // entire buffer exists before the splice point.
 411     if (frames_before_splice >= preroll->frame_count()) {
 412       // Adjust the number of frames remaining before the splice.  NOTE: This is
 413       // safe since |pre_splice_sanitizer_| is a continuation of the timeline in
 414       // |output_sanitizer_|.  As such we're guaranteed there are no gaps or
 415       // overlaps in the timeline between the two sanitizers.
 416       frames_before_splice -= preroll->frame_count();
 417       CHECK(output_sanitizer_->AddInput(preroll));
 418       continue;
 419     }
 420
 421     const int frames_to_read =
 422         std::min(preroll->frame_count() - frames_before_splice,
 423                  output_bus->frames() - frames_read);
 424     preroll->ReadFrames(
 425         frames_to_read, frames_before_splice, frames_read, output_bus.get());
 426     frames_read += frames_to_read;
 427
 428     // If only part of the buffer was consumed, trim it appropriately and stick
 429     // it into the output queue.
 430     if (frames_before_splice) {
 431       preroll->TrimEnd(preroll->frame_count() - frames_before_splice);
 432       CHECK(output_sanitizer_->AddInput(preroll));
 433       frames_before_splice = 0;
 434     }
 435   }
 436
 437   // Ensure outputs were properly allocated.  The method should not have been
 438   // called if there is not enough data to crossfade.
 439   // TODO(dalecurtis): Convert to DCHECK() once http://crbug.com/356073 fixed.
 440   CHECK(output_bus);
 441   CHECK(*crossfade_buffer);
 442
 443   // All necessary buffers have been processed, it's safe to reset.
 444   pre_splice_sanitizer_->Reset();
 445   DCHECK_EQ(output_bus->frames(), frames_read);
 446   DCHECK_EQ(output_ts_helper.GetFramesToTarget(splice_timestamp_), 0);
 447   return output_bus.Pass();
 448 }
 449
 450 void AudioSplicer::CrossfadePostSplice(
 451     scoped_ptr<AudioBus> pre_splice_bus,
 452     scoped_refptr<AudioBuffer> crossfade_buffer) {
 453   // Use the calculated timestamp and duration to ensure there's no extra gaps
 454   // or overlaps to process when adding the buffer to |output_sanitizer_|.
 455   const AudioTimestampHelper& output_ts_helper =
 456       output_sanitizer_->timestamp_helper();
 457   crossfade_buffer->set_timestamp(output_ts_helper.GetTimestamp());
 458
 459   // AudioBuffer::ReadFrames() only allows output into an AudioBus, so wrap
 460   // our AudioBuffer in one so we can avoid extra data copies.
 461   scoped_ptr<AudioBus> output_bus = CreateAudioBufferWrapper(crossfade_buffer);
 462
 463   // Extract crossfade section from the |post_splice_sanitizer_|.
 464   int frames_read = 0, frames_to_trim = 0;
 465   scoped_refptr<AudioBuffer> remainder;
 466   while (post_splice_sanitizer_->HasNextBuffer() &&
 467          frames_read < output_bus->frames()) {
 468     scoped_refptr<AudioBuffer> postroll =
 469         post_splice_sanitizer_->GetNextBuffer();
 470     const int frames_to_read =
 471         std::min(postroll->frame_count(), output_bus->frames() - frames_read);
 472     postroll->ReadFrames(frames_to_read, 0, frames_read, output_bus.get());
 473     frames_read += frames_to_read;
 474
 475     // If only part of the buffer was consumed, save it for after we've added
 476     // the crossfade buffer
 477     if (frames_to_read < postroll->frame_count()) {
 478       DCHECK(!remainder);
 479       remainder.swap(postroll);
 480       frames_to_trim = frames_to_read;
 481     }
 482   }
 483
 484   DCHECK_EQ(output_bus->frames(), frames_read);
 485
 486   // Crossfade the audio into |crossfade_buffer|.
 487   for (int ch = 0; ch < output_bus->channels(); ++ch) {
 488     vector_math::Crossfade(pre_splice_bus->channel(ch),
 489                            pre_splice_bus->frames(),
 490                            output_bus->channel(ch));
 491   }
 492
 493   CHECK(output_sanitizer_->AddInput(crossfade_buffer));
 494   DCHECK_EQ(crossfade_buffer->frame_count(), output_bus->frames());
 495
 496   if (remainder) {
 497     // Trim off consumed frames.
 498     AccurateTrimStart(frames_to_trim, remainder, output_ts_helper);
 499     CHECK(output_sanitizer_->AddInput(remainder));
 500   }
 501
 502   // Transfer all remaining buffers out and reset once empty.
 503   CHECK(post_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
 504   post_splice_sanitizer_->Reset();
 505 }
 506
 507 }  // namespace media