1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/base/audio_splicer.h"
10 #include "base/logging.h"
11 #include "media/base/audio_buffer.h"
12 #include "media/base/audio_bus.h"
13 #include "media/base/audio_decoder_config.h"
14 #include "media/base/audio_timestamp_helper.h"
15 #include "media/base/media_log.h"
16 #include "media/base/vector_math.h"
23 // Minimum gap size needed before the splicer will take action to
24 // fill a gap. This avoids periodically inserting and then dropping samples
25 // when the buffer timestamps are slightly off because of timestamp rounding
26 // in the source content. Unit is frames.
29 // Limits the number of MEDIA_LOG() per sanitizer instance warning the user
30 // about splicer overlaps within |kMaxTimeDeltaInMilliseconds| or gaps larger
31 // than |kMinGapSize| and less than |kMaxTimeDeltaInMilliseconds|. These
32 // warnings may be frequent for some streams, and number of sanitizer
33 // instances may be high, so keep this limit low to help reduce log spam.
34 kMaxSanitizerWarningLogs
= 5,
37 // AudioBuffer::TrimStart() is not as accurate as the timestamp helper, so
38 // manually adjust the duration and timestamp after trimming.
39 void AccurateTrimStart(int frames_to_trim
,
40 const scoped_refptr
<AudioBuffer
> buffer
,
41 const AudioTimestampHelper
& timestamp_helper
) {
42 buffer
->TrimStart(frames_to_trim
);
43 buffer
->set_timestamp(timestamp_helper
.GetTimestamp());
46 // Returns an AudioBus whose frame buffer is backed by the provided AudioBuffer.
47 scoped_ptr
<AudioBus
> CreateAudioBufferWrapper(
48 const scoped_refptr
<AudioBuffer
>& buffer
) {
49 scoped_ptr
<AudioBus
> wrapper
=
50 AudioBus::CreateWrapper(buffer
->channel_count());
51 wrapper
->set_frames(buffer
->frame_count());
52 for (int ch
= 0; ch
< buffer
->channel_count(); ++ch
) {
53 wrapper
->SetChannelData(
54 ch
, reinterpret_cast<float*>(buffer
->channel_data()[ch
]));
56 return wrapper
.Pass();
61 class AudioStreamSanitizer
{
63 AudioStreamSanitizer(int samples_per_second
,
64 const scoped_refptr
<MediaLog
>& media_log
);
65 ~AudioStreamSanitizer();
67 // Resets the sanitizer state by clearing the output buffers queue, and
68 // resetting the timestamp helper.
71 // Similar to Reset(), but initializes the timestamp helper with the given
73 void ResetTimestampState(int64 frame_count
, base::TimeDelta base_timestamp
);
75 // Adds a new buffer full of samples or end of stream buffer to the splicer.
76 // Returns true if the buffer was accepted. False is returned if an error
78 bool AddInput(const scoped_refptr
<AudioBuffer
>& input
);
80 // Returns true if the sanitizer has a buffer to return.
81 bool HasNextBuffer() const;
83 // Removes the next buffer from the output buffer queue and returns it; should
84 // only be called if HasNextBuffer() returns true.
85 scoped_refptr
<AudioBuffer
> GetNextBuffer();
87 // Returns the total frame count of all buffers available for output.
88 int GetFrameCount() const;
90 const AudioTimestampHelper
& timestamp_helper() {
91 return output_timestamp_helper_
;
94 // Transfer all buffers into |output|. Returns false if AddInput() on the
95 // |output| sanitizer fails for any buffer removed from |this|.
96 bool DrainInto(AudioStreamSanitizer
* output
);
99 void AddOutputBuffer(const scoped_refptr
<AudioBuffer
>& buffer
);
101 AudioTimestampHelper output_timestamp_helper_
;
102 bool received_end_of_stream_
;
104 typedef std::deque
<scoped_refptr
<AudioBuffer
> > BufferQueue
;
105 BufferQueue output_buffers_
;
107 scoped_refptr
<MediaLog
> media_log_
;
109 // To prevent log spam, counts the number of audio gap or overlaps warned in
111 int num_warning_logs_
;
113 DISALLOW_ASSIGN(AudioStreamSanitizer
);
116 AudioStreamSanitizer::AudioStreamSanitizer(
117 int samples_per_second
,
118 const scoped_refptr
<MediaLog
>& media_log
)
119 : output_timestamp_helper_(samples_per_second
),
120 received_end_of_stream_(false),
121 media_log_(media_log
),
122 num_warning_logs_(0) {
125 AudioStreamSanitizer::~AudioStreamSanitizer() {}
127 void AudioStreamSanitizer::Reset() {
128 ResetTimestampState(0, kNoTimestamp());
131 void AudioStreamSanitizer::ResetTimestampState(int64 frame_count
,
132 base::TimeDelta base_timestamp
) {
133 output_buffers_
.clear();
134 received_end_of_stream_
= false;
135 output_timestamp_helper_
.SetBaseTimestamp(base_timestamp
);
137 output_timestamp_helper_
.AddFrames(frame_count
);
140 bool AudioStreamSanitizer::AddInput(const scoped_refptr
<AudioBuffer
>& input
) {
141 DCHECK(!received_end_of_stream_
|| input
->end_of_stream());
143 if (input
->end_of_stream()) {
144 output_buffers_
.push_back(input
);
145 received_end_of_stream_
= true;
149 DCHECK(input
->timestamp() != kNoTimestamp());
150 DCHECK(input
->duration() > base::TimeDelta());
151 DCHECK_GT(input
->frame_count(), 0);
153 if (output_timestamp_helper_
.base_timestamp() == kNoTimestamp())
154 output_timestamp_helper_
.SetBaseTimestamp(input
->timestamp());
156 if (output_timestamp_helper_
.base_timestamp() > input
->timestamp()) {
157 MEDIA_LOG(ERROR
, media_log_
)
158 << "Audio splicing failed: unexpected timestamp sequence. base "
160 << output_timestamp_helper_
.base_timestamp().InMicroseconds()
161 << "us, input timestamp=" << input
->timestamp().InMicroseconds()
166 const base::TimeDelta timestamp
= input
->timestamp();
167 const base::TimeDelta expected_timestamp
=
168 output_timestamp_helper_
.GetTimestamp();
169 const base::TimeDelta delta
= timestamp
- expected_timestamp
;
171 if (std::abs(delta
.InMilliseconds()) >
172 AudioSplicer::kMaxTimeDeltaInMilliseconds
) {
173 MEDIA_LOG(ERROR
, media_log_
)
174 << "Audio splicing failed: coded frame timestamp differs from "
175 "expected timestamp " << expected_timestamp
.InMicroseconds()
176 << "us by " << delta
.InMicroseconds()
177 << "us, more than threshold of +/-"
178 << AudioSplicer::kMaxTimeDeltaInMilliseconds
179 << "ms. Expected timestamp is based on decoded frames and frame rate.";
183 int frames_to_fill
= 0;
184 if (delta
!= base::TimeDelta())
185 frames_to_fill
= output_timestamp_helper_
.GetFramesToTarget(timestamp
);
187 if (frames_to_fill
== 0 || std::abs(frames_to_fill
) < kMinGapSize
) {
188 AddOutputBuffer(input
);
192 if (frames_to_fill
> 0) {
193 LIMITED_MEDIA_LOG(DEBUG
, media_log_
, num_warning_logs_
,
194 kMaxSanitizerWarningLogs
)
195 << "Audio splicer inserting silence for small gap of "
196 << delta
.InMicroseconds() << "us at time "
197 << expected_timestamp
.InMicroseconds() << "us.";
198 DVLOG(1) << "Gap detected @ " << expected_timestamp
.InMicroseconds()
199 << " us: " << delta
.InMicroseconds() << " us";
201 // Create a buffer with enough silence samples to fill the gap and
202 // add it to the output buffer.
203 scoped_refptr
<AudioBuffer
> gap
=
204 AudioBuffer::CreateEmptyBuffer(input
->channel_layout(),
205 input
->channel_count(),
206 input
->sample_rate(),
209 AddOutputBuffer(gap
);
211 // Add the input buffer now that the gap has been filled.
212 AddOutputBuffer(input
);
216 // Overlapping buffers marked as splice frames are handled by AudioSplicer,
217 // but decoder and demuxer quirks may sometimes produce overlapping samples
218 // which need to be sanitized.
220 // A crossfade can't be done here because only the current buffer is available
221 // at this point, not previous buffers.
222 LIMITED_MEDIA_LOG(DEBUG
, media_log_
, num_warning_logs_
,
223 kMaxSanitizerWarningLogs
)
224 << "Audio splicer skipping frames for small overlap of "
225 << -delta
.InMicroseconds() << "us at time "
226 << expected_timestamp
.InMicroseconds() << "us.";
227 DVLOG(1) << "Overlap detected @ " << expected_timestamp
.InMicroseconds()
228 << " us: " << -delta
.InMicroseconds() << " us";
230 const int frames_to_skip
= -frames_to_fill
;
231 if (input
->frame_count() <= frames_to_skip
) {
232 DVLOG(1) << "Dropping whole buffer";
236 // Copy the trailing samples that do not overlap samples already output
237 // into a new buffer. Add this new buffer to the output queue.
239 // TODO(acolwell): Implement a cross-fade here so the transition is less
241 AccurateTrimStart(frames_to_skip
, input
, output_timestamp_helper_
);
242 AddOutputBuffer(input
);
246 bool AudioStreamSanitizer::HasNextBuffer() const {
247 return !output_buffers_
.empty();
250 scoped_refptr
<AudioBuffer
> AudioStreamSanitizer::GetNextBuffer() {
251 scoped_refptr
<AudioBuffer
> ret
= output_buffers_
.front();
252 output_buffers_
.pop_front();
256 void AudioStreamSanitizer::AddOutputBuffer(
257 const scoped_refptr
<AudioBuffer
>& buffer
) {
258 output_timestamp_helper_
.AddFrames(buffer
->frame_count());
259 output_buffers_
.push_back(buffer
);
262 int AudioStreamSanitizer::GetFrameCount() const {
264 for (const auto& buffer
: output_buffers_
)
265 frame_count
+= buffer
->frame_count();
269 bool AudioStreamSanitizer::DrainInto(AudioStreamSanitizer
* output
) {
270 while (HasNextBuffer()) {
271 if (!output
->AddInput(GetNextBuffer()))
277 AudioSplicer::AudioSplicer(int samples_per_second
,
278 const scoped_refptr
<MediaLog
>& media_log
)
279 : max_crossfade_duration_(
280 base::TimeDelta::FromMilliseconds(kCrossfadeDurationInMilliseconds
)),
281 splice_timestamp_(kNoTimestamp()),
282 max_splice_end_timestamp_(kNoTimestamp()),
284 new AudioStreamSanitizer(samples_per_second
, media_log
)),
285 pre_splice_sanitizer_(
286 new AudioStreamSanitizer(samples_per_second
, media_log
)),
287 post_splice_sanitizer_(
288 new AudioStreamSanitizer(samples_per_second
, media_log
)),
289 have_all_pre_splice_buffers_(false) {
292 AudioSplicer::~AudioSplicer() {}
294 void AudioSplicer::Reset() {
295 output_sanitizer_
->Reset();
296 pre_splice_sanitizer_
->Reset();
297 post_splice_sanitizer_
->Reset();
298 have_all_pre_splice_buffers_
= false;
299 reset_splice_timestamps();
302 bool AudioSplicer::AddInput(const scoped_refptr
<AudioBuffer
>& input
) {
303 // If we're not processing a splice, add the input to the output queue.
304 if (splice_timestamp_
== kNoTimestamp()) {
305 DCHECK(!pre_splice_sanitizer_
->HasNextBuffer());
306 DCHECK(!post_splice_sanitizer_
->HasNextBuffer());
307 return output_sanitizer_
->AddInput(input
);
310 const AudioTimestampHelper
& output_ts_helper
=
311 output_sanitizer_
->timestamp_helper();
313 if (!have_all_pre_splice_buffers_
) {
314 DCHECK(!input
->end_of_stream());
316 // If the provided buffer is entirely before the splice point it can also be
317 // added to the output queue.
318 if (input
->timestamp() + input
->duration() < splice_timestamp_
) {
319 DCHECK(!pre_splice_sanitizer_
->HasNextBuffer());
320 return output_sanitizer_
->AddInput(input
);
323 // If we've encountered the first pre splice buffer, reset the pre splice
324 // sanitizer based on |output_sanitizer_|. This is done so that gaps and
325 // overlaps between buffers across the sanitizers are accounted for prior
326 // to calculating crossfade.
327 if (!pre_splice_sanitizer_
->HasNextBuffer()) {
328 pre_splice_sanitizer_
->ResetTimestampState(
329 output_ts_helper
.frame_count(), output_ts_helper
.base_timestamp());
332 return pre_splice_sanitizer_
->AddInput(input
);
335 // The first post splice buffer is expected to match |splice_timestamp_|.
336 if (!post_splice_sanitizer_
->HasNextBuffer())
337 CHECK(splice_timestamp_
== input
->timestamp());
339 // At this point we have all the fade out preroll buffers from the decoder.
340 // We now need to wait until we have enough data to perform the crossfade (or
341 // we receive an end of stream).
342 if (!post_splice_sanitizer_
->AddInput(input
))
345 // Ensure |output_sanitizer_| has a valid base timestamp so we can use it for
346 // timestamp calculations.
347 if (output_ts_helper
.base_timestamp() == kNoTimestamp()) {
348 output_sanitizer_
->ResetTimestampState(
349 0, pre_splice_sanitizer_
->timestamp_helper().base_timestamp());
352 // If a splice frame was incorrectly marked due to poor demuxed timestamps, we
353 // may not actually have a splice. Here we check if any frames exist before
354 // the splice. In this case, just transfer all data to the output sanitizer.
355 const int frames_before_splice
=
356 output_ts_helper
.GetFramesToTarget(splice_timestamp_
);
357 if (frames_before_splice
< 0 ||
358 pre_splice_sanitizer_
->GetFrameCount() <= frames_before_splice
) {
359 CHECK(pre_splice_sanitizer_
->DrainInto(output_sanitizer_
.get()));
361 // If the file contains incorrectly muxed timestamps, there may be huge gaps
362 // between the demuxed and decoded timestamps.
363 if (!post_splice_sanitizer_
->DrainInto(output_sanitizer_
.get()))
366 reset_splice_timestamps();
370 // Wait until we have enough data to crossfade or end of stream.
371 if (!input
->end_of_stream() &&
372 input
->timestamp() + input
->duration() < max_splice_end_timestamp_
) {
376 scoped_refptr
<AudioBuffer
> crossfade_buffer
;
377 scoped_ptr
<AudioBus
> pre_splice
=
378 ExtractCrossfadeFromPreSplice(&crossfade_buffer
);
380 // Crossfade the pre splice and post splice sections and transfer all relevant
381 // buffers into |output_sanitizer_|.
382 CrossfadePostSplice(pre_splice
.Pass(), crossfade_buffer
);
384 // Clear the splice timestamp so new splices can be accepted.
385 reset_splice_timestamps();
389 bool AudioSplicer::HasNextBuffer() const {
390 return output_sanitizer_
->HasNextBuffer();
393 scoped_refptr
<AudioBuffer
> AudioSplicer::GetNextBuffer() {
394 return output_sanitizer_
->GetNextBuffer();
397 void AudioSplicer::SetSpliceTimestamp(base::TimeDelta splice_timestamp
) {
398 if (splice_timestamp
== kNoTimestamp()) {
399 DCHECK(splice_timestamp_
!= kNoTimestamp());
400 DCHECK(!have_all_pre_splice_buffers_
);
401 have_all_pre_splice_buffers_
= true;
405 if (splice_timestamp_
== splice_timestamp
)
408 // TODO(dalecurtis): We may need the concept of a future_splice_timestamp_ to
409 // handle cases where another splice comes in before we've received 5ms of
410 // data from the last one. Leave this as a CHECK for now to figure out if
411 // this case is possible.
412 CHECK(splice_timestamp_
== kNoTimestamp());
413 splice_timestamp_
= splice_timestamp
;
414 max_splice_end_timestamp_
= splice_timestamp_
+ max_crossfade_duration_
;
415 pre_splice_sanitizer_
->Reset();
416 post_splice_sanitizer_
->Reset();
417 have_all_pre_splice_buffers_
= false;
420 scoped_ptr
<AudioBus
> AudioSplicer::ExtractCrossfadeFromPreSplice(
421 scoped_refptr
<AudioBuffer
>* crossfade_buffer
) {
422 DCHECK(crossfade_buffer
);
423 const AudioTimestampHelper
& output_ts_helper
=
424 output_sanitizer_
->timestamp_helper();
426 int frames_before_splice
=
427 output_ts_helper
.GetFramesToTarget(splice_timestamp_
);
429 // Determine crossfade frame count based on available frames in each splicer
430 // and capping to the maximum crossfade duration.
431 const int max_crossfade_frame_count
=
432 output_ts_helper
.GetFramesToTarget(max_splice_end_timestamp_
) -
433 frames_before_splice
;
434 const int frames_to_crossfade
= std::min(
435 max_crossfade_frame_count
,
436 std::min(pre_splice_sanitizer_
->GetFrameCount() - frames_before_splice
,
437 post_splice_sanitizer_
->GetFrameCount()));
438 // There must always be frames to crossfade, otherwise the splice should not
439 // have been generated.
440 DCHECK_GT(frames_to_crossfade
, 0);
443 scoped_ptr
<AudioBus
> output_bus
;
444 while (pre_splice_sanitizer_
->HasNextBuffer() &&
445 frames_read
< frames_to_crossfade
) {
446 scoped_refptr
<AudioBuffer
> preroll
= pre_splice_sanitizer_
->GetNextBuffer();
448 // We don't know the channel count until we see the first buffer, so wait
449 // until the first buffer to allocate the output AudioBus.
452 AudioBus::Create(preroll
->channel_count(), frames_to_crossfade
);
453 // Allocate output buffer for crossfade.
454 *crossfade_buffer
= AudioBuffer::CreateBuffer(kSampleFormatPlanarF32
,
455 preroll
->channel_layout(),
456 preroll
->channel_count(),
457 preroll
->sample_rate(),
458 frames_to_crossfade
);
461 // There may be enough of a gap introduced during decoding such that an
462 // entire buffer exists before the splice point.
463 if (frames_before_splice
>= preroll
->frame_count()) {
464 // Adjust the number of frames remaining before the splice. NOTE: This is
465 // safe since |pre_splice_sanitizer_| is a continuation of the timeline in
466 // |output_sanitizer_|. As such we're guaranteed there are no gaps or
467 // overlaps in the timeline between the two sanitizers.
468 frames_before_splice
-= preroll
->frame_count();
469 CHECK(output_sanitizer_
->AddInput(preroll
));
473 const int frames_to_read
=
474 std::min(preroll
->frame_count() - frames_before_splice
,
475 output_bus
->frames() - frames_read
);
477 frames_to_read
, frames_before_splice
, frames_read
, output_bus
.get());
478 frames_read
+= frames_to_read
;
480 // If only part of the buffer was consumed, trim it appropriately and stick
481 // it into the output queue.
482 if (frames_before_splice
) {
483 preroll
->TrimEnd(preroll
->frame_count() - frames_before_splice
);
484 CHECK(output_sanitizer_
->AddInput(preroll
));
485 frames_before_splice
= 0;
489 // Ensure outputs were properly allocated. The method should not have been
490 // called if there is not enough data to crossfade.
491 // TODO(dalecurtis): Convert to DCHECK() once http://crbug.com/356073 fixed.
493 CHECK(crossfade_buffer
->get());
495 // All necessary buffers have been processed, it's safe to reset.
496 pre_splice_sanitizer_
->Reset();
497 DCHECK_EQ(output_bus
->frames(), frames_read
);
498 DCHECK_EQ(output_ts_helper
.GetFramesToTarget(splice_timestamp_
), 0);
499 return output_bus
.Pass();
502 void AudioSplicer::CrossfadePostSplice(
503 scoped_ptr
<AudioBus
> pre_splice_bus
,
504 const scoped_refptr
<AudioBuffer
>& crossfade_buffer
) {
505 // Use the calculated timestamp and duration to ensure there's no extra gaps
506 // or overlaps to process when adding the buffer to |output_sanitizer_|.
507 const AudioTimestampHelper
& output_ts_helper
=
508 output_sanitizer_
->timestamp_helper();
509 crossfade_buffer
->set_timestamp(output_ts_helper
.GetTimestamp());
511 // AudioBuffer::ReadFrames() only allows output into an AudioBus, so wrap
512 // our AudioBuffer in one so we can avoid extra data copies.
513 scoped_ptr
<AudioBus
> output_bus
= CreateAudioBufferWrapper(crossfade_buffer
);
515 // Extract crossfade section from the |post_splice_sanitizer_|.
516 int frames_read
= 0, frames_to_trim
= 0;
517 scoped_refptr
<AudioBuffer
> remainder
;
518 while (post_splice_sanitizer_
->HasNextBuffer() &&
519 frames_read
< output_bus
->frames()) {
520 scoped_refptr
<AudioBuffer
> postroll
=
521 post_splice_sanitizer_
->GetNextBuffer();
522 const int frames_to_read
=
523 std::min(postroll
->frame_count(), output_bus
->frames() - frames_read
);
524 postroll
->ReadFrames(frames_to_read
, 0, frames_read
, output_bus
.get());
525 frames_read
+= frames_to_read
;
527 // If only part of the buffer was consumed, save it for after we've added
528 // the crossfade buffer
529 if (frames_to_read
< postroll
->frame_count()) {
530 DCHECK(!remainder
.get());
531 remainder
.swap(postroll
);
532 frames_to_trim
= frames_to_read
;
536 DCHECK_EQ(output_bus
->frames(), frames_read
);
538 // Crossfade the audio into |crossfade_buffer|.
539 for (int ch
= 0; ch
< output_bus
->channels(); ++ch
) {
540 vector_math::Crossfade(pre_splice_bus
->channel(ch
),
541 pre_splice_bus
->frames(),
542 output_bus
->channel(ch
));
545 CHECK(output_sanitizer_
->AddInput(crossfade_buffer
));
546 DCHECK_EQ(crossfade_buffer
->frame_count(), output_bus
->frames());
548 if (remainder
.get()) {
549 // Trim off consumed frames.
550 AccurateTrimStart(frames_to_trim
, remainder
, output_ts_helper
);
551 CHECK(output_sanitizer_
->AddInput(remainder
));
554 // Transfer all remaining buffers out and reset once empty.
555 CHECK(post_splice_sanitizer_
->DrainInto(output_sanitizer_
.get()));
556 post_splice_sanitizer_
->Reset();