1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/mp4/mp4_stream_parser.h"
7 #include "base/callback.h"
8 #include "base/callback_helpers.h"
9 #include "base/logging.h"
10 #include "base/time/time.h"
11 #include "media/base/audio_decoder_config.h"
12 #include "media/base/stream_parser_buffer.h"
13 #include "media/base/video_decoder_config.h"
14 #include "media/base/video_util.h"
15 #include "media/mp4/box_definitions.h"
16 #include "media/mp4/box_reader.h"
17 #include "media/mp4/es_descriptor.h"
18 #include "media/mp4/rcheck.h"
23 // TODO(xhwang): Figure out the init data type appropriately once it's spec'ed.
24 static const char kMp4InitDataType
[] = "video/mp4";
26 MP4StreamParser::MP4StreamParser(const std::set
<int>& audio_object_types
,
28 : state_(kWaitingForInit
),
35 audio_object_types_(audio_object_types
),
37 is_audio_track_encrypted_(false),
38 is_video_track_encrypted_(false) {
41 MP4StreamParser::~MP4StreamParser() {}
43 void MP4StreamParser::Init(const InitCB
& init_cb
,
44 const NewConfigCB
& config_cb
,
45 const NewBuffersCB
& new_buffers_cb
,
46 const NewTextBuffersCB
& /* text_cb */ ,
47 const NeedKeyCB
& need_key_cb
,
48 const AddTextTrackCB
& /* add_text_track_cb */ ,
49 const NewMediaSegmentCB
& new_segment_cb
,
50 const base::Closure
& end_of_segment_cb
,
51 const LogCB
& log_cb
) {
52 DCHECK_EQ(state_
, kWaitingForInit
);
53 DCHECK(init_cb_
.is_null());
54 DCHECK(!init_cb
.is_null());
55 DCHECK(!config_cb
.is_null());
56 DCHECK(!new_buffers_cb
.is_null());
57 DCHECK(!need_key_cb
.is_null());
58 DCHECK(!end_of_segment_cb
.is_null());
60 ChangeState(kParsingBoxes
);
62 config_cb_
= config_cb
;
63 new_buffers_cb_
= new_buffers_cb
;
64 need_key_cb_
= need_key_cb
;
65 new_segment_cb_
= new_segment_cb
;
66 end_of_segment_cb_
= end_of_segment_cb
;
70 void MP4StreamParser::Reset() {
77 void MP4StreamParser::Flush() {
78 DCHECK_NE(state_
, kWaitingForInit
);
80 ChangeState(kParsingBoxes
);
83 bool MP4StreamParser::Parse(const uint8
* buf
, int size
) {
84 DCHECK_NE(state_
, kWaitingForInit
);
89 queue_
.Push(buf
, size
);
91 BufferQueue audio_buffers
;
92 BufferQueue video_buffers
;
94 bool result
, err
= false;
97 if (state_
== kParsingBoxes
) {
98 result
= ParseBox(&err
);
100 DCHECK_EQ(kEmittingSamples
, state_
);
101 result
= EnqueueSample(&audio_buffers
, &video_buffers
, &err
);
103 int64 max_clear
= runs_
->GetMaxClearOffset() + moof_head_
;
104 err
= !ReadAndDiscardMDATsUntil(max_clear
);
107 } while (result
&& !err
);
110 err
= !SendAndFlushSamples(&audio_buffers
, &video_buffers
);
113 DLOG(ERROR
) << "Error while parsing MP4";
123 bool MP4StreamParser::ParseBox(bool* err
) {
126 queue_
.Peek(&buf
, &size
);
127 if (!size
) return false;
129 scoped_ptr
<BoxReader
> reader(
130 BoxReader::ReadTopLevelBox(buf
, size
, log_cb_
, err
));
131 if (reader
.get() == NULL
) return false;
133 if (reader
->type() == FOURCC_MOOV
) {
134 *err
= !ParseMoov(reader
.get());
135 } else if (reader
->type() == FOURCC_MOOF
) {
136 moof_head_
= queue_
.head();
137 *err
= !ParseMoof(reader
.get());
139 // Set up first mdat offset for ReadMDATsUntil().
140 mdat_tail_
= queue_
.head() + reader
->size();
142 // Return early to avoid evicting 'moof' data from queue. Auxiliary info may
143 // be located anywhere in the file, including inside the 'moof' itself.
144 // (Since 'default-base-is-moof' is mandated, no data references can come
145 // before the head of the 'moof', so keeping this box around is sufficient.)
148 MEDIA_LOG(log_cb_
) << "Skipping unrecognized top-level box: "
149 << FourCCToString(reader
->type());
152 queue_
.Pop(reader
->size());
157 bool MP4StreamParser::ParseMoov(BoxReader
* reader
) {
158 moov_
.reset(new Movie
);
159 RCHECK(moov_
->Parse(reader
));
165 AudioDecoderConfig audio_config
;
166 VideoDecoderConfig video_config
;
168 for (std::vector
<Track
>::const_iterator track
= moov_
->tracks
.begin();
169 track
!= moov_
->tracks
.end(); ++track
) {
170 // TODO(strobe): Only the first audio and video track present in a file are
171 // used. (Track selection is better accomplished via Source IDs, though, so
172 // adding support for track selection within a stream is low-priority.)
173 const SampleDescription
& samp_descr
=
174 track
->media
.information
.sample_table
.description
;
176 // TODO(strobe): When codec reconfigurations are supported, detect and send
177 // a codec reconfiguration for fragments using a sample description index
178 // different from the previous one
180 for (size_t t
= 0; t
< moov_
->extends
.tracks
.size(); t
++) {
181 const TrackExtends
& trex
= moov_
->extends
.tracks
[t
];
182 if (trex
.track_id
== track
->header
.track_id
) {
183 desc_idx
= trex
.default_sample_description_index
;
187 RCHECK(desc_idx
> 0);
188 desc_idx
-= 1; // BMFF descriptor index is one-based
190 if (track
->media
.handler
.type
== kAudio
&& !audio_config
.IsValidConfig()) {
191 RCHECK(!samp_descr
.audio_entries
.empty());
193 // It is not uncommon to find otherwise-valid files with incorrect sample
194 // description indices, so we fail gracefully in that case.
195 if (desc_idx
>= samp_descr
.audio_entries
.size())
197 const AudioSampleEntry
& entry
= samp_descr
.audio_entries
[desc_idx
];
198 const AAC
& aac
= entry
.esds
.aac
;
200 if (!(entry
.format
== FOURCC_MP4A
|| entry
.format
== FOURCC_EAC3
||
201 (entry
.format
== FOURCC_ENCA
&&
202 entry
.sinf
.format
.format
== FOURCC_MP4A
))) {
203 MEDIA_LOG(log_cb_
) << "Unsupported audio format 0x"
204 << std::hex
<< entry
.format
<< " in stsd box.";
208 uint8 audio_type
= entry
.esds
.object_type
;
209 DVLOG(1) << "audio_type " << std::hex
<< audio_type
;
210 if (audio_type
== kForbidden
&& entry
.format
== FOURCC_EAC3
) {
213 if (audio_object_types_
.find(audio_type
) == audio_object_types_
.end()) {
214 MEDIA_LOG(log_cb_
) << "audio object type 0x" << std::hex
<< audio_type
215 << " does not match what is specified in the"
220 AudioCodec codec
= kUnknownAudioCodec
;
221 ChannelLayout channel_layout
= CHANNEL_LAYOUT_NONE
;
222 int sample_per_second
= 0;
223 std::vector
<uint8
> extra_data
;
224 // Check if it is MPEG4 AAC defined in ISO 14496 Part 3 or
225 // supported MPEG2 AAC varients.
226 if (ESDescriptor::IsAAC(audio_type
)) {
228 channel_layout
= aac
.GetChannelLayout(has_sbr_
);
229 sample_per_second
= aac
.GetOutputSamplesPerSecond(has_sbr_
);
230 #if defined(OS_ANDROID)
231 extra_data
= aac
.codec_specific_data();
233 } else if (audio_type
== kEAC3
) {
235 channel_layout
= GuessChannelLayout(entry
.channelcount
);
236 sample_per_second
= entry
.samplerate
;
238 MEDIA_LOG(log_cb_
) << "Unsupported audio object type 0x" << std::hex
239 << audio_type
<< " in esds.";
243 SampleFormat sample_format
;
244 if (entry
.samplesize
== 8) {
245 sample_format
= kSampleFormatU8
;
246 } else if (entry
.samplesize
== 16) {
247 sample_format
= kSampleFormatS16
;
248 } else if (entry
.samplesize
== 32) {
249 sample_format
= kSampleFormatS32
;
251 LOG(ERROR
) << "Unsupported sample size.";
255 is_audio_track_encrypted_
= entry
.sinf
.info
.track_encryption
.is_encrypted
;
256 DVLOG(1) << "is_audio_track_encrypted_: " << is_audio_track_encrypted_
;
257 audio_config
.Initialize(
258 codec
, sample_format
, channel_layout
, sample_per_second
,
259 extra_data
.size() ? &extra_data
[0] : NULL
, extra_data
.size(),
260 is_audio_track_encrypted_
, false);
262 audio_track_id_
= track
->header
.track_id
;
264 if (track
->media
.handler
.type
== kVideo
&& !video_config
.IsValidConfig()) {
265 RCHECK(!samp_descr
.video_entries
.empty());
266 if (desc_idx
>= samp_descr
.video_entries
.size())
268 const VideoSampleEntry
& entry
= samp_descr
.video_entries
[desc_idx
];
270 if (!(entry
.format
== FOURCC_AVC1
||
271 (entry
.format
== FOURCC_ENCV
&&
272 entry
.sinf
.format
.format
== FOURCC_AVC1
))) {
273 MEDIA_LOG(log_cb_
) << "Unsupported video format 0x"
274 << std::hex
<< entry
.format
<< " in stsd box.";
278 // TODO(strobe): Recover correct crop box
279 gfx::Size
coded_size(entry
.width
, entry
.height
);
280 gfx::Rect
visible_rect(coded_size
);
281 gfx::Size natural_size
= GetNaturalSize(visible_rect
.size(),
282 entry
.pixel_aspect
.h_spacing
,
283 entry
.pixel_aspect
.v_spacing
);
284 is_video_track_encrypted_
= entry
.sinf
.info
.track_encryption
.is_encrypted
;
285 DVLOG(1) << "is_video_track_encrypted_: " << is_video_track_encrypted_
;
286 video_config
.Initialize(kCodecH264
, H264PROFILE_MAIN
, VideoFrame::YV12
,
287 coded_size
, visible_rect
, natural_size
,
288 // No decoder-specific buffer needed for AVC;
289 // SPS/PPS are embedded in the video stream
290 NULL
, 0, is_video_track_encrypted_
, true);
292 video_track_id_
= track
->header
.track_id
;
296 RCHECK(config_cb_
.Run(audio_config
, video_config
));
298 base::TimeDelta duration
;
299 if (moov_
->extends
.header
.fragment_duration
> 0) {
300 duration
= TimeDeltaFromRational(moov_
->extends
.header
.fragment_duration
,
301 moov_
->header
.timescale
);
302 } else if (moov_
->header
.duration
> 0 &&
303 moov_
->header
.duration
!= kuint64max
) {
304 duration
= TimeDeltaFromRational(moov_
->header
.duration
,
305 moov_
->header
.timescale
);
307 duration
= kInfiniteDuration();
310 if (!init_cb_
.is_null())
311 base::ResetAndReturn(&init_cb_
).Run(true, duration
);
313 EmitNeedKeyIfNecessary(moov_
->pssh
);
317 bool MP4StreamParser::ParseMoof(BoxReader
* reader
) {
318 RCHECK(moov_
.get()); // Must already have initialization segment
320 RCHECK(moof
.Parse(reader
));
322 runs_
.reset(new TrackRunIterator(moov_
.get(), log_cb_
));
323 RCHECK(runs_
->Init(moof
));
324 EmitNeedKeyIfNecessary(moof
.pssh
);
325 new_segment_cb_
.Run();
326 ChangeState(kEmittingSamples
);
330 void MP4StreamParser::EmitNeedKeyIfNecessary(
331 const std::vector
<ProtectionSystemSpecificHeader
>& headers
) {
332 // TODO(strobe): ensure that the value of init_data (all PSSH headers
333 // concatenated in arbitrary order) matches the EME spec.
334 // See https://www.w3.org/Bugs/Public/show_bug.cgi?id=17673.
338 size_t total_size
= 0;
339 for (size_t i
= 0; i
< headers
.size(); i
++)
340 total_size
+= headers
[i
].raw_box
.size();
342 std::vector
<uint8
> init_data(total_size
);
344 for (size_t i
= 0; i
< headers
.size(); i
++) {
345 memcpy(&init_data
[pos
], &headers
[i
].raw_box
[0],
346 headers
[i
].raw_box
.size());
347 pos
+= headers
[i
].raw_box
.size();
349 need_key_cb_
.Run(kMp4InitDataType
, init_data
);
352 bool MP4StreamParser::PrepareAVCBuffer(
353 const AVCDecoderConfigurationRecord
& avc_config
,
354 std::vector
<uint8
>* frame_buf
,
355 std::vector
<SubsampleEntry
>* subsamples
) const {
356 // Convert the AVC NALU length fields to Annex B headers, as expected by
357 // decoding libraries. Since this may enlarge the size of the buffer, we also
358 // update the clear byte count for each subsample if encryption is used to
359 // account for the difference in size between the length prefix and Annex B
361 RCHECK(AVC::ConvertFrameToAnnexB(avc_config
.length_size
, frame_buf
));
362 if (!subsamples
->empty()) {
363 const int nalu_size_diff
= 4 - avc_config
.length_size
;
364 size_t expected_size
= runs_
->sample_size() +
365 subsamples
->size() * nalu_size_diff
;
366 RCHECK(frame_buf
->size() == expected_size
);
367 for (size_t i
= 0; i
< subsamples
->size(); i
++)
368 (*subsamples
)[i
].clear_bytes
+= nalu_size_diff
;
371 if (runs_
->is_keyframe()) {
372 // If this is a keyframe, we (re-)inject SPS and PPS headers at the start of
373 // a frame. If subsample info is present, we also update the clear byte
374 // count for that first subsample.
375 std::vector
<uint8
> param_sets
;
376 RCHECK(AVC::ConvertConfigToAnnexB(avc_config
, ¶m_sets
));
377 frame_buf
->insert(frame_buf
->begin(),
378 param_sets
.begin(), param_sets
.end());
379 if (!subsamples
->empty())
380 (*subsamples
)[0].clear_bytes
+= param_sets
.size();
385 bool MP4StreamParser::PrepareAACBuffer(
386 const AAC
& aac_config
, std::vector
<uint8
>* frame_buf
,
387 std::vector
<SubsampleEntry
>* subsamples
) const {
388 // Append an ADTS header to every audio sample.
389 RCHECK(aac_config
.ConvertEsdsToADTS(frame_buf
));
391 // As above, adjust subsample information to account for the headers. AAC is
392 // not required to use subsample encryption, so we may need to add an entry.
393 if (subsamples
->empty()) {
394 SubsampleEntry entry
;
395 entry
.clear_bytes
= AAC::kADTSHeaderSize
;
396 entry
.cypher_bytes
= frame_buf
->size() - AAC::kADTSHeaderSize
;
397 subsamples
->push_back(entry
);
399 (*subsamples
)[0].clear_bytes
+= AAC::kADTSHeaderSize
;
404 bool MP4StreamParser::EnqueueSample(BufferQueue
* audio_buffers
,
405 BufferQueue
* video_buffers
,
407 if (!runs_
->IsRunValid()) {
408 // Flush any buffers we've gotten in this chunk so that buffers don't
409 // cross NewSegment() calls
410 *err
= !SendAndFlushSamples(audio_buffers
, video_buffers
);
414 // Remain in kEnqueueingSamples state, discarding data, until the end of
415 // the current 'mdat' box has been appended to the queue.
416 if (!queue_
.Trim(mdat_tail_
))
419 ChangeState(kParsingBoxes
);
420 end_of_segment_cb_
.Run();
424 if (!runs_
->IsSampleValid()) {
433 queue_
.Peek(&buf
, &buf_size
);
434 if (!buf_size
) return false;
436 bool audio
= has_audio_
&& audio_track_id_
== runs_
->track_id();
437 bool video
= has_video_
&& video_track_id_
== runs_
->track_id();
439 // Skip this entire track if it's not one we're interested in
440 if (!audio
&& !video
)
443 // Attempt to cache the auxiliary information first. Aux info is usually
444 // placed in a contiguous block before the sample data, rather than being
445 // interleaved. If we didn't cache it, this would require that we retain the
446 // start of the segment buffer while reading samples. Aux info is typically
447 // quite small compared to sample data, so this pattern is useful on
448 // memory-constrained devices where the source buffer consumes a substantial
449 // portion of the total system memory.
450 if (runs_
->AuxInfoNeedsToBeCached()) {
451 queue_
.PeekAt(runs_
->aux_info_offset() + moof_head_
, &buf
, &buf_size
);
452 if (buf_size
< runs_
->aux_info_size()) return false;
453 *err
= !runs_
->CacheAuxInfo(buf
, buf_size
);
457 queue_
.PeekAt(runs_
->sample_offset() + moof_head_
, &buf
, &buf_size
);
458 if (buf_size
< runs_
->sample_size()) return false;
460 scoped_ptr
<DecryptConfig
> decrypt_config
;
461 std::vector
<SubsampleEntry
> subsamples
;
462 if (runs_
->is_encrypted()) {
463 decrypt_config
= runs_
->GetDecryptConfig();
464 if (!decrypt_config
) {
468 subsamples
= decrypt_config
->subsamples();
471 std::vector
<uint8
> frame_buf(buf
, buf
+ runs_
->sample_size());
473 if (!PrepareAVCBuffer(runs_
->video_description().avcc
,
474 &frame_buf
, &subsamples
)) {
475 MEDIA_LOG(log_cb_
) << "Failed to prepare AVC sample for decode";
482 if (ESDescriptor::IsAAC(runs_
->audio_description().esds
.object_type
) &&
483 !PrepareAACBuffer(runs_
->audio_description().esds
.aac
,
484 &frame_buf
, &subsamples
)) {
485 MEDIA_LOG(log_cb_
) << "Failed to prepare AAC sample for decode";
491 if (decrypt_config
) {
492 if (!subsamples
.empty()) {
493 // Create a new config with the updated subsamples.
494 decrypt_config
.reset(new DecryptConfig(
495 decrypt_config
->key_id(),
496 decrypt_config
->iv(),
497 decrypt_config
->data_offset(),
500 // else, use the existing config.
501 } else if ((audio
&& is_audio_track_encrypted_
) ||
502 (video
&& is_video_track_encrypted_
)) {
503 // The media pipeline requires a DecryptConfig with an empty |iv|.
504 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1");
505 decrypt_config
.reset(
506 new DecryptConfig("1", "", 0, std::vector
<SubsampleEntry
>()));
509 scoped_refptr
<StreamParserBuffer
> stream_buf
=
510 StreamParserBuffer::CopyFrom(&frame_buf
[0], frame_buf
.size(),
511 runs_
->is_keyframe());
514 stream_buf
->set_decrypt_config(decrypt_config
.Pass());
516 stream_buf
->set_duration(runs_
->duration());
517 stream_buf
->set_timestamp(runs_
->cts());
518 stream_buf
->SetDecodeTimestamp(runs_
->dts());
520 DVLOG(3) << "Pushing frame: aud=" << audio
521 << ", key=" << runs_
->is_keyframe()
522 << ", dur=" << runs_
->duration().InMilliseconds()
523 << ", dts=" << runs_
->dts().InMilliseconds()
524 << ", cts=" << runs_
->cts().InMilliseconds()
525 << ", size=" << runs_
->sample_size();
528 audio_buffers
->push_back(stream_buf
);
530 video_buffers
->push_back(stream_buf
);
533 runs_
->AdvanceSample();
537 bool MP4StreamParser::SendAndFlushSamples(BufferQueue
* audio_buffers
,
538 BufferQueue
* video_buffers
) {
539 if (audio_buffers
->empty() && video_buffers
->empty())
542 bool success
= new_buffers_cb_
.Run(*audio_buffers
, *video_buffers
);
543 audio_buffers
->clear();
544 video_buffers
->clear();
548 bool MP4StreamParser::ReadAndDiscardMDATsUntil(const int64 offset
) {
550 while (mdat_tail_
< offset
) {
553 queue_
.PeekAt(mdat_tail_
, &buf
, &size
);
557 if (!BoxReader::StartTopLevelBox(buf
, size
, log_cb_
,
558 &type
, &box_sz
, &err
))
561 if (type
!= FOURCC_MDAT
) {
562 MEDIA_LOG(log_cb_
) << "Unexpected box type while parsing MDATs: "
563 << FourCCToString(type
);
565 mdat_tail_
+= box_sz
;
567 queue_
.Trim(std::min(mdat_tail_
, offset
));
571 void MP4StreamParser::ChangeState(State new_state
) {
572 DVLOG(2) << "Changing state: " << new_state
;