1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/mp4/mp4_stream_parser.h"
7 #include "base/callback.h"
8 #include "base/callback_helpers.h"
9 #include "base/logging.h"
10 #include "base/time/time.h"
11 #include "media/base/audio_decoder_config.h"
12 #include "media/base/stream_parser_buffer.h"
13 #include "media/base/video_decoder_config.h"
14 #include "media/base/video_util.h"
15 #include "media/mp4/box_definitions.h"
16 #include "media/mp4/box_reader.h"
17 #include "media/mp4/es_descriptor.h"
18 #include "media/mp4/rcheck.h"
23 // TODO(xhwang): Figure out the init data type appropriately once it's spec'ed.
24 static const char kMp4InitDataType
[] = "video/mp4";
26 MP4StreamParser::MP4StreamParser(const std::set
<int>& audio_object_types
,
28 : state_(kWaitingForInit
),
35 audio_object_types_(audio_object_types
),
37 is_audio_track_encrypted_(false),
38 is_video_track_encrypted_(false) {
41 MP4StreamParser::~MP4StreamParser() {}
43 void MP4StreamParser::Init(const InitCB
& init_cb
,
44 const NewConfigCB
& config_cb
,
45 const NewBuffersCB
& new_buffers_cb
,
46 const NewTextBuffersCB
& /* text_cb */ ,
47 const NeedKeyCB
& need_key_cb
,
48 const AddTextTrackCB
& /* add_text_track_cb */ ,
49 const NewMediaSegmentCB
& new_segment_cb
,
50 const base::Closure
& end_of_segment_cb
,
51 const LogCB
& log_cb
) {
52 DCHECK_EQ(state_
, kWaitingForInit
);
53 DCHECK(init_cb_
.is_null());
54 DCHECK(!init_cb
.is_null());
55 DCHECK(!config_cb
.is_null());
56 DCHECK(!new_buffers_cb
.is_null());
57 DCHECK(!need_key_cb
.is_null());
58 DCHECK(!end_of_segment_cb
.is_null());
60 ChangeState(kParsingBoxes
);
62 config_cb_
= config_cb
;
63 new_buffers_cb_
= new_buffers_cb
;
64 need_key_cb_
= need_key_cb
;
65 new_segment_cb_
= new_segment_cb
;
66 end_of_segment_cb_
= end_of_segment_cb
;
70 void MP4StreamParser::Reset() {
77 void MP4StreamParser::Flush() {
78 DCHECK_NE(state_
, kWaitingForInit
);
80 ChangeState(kParsingBoxes
);
83 bool MP4StreamParser::Parse(const uint8
* buf
, int size
) {
84 DCHECK_NE(state_
, kWaitingForInit
);
89 queue_
.Push(buf
, size
);
91 BufferQueue audio_buffers
;
92 BufferQueue video_buffers
;
94 bool result
, err
= false;
97 if (state_
== kParsingBoxes
) {
98 result
= ParseBox(&err
);
100 DCHECK_EQ(kEmittingSamples
, state_
);
101 result
= EnqueueSample(&audio_buffers
, &video_buffers
, &err
);
103 int64 max_clear
= runs_
->GetMaxClearOffset() + moof_head_
;
104 err
= !ReadAndDiscardMDATsUntil(max_clear
);
107 } while (result
&& !err
);
110 err
= !SendAndFlushSamples(&audio_buffers
, &video_buffers
);
113 DLOG(ERROR
) << "Error while parsing MP4";
123 bool MP4StreamParser::ParseBox(bool* err
) {
126 queue_
.Peek(&buf
, &size
);
127 if (!size
) return false;
129 scoped_ptr
<BoxReader
> reader(
130 BoxReader::ReadTopLevelBox(buf
, size
, log_cb_
, err
));
131 if (reader
.get() == NULL
) return false;
133 if (reader
->type() == FOURCC_MOOV
) {
134 *err
= !ParseMoov(reader
.get());
135 } else if (reader
->type() == FOURCC_MOOF
) {
136 moof_head_
= queue_
.head();
137 *err
= !ParseMoof(reader
.get());
139 // Set up first mdat offset for ReadMDATsUntil().
140 mdat_tail_
= queue_
.head() + reader
->size();
142 // Return early to avoid evicting 'moof' data from queue. Auxiliary info may
143 // be located anywhere in the file, including inside the 'moof' itself.
144 // (Since 'default-base-is-moof' is mandated, no data references can come
145 // before the head of the 'moof', so keeping this box around is sufficient.)
148 MEDIA_LOG(log_cb_
) << "Skipping unrecognized top-level box: "
149 << FourCCToString(reader
->type());
152 queue_
.Pop(reader
->size());
157 bool MP4StreamParser::ParseMoov(BoxReader
* reader
) {
158 moov_
.reset(new Movie
);
159 RCHECK(moov_
->Parse(reader
));
165 AudioDecoderConfig audio_config
;
166 VideoDecoderConfig video_config
;
168 for (std::vector
<Track
>::const_iterator track
= moov_
->tracks
.begin();
169 track
!= moov_
->tracks
.end(); ++track
) {
170 // TODO(strobe): Only the first audio and video track present in a file are
171 // used. (Track selection is better accomplished via Source IDs, though, so
172 // adding support for track selection within a stream is low-priority.)
173 const SampleDescription
& samp_descr
=
174 track
->media
.information
.sample_table
.description
;
176 // TODO(strobe): When codec reconfigurations are supported, detect and send
177 // a codec reconfiguration for fragments using a sample description index
178 // different from the previous one
180 for (size_t t
= 0; t
< moov_
->extends
.tracks
.size(); t
++) {
181 const TrackExtends
& trex
= moov_
->extends
.tracks
[t
];
182 if (trex
.track_id
== track
->header
.track_id
) {
183 desc_idx
= trex
.default_sample_description_index
;
187 RCHECK(desc_idx
> 0);
188 desc_idx
-= 1; // BMFF descriptor index is one-based
190 if (track
->media
.handler
.type
== kAudio
&& !audio_config
.IsValidConfig()) {
191 RCHECK(!samp_descr
.audio_entries
.empty());
193 // It is not uncommon to find otherwise-valid files with incorrect sample
194 // description indices, so we fail gracefully in that case.
195 if (desc_idx
>= samp_descr
.audio_entries
.size())
197 const AudioSampleEntry
& entry
= samp_descr
.audio_entries
[desc_idx
];
198 const AAC
& aac
= entry
.esds
.aac
;
200 if (!(entry
.format
== FOURCC_MP4A
|| entry
.format
== FOURCC_EAC3
||
201 (entry
.format
== FOURCC_ENCA
&&
202 entry
.sinf
.format
.format
== FOURCC_MP4A
))) {
203 MEDIA_LOG(log_cb_
) << "Unsupported audio format 0x"
204 << std::hex
<< entry
.format
<< " in stsd box.";
208 uint8 audio_type
= entry
.esds
.object_type
;
209 DVLOG(1) << "audio_type " << std::hex
<< audio_type
;
210 if (audio_type
== kForbidden
&& entry
.format
== FOURCC_EAC3
) {
213 if (audio_object_types_
.find(audio_type
) == audio_object_types_
.end()) {
214 MEDIA_LOG(log_cb_
) << "audio object type 0x" << std::hex
<< audio_type
215 << " does not match what is specified in the"
220 AudioCodec codec
= kUnknownAudioCodec
;
221 ChannelLayout channel_layout
= CHANNEL_LAYOUT_NONE
;
222 int sample_per_second
= 0;
223 std::vector
<uint8
> extra_data
;
224 // Check if it is MPEG4 AAC defined in ISO 14496 Part 3 or
225 // supported MPEG2 AAC varients.
226 if (ESDescriptor::IsAAC(audio_type
)) {
228 channel_layout
= aac
.GetChannelLayout(has_sbr_
);
229 sample_per_second
= aac
.GetOutputSamplesPerSecond(has_sbr_
);
230 #if defined(OS_ANDROID)
231 extra_data
= aac
.codec_specific_data();
233 } else if (audio_type
== kEAC3
) {
235 channel_layout
= GuessChannelLayout(entry
.channelcount
);
236 sample_per_second
= entry
.samplerate
;
238 MEDIA_LOG(log_cb_
) << "Unsupported audio object type 0x" << std::hex
239 << audio_type
<< " in esds.";
243 SampleFormat sample_format
;
244 if (entry
.samplesize
== 8) {
245 sample_format
= kSampleFormatU8
;
246 } else if (entry
.samplesize
== 16) {
247 sample_format
= kSampleFormatS16
;
248 } else if (entry
.samplesize
== 32) {
249 sample_format
= kSampleFormatS32
;
251 LOG(ERROR
) << "Unsupported sample size.";
255 is_audio_track_encrypted_
= entry
.sinf
.info
.track_encryption
.is_encrypted
;
256 DVLOG(1) << "is_audio_track_encrypted_: " << is_audio_track_encrypted_
;
257 audio_config
.Initialize(
258 codec
, sample_format
, channel_layout
, sample_per_second
,
259 extra_data
.size() ? &extra_data
[0] : NULL
, extra_data
.size(),
260 is_audio_track_encrypted_
, false, base::TimeDelta(),
263 audio_track_id_
= track
->header
.track_id
;
265 if (track
->media
.handler
.type
== kVideo
&& !video_config
.IsValidConfig()) {
266 RCHECK(!samp_descr
.video_entries
.empty());
267 if (desc_idx
>= samp_descr
.video_entries
.size())
269 const VideoSampleEntry
& entry
= samp_descr
.video_entries
[desc_idx
];
271 if (!entry
.IsFormatValid()) {
272 MEDIA_LOG(log_cb_
) << "Unsupported video format 0x"
273 << std::hex
<< entry
.format
<< " in stsd box.";
277 // TODO(strobe): Recover correct crop box
278 gfx::Size
coded_size(entry
.width
, entry
.height
);
279 gfx::Rect
visible_rect(coded_size
);
280 gfx::Size natural_size
= GetNaturalSize(visible_rect
.size(),
281 entry
.pixel_aspect
.h_spacing
,
282 entry
.pixel_aspect
.v_spacing
);
283 is_video_track_encrypted_
= entry
.sinf
.info
.track_encryption
.is_encrypted
;
284 DVLOG(1) << "is_video_track_encrypted_: " << is_video_track_encrypted_
;
285 video_config
.Initialize(kCodecH264
, H264PROFILE_MAIN
, VideoFrame::YV12
,
286 coded_size
, visible_rect
, natural_size
,
287 // No decoder-specific buffer needed for AVC;
288 // SPS/PPS are embedded in the video stream
289 NULL
, 0, is_video_track_encrypted_
, true);
291 video_track_id_
= track
->header
.track_id
;
295 RCHECK(config_cb_
.Run(audio_config
, video_config
));
297 base::TimeDelta duration
;
298 if (moov_
->extends
.header
.fragment_duration
> 0) {
299 duration
= TimeDeltaFromRational(moov_
->extends
.header
.fragment_duration
,
300 moov_
->header
.timescale
);
301 } else if (moov_
->header
.duration
> 0 &&
302 moov_
->header
.duration
!= kuint64max
) {
303 duration
= TimeDeltaFromRational(moov_
->header
.duration
,
304 moov_
->header
.timescale
);
306 duration
= kInfiniteDuration();
309 if (!init_cb_
.is_null())
310 base::ResetAndReturn(&init_cb_
).Run(true, duration
);
312 EmitNeedKeyIfNecessary(moov_
->pssh
);
316 bool MP4StreamParser::ParseMoof(BoxReader
* reader
) {
317 RCHECK(moov_
.get()); // Must already have initialization segment
319 RCHECK(moof
.Parse(reader
));
321 runs_
.reset(new TrackRunIterator(moov_
.get(), log_cb_
));
322 RCHECK(runs_
->Init(moof
));
323 EmitNeedKeyIfNecessary(moof
.pssh
);
324 new_segment_cb_
.Run();
325 ChangeState(kEmittingSamples
);
329 void MP4StreamParser::EmitNeedKeyIfNecessary(
330 const std::vector
<ProtectionSystemSpecificHeader
>& headers
) {
331 // TODO(strobe): ensure that the value of init_data (all PSSH headers
332 // concatenated in arbitrary order) matches the EME spec.
333 // See https://www.w3.org/Bugs/Public/show_bug.cgi?id=17673.
337 size_t total_size
= 0;
338 for (size_t i
= 0; i
< headers
.size(); i
++)
339 total_size
+= headers
[i
].raw_box
.size();
341 std::vector
<uint8
> init_data(total_size
);
343 for (size_t i
= 0; i
< headers
.size(); i
++) {
344 memcpy(&init_data
[pos
], &headers
[i
].raw_box
[0],
345 headers
[i
].raw_box
.size());
346 pos
+= headers
[i
].raw_box
.size();
348 need_key_cb_
.Run(kMp4InitDataType
, init_data
);
351 bool MP4StreamParser::PrepareAVCBuffer(
352 const AVCDecoderConfigurationRecord
& avc_config
,
353 std::vector
<uint8
>* frame_buf
,
354 std::vector
<SubsampleEntry
>* subsamples
) const {
355 // Convert the AVC NALU length fields to Annex B headers, as expected by
356 // decoding libraries. Since this may enlarge the size of the buffer, we also
357 // update the clear byte count for each subsample if encryption is used to
358 // account for the difference in size between the length prefix and Annex B
360 RCHECK(AVC::ConvertFrameToAnnexB(avc_config
.length_size
, frame_buf
));
361 if (!subsamples
->empty()) {
362 const int nalu_size_diff
= 4 - avc_config
.length_size
;
363 size_t expected_size
= runs_
->sample_size() +
364 subsamples
->size() * nalu_size_diff
;
365 RCHECK(frame_buf
->size() == expected_size
);
366 for (size_t i
= 0; i
< subsamples
->size(); i
++)
367 (*subsamples
)[i
].clear_bytes
+= nalu_size_diff
;
370 if (runs_
->is_keyframe()) {
371 // If this is a keyframe, we (re-)inject SPS and PPS headers at the start of
372 // a frame. If subsample info is present, we also update the clear byte
373 // count for that first subsample.
374 std::vector
<uint8
> param_sets
;
375 RCHECK(AVC::ConvertConfigToAnnexB(avc_config
, ¶m_sets
));
376 frame_buf
->insert(frame_buf
->begin(),
377 param_sets
.begin(), param_sets
.end());
378 if (!subsamples
->empty())
379 (*subsamples
)[0].clear_bytes
+= param_sets
.size();
384 bool MP4StreamParser::PrepareAACBuffer(
385 const AAC
& aac_config
, std::vector
<uint8
>* frame_buf
,
386 std::vector
<SubsampleEntry
>* subsamples
) const {
387 // Append an ADTS header to every audio sample.
388 RCHECK(aac_config
.ConvertEsdsToADTS(frame_buf
));
390 // As above, adjust subsample information to account for the headers. AAC is
391 // not required to use subsample encryption, so we may need to add an entry.
392 if (subsamples
->empty()) {
393 SubsampleEntry entry
;
394 entry
.clear_bytes
= AAC::kADTSHeaderSize
;
395 entry
.cypher_bytes
= frame_buf
->size() - AAC::kADTSHeaderSize
;
396 subsamples
->push_back(entry
);
398 (*subsamples
)[0].clear_bytes
+= AAC::kADTSHeaderSize
;
403 bool MP4StreamParser::EnqueueSample(BufferQueue
* audio_buffers
,
404 BufferQueue
* video_buffers
,
406 if (!runs_
->IsRunValid()) {
407 // Flush any buffers we've gotten in this chunk so that buffers don't
408 // cross NewSegment() calls
409 *err
= !SendAndFlushSamples(audio_buffers
, video_buffers
);
413 // Remain in kEnqueueingSamples state, discarding data, until the end of
414 // the current 'mdat' box has been appended to the queue.
415 if (!queue_
.Trim(mdat_tail_
))
418 ChangeState(kParsingBoxes
);
419 end_of_segment_cb_
.Run();
423 if (!runs_
->IsSampleValid()) {
432 queue_
.Peek(&buf
, &buf_size
);
433 if (!buf_size
) return false;
435 bool audio
= has_audio_
&& audio_track_id_
== runs_
->track_id();
436 bool video
= has_video_
&& video_track_id_
== runs_
->track_id();
438 // Skip this entire track if it's not one we're interested in
439 if (!audio
&& !video
)
442 // Attempt to cache the auxiliary information first. Aux info is usually
443 // placed in a contiguous block before the sample data, rather than being
444 // interleaved. If we didn't cache it, this would require that we retain the
445 // start of the segment buffer while reading samples. Aux info is typically
446 // quite small compared to sample data, so this pattern is useful on
447 // memory-constrained devices where the source buffer consumes a substantial
448 // portion of the total system memory.
449 if (runs_
->AuxInfoNeedsToBeCached()) {
450 queue_
.PeekAt(runs_
->aux_info_offset() + moof_head_
, &buf
, &buf_size
);
451 if (buf_size
< runs_
->aux_info_size()) return false;
452 *err
= !runs_
->CacheAuxInfo(buf
, buf_size
);
456 queue_
.PeekAt(runs_
->sample_offset() + moof_head_
, &buf
, &buf_size
);
457 if (buf_size
< runs_
->sample_size()) return false;
459 scoped_ptr
<DecryptConfig
> decrypt_config
;
460 std::vector
<SubsampleEntry
> subsamples
;
461 if (runs_
->is_encrypted()) {
462 decrypt_config
= runs_
->GetDecryptConfig();
463 if (!decrypt_config
) {
467 subsamples
= decrypt_config
->subsamples();
470 std::vector
<uint8
> frame_buf(buf
, buf
+ runs_
->sample_size());
472 if (!PrepareAVCBuffer(runs_
->video_description().avcc
,
473 &frame_buf
, &subsamples
)) {
474 MEDIA_LOG(log_cb_
) << "Failed to prepare AVC sample for decode";
481 if (ESDescriptor::IsAAC(runs_
->audio_description().esds
.object_type
) &&
482 !PrepareAACBuffer(runs_
->audio_description().esds
.aac
,
483 &frame_buf
, &subsamples
)) {
484 MEDIA_LOG(log_cb_
) << "Failed to prepare AAC sample for decode";
490 if (decrypt_config
) {
491 if (!subsamples
.empty()) {
492 // Create a new config with the updated subsamples.
493 decrypt_config
.reset(new DecryptConfig(
494 decrypt_config
->key_id(),
495 decrypt_config
->iv(),
496 decrypt_config
->data_offset(),
499 // else, use the existing config.
500 } else if ((audio
&& is_audio_track_encrypted_
) ||
501 (video
&& is_video_track_encrypted_
)) {
502 // The media pipeline requires a DecryptConfig with an empty |iv|.
503 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1");
504 decrypt_config
.reset(
505 new DecryptConfig("1", "", 0, std::vector
<SubsampleEntry
>()));
508 scoped_refptr
<StreamParserBuffer
> stream_buf
=
509 StreamParserBuffer::CopyFrom(&frame_buf
[0], frame_buf
.size(),
510 runs_
->is_keyframe());
513 stream_buf
->set_decrypt_config(decrypt_config
.Pass());
515 stream_buf
->set_duration(runs_
->duration());
516 stream_buf
->set_timestamp(runs_
->cts());
517 stream_buf
->SetDecodeTimestamp(runs_
->dts());
519 DVLOG(3) << "Pushing frame: aud=" << audio
520 << ", key=" << runs_
->is_keyframe()
521 << ", dur=" << runs_
->duration().InMilliseconds()
522 << ", dts=" << runs_
->dts().InMilliseconds()
523 << ", cts=" << runs_
->cts().InMilliseconds()
524 << ", size=" << runs_
->sample_size();
527 audio_buffers
->push_back(stream_buf
);
529 video_buffers
->push_back(stream_buf
);
532 runs_
->AdvanceSample();
536 bool MP4StreamParser::SendAndFlushSamples(BufferQueue
* audio_buffers
,
537 BufferQueue
* video_buffers
) {
538 if (audio_buffers
->empty() && video_buffers
->empty())
541 bool success
= new_buffers_cb_
.Run(*audio_buffers
, *video_buffers
);
542 audio_buffers
->clear();
543 video_buffers
->clear();
547 bool MP4StreamParser::ReadAndDiscardMDATsUntil(const int64 offset
) {
549 while (mdat_tail_
< offset
) {
552 queue_
.PeekAt(mdat_tail_
, &buf
, &size
);
556 if (!BoxReader::StartTopLevelBox(buf
, size
, log_cb_
,
557 &type
, &box_sz
, &err
))
560 if (type
!= FOURCC_MDAT
) {
561 MEDIA_LOG(log_cb_
) << "Unexpected box type while parsing MDATs: "
562 << FourCCToString(type
);
564 mdat_tail_
+= box_sz
;
566 queue_
.Trim(std::min(mdat_tail_
, offset
));
570 void MP4StreamParser::ChangeState(State new_state
) {
571 DVLOG(2) << "Changing state: " << new_state
;