1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/mp4/mp4_stream_parser.h"
7 #include "base/callback.h"
8 #include "base/callback_helpers.h"
9 #include "base/logging.h"
10 #include "base/time.h"
11 #include "media/base/audio_decoder_config.h"
12 #include "media/base/stream_parser_buffer.h"
13 #include "media/base/video_decoder_config.h"
14 #include "media/base/video_util.h"
15 #include "media/mp4/box_definitions.h"
16 #include "media/mp4/box_reader.h"
17 #include "media/mp4/es_descriptor.h"
18 #include "media/mp4/rcheck.h"
23 // TODO(xhwang): Figure out the init data type appropriately once it's spec'ed.
24 static const char kMp4InitDataType
[] = "video/mp4";
26 MP4StreamParser::MP4StreamParser(const std::set
<int>& audio_object_types
,
28 : state_(kWaitingForInit
),
35 audio_object_types_(audio_object_types
),
37 is_audio_track_encrypted_(false),
38 is_video_track_encrypted_(false) {
41 MP4StreamParser::~MP4StreamParser() {}
43 void MP4StreamParser::Init(const InitCB
& init_cb
,
44 const NewConfigCB
& config_cb
,
45 const NewBuffersCB
& audio_cb
,
46 const NewBuffersCB
& video_cb
,
47 const NeedKeyCB
& need_key_cb
,
48 const NewMediaSegmentCB
& new_segment_cb
,
49 const base::Closure
& end_of_segment_cb
,
50 const LogCB
& log_cb
) {
51 DCHECK_EQ(state_
, kWaitingForInit
);
52 DCHECK(init_cb_
.is_null());
53 DCHECK(!init_cb
.is_null());
54 DCHECK(!config_cb
.is_null());
55 DCHECK(!audio_cb
.is_null() || !video_cb
.is_null());
56 DCHECK(!need_key_cb
.is_null());
57 DCHECK(!end_of_segment_cb
.is_null());
59 ChangeState(kParsingBoxes
);
61 config_cb_
= config_cb
;
64 need_key_cb_
= need_key_cb
;
65 new_segment_cb_
= new_segment_cb
;
66 end_of_segment_cb_
= end_of_segment_cb
;
70 void MP4StreamParser::Reset() {
78 void MP4StreamParser::Flush() {
79 DCHECK_NE(state_
, kWaitingForInit
);
81 ChangeState(kParsingBoxes
);
84 bool MP4StreamParser::Parse(const uint8
* buf
, int size
) {
85 DCHECK_NE(state_
, kWaitingForInit
);
90 queue_
.Push(buf
, size
);
92 BufferQueue audio_buffers
;
93 BufferQueue video_buffers
;
95 bool result
, err
= false;
98 if (state_
== kParsingBoxes
) {
99 result
= ParseBox(&err
);
101 DCHECK_EQ(kEmittingSamples
, state_
);
102 result
= EnqueueSample(&audio_buffers
, &video_buffers
, &err
);
104 int64 max_clear
= runs_
->GetMaxClearOffset() + moof_head_
;
105 err
= !ReadAndDiscardMDATsUntil(max_clear
);
108 } while (result
&& !err
);
111 err
= !SendAndFlushSamples(&audio_buffers
, &video_buffers
);
114 DLOG(ERROR
) << "Error while parsing MP4";
123 bool MP4StreamParser::ParseBox(bool* err
) {
126 queue_
.Peek(&buf
, &size
);
127 if (!size
) return false;
129 scoped_ptr
<BoxReader
> reader(
130 BoxReader::ReadTopLevelBox(buf
, size
, log_cb_
, err
));
131 if (reader
.get() == NULL
) return false;
133 if (reader
->type() == FOURCC_MOOV
) {
134 *err
= !ParseMoov(reader
.get());
135 } else if (reader
->type() == FOURCC_MOOF
) {
136 moof_head_
= queue_
.head();
137 *err
= !ParseMoof(reader
.get());
139 // Set up first mdat offset for ReadMDATsUntil().
140 mdat_tail_
= queue_
.head() + reader
->size();
142 // Return early to avoid evicting 'moof' data from queue. Auxiliary info may
143 // be located anywhere in the file, including inside the 'moof' itself.
144 // (Since 'default-base-is-moof' is mandated, no data references can come
145 // before the head of the 'moof', so keeping this box around is sufficient.)
148 MEDIA_LOG(log_cb_
) << "Skipping unrecognized top-level box: "
149 << FourCCToString(reader
->type());
152 queue_
.Pop(reader
->size());
157 bool MP4StreamParser::ParseMoov(BoxReader
* reader
) {
158 moov_
.reset(new Movie
);
159 RCHECK(moov_
->Parse(reader
));
160 runs_
.reset(new TrackRunIterator(moov_
.get(), log_cb_
));
165 AudioDecoderConfig audio_config
;
166 VideoDecoderConfig video_config
;
168 for (std::vector
<Track
>::const_iterator track
= moov_
->tracks
.begin();
169 track
!= moov_
->tracks
.end(); ++track
) {
170 // TODO(strobe): Only the first audio and video track present in a file are
171 // used. (Track selection is better accomplished via Source IDs, though, so
172 // adding support for track selection within a stream is low-priority.)
173 const SampleDescription
& samp_descr
=
174 track
->media
.information
.sample_table
.description
;
176 // TODO(strobe): When codec reconfigurations are supported, detect and send
177 // a codec reconfiguration for fragments using a sample description index
178 // different from the previous one
180 for (size_t t
= 0; t
< moov_
->extends
.tracks
.size(); t
++) {
181 const TrackExtends
& trex
= moov_
->extends
.tracks
[t
];
182 if (trex
.track_id
== track
->header
.track_id
) {
183 desc_idx
= trex
.default_sample_description_index
;
187 RCHECK(desc_idx
> 0);
188 desc_idx
-= 1; // BMFF descriptor index is one-based
190 if (track
->media
.handler
.type
== kAudio
&& !audio_config
.IsValidConfig()) {
191 RCHECK(!samp_descr
.audio_entries
.empty());
193 // It is not uncommon to find otherwise-valid files with incorrect sample
194 // description indices, so we fail gracefully in that case.
195 if (desc_idx
>= samp_descr
.audio_entries
.size())
197 const AudioSampleEntry
& entry
= samp_descr
.audio_entries
[desc_idx
];
198 const AAC
& aac
= entry
.esds
.aac
;
200 if (!(entry
.format
== FOURCC_MP4A
||
201 (entry
.format
== FOURCC_ENCA
&&
202 entry
.sinf
.format
.format
== FOURCC_MP4A
))) {
203 MEDIA_LOG(log_cb_
) << "Unsupported audio format 0x"
204 << std::hex
<< entry
.format
<< " in stsd box.";
208 int audio_type
= entry
.esds
.object_type
;
209 DVLOG(1) << "audio_type " << std::hex
<< audio_type
;
210 if (audio_object_types_
.find(audio_type
) == audio_object_types_
.end()) {
211 MEDIA_LOG(log_cb_
) << "audio object type 0x" << std::hex
<< audio_type
212 << " does not match what is specified in the"
217 // Check if it is MPEG4 AAC defined in ISO 14496 Part 3 or
218 // supported MPEG2 AAC varients.
219 if (audio_type
!= kISO_14496_3
&& audio_type
!= kISO_13818_7_AAC_LC
) {
220 MEDIA_LOG(log_cb_
) << "Unsupported audio object type 0x" << std::hex
221 << audio_type
<< " in esds.";
225 SampleFormat sample_format
;
226 if (entry
.samplesize
== 8) {
227 sample_format
= kSampleFormatU8
;
228 } else if (entry
.samplesize
== 16) {
229 sample_format
= kSampleFormatS16
;
230 } else if (entry
.samplesize
== 32) {
231 sample_format
= kSampleFormatS32
;
233 LOG(ERROR
) << "Unsupported sample size.";
237 is_audio_track_encrypted_
= entry
.sinf
.info
.track_encryption
.is_encrypted
;
238 DVLOG(1) << "is_audio_track_encrypted_: " << is_audio_track_encrypted_
;
239 audio_config
.Initialize(kCodecAAC
, sample_format
,
240 aac
.GetChannelLayout(has_sbr_
),
241 aac
.GetOutputSamplesPerSecond(has_sbr_
),
242 NULL
, 0, is_audio_track_encrypted_
, false);
244 audio_track_id_
= track
->header
.track_id
;
246 if (track
->media
.handler
.type
== kVideo
&& !video_config
.IsValidConfig()) {
247 RCHECK(!samp_descr
.video_entries
.empty());
248 if (desc_idx
>= samp_descr
.video_entries
.size())
250 const VideoSampleEntry
& entry
= samp_descr
.video_entries
[desc_idx
];
252 if (!(entry
.format
== FOURCC_AVC1
||
253 (entry
.format
== FOURCC_ENCV
&&
254 entry
.sinf
.format
.format
== FOURCC_AVC1
))) {
255 MEDIA_LOG(log_cb_
) << "Unsupported video format 0x"
256 << std::hex
<< entry
.format
<< " in stsd box.";
260 // TODO(strobe): Recover correct crop box
261 gfx::Size
coded_size(entry
.width
, entry
.height
);
262 gfx::Rect
visible_rect(coded_size
);
263 gfx::Size natural_size
= GetNaturalSize(visible_rect
.size(),
264 entry
.pixel_aspect
.h_spacing
,
265 entry
.pixel_aspect
.v_spacing
);
266 is_video_track_encrypted_
= entry
.sinf
.info
.track_encryption
.is_encrypted
;
267 DVLOG(1) << "is_video_track_encrypted_: " << is_video_track_encrypted_
;
268 video_config
.Initialize(kCodecH264
, H264PROFILE_MAIN
, VideoFrame::YV12
,
269 coded_size
, visible_rect
, natural_size
,
270 // No decoder-specific buffer needed for AVC;
271 // SPS/PPS are embedded in the video stream
272 NULL
, 0, is_video_track_encrypted_
, true);
274 video_track_id_
= track
->header
.track_id
;
278 RCHECK(config_cb_
.Run(audio_config
, video_config
));
280 base::TimeDelta duration
;
281 if (moov_
->extends
.header
.fragment_duration
> 0) {
282 duration
= TimeDeltaFromRational(moov_
->extends
.header
.fragment_duration
,
283 moov_
->header
.timescale
);
284 } else if (moov_
->header
.duration
> 0 &&
285 moov_
->header
.duration
!= kuint64max
) {
286 duration
= TimeDeltaFromRational(moov_
->header
.duration
,
287 moov_
->header
.timescale
);
289 duration
= kInfiniteDuration();
292 if (!init_cb_
.is_null())
293 base::ResetAndReturn(&init_cb_
).Run(true, duration
);
295 RCHECK(EmitNeedKeyIfNecessary(moov_
->pssh
));
299 bool MP4StreamParser::ParseMoof(BoxReader
* reader
) {
300 RCHECK(moov_
.get()); // Must already have initialization segment
302 RCHECK(moof
.Parse(reader
));
303 RCHECK(runs_
->Init(moof
));
304 RCHECK(EmitNeedKeyIfNecessary(moof
.pssh
));
305 new_segment_cb_
.Run(runs_
->GetMinDecodeTimestamp());
306 ChangeState(kEmittingSamples
);
310 bool MP4StreamParser::EmitNeedKeyIfNecessary(
311 const std::vector
<ProtectionSystemSpecificHeader
>& headers
) {
312 // TODO(strobe): ensure that the value of init_data (all PSSH headers
313 // concatenated in arbitrary order) matches the EME spec.
314 // See https://www.w3.org/Bugs/Public/show_bug.cgi?id=17673.
318 size_t total_size
= 0;
319 for (size_t i
= 0; i
< headers
.size(); i
++)
320 total_size
+= headers
[i
].raw_box
.size();
322 scoped_ptr
<uint8
[]> init_data(new uint8
[total_size
]);
324 for (size_t i
= 0; i
< headers
.size(); i
++) {
325 memcpy(&init_data
.get()[pos
], &headers
[i
].raw_box
[0],
326 headers
[i
].raw_box
.size());
327 pos
+= headers
[i
].raw_box
.size();
329 return need_key_cb_
.Run(kMp4InitDataType
, init_data
.Pass(), total_size
);
332 bool MP4StreamParser::PrepareAVCBuffer(
333 const AVCDecoderConfigurationRecord
& avc_config
,
334 std::vector
<uint8
>* frame_buf
,
335 std::vector
<SubsampleEntry
>* subsamples
) const {
336 // Convert the AVC NALU length fields to Annex B headers, as expected by
337 // decoding libraries. Since this may enlarge the size of the buffer, we also
338 // update the clear byte count for each subsample if encryption is used to
339 // account for the difference in size between the length prefix and Annex B
341 RCHECK(AVC::ConvertFrameToAnnexB(avc_config
.length_size
, frame_buf
));
342 if (!subsamples
->empty()) {
343 const int nalu_size_diff
= 4 - avc_config
.length_size
;
344 size_t expected_size
= runs_
->sample_size() +
345 subsamples
->size() * nalu_size_diff
;
346 RCHECK(frame_buf
->size() == expected_size
);
347 for (size_t i
= 0; i
< subsamples
->size(); i
++)
348 (*subsamples
)[i
].clear_bytes
+= nalu_size_diff
;
351 if (runs_
->is_keyframe()) {
352 // If this is a keyframe, we (re-)inject SPS and PPS headers at the start of
353 // a frame. If subsample info is present, we also update the clear byte
354 // count for that first subsample.
355 std::vector
<uint8
> param_sets
;
356 RCHECK(AVC::ConvertConfigToAnnexB(avc_config
, ¶m_sets
));
357 frame_buf
->insert(frame_buf
->begin(),
358 param_sets
.begin(), param_sets
.end());
359 if (!subsamples
->empty())
360 (*subsamples
)[0].clear_bytes
+= param_sets
.size();
365 bool MP4StreamParser::PrepareAACBuffer(
366 const AAC
& aac_config
, std::vector
<uint8
>* frame_buf
,
367 std::vector
<SubsampleEntry
>* subsamples
) const {
368 // Append an ADTS header to every audio sample.
369 RCHECK(aac_config
.ConvertEsdsToADTS(frame_buf
));
371 // As above, adjust subsample information to account for the headers. AAC is
372 // not required to use subsample encryption, so we may need to add an entry.
373 if (subsamples
->empty()) {
374 SubsampleEntry entry
;
375 entry
.clear_bytes
= AAC::kADTSHeaderSize
;
376 entry
.cypher_bytes
= frame_buf
->size() - AAC::kADTSHeaderSize
;
377 subsamples
->push_back(entry
);
379 (*subsamples
)[0].clear_bytes
+= AAC::kADTSHeaderSize
;
384 bool MP4StreamParser::EnqueueSample(BufferQueue
* audio_buffers
,
385 BufferQueue
* video_buffers
,
387 if (!runs_
->IsRunValid()) {
388 // Flush any buffers we've gotten in this chunk so that buffers don't
389 // cross NewSegment() calls
390 *err
= !SendAndFlushSamples(audio_buffers
, video_buffers
);
394 // Remain in kEnqueueingSamples state, discarding data, until the end of
395 // the current 'mdat' box has been appended to the queue.
396 if (!queue_
.Trim(mdat_tail_
))
399 ChangeState(kParsingBoxes
);
400 end_of_segment_cb_
.Run();
404 if (!runs_
->IsSampleValid()) {
413 queue_
.Peek(&buf
, &buf_size
);
414 if (!buf_size
) return false;
416 bool audio
= has_audio_
&& audio_track_id_
== runs_
->track_id();
417 bool video
= has_video_
&& video_track_id_
== runs_
->track_id();
419 // Skip this entire track if it's not one we're interested in
420 if (!audio
&& !video
)
423 // Attempt to cache the auxiliary information first. Aux info is usually
424 // placed in a contiguous block before the sample data, rather than being
425 // interleaved. If we didn't cache it, this would require that we retain the
426 // start of the segment buffer while reading samples. Aux info is typically
427 // quite small compared to sample data, so this pattern is useful on
428 // memory-constrained devices where the source buffer consumes a substantial
429 // portion of the total system memory.
430 if (runs_
->AuxInfoNeedsToBeCached()) {
431 queue_
.PeekAt(runs_
->aux_info_offset() + moof_head_
, &buf
, &buf_size
);
432 if (buf_size
< runs_
->aux_info_size()) return false;
433 *err
= !runs_
->CacheAuxInfo(buf
, buf_size
);
437 queue_
.PeekAt(runs_
->sample_offset() + moof_head_
, &buf
, &buf_size
);
438 if (buf_size
< runs_
->sample_size()) return false;
440 scoped_ptr
<DecryptConfig
> decrypt_config
;
441 std::vector
<SubsampleEntry
> subsamples
;
442 if (runs_
->is_encrypted()) {
443 decrypt_config
= runs_
->GetDecryptConfig();
444 subsamples
= decrypt_config
->subsamples();
447 std::vector
<uint8
> frame_buf(buf
, buf
+ runs_
->sample_size());
449 if (!PrepareAVCBuffer(runs_
->video_description().avcc
,
450 &frame_buf
, &subsamples
)) {
451 MEDIA_LOG(log_cb_
) << "Failed to prepare AVC sample for decode";
458 if (!PrepareAACBuffer(runs_
->audio_description().esds
.aac
,
459 &frame_buf
, &subsamples
)) {
460 MEDIA_LOG(log_cb_
) << "Failed to prepare AAC sample for decode";
466 if (decrypt_config
) {
467 if (!subsamples
.empty()) {
468 // Create a new config with the updated subsamples.
469 decrypt_config
.reset(new DecryptConfig(
470 decrypt_config
->key_id(),
471 decrypt_config
->iv(),
472 decrypt_config
->data_offset(),
475 // else, use the existing config.
476 } else if ((audio
&& is_audio_track_encrypted_
) ||
477 (video
&& is_video_track_encrypted_
)) {
478 // The media pipeline requires a DecryptConfig with an empty |iv|.
479 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1");
480 decrypt_config
.reset(
481 new DecryptConfig("1", "", 0, std::vector
<SubsampleEntry
>()));
484 scoped_refptr
<StreamParserBuffer
> stream_buf
=
485 StreamParserBuffer::CopyFrom(&frame_buf
[0], frame_buf
.size(),
486 runs_
->is_keyframe());
489 stream_buf
->SetDecryptConfig(decrypt_config
.Pass());
491 stream_buf
->SetDuration(runs_
->duration());
492 stream_buf
->SetTimestamp(runs_
->cts());
493 stream_buf
->SetDecodeTimestamp(runs_
->dts());
495 DVLOG(3) << "Pushing frame: aud=" << audio
496 << ", key=" << runs_
->is_keyframe()
497 << ", dur=" << runs_
->duration().InMilliseconds()
498 << ", dts=" << runs_
->dts().InMilliseconds()
499 << ", cts=" << runs_
->cts().InMilliseconds()
500 << ", size=" << runs_
->sample_size();
503 audio_buffers
->push_back(stream_buf
);
505 video_buffers
->push_back(stream_buf
);
508 runs_
->AdvanceSample();
512 bool MP4StreamParser::SendAndFlushSamples(BufferQueue
* audio_buffers
,
513 BufferQueue
* video_buffers
) {
515 if (!audio_buffers
->empty()) {
516 err
|= (audio_cb_
.is_null() || !audio_cb_
.Run(*audio_buffers
));
517 audio_buffers
->clear();
519 if (!video_buffers
->empty()) {
520 err
|= (video_cb_
.is_null() || !video_cb_
.Run(*video_buffers
));
521 video_buffers
->clear();
526 bool MP4StreamParser::ReadAndDiscardMDATsUntil(const int64 offset
) {
528 while (mdat_tail_
< offset
) {
531 queue_
.PeekAt(mdat_tail_
, &buf
, &size
);
535 if (!BoxReader::StartTopLevelBox(buf
, size
, log_cb_
,
536 &type
, &box_sz
, &err
))
539 if (type
!= FOURCC_MDAT
) {
540 MEDIA_LOG(log_cb_
) << "Unexpected box type while parsing MDATs: "
541 << FourCCToString(type
);
543 mdat_tail_
+= box_sz
;
545 queue_
.Trim(std::min(mdat_tail_
, offset
));
549 void MP4StreamParser::ChangeState(State new_state
) {
550 DVLOG(2) << "Changing state: " << new_state
;