media/formats/mp4/mp4_stream_parser_unittest.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <algorithm>
   6 #include <string>
   7
   8 #include "base/bind.h"
   9 #include "base/bind_helpers.h"
  10 #include "base/logging.h"
  11 #include "base/memory/ref_counted.h"
  12 #include "base/time/time.h"
  13 #include "media/base/audio_decoder_config.h"
  14 #include "media/base/decoder_buffer.h"
  15 #include "media/base/mock_media_log.h"
  16 #include "media/base/stream_parser_buffer.h"
  17 #include "media/base/test_data_util.h"
  18 #include "media/base/text_track_config.h"
  19 #include "media/base/video_decoder_config.h"
  20 #include "media/formats/mp4/es_descriptor.h"
  21 #include "media/formats/mp4/mp4_stream_parser.h"
  22 #include "testing/gmock/include/gmock/gmock.h"
  23 #include "testing/gtest/include/gtest/gtest.h"
  24
  25 using ::testing::InSequence;
  26 using ::testing::StrictMock;
  27 using base::TimeDelta;
  28
  29 namespace media {
  30 namespace mp4 {
  31
  32 // Matchers for verifying common media log entry strings.
  33 MATCHER_P(VideoCodecLog, codec_string, "") {
  34   return CONTAINS_STRING(arg, "Video codec: " + std::string(codec_string));
  35 }
  36
  37 MATCHER_P(AudioCodecLog, codec_string, "") {
  38   return CONTAINS_STRING(arg, "Audio codec: " + std::string(codec_string));
  39 }
  40
  41 MATCHER(AuxInfoUnavailableLog, "") {
  42   return CONTAINS_STRING(arg, "Aux Info is not available.");
  43 }
  44
  45 class MP4StreamParserTest : public testing::Test {
  46  public:
  47   MP4StreamParserTest()
  48       : media_log_(new StrictMock<MockMediaLog>()),
  49         configs_received_(false),
  50         lower_bound_(
  51             DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max())) {
  52     std::set<int> audio_object_types;
  53     audio_object_types.insert(kISO_14496_3);
  54     parser_.reset(new MP4StreamParser(audio_object_types, false));
  55   }
  56
  57  protected:
  58   scoped_refptr<StrictMock<MockMediaLog>> media_log_;
  59   scoped_ptr<MP4StreamParser> parser_;
  60   bool configs_received_;
  61   AudioDecoderConfig audio_decoder_config_;
  62   VideoDecoderConfig video_decoder_config_;
  63   DecodeTimestamp lower_bound_;
  64
  65   bool AppendData(const uint8* data, size_t length) {
  66     return parser_->Parse(data, length);
  67   }
  68
  69   bool AppendDataInPieces(const uint8* data, size_t length, size_t piece_size) {
  70     const uint8* start = data;
  71     const uint8* end = data + length;
  72     while (start < end) {
  73       size_t append_size = std::min(piece_size,
  74                                     static_cast<size_t>(end - start));
  75       if (!AppendData(start, append_size))
  76         return false;
  77       start += append_size;
  78     }
  79     return true;
  80   }
  81
  82   void InitF(DemuxerStream::Liveness expected_liveness,
  83              const StreamParser::InitParameters& params) {
  84     DVLOG(1) << "InitF: dur=" << params.duration.InMilliseconds()
  85              << ", autoTimestampOffset=" << params.auto_update_timestamp_offset;
  86     EXPECT_EQ(expected_liveness, params.liveness);
  87   }
  88
  89   bool NewConfigF(const AudioDecoderConfig& ac,
  90                   const VideoDecoderConfig& vc,
  91                   const StreamParser::TextTrackConfigMap& tc) {
  92     DVLOG(1) << "NewConfigF: audio=" << ac.IsValidConfig()
  93              << ", video=" << vc.IsValidConfig();
  94     configs_received_ = true;
  95     audio_decoder_config_ = ac;
  96     video_decoder_config_ = vc;
  97     return true;
  98   }
  99
 100   void DumpBuffers(const std::string& label,
 101                    const StreamParser::BufferQueue& buffers) {
 102     DVLOG(2) << "DumpBuffers: " << label << " size " << buffers.size();
 103     for (StreamParser::BufferQueue::const_iterator buf = buffers.begin();
 104          buf != buffers.end(); buf++) {
 105       DVLOG(3) << "  n=" << buf - buffers.begin()
 106                << ", size=" << (*buf)->data_size()
 107                << ", dur=" << (*buf)->duration().InMilliseconds();
 108     }
 109   }
 110
 111   bool NewBuffersF(const StreamParser::BufferQueue& audio_buffers,
 112                    const StreamParser::BufferQueue& video_buffers,
 113                    const StreamParser::TextBufferQueueMap& text_map) {
 114     DumpBuffers("audio_buffers", audio_buffers);
 115     DumpBuffers("video_buffers", video_buffers);
 116
 117     // TODO(wolenetz/acolwell): Add text track support to more MSE parsers. See
 118     // http://crbug.com/336926.
 119     if (!text_map.empty())
 120       return false;
 121
 122     // Find the second highest timestamp so that we know what the
 123     // timestamps on the next set of buffers must be >= than.
 124     DecodeTimestamp audio = !audio_buffers.empty() ?
 125         audio_buffers.back()->GetDecodeTimestamp() : kNoDecodeTimestamp();
 126     DecodeTimestamp video = !video_buffers.empty() ?
 127         video_buffers.back()->GetDecodeTimestamp() : kNoDecodeTimestamp();
 128     DecodeTimestamp second_highest_timestamp =
 129         (audio == kNoDecodeTimestamp() ||
 130          (video != kNoDecodeTimestamp() && audio > video)) ? video : audio;
 131
 132     DCHECK(second_highest_timestamp != kNoDecodeTimestamp());
 133
 134     if (lower_bound_ != kNoDecodeTimestamp() &&
 135         second_highest_timestamp < lower_bound_) {
 136       return false;
 137     }
 138
 139     lower_bound_ = second_highest_timestamp;
 140     return true;
 141   }
 142
 143   void KeyNeededF(EmeInitDataType type, const std::vector<uint8>& init_data) {
 144     DVLOG(1) << "KeyNeededF: " << init_data.size();
 145     EXPECT_EQ(EmeInitDataType::CENC, type);
 146     EXPECT_FALSE(init_data.empty());
 147   }
 148
 149   void NewSegmentF() {
 150     DVLOG(1) << "NewSegmentF";
 151     lower_bound_ = kNoDecodeTimestamp();
 152   }
 153
 154   void EndOfSegmentF() {
 155     DVLOG(1) << "EndOfSegmentF()";
 156     lower_bound_ =
 157         DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
 158   }
 159
 160   void InitializeParserAndExpectLiveness(
 161       DemuxerStream::Liveness expected_liveness) {
 162     parser_->Init(
 163         base::Bind(&MP4StreamParserTest::InitF, base::Unretained(this),
 164                    expected_liveness),
 165         base::Bind(&MP4StreamParserTest::NewConfigF, base::Unretained(this)),
 166         base::Bind(&MP4StreamParserTest::NewBuffersF, base::Unretained(this)),
 167         true,
 168         base::Bind(&MP4StreamParserTest::KeyNeededF, base::Unretained(this)),
 169         base::Bind(&MP4StreamParserTest::NewSegmentF, base::Unretained(this)),
 170         base::Bind(&MP4StreamParserTest::EndOfSegmentF, base::Unretained(this)),
 171         media_log_);
 172   }
 173
 174   void InitializeParser() {
 175     // Most unencrypted test mp4 files have zero duration and are treated as
 176     // live streams.
 177     InitializeParserAndExpectLiveness(DemuxerStream::LIVENESS_LIVE);
 178   }
 179
 180   bool ParseMP4File(const std::string& filename, int append_bytes) {
 181     InitializeParser();
 182
 183     scoped_refptr<DecoderBuffer> buffer = ReadTestDataFile(filename);
 184     EXPECT_TRUE(AppendDataInPieces(buffer->data(),
 185                                    buffer->data_size(),
 186                                    append_bytes));
 187     return true;
 188   }
 189 };
 190
 191 TEST_F(MP4StreamParserTest, UnalignedAppend) {
 192   // Test small, non-segment-aligned appends (small enough to exercise
 193   // incremental append system)
 194   EXPECT_MEDIA_LOG(VideoCodecLog("avc1.6401f"));
 195   EXPECT_MEDIA_LOG(AudioCodecLog("mp4a.40.2"));
 196   ParseMP4File("bear-1280x720-av_frag.mp4", 512);
 197 }
 198
 199 TEST_F(MP4StreamParserTest, BytewiseAppend) {
 200   // Ensure no incremental errors occur when parsing
 201   EXPECT_MEDIA_LOG(VideoCodecLog("avc1.6401f"));
 202   EXPECT_MEDIA_LOG(AudioCodecLog("mp4a.40.2"));
 203   ParseMP4File("bear-1280x720-av_frag.mp4", 1);
 204 }
 205
 206 TEST_F(MP4StreamParserTest, MultiFragmentAppend) {
 207   // Large size ensures multiple fragments are appended in one call (size is
 208   // larger than this particular test file)
 209   EXPECT_MEDIA_LOG(VideoCodecLog("avc1.6401f"));
 210   EXPECT_MEDIA_LOG(AudioCodecLog("mp4a.40.2"));
 211   ParseMP4File("bear-1280x720-av_frag.mp4", 768432);
 212 }
 213
 214 TEST_F(MP4StreamParserTest, Flush) {
 215   // Flush while reading sample data, then start a new stream.
 216   EXPECT_MEDIA_LOG(VideoCodecLog("avc1.6401f")).Times(2);
 217   EXPECT_MEDIA_LOG(AudioCodecLog("mp4a.40.2")).Times(2);
 218   InitializeParser();
 219
 220   scoped_refptr<DecoderBuffer> buffer =
 221       ReadTestDataFile("bear-1280x720-av_frag.mp4");
 222   EXPECT_TRUE(AppendDataInPieces(buffer->data(), 65536, 512));
 223   parser_->Flush();
 224   EXPECT_TRUE(AppendDataInPieces(buffer->data(),
 225                                  buffer->data_size(),
 226                                  512));
 227 }
 228
 229 TEST_F(MP4StreamParserTest, Reinitialization) {
 230   EXPECT_MEDIA_LOG(VideoCodecLog("avc1.6401f")).Times(2);
 231   EXPECT_MEDIA_LOG(AudioCodecLog("mp4a.40.2")).Times(2);
 232   InitializeParser();
 233
 234   scoped_refptr<DecoderBuffer> buffer =
 235       ReadTestDataFile("bear-1280x720-av_frag.mp4");
 236   EXPECT_TRUE(AppendDataInPieces(buffer->data(),
 237                                  buffer->data_size(),
 238                                  512));
 239   EXPECT_TRUE(AppendDataInPieces(buffer->data(),
 240                                  buffer->data_size(),
 241                                  512));
 242 }
 243
 244 TEST_F(MP4StreamParserTest, MPEG2_AAC_LC) {
 245   InSequence s;
 246   std::set<int> audio_object_types;
 247   audio_object_types.insert(kISO_13818_7_AAC_LC);
 248   parser_.reset(new MP4StreamParser(audio_object_types, false));
 249   EXPECT_MEDIA_LOG(AudioCodecLog("mp4a.67"));
 250   EXPECT_MEDIA_LOG(AudioCodecLog("mp4a.40.2"));
 251   ParseMP4File("bear-mpeg2-aac-only_frag.mp4", 512);
 252 }
 253
 254 // Test that a moov box is not always required after Flush() is called.
 255 TEST_F(MP4StreamParserTest, NoMoovAfterFlush) {
 256   EXPECT_MEDIA_LOG(VideoCodecLog("avc1.6401f"));
 257   EXPECT_MEDIA_LOG(AudioCodecLog("mp4a.40.2"));
 258   InitializeParser();
 259
 260   scoped_refptr<DecoderBuffer> buffer =
 261       ReadTestDataFile("bear-1280x720-av_frag.mp4");
 262   EXPECT_TRUE(AppendDataInPieces(buffer->data(),
 263                                  buffer->data_size(),
 264                                  512));
 265   parser_->Flush();
 266
 267   const int kFirstMoofOffset = 1307;
 268   EXPECT_TRUE(AppendDataInPieces(buffer->data() + kFirstMoofOffset,
 269                                  buffer->data_size() - kFirstMoofOffset,
 270                                  512));
 271 }
 272
 273 // Test an invalid file where there are encrypted samples, but
 274 // SampleAuxiliaryInformation{Sizes|Offsets}Box (saiz|saio) are missing.
 275 // The parser should fail instead of crash. See http://crbug.com/361347
 276 TEST_F(MP4StreamParserTest, MissingSampleAuxInfo) {
 277   InSequence s;
 278
 279   // Encrypted test mp4 files have non-zero duration and are treated as
 280   // recorded streams.
 281   InitializeParserAndExpectLiveness(DemuxerStream::LIVENESS_RECORDED);
 282
 283   scoped_refptr<DecoderBuffer> buffer =
 284       ReadTestDataFile("bear-1280x720-a_frag-cenc_missing-saiz-saio.mp4");
 285   EXPECT_MEDIA_LOG(AudioCodecLog("mp4a.40.2")).Times(2);
 286   EXPECT_MEDIA_LOG(AuxInfoUnavailableLog());
 287   EXPECT_FALSE(AppendDataInPieces(buffer->data(), buffer->data_size(), 512));
 288 }
 289
 290 // Test a file where all video samples start with an Access Unit
 291 // Delimiter (AUD) NALU.
 292 TEST_F(MP4StreamParserTest, VideoSamplesStartWithAUDs) {
 293   EXPECT_MEDIA_LOG(VideoCodecLog("avc1.4d4028"));
 294   ParseMP4File("bear-1280x720-av_with-aud-nalus_frag.mp4", 512);
 295 }
 296
 297 #if defined(ENABLE_HEVC_DEMUXING)
 298 TEST_F(MP4StreamParserTest, HEVC_in_MP4_container) {
 299   InitializeParserAndExpectLiveness(DemuxerStream::LIVENESS_RECORDED);
 300   scoped_refptr<DecoderBuffer> buffer = ReadTestDataFile("bear-hevc-frag.mp4");
 301   EXPECT_MEDIA_LOG(VideoCodecLog("hevc"));
 302   EXPECT_TRUE(AppendDataInPieces(buffer->data(), buffer->data_size(), 512));
 303 }
 304 #endif
 305
 306 TEST_F(MP4StreamParserTest, CENC) {
 307   // Encrypted test mp4 files have non-zero duration and are treated as
 308   // recorded streams.
 309   InitializeParserAndExpectLiveness(DemuxerStream::LIVENESS_RECORDED);
 310
 311   scoped_refptr<DecoderBuffer> buffer =
 312       ReadTestDataFile("bear-1280x720-v_frag-cenc.mp4");
 313   EXPECT_MEDIA_LOG(VideoCodecLog("avc1.6401f"));
 314   EXPECT_TRUE(AppendDataInPieces(buffer->data(), buffer->data_size(), 512));
 315 }
 316
 317 TEST_F(MP4StreamParserTest, NaturalSizeWithoutPASP) {
 318   InitializeParserAndExpectLiveness(DemuxerStream::LIVENESS_RECORDED);
 319
 320   scoped_refptr<DecoderBuffer> buffer =
 321       ReadTestDataFile("bear-640x360-non_square_pixel-without_pasp.mp4");
 322
 323   EXPECT_MEDIA_LOG(VideoCodecLog("avc1.6401e"));
 324   EXPECT_TRUE(AppendDataInPieces(buffer->data(), buffer->data_size(), 512));
 325   EXPECT_EQ(gfx::Size(639, 360), video_decoder_config_.natural_size());
 326 }
 327
 328 TEST_F(MP4StreamParserTest, NaturalSizeWithPASP) {
 329   InitializeParserAndExpectLiveness(DemuxerStream::LIVENESS_RECORDED);
 330
 331   scoped_refptr<DecoderBuffer> buffer =
 332       ReadTestDataFile("bear-640x360-non_square_pixel-with_pasp.mp4");
 333
 334   EXPECT_MEDIA_LOG(VideoCodecLog("avc1.6401e"));
 335   EXPECT_TRUE(AppendDataInPieces(buffer->data(), buffer->data_size(), 512));
 336   EXPECT_EQ(gfx::Size(639, 360), video_decoder_config_.natural_size());
 337 }
 338
 339 }  // namespace mp4
 340 }  // namespace media