media/formats/webm/webm_cluster_parser.h

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
   6 #define MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
   7
   8 #include <deque>
   9 #include <map>
  10 #include <set>
  11 #include <string>
  12
  13 #include "base/memory/scoped_ptr.h"
  14 #include "media/base/audio_decoder_config.h"
  15 #include "media/base/media_export.h"
  16 #include "media/base/media_log.h"
  17 #include "media/base/stream_parser.h"
  18 #include "media/base/stream_parser_buffer.h"
  19 #include "media/formats/webm/webm_parser.h"
  20 #include "media/formats/webm/webm_tracks_parser.h"
  21
  22 namespace media {
  23
  24 class MEDIA_EXPORT WebMClusterParser : public WebMParserClient {
  25  public:
  26   typedef StreamParser::TrackId TrackId;
  27   typedef std::deque<scoped_refptr<StreamParserBuffer> > BufferQueue;
  28   typedef std::map<TrackId, const BufferQueue> TextBufferQueueMap;
  29
  30   // Numbers chosen to estimate the duration of a buffer if none is set and
  31   // there is not enough information to get a better estimate.
  32   enum {
  33     // Common 1k samples @44.1kHz
  34     kDefaultAudioBufferDurationInMs = 23,
  35
  36     // Chosen to represent 16fps duration, which will prevent MSE stalls in
  37     // videos with frame-rates as low as 8fps.
  38     kDefaultVideoBufferDurationInMs = 63
  39   };
  40
  41   // Opus packets encode the duration and other parameters in the 5 most
  42   // significant bits of the first byte. The index in this array corresponds
  43   // to the duration of each frame of the packet in microseconds. See
  44   // https://tools.ietf.org/html/rfc6716#page-14
  45   static const uint16_t kOpusFrameDurationsMu[];
  46
  47  private:
  48   // Helper class that manages per-track state.
  49   class Track {
  50    public:
  51     Track(int track_num,
  52           bool is_video,
  53           base::TimeDelta default_duration,
  54           const scoped_refptr<MediaLog>& media_log);
  55     ~Track();
  56
  57     int track_num() const { return track_num_; }
  58
  59     // If a buffer is currently held aside pending duration calculation, returns
  60     // its decode timestamp. Otherwise, returns kInfiniteDuration().
  61     DecodeTimestamp GetReadyUpperBound();
  62
  63     // Prepares |ready_buffers_| for retrieval. Prior to calling,
  64     // |ready_buffers_| must be empty. Moves all |buffers_| with decode
  65     // timestamp before |before_timestamp| to |ready_buffers_|, preserving their
  66     // order.
  67     void ExtractReadyBuffers(const DecodeTimestamp before_timestamp);
  68
  69     const BufferQueue& ready_buffers() const { return ready_buffers_; }
  70
  71     // If |last_added_buffer_missing_duration_| is set, updates its duration
  72     // relative to |buffer|'s timestamp, and adds it to |buffers_| and unsets
  73     // |last_added_buffer_missing_duration_|. Then, if |buffer| is missing
  74     // duration, saves |buffer| into |last_added_buffer_missing_duration_|, or
  75     // otherwise adds |buffer| to |buffers_|.
  76     bool AddBuffer(const scoped_refptr<StreamParserBuffer>& buffer);
  77
  78     // If |last_added_buffer_missing_duration_| is set, updates its duration to
  79     // be non-kNoTimestamp() value of |estimated_next_frame_duration_| or a
  80     // hard-coded default, then adds it to |buffers_| and unsets
  81     // |last_added_buffer_missing_duration_|. (This method helps stream parser
  82     // emit all buffers in a media segment before signaling end of segment.)
  83     void ApplyDurationEstimateIfNeeded();
  84
  85     // Clears |ready_buffers_| (use ExtractReadyBuffers() to fill it again).
  86     // Leaves as-is |buffers_| and any possibly held-aside buffer that is
  87     // missing duration.
  88     void ClearReadyBuffers();
  89
  90     // Clears all buffer state, including any possibly held-aside buffer that
  91     // was missing duration, and all contents of |buffers_| and
  92     // |ready_buffers_|.
  93     void Reset();
  94
  95     // Helper function used to inspect block data to determine if the
  96     // block is a keyframe.
  97     // |data| contains the bytes in the block.
  98     // |size| indicates the number of bytes in |data|.
  99     bool IsKeyframe(const uint8_t* data, int size) const;
 100
 101     base::TimeDelta default_duration() const { return default_duration_; }
 102
 103    private:
 104     // Helper that sanity-checks |buffer| duration, updates
 105     // |estimated_next_frame_duration_|, and adds |buffer| to |buffers_|.
 106     // Returns false if |buffer| failed sanity check and therefore was not added
 107     // to |buffers_|. Returns true otherwise.
 108     bool QueueBuffer(const scoped_refptr<StreamParserBuffer>& buffer);
 109
 110     // Helper that calculates the buffer duration to use in
 111     // ApplyDurationEstimateIfNeeded().
 112     base::TimeDelta GetDurationEstimate();
 113
 114     // Counts the number of estimated durations used in this track. Used to
 115     // prevent log spam for MEDIA_LOG()s about estimated duration.
 116     int num_duration_estimates_;
 117
 118     int track_num_;
 119     bool is_video_;
 120
 121     // Parsed track buffers, each with duration and in (decode) timestamp order,
 122     // that have not yet been extracted into |ready_buffers_|. Note that up to
 123     // one additional buffer missing duration may be tracked by
 124     // |last_added_buffer_missing_duration_|.
 125     BufferQueue buffers_;
 126     scoped_refptr<StreamParserBuffer> last_added_buffer_missing_duration_;
 127
 128     // Buffers in (decode) timestamp order that were previously parsed into and
 129     // extracted from |buffers_|. Buffers are moved from |buffers_| to
 130     // |ready_buffers_| by ExtractReadyBuffers() if they are below a specified
 131     // upper bound timestamp. Track users can therefore extract only those
 132     // parsed buffers which are "ready" for emission (all before some maximum
 133     // timestamp).
 134     BufferQueue ready_buffers_;
 135
 136     // If kNoTimestamp(), then |estimated_next_frame_duration_| will be used.
 137     base::TimeDelta default_duration_;
 138
 139     // If kNoTimestamp(), then a default value will be used. This estimate is
 140     // the maximum (for video), or minimum (for audio) duration seen so far for
 141     // this track, and is used only if |default_duration_| is kNoTimestamp().
 142     // TODO(chcunningham): Use maximum for audio too, adding checks to disable
 143     // splicing when these estimates are observed in SourceBufferStream.
 144     base::TimeDelta estimated_next_frame_duration_;
 145
 146     scoped_refptr<MediaLog> media_log_;
 147   };
 148
 149   typedef std::map<int, Track> TextTrackMap;
 150
 151  public:
 152   WebMClusterParser(int64 timecode_scale,
 153                     int audio_track_num,
 154                     base::TimeDelta audio_default_duration,
 155                     int video_track_num,
 156                     base::TimeDelta video_default_duration,
 157                     const WebMTracksParser::TextTracks& text_tracks,
 158                     const std::set<int64>& ignored_tracks,
 159                     const std::string& audio_encryption_key_id,
 160                     const std::string& video_encryption_key_id,
 161                     const AudioCodec audio_codec,
 162                     const scoped_refptr<MediaLog>& media_log);
 163   ~WebMClusterParser() override;
 164
 165   // Resets the parser state so it can accept a new cluster.
 166   void Reset();
 167
 168   // Parses a WebM cluster element in |buf|.
 169   //
 170   // Returns -1 if the parse fails.
 171   // Returns 0 if more data is needed.
 172   // Returns the number of bytes parsed on success.
 173   int Parse(const uint8_t* buf, int size);
 174
 175   base::TimeDelta cluster_start_time() const { return cluster_start_time_; }
 176
 177   // Get the current ready buffers resulting from Parse().
 178   // If the parse reached the end of cluster and the last buffer was held aside
 179   // due to missing duration, the buffer is given an estimated duration and
 180   // included in the result.
 181   // Otherwise, if there are is a buffer held aside due to missing duration for
 182   // any of the tracks, no buffers with same or greater (decode) timestamp will
 183   // be included in the buffers.
 184   // The returned deques are cleared by Parse() or Reset() and updated by the
 185   // next calls to Get{Audio,Video}Buffers().
 186   // If no Parse() or Reset() has occurred since the last call to Get{Audio,
 187   // Video,Text}Buffers(), then the previous BufferQueue& is returned again
 188   // without any recalculation.
 189   const BufferQueue& GetAudioBuffers();
 190   const BufferQueue& GetVideoBuffers();
 191
 192   // Constructs and returns a subset of |text_track_map_| containing only
 193   // tracks with non-empty buffer queues produced by the last Parse() and
 194   // filtered to exclude any buffers that have (decode) timestamp same or
 195   // greater than the lowest (decode) timestamp across all tracks of any buffer
 196   // held aside due to missing duration (unless the end of cluster has been
 197   // reached).
 198   // The returned map is cleared by Parse() or Reset() and updated by the next
 199   // call to GetTextBuffers().
 200   // If no Parse() or Reset() has occurred since the last call to
 201   // GetTextBuffers(), then the previous TextBufferQueueMap& is returned again
 202   // without any recalculation.
 203   const TextBufferQueueMap& GetTextBuffers();
 204
 205   // Returns true if the last Parse() call stopped at the end of a cluster.
 206   bool cluster_ended() const { return cluster_ended_; }
 207
 208  private:
 209   // WebMParserClient methods.
 210   WebMParserClient* OnListStart(int id) override;
 211   bool OnListEnd(int id) override;
 212   bool OnUInt(int id, int64 val) override;
 213   bool OnBinary(int id, const uint8_t* data, int size) override;
 214
 215   bool ParseBlock(bool is_simple_block,
 216                   const uint8_t* buf,
 217                   int size,
 218                   const uint8_t* additional,
 219                   int additional_size,
 220                   int duration,
 221                   int64 discard_padding);
 222   bool OnBlock(bool is_simple_block,
 223                int track_num,
 224                int timecode,
 225                int duration,
 226                int flags,
 227                const uint8_t* data,
 228                int size,
 229                const uint8_t* additional,
 230                int additional_size,
 231                int64 discard_padding);
 232
 233   // Resets the Track objects associated with each text track.
 234   void ResetTextTracks();
 235
 236   // Clears the the ready buffers associated with each text track.
 237   void ClearTextTrackReadyBuffers();
 238
 239   // Helper method for Get{Audio,Video,Text}Buffers() that recomputes
 240   // |ready_buffer_upper_bound_| and calls ExtractReadyBuffers() on each track.
 241   // If |cluster_ended_| is true, first applies duration estimate if needed for
 242   // |audio_| and |video_| and sets |ready_buffer_upper_bound_| to
 243   // kInfiniteDuration(). Otherwise, sets |ready_buffer_upper_bound_| to the
 244   // minimum upper bound across |audio_| and |video_|. (Text tracks can have no
 245   // buffers missing duration, so they are not involved in calculating the upper
 246   // bound.)
 247   // Parse() or Reset() must be called between calls to UpdateReadyBuffers() to
 248   // clear each track's ready buffers and to reset |ready_buffer_upper_bound_|
 249   // to kNoDecodeTimestamp().
 250   void UpdateReadyBuffers();
 251
 252   // Search for the indicated track_num among the text tracks.  Returns NULL
 253   // if that track num is not a text track.
 254   Track* FindTextTrack(int track_num);
 255
 256   // Attempts to read the duration from the encoded audio data, returning as
 257   // TimeDelta or kNoTimestamp() if duration cannot be retrieved. This obviously
 258   // violates layering rules, but is useful for MSE to know duration in cases
 259   // where it isn't explicitly given and cannot be calculated for Blocks at the
 260   // end of a Cluster (the next Cluster in playback-order may not be the next
 261   // Cluster we parse, so we can't simply use the delta of the first Block in
 262   // the next Cluster). Avoid calling if encrypted; may produce unexpected
 263   // output. See implementation for supported codecs.
 264   base::TimeDelta TryGetEncodedAudioDuration(const uint8_t* data, int size);
 265
 266   // Reads Opus packet header to determine packet duration. Duration returned
 267   // as TimeDelta or kNoTimestamp() upon failure to read duration from packet.
 268   base::TimeDelta ReadOpusDuration(const uint8_t* data, int size);
 269
 270   // Tracks the number of MEDIA_LOGs made in process of reading encoded
 271   // duration. Useful to prevent log spam.
 272   int num_duration_errors_;
 273
 274   double timecode_multiplier_;  // Multiplier used to convert timecodes into
 275                                 // microseconds.
 276   std::set<int64> ignored_tracks_;
 277   std::string audio_encryption_key_id_;
 278   std::string video_encryption_key_id_;
 279   const AudioCodec audio_codec_;
 280
 281   WebMListParser parser_;
 282
 283   int64 last_block_timecode_;
 284   scoped_ptr<uint8_t[]> block_data_;
 285   int block_data_size_;
 286   int64 block_duration_;
 287   int64 block_add_id_;
 288
 289   scoped_ptr<uint8_t[]> block_additional_data_;
 290   // Must be 0 if |block_additional_data_| is null. Must be > 0 if
 291   // |block_additional_data_| is NOT null.
 292   int block_additional_data_size_;
 293
 294   int64 discard_padding_;
 295   bool discard_padding_set_;
 296
 297   int64 cluster_timecode_;
 298   base::TimeDelta cluster_start_time_;
 299   bool cluster_ended_;
 300
 301   Track audio_;
 302   Track video_;
 303   TextTrackMap text_track_map_;
 304
 305   // Subset of |text_track_map_| maintained by GetTextBuffers(), and cleared by
 306   // ClearTextTrackReadyBuffers(). Callers of GetTextBuffers() get a const-ref
 307   // to this member.
 308   TextBufferQueueMap text_buffers_map_;
 309
 310   // Limits the range of buffers returned by Get{Audio,Video,Text}Buffers() to
 311   // this exclusive upper bound. Set to kNoDecodeTimestamp(), meaning not yet
 312   // calculated, by Reset() and Parse(). If kNoDecodeTimestamp(), then
 313   // Get{Audio,Video,Text}Buffers() will calculate it to be the minimum (decode)
 314   // timestamp across all tracks' |last_buffer_missing_duration_|, or
 315   // kInfiniteDuration() if no buffers are currently missing duration.
 316   DecodeTimestamp ready_buffer_upper_bound_;
 317
 318   scoped_refptr<MediaLog> media_log_;
 319
 320   DISALLOW_IMPLICIT_CONSTRUCTORS(WebMClusterParser);
 321 };
 322
 323 }  // namespace media
 324
 325 #endif  // MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_