Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / media / formats / webm / webm_cluster_parser.h
blobe2d5b98e6de71b1d2eb06802bd1b733f6cf223e1
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
6 #define MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
8 #include <deque>
9 #include <map>
10 #include <set>
11 #include <string>
13 #include "base/memory/scoped_ptr.h"
14 #include "media/base/audio_decoder_config.h"
15 #include "media/base/media_export.h"
16 #include "media/base/media_log.h"
17 #include "media/base/stream_parser.h"
18 #include "media/base/stream_parser_buffer.h"
19 #include "media/formats/webm/webm_parser.h"
20 #include "media/formats/webm/webm_tracks_parser.h"
22 namespace media {
24 class MEDIA_EXPORT WebMClusterParser : public WebMParserClient {
25 public:
26 typedef StreamParser::TrackId TrackId;
27 typedef std::deque<scoped_refptr<StreamParserBuffer> > BufferQueue;
28 typedef std::map<TrackId, const BufferQueue> TextBufferQueueMap;
30 // Numbers chosen to estimate the duration of a buffer if none is set and
31 // there is not enough information to get a better estimate.
32 enum {
33 // Common 1k samples @44.1kHz
34 kDefaultAudioBufferDurationInMs = 23,
36 // Chosen to represent 16fps duration, which will prevent MSE stalls in
37 // videos with frame-rates as low as 8fps.
38 kDefaultVideoBufferDurationInMs = 63
41 // Opus packets encode the duration and other parameters in the 5 most
42 // significant bits of the first byte. The index in this array corresponds
43 // to the duration of each frame of the packet in microseconds. See
44 // https://tools.ietf.org/html/rfc6716#page-14
45 static const uint16_t kOpusFrameDurationsMu[];
47 private:
48 // Helper class that manages per-track state.
49 class Track {
50 public:
51 Track(int track_num,
52 bool is_video,
53 base::TimeDelta default_duration,
54 const scoped_refptr<MediaLog>& media_log);
55 ~Track();
57 int track_num() const { return track_num_; }
59 // If a buffer is currently held aside pending duration calculation, returns
60 // its decode timestamp. Otherwise, returns kInfiniteDuration().
61 DecodeTimestamp GetReadyUpperBound();
63 // Prepares |ready_buffers_| for retrieval. Prior to calling,
64 // |ready_buffers_| must be empty. Moves all |buffers_| with decode
65 // timestamp before |before_timestamp| to |ready_buffers_|, preserving their
66 // order.
67 void ExtractReadyBuffers(const DecodeTimestamp before_timestamp);
69 const BufferQueue& ready_buffers() const { return ready_buffers_; }
71 // If |last_added_buffer_missing_duration_| is set, updates its duration
72 // relative to |buffer|'s timestamp, and adds it to |buffers_| and unsets
73 // |last_added_buffer_missing_duration_|. Then, if |buffer| is missing
74 // duration, saves |buffer| into |last_added_buffer_missing_duration_|, or
75 // otherwise adds |buffer| to |buffers_|.
76 bool AddBuffer(const scoped_refptr<StreamParserBuffer>& buffer);
78 // If |last_added_buffer_missing_duration_| is set, updates its duration to
79 // be non-kNoTimestamp() value of |estimated_next_frame_duration_| or a
80 // hard-coded default, then adds it to |buffers_| and unsets
81 // |last_added_buffer_missing_duration_|. (This method helps stream parser
82 // emit all buffers in a media segment before signaling end of segment.)
83 void ApplyDurationEstimateIfNeeded();
85 // Clears |ready_buffers_| (use ExtractReadyBuffers() to fill it again).
86 // Leaves as-is |buffers_| and any possibly held-aside buffer that is
87 // missing duration.
88 void ClearReadyBuffers();
90 // Clears all buffer state, including any possibly held-aside buffer that
91 // was missing duration, and all contents of |buffers_| and
92 // |ready_buffers_|.
93 void Reset();
95 // Helper function used to inspect block data to determine if the
96 // block is a keyframe.
97 // |data| contains the bytes in the block.
98 // |size| indicates the number of bytes in |data|.
99 bool IsKeyframe(const uint8_t* data, int size) const;
101 base::TimeDelta default_duration() const { return default_duration_; }
103 private:
104 // Helper that sanity-checks |buffer| duration, updates
105 // |estimated_next_frame_duration_|, and adds |buffer| to |buffers_|.
106 // Returns false if |buffer| failed sanity check and therefore was not added
107 // to |buffers_|. Returns true otherwise.
108 bool QueueBuffer(const scoped_refptr<StreamParserBuffer>& buffer);
110 // Helper that calculates the buffer duration to use in
111 // ApplyDurationEstimateIfNeeded().
112 base::TimeDelta GetDurationEstimate();
114 // Counts the number of estimated durations used in this track. Used to
115 // prevent log spam for MEDIA_LOG()s about estimated duration.
116 int num_duration_estimates_ = 0;
118 int track_num_;
119 bool is_video_;
121 // Parsed track buffers, each with duration and in (decode) timestamp order,
122 // that have not yet been extracted into |ready_buffers_|. Note that up to
123 // one additional buffer missing duration may be tracked by
124 // |last_added_buffer_missing_duration_|.
125 BufferQueue buffers_;
126 scoped_refptr<StreamParserBuffer> last_added_buffer_missing_duration_;
128 // Buffers in (decode) timestamp order that were previously parsed into and
129 // extracted from |buffers_|. Buffers are moved from |buffers_| to
130 // |ready_buffers_| by ExtractReadyBuffers() if they are below a specified
131 // upper bound timestamp. Track users can therefore extract only those
132 // parsed buffers which are "ready" for emission (all before some maximum
133 // timestamp).
134 BufferQueue ready_buffers_;
136 // If kNoTimestamp(), then |estimated_next_frame_duration_| will be used.
137 base::TimeDelta default_duration_;
139 // If kNoTimestamp(), then a default value will be used. This estimate is
140 // the maximum (for video), or minimum (for audio) duration seen so far for
141 // this track, and is used only if |default_duration_| is kNoTimestamp().
142 // TODO(chcunningham): Use maximum for audio too, adding checks to disable
143 // splicing when these estimates are observed in SourceBufferStream.
144 base::TimeDelta estimated_next_frame_duration_;
146 scoped_refptr<MediaLog> media_log_;
149 typedef std::map<int, Track> TextTrackMap;
151 public:
152 WebMClusterParser(int64 timecode_scale,
153 int audio_track_num,
154 base::TimeDelta audio_default_duration,
155 int video_track_num,
156 base::TimeDelta video_default_duration,
157 const WebMTracksParser::TextTracks& text_tracks,
158 const std::set<int64>& ignored_tracks,
159 const std::string& audio_encryption_key_id,
160 const std::string& video_encryption_key_id,
161 const AudioCodec audio_codec,
162 const scoped_refptr<MediaLog>& media_log);
163 ~WebMClusterParser() override;
165 // Resets the parser state so it can accept a new cluster.
166 void Reset();
168 // Parses a WebM cluster element in |buf|.
170 // Returns -1 if the parse fails.
171 // Returns 0 if more data is needed.
172 // Returns the number of bytes parsed on success.
173 int Parse(const uint8_t* buf, int size);
175 base::TimeDelta cluster_start_time() const { return cluster_start_time_; }
177 // Get the current ready buffers resulting from Parse().
178 // If the parse reached the end of cluster and the last buffer was held aside
179 // due to missing duration, the buffer is given an estimated duration and
180 // included in the result.
181 // Otherwise, if there are is a buffer held aside due to missing duration for
182 // any of the tracks, no buffers with same or greater (decode) timestamp will
183 // be included in the buffers.
184 // The returned deques are cleared by Parse() or Reset() and updated by the
185 // next calls to Get{Audio,Video}Buffers().
186 // If no Parse() or Reset() has occurred since the last call to Get{Audio,
187 // Video,Text}Buffers(), then the previous BufferQueue& is returned again
188 // without any recalculation.
189 const BufferQueue& GetAudioBuffers();
190 const BufferQueue& GetVideoBuffers();
192 // Constructs and returns a subset of |text_track_map_| containing only
193 // tracks with non-empty buffer queues produced by the last Parse() and
194 // filtered to exclude any buffers that have (decode) timestamp same or
195 // greater than the lowest (decode) timestamp across all tracks of any buffer
196 // held aside due to missing duration (unless the end of cluster has been
197 // reached).
198 // The returned map is cleared by Parse() or Reset() and updated by the next
199 // call to GetTextBuffers().
200 // If no Parse() or Reset() has occurred since the last call to
201 // GetTextBuffers(), then the previous TextBufferQueueMap& is returned again
202 // without any recalculation.
203 const TextBufferQueueMap& GetTextBuffers();
205 // Returns true if the last Parse() call stopped at the end of a cluster.
206 bool cluster_ended() const { return cluster_ended_; }
208 private:
209 // WebMParserClient methods.
210 WebMParserClient* OnListStart(int id) override;
211 bool OnListEnd(int id) override;
212 bool OnUInt(int id, int64 val) override;
213 bool OnBinary(int id, const uint8_t* data, int size) override;
215 bool ParseBlock(bool is_simple_block,
216 const uint8_t* buf,
217 int size,
218 const uint8_t* additional,
219 int additional_size,
220 int duration,
221 int64 discard_padding);
222 bool OnBlock(bool is_simple_block,
223 int track_num,
224 int timecode,
225 int duration,
226 int flags,
227 const uint8_t* data,
228 int size,
229 const uint8_t* additional,
230 int additional_size,
231 int64 discard_padding);
233 // Resets the Track objects associated with each text track.
234 void ResetTextTracks();
236 // Clears the the ready buffers associated with each text track.
237 void ClearTextTrackReadyBuffers();
239 // Helper method for Get{Audio,Video,Text}Buffers() that recomputes
240 // |ready_buffer_upper_bound_| and calls ExtractReadyBuffers() on each track.
241 // If |cluster_ended_| is true, first applies duration estimate if needed for
242 // |audio_| and |video_| and sets |ready_buffer_upper_bound_| to
243 // kInfiniteDuration(). Otherwise, sets |ready_buffer_upper_bound_| to the
244 // minimum upper bound across |audio_| and |video_|. (Text tracks can have no
245 // buffers missing duration, so they are not involved in calculating the upper
246 // bound.)
247 // Parse() or Reset() must be called between calls to UpdateReadyBuffers() to
248 // clear each track's ready buffers and to reset |ready_buffer_upper_bound_|
249 // to kNoDecodeTimestamp().
250 void UpdateReadyBuffers();
252 // Search for the indicated track_num among the text tracks. Returns NULL
253 // if that track num is not a text track.
254 Track* FindTextTrack(int track_num);
256 // Attempts to read the duration from the encoded audio data, returning as
257 // TimeDelta or kNoTimestamp() if duration cannot be retrieved. This obviously
258 // violates layering rules, but is useful for MSE to know duration in cases
259 // where it isn't explicitly given and cannot be calculated for Blocks at the
260 // end of a Cluster (the next Cluster in playback-order may not be the next
261 // Cluster we parse, so we can't simply use the delta of the first Block in
262 // the next Cluster). Avoid calling if encrypted; may produce unexpected
263 // output. See implementation for supported codecs.
264 base::TimeDelta TryGetEncodedAudioDuration(const uint8_t* data, int size);
266 // Reads Opus packet header to determine packet duration. Duration returned
267 // as TimeDelta or kNoTimestamp() upon failure to read duration from packet.
268 base::TimeDelta ReadOpusDuration(const uint8_t* data, int size);
270 // Tracks the number of MEDIA_LOGs made in process of reading encoded
271 // duration. Useful to prevent log spam.
272 int num_duration_errors_ = 0;
274 double timecode_multiplier_; // Multiplier used to convert timecodes into
275 // microseconds.
276 std::set<int64> ignored_tracks_;
277 std::string audio_encryption_key_id_;
278 std::string video_encryption_key_id_;
279 const AudioCodec audio_codec_;
281 WebMListParser parser_;
283 int64 last_block_timecode_ = -1;
284 scoped_ptr<uint8_t[]> block_data_;
285 int block_data_size_ = -1;
286 int64 block_duration_ = -1;
287 int64 block_add_id_ = -1;
289 scoped_ptr<uint8_t[]> block_additional_data_;
290 // Must be 0 if |block_additional_data_| is null. Must be > 0 if
291 // |block_additional_data_| is NOT null.
292 int block_additional_data_size_ = 0;
294 int64 discard_padding_ = -1;
295 bool discard_padding_set_ = false;
297 int64 cluster_timecode_ = -1;
298 base::TimeDelta cluster_start_time_;
299 bool cluster_ended_ = false;
301 Track audio_;
302 Track video_;
303 TextTrackMap text_track_map_;
305 // Subset of |text_track_map_| maintained by GetTextBuffers(), and cleared by
306 // ClearTextTrackReadyBuffers(). Callers of GetTextBuffers() get a const-ref
307 // to this member.
308 TextBufferQueueMap text_buffers_map_;
310 // Limits the range of buffers returned by Get{Audio,Video,Text}Buffers() to
311 // this exclusive upper bound. Set to kNoDecodeTimestamp(), meaning not yet
312 // calculated, by Reset() and Parse(). If kNoDecodeTimestamp(), then
313 // Get{Audio,Video,Text}Buffers() will calculate it to be the minimum (decode)
314 // timestamp across all tracks' |last_buffer_missing_duration_|, or
315 // kInfiniteDuration() if no buffers are currently missing duration.
316 DecodeTimestamp ready_buffer_upper_bound_;
318 scoped_refptr<MediaLog> media_log_;
320 DISALLOW_IMPLICIT_CONSTRUCTORS(WebMClusterParser);
323 } // namespace media
325 #endif // MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_