1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
6 #define MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_
13 #include "base/memory/scoped_ptr.h"
14 #include "media/base/audio_decoder_config.h"
15 #include "media/base/media_export.h"
16 #include "media/base/media_log.h"
17 #include "media/base/stream_parser.h"
18 #include "media/base/stream_parser_buffer.h"
19 #include "media/formats/webm/webm_parser.h"
20 #include "media/formats/webm/webm_tracks_parser.h"
24 class MEDIA_EXPORT WebMClusterParser
: public WebMParserClient
{
26 typedef StreamParser::TrackId TrackId
;
27 typedef std::deque
<scoped_refptr
<StreamParserBuffer
> > BufferQueue
;
28 typedef std::map
<TrackId
, const BufferQueue
> TextBufferQueueMap
;
30 // Arbitrarily-chosen numbers to estimate the duration of a buffer if none is
31 // set and there is not enough information to get a better estimate.
33 kDefaultAudioBufferDurationInMs
= 23, // Common 1k samples @44.1kHz
34 kDefaultVideoBufferDurationInMs
= 42 // Low 24fps to reduce stalls
37 // Opus packets encode the duration and other parameters in the 5 most
38 // significant bits of the first byte. The index in this array corresponds
39 // to the duration of each frame of the packet in microseconds. See
40 // https://tools.ietf.org/html/rfc6716#page-14
41 static const uint16_t kOpusFrameDurationsMu
[];
44 // Helper class that manages per-track state.
49 base::TimeDelta default_duration
,
53 int track_num() const { return track_num_
; }
55 // If a buffer is currently held aside pending duration calculation, returns
56 // its decode timestamp. Otherwise, returns kInfiniteDuration().
57 DecodeTimestamp
GetReadyUpperBound();
59 // Prepares |ready_buffers_| for retrieval. Prior to calling,
60 // |ready_buffers_| must be empty. Moves all |buffers_| with decode
61 // timestamp before |before_timestamp| to |ready_buffers_|, preserving their
63 void ExtractReadyBuffers(const DecodeTimestamp before_timestamp
);
65 const BufferQueue
& ready_buffers() const { return ready_buffers_
; }
67 // If |last_added_buffer_missing_duration_| is set, updates its duration
68 // relative to |buffer|'s timestamp, and adds it to |buffers_| and unsets
69 // |last_added_buffer_missing_duration_|. Then, if |buffer| is missing
70 // duration, saves |buffer| into |last_added_buffer_missing_duration_|, or
71 // otherwise adds |buffer| to |buffers_|.
72 bool AddBuffer(const scoped_refptr
<StreamParserBuffer
>& buffer
);
74 // If |last_added_buffer_missing_duration_| is set, updates its duration to
75 // be non-kNoTimestamp() value of |estimated_next_frame_duration_| or an
76 // arbitrary default, then adds it to |buffers_| and unsets
77 // |last_added_buffer_missing_duration_|. (This method helps stream parser
78 // emit all buffers in a media segment before signaling end of segment.)
79 void ApplyDurationEstimateIfNeeded();
81 // Clears |ready_buffers_| (use ExtractReadyBuffers() to fill it again).
82 // Leaves as-is |buffers_| and any possibly held-aside buffer that is
84 void ClearReadyBuffers();
86 // Clears all buffer state, including any possibly held-aside buffer that
87 // was missing duration, and all contents of |buffers_| and
91 // Helper function used to inspect block data to determine if the
92 // block is a keyframe.
93 // |data| contains the bytes in the block.
94 // |size| indicates the number of bytes in |data|.
95 bool IsKeyframe(const uint8_t* data
, int size
) const;
97 base::TimeDelta
default_duration() const { return default_duration_
; }
100 // Helper that sanity-checks |buffer| duration, updates
101 // |estimated_next_frame_duration_|, and adds |buffer| to |buffers_|.
102 // Returns false if |buffer| failed sanity check and therefore was not added
103 // to |buffers_|. Returns true otherwise.
104 bool QueueBuffer(const scoped_refptr
<StreamParserBuffer
>& buffer
);
106 // Helper that calculates the buffer duration to use in
107 // ApplyDurationEstimateIfNeeded().
108 base::TimeDelta
GetDurationEstimate();
113 // Parsed track buffers, each with duration and in (decode) timestamp order,
114 // that have not yet been extracted into |ready_buffers_|. Note that up to
115 // one additional buffer missing duration may be tracked by
116 // |last_added_buffer_missing_duration_|.
117 BufferQueue buffers_
;
118 scoped_refptr
<StreamParserBuffer
> last_added_buffer_missing_duration_
;
120 // Buffers in (decode) timestamp order that were previously parsed into and
121 // extracted from |buffers_|. Buffers are moved from |buffers_| to
122 // |ready_buffers_| by ExtractReadyBuffers() if they are below a specified
123 // upper bound timestamp. Track users can therefore extract only those
124 // parsed buffers which are "ready" for emission (all before some maximum
126 BufferQueue ready_buffers_
;
128 // If kNoTimestamp(), then |estimated_next_frame_duration_| will be used.
129 base::TimeDelta default_duration_
;
131 // If kNoTimestamp(), then a default value will be used. This estimate is
132 // the maximum duration seen or derived so far for this track, and is valid
133 // only if |default_duration_| is kNoTimestamp().
134 base::TimeDelta estimated_next_frame_duration_
;
139 typedef std::map
<int, Track
> TextTrackMap
;
142 WebMClusterParser(int64 timecode_scale
,
144 base::TimeDelta audio_default_duration
,
146 base::TimeDelta video_default_duration
,
147 const WebMTracksParser::TextTracks
& text_tracks
,
148 const std::set
<int64
>& ignored_tracks
,
149 const std::string
& audio_encryption_key_id
,
150 const std::string
& video_encryption_key_id
,
151 const AudioCodec audio_codec_
,
152 const LogCB
& log_cb
);
153 ~WebMClusterParser() override
;
155 // Resets the parser state so it can accept a new cluster.
158 // Parses a WebM cluster element in |buf|.
160 // Returns -1 if the parse fails.
161 // Returns 0 if more data is needed.
162 // Returns the number of bytes parsed on success.
163 int Parse(const uint8_t* buf
, int size
);
165 base::TimeDelta
cluster_start_time() const { return cluster_start_time_
; }
167 // Get the current ready buffers resulting from Parse().
168 // If the parse reached the end of cluster and the last buffer was held aside
169 // due to missing duration, the buffer is given an estimated duration and
170 // included in the result.
171 // Otherwise, if there are is a buffer held aside due to missing duration for
172 // any of the tracks, no buffers with same or greater (decode) timestamp will
173 // be included in the buffers.
174 // The returned deques are cleared by Parse() or Reset() and updated by the
175 // next calls to Get{Audio,Video}Buffers().
176 // If no Parse() or Reset() has occurred since the last call to Get{Audio,
177 // Video,Text}Buffers(), then the previous BufferQueue& is returned again
178 // without any recalculation.
179 const BufferQueue
& GetAudioBuffers();
180 const BufferQueue
& GetVideoBuffers();
182 // Constructs and returns a subset of |text_track_map_| containing only
183 // tracks with non-empty buffer queues produced by the last Parse() and
184 // filtered to exclude any buffers that have (decode) timestamp same or
185 // greater than the lowest (decode) timestamp across all tracks of any buffer
186 // held aside due to missing duration (unless the end of cluster has been
188 // The returned map is cleared by Parse() or Reset() and updated by the next
189 // call to GetTextBuffers().
190 // If no Parse() or Reset() has occurred since the last call to
191 // GetTextBuffers(), then the previous TextBufferQueueMap& is returned again
192 // without any recalculation.
193 const TextBufferQueueMap
& GetTextBuffers();
195 // Returns true if the last Parse() call stopped at the end of a cluster.
196 bool cluster_ended() const { return cluster_ended_
; }
199 // WebMParserClient methods.
200 WebMParserClient
* OnListStart(int id
) override
;
201 bool OnListEnd(int id
) override
;
202 bool OnUInt(int id
, int64 val
) override
;
203 bool OnBinary(int id
, const uint8_t* data
, int size
) override
;
205 bool ParseBlock(bool is_simple_block
,
208 const uint8_t* additional
,
211 int64 discard_padding
);
212 bool OnBlock(bool is_simple_block
,
219 const uint8_t* additional
,
221 int64 discard_padding
);
223 // Resets the Track objects associated with each text track.
224 void ResetTextTracks();
226 // Clears the the ready buffers associated with each text track.
227 void ClearTextTrackReadyBuffers();
229 // Helper method for Get{Audio,Video,Text}Buffers() that recomputes
230 // |ready_buffer_upper_bound_| and calls ExtractReadyBuffers() on each track.
231 // If |cluster_ended_| is true, first applies duration estimate if needed for
232 // |audio_| and |video_| and sets |ready_buffer_upper_bound_| to
233 // kInfiniteDuration(). Otherwise, sets |ready_buffer_upper_bound_| to the
234 // minimum upper bound across |audio_| and |video_|. (Text tracks can have no
235 // buffers missing duration, so they are not involved in calculating the upper
237 // Parse() or Reset() must be called between calls to UpdateReadyBuffers() to
238 // clear each track's ready buffers and to reset |ready_buffer_upper_bound_|
239 // to kNoDecodeTimestamp().
240 void UpdateReadyBuffers();
242 // Search for the indicated track_num among the text tracks. Returns NULL
243 // if that track num is not a text track.
244 Track
* FindTextTrack(int track_num
);
246 // Attempts to read the duration from the encoded audio data, returning as
247 // TimeDelta or kNoTimestamp() if duration cannot be retrieved. This obviously
248 // violates layering rules, but is useful for MSE to know duration in cases
249 // where it isn't explicitly given and cannot be calculated for Blocks at the
250 // end of a Cluster (the next Cluster in playback-order may not be the next
251 // Cluster we parse, so we can't simply use the delta of the first Block in
252 // the next Cluster). Avoid calling if encrypted; may produce unexpected
253 // output. See implementation for supported codecs.
254 base::TimeDelta
TryGetEncodedAudioDuration(const uint8_t* data
, int size
);
256 // Reads Opus packet header to determine packet duration. Duration returned
257 // as TimeDelta or kNoTimestamp() upon failure to read duration from packet.
258 base::TimeDelta
ReadOpusDuration(const uint8_t* data
, int size
);
260 // Tracks the number of MEDIA_LOGs made in process of reading encoded
261 // duration. Useful to prevent log spam.
262 int num_duration_errors_
;
264 double timecode_multiplier_
; // Multiplier used to convert timecodes into
266 std::set
<int64
> ignored_tracks_
;
267 std::string audio_encryption_key_id_
;
268 std::string video_encryption_key_id_
;
269 const AudioCodec audio_codec_
;
271 WebMListParser parser_
;
273 int64 last_block_timecode_
;
274 scoped_ptr
<uint8_t[]> block_data_
;
275 int block_data_size_
;
276 int64 block_duration_
;
279 scoped_ptr
<uint8_t[]> block_additional_data_
;
280 // Must be 0 if |block_additional_data_| is null. Must be > 0 if
281 // |block_additional_data_| is NOT null.
282 int block_additional_data_size_
;
284 int64 discard_padding_
;
285 bool discard_padding_set_
;
287 int64 cluster_timecode_
;
288 base::TimeDelta cluster_start_time_
;
293 TextTrackMap text_track_map_
;
295 // Subset of |text_track_map_| maintained by GetTextBuffers(), and cleared by
296 // ClearTextTrackReadyBuffers(). Callers of GetTextBuffers() get a const-ref
298 TextBufferQueueMap text_buffers_map_
;
300 // Limits the range of buffers returned by Get{Audio,Video,Text}Buffers() to
301 // this exclusive upper bound. Set to kNoDecodeTimestamp(), meaning not yet
302 // calculated, by Reset() and Parse(). If kNoDecodeTimestamp(), then
303 // Get{Audio,Video,Text}Buffers() will calculate it to be the minimum (decode)
304 // timestamp across all tracks' |last_buffer_missing_duration_|, or
305 // kInfiniteDuration() if no buffers are currently missing duration.
306 DecodeTimestamp ready_buffer_upper_bound_
;
310 DISALLOW_IMPLICIT_CONSTRUCTORS(WebMClusterParser
);
315 #endif // MEDIA_FORMATS_WEBM_WEBM_CLUSTER_PARSER_H_