1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/mp2t/es_parser_h264.h"
7 #include "base/basictypes.h"
8 #include "base/logging.h"
9 #include "media/base/bit_reader.h"
10 #include "media/base/buffers.h"
11 #include "media/base/stream_parser_buffer.h"
12 #include "media/base/video_frame.h"
13 #include "media/mp2t/mp2t_common.h"
14 #include "ui/gfx/rect.h"
15 #include "ui/gfx/size.h"
17 static const int kExtendedSar
= 255;
20 // VUI parameters: Table E-1 "Meaning of sample aspect ratio indicator"
21 static const int kSarTableSize
= 17;
22 static const int kTableSarWidth
[kSarTableSize
] = {
23 0, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160, 4, 3, 2
25 static const int kTableSarHeight
[kSarTableSize
] = {
26 0, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99, 3, 2, 1
29 // Remove the start code emulation prevention ( 0x000003 )
30 // and return the size of the converted buffer.
31 // Note: Size of |buf_rbsp| should be at least |size| to accomodate
33 static int ConvertToRbsp(const uint8
* buf
, int size
, uint8
* buf_rbsp
) {
36 for (int k
= 0; k
< size
; k
++) {
37 if (buf
[k
] == 0x3 && zero_count
>= 2) {
45 buf_rbsp
[rbsp_size
++] = buf
[k
];
53 // ISO 14496 - Part 10: Table 7-1 "NAL unit type codes"
55 kNalUnitTypeNonIdrSlice
= 1,
56 kNalUnitTypeIdrSlice
= 5,
62 class BitReaderH264
: public BitReader
{
64 BitReaderH264(const uint8
* data
, off_t size
)
65 : BitReader(data
, size
) { }
67 // Read an unsigned exp-golomb value.
68 // Return true if successful.
69 bool ReadBitsExpGolomb(uint32
* exp_golomb_value
);
72 bool BitReaderH264::ReadBitsExpGolomb(uint32
* exp_golomb_value
) {
73 // Get the number of leading zeros.
77 RCHECK(ReadBits(1, &one_bit
));
83 // If zero_count is greater than 31, the calculated value will overflow.
84 if (zero_count
> 31) {
89 // Read the actual value.
90 uint32 base
= (1 << zero_count
) - 1;
92 RCHECK(ReadBits(zero_count
, &offset
));
93 *exp_golomb_value
= base
+ offset
;
98 EsParserH264::EsParserH264(
99 const NewVideoConfigCB
& new_video_config_cb
,
100 const EmitBufferCB
& emit_buffer_cb
)
101 : new_video_config_cb_(new_video_config_cb
),
102 emit_buffer_cb_(emit_buffer_cb
),
104 current_nal_pos_(-1),
105 current_access_unit_pos_(-1),
106 is_key_frame_(false) {
109 EsParserH264::~EsParserH264() {
112 bool EsParserH264::Parse(const uint8
* buf
, int size
,
114 base::TimeDelta dts
) {
115 // Note: Parse is invoked each time a PES packet has been reassembled.
116 // Unfortunately, a PES packet does not necessarily map
117 // to an h264 access unit, although the HLS recommendation is to use one PES
118 // for each access unit (but this is just a recommendation and some streams
119 // do not comply with this recommendation).
121 // Link position |raw_es_size| in the ES stream with a timing descriptor.
122 // HLS recommendation: "In AVC video, you should have both a DTS and a
123 // PTS in each PES header".
124 if (dts
== kNoTimestamp() && pts
== kNoTimestamp()) {
125 DVLOG(1) << "A timestamp must be provided for each reassembled PES";
128 TimingDesc timing_desc
;
129 timing_desc
.pts
= pts
;
130 timing_desc
.dts
= (dts
!= kNoTimestamp()) ? dts
: pts
;
134 es_byte_queue_
.Peek(&raw_es
, &raw_es_size
);
135 timing_desc_list_
.push_back(
136 std::pair
<int, TimingDesc
>(raw_es_size
, timing_desc
));
138 // Add the incoming bytes to the ES queue.
139 es_byte_queue_
.Push(buf
, size
);
141 // Add NALs from the incoming buffer.
142 if (!ParseInternal())
145 // Discard emitted frames
146 // or every byte that was parsed so far if there is no current frame.
148 (current_access_unit_pos_
>= 0) ? current_access_unit_pos_
: es_pos_
;
149 DiscardEs(skip_count
);
154 void EsParserH264::Flush() {
155 if (current_access_unit_pos_
< 0)
158 // Force emitting the last access unit.
161 es_byte_queue_
.Peek(&raw_es
, &next_aud_pos
);
162 EmitFrameIfNeeded(next_aud_pos
);
163 current_nal_pos_
= -1;
166 // Discard the emitted frame.
167 DiscardEs(next_aud_pos
);
170 void EsParserH264::Reset() {
171 DVLOG(1) << "EsParserH264::Reset";
172 es_byte_queue_
.Reset();
173 timing_desc_list_
.clear();
175 current_nal_pos_
= -1;
177 last_video_decoder_config_
= VideoDecoderConfig();
180 bool EsParserH264::ParseInternal() {
183 es_byte_queue_
.Peek(&raw_es
, &raw_es_size
);
185 DCHECK_GE(es_pos_
, 0);
186 DCHECK_LT(es_pos_
, raw_es_size
);
188 // Resume h264 es parsing where it was left.
189 for ( ; es_pos_
< raw_es_size
- 4; es_pos_
++) {
190 // Make sure the syncword is either 00 00 00 01 or 00 00 01
191 if (raw_es
[es_pos_
+ 0] != 0 || raw_es
[es_pos_
+ 1] != 0)
193 int syncword_length
= 0;
194 if (raw_es
[es_pos_
+ 2] == 0 && raw_es
[es_pos_
+ 3] == 1)
196 else if (raw_es
[es_pos_
+ 2] == 1)
201 // Parse the current NAL (and the new NAL then becomes the current one).
202 if (current_nal_pos_
>= 0) {
203 int nal_size
= es_pos_
- current_nal_pos_
;
204 DCHECK_GT(nal_size
, 0);
205 RCHECK(NalParser(&raw_es
[current_nal_pos_
], nal_size
));
207 current_nal_pos_
= es_pos_
+ syncword_length
;
209 // Retrieve the NAL type.
210 int nal_header
= raw_es
[current_nal_pos_
];
211 int forbidden_zero_bit
= (nal_header
>> 7) & 0x1;
212 RCHECK(forbidden_zero_bit
== 0);
213 NalUnitType nal_unit_type
= static_cast<NalUnitType
>(nal_header
& 0x1f);
214 DVLOG(LOG_LEVEL_ES
) << "nal: offset=" << es_pos_
215 << " type=" << nal_unit_type
;
217 // Emit a frame if needed.
218 if (nal_unit_type
== kNalUnitTypeAUD
)
219 RCHECK(EmitFrameIfNeeded(es_pos_
));
221 // Skip the syncword.
222 es_pos_
+= syncword_length
;
228 bool EsParserH264::EmitFrameIfNeeded(int next_aud_pos
) {
229 // There is no current frame: start a new frame.
230 if (current_access_unit_pos_
< 0) {
231 StartFrame(next_aud_pos
);
235 // Get the access unit timing info.
236 TimingDesc current_timing_desc
= {kNoTimestamp(), kNoTimestamp()};
237 while (!timing_desc_list_
.empty() &&
238 timing_desc_list_
.front().first
<= current_access_unit_pos_
) {
239 current_timing_desc
= timing_desc_list_
.front().second
;
240 timing_desc_list_
.pop_front();
243 if (current_timing_desc
.pts
== kNoTimestamp())
249 es_byte_queue_
.Peek(&raw_es
, &raw_es_size
);
250 int access_unit_size
= next_aud_pos
- current_access_unit_pos_
;
251 scoped_refptr
<StreamParserBuffer
> stream_parser_buffer
=
252 StreamParserBuffer::CopyFrom(
253 &raw_es
[current_access_unit_pos_
],
256 stream_parser_buffer
->SetDecodeTimestamp(current_timing_desc
.dts
);
257 stream_parser_buffer
->set_timestamp(current_timing_desc
.pts
);
258 emit_buffer_cb_
.Run(stream_parser_buffer
);
260 // Set the current frame position to the next AUD position.
261 StartFrame(next_aud_pos
);
265 void EsParserH264::StartFrame(int aud_pos
) {
267 // - if aud_pos < 0, clear the current frame and set |is_key_frame| to a
268 // default value (false).
269 // - if aud_pos >= 0, start a new frame and set |is_key_frame| to true
270 // |is_key_frame_| will be updated while parsing the NALs of that frame.
271 // If any NAL is a non IDR NAL, it will be set to false.
272 current_access_unit_pos_
= aud_pos
;
273 is_key_frame_
= (aud_pos
>= 0);
276 void EsParserH264::DiscardEs(int nbytes
) {
277 DCHECK_GE(nbytes
, 0);
281 // Update the position of
283 // - the current NAL,
284 // - the current access unit.
289 if (current_nal_pos_
>= 0) {
290 DCHECK_GE(current_nal_pos_
, nbytes
);
291 current_nal_pos_
-= nbytes
;
293 if (current_access_unit_pos_
>= 0) {
294 DCHECK_GE(current_access_unit_pos_
, nbytes
);
295 current_access_unit_pos_
-= nbytes
;
298 // Update the timing information accordingly.
299 std::list
<std::pair
<int, TimingDesc
> >::iterator timing_it
300 = timing_desc_list_
.begin();
301 for (; timing_it
!= timing_desc_list_
.end(); ++timing_it
)
302 timing_it
->first
-= nbytes
;
304 // Discard |nbytes| of ES.
305 es_byte_queue_
.Pop(nbytes
);
308 bool EsParserH264::NalParser(const uint8
* buf
, int size
) {
309 // Get the NAL header.
311 DVLOG(1) << "NalParser: incomplete NAL";
314 int nal_header
= buf
[0];
318 int forbidden_zero_bit
= (nal_header
>> 7) & 0x1;
319 if (forbidden_zero_bit
!= 0)
321 int nal_ref_idc
= (nal_header
>> 5) & 0x3;
322 int nal_unit_type
= nal_header
& 0x1f;
324 // Process the NAL content.
325 switch (nal_unit_type
) {
326 case kNalUnitTypeSPS
:
327 DVLOG(LOG_LEVEL_ES
) << "NAL: SPS";
328 // |nal_ref_idc| should not be 0 for a SPS.
329 if (nal_ref_idc
== 0)
331 return ProcessSPS(buf
, size
);
332 case kNalUnitTypeIdrSlice
:
333 DVLOG(LOG_LEVEL_ES
) << "NAL: IDR slice";
335 case kNalUnitTypeNonIdrSlice
:
336 DVLOG(LOG_LEVEL_ES
) << "NAL: Non IDR slice";
337 is_key_frame_
= false;
339 case kNalUnitTypePPS
:
340 DVLOG(LOG_LEVEL_ES
) << "NAL: PPS";
342 case kNalUnitTypeAUD
:
343 DVLOG(LOG_LEVEL_ES
) << "NAL: AUD";
346 DVLOG(LOG_LEVEL_ES
) << "NAL: " << nal_unit_type
;
354 bool EsParserH264::ProcessSPS(const uint8
* buf
, int size
) {
358 // Removes start code emulation prevention.
359 // TODO(damienv): refactoring in media/base
360 // so as to have a unique H264 bit reader in Chrome.
361 scoped_ptr
<uint8
[]> buf_rbsp(new uint8
[size
]);
362 int rbsp_size
= ConvertToRbsp(buf
, size
, buf_rbsp
.get());
364 BitReaderH264
bit_reader(buf_rbsp
.get(), rbsp_size
);
367 int constraint_setX_flag
;
369 uint32 seq_parameter_set_id
;
370 uint32 log2_max_frame_num_minus4
;
371 uint32 pic_order_cnt_type
;
372 RCHECK(bit_reader
.ReadBits(8, &profile_idc
));
373 RCHECK(bit_reader
.ReadBits(8, &constraint_setX_flag
));
374 RCHECK(bit_reader
.ReadBits(8, &level_idc
));
375 RCHECK(bit_reader
.ReadBitsExpGolomb(&seq_parameter_set_id
));
377 if (profile_idc
== 100 || profile_idc
== 110 ||
378 profile_idc
== 122 || profile_idc
== 244 ||
379 profile_idc
== 44 || profile_idc
== 83 ||
380 profile_idc
== 86 || profile_idc
== 118 ||
381 profile_idc
== 128) {
382 uint32 chroma_format_idc
;
383 RCHECK(bit_reader
.ReadBitsExpGolomb(&chroma_format_idc
));
384 if (chroma_format_idc
== 3) {
385 int separate_colour_plane_flag
;
386 RCHECK(bit_reader
.ReadBits(1, &separate_colour_plane_flag
));
388 uint32 bit_depth_luma_minus8
;
389 uint32 bit_depth_chroma_minus8
;
390 int qpprime_y_zero_transform_bypass_flag
;
391 int seq_scaling_matrix_present_flag
;
392 RCHECK(bit_reader
.ReadBitsExpGolomb(&bit_depth_luma_minus8
));
393 RCHECK(bit_reader
.ReadBitsExpGolomb(&bit_depth_chroma_minus8
));
394 RCHECK(bit_reader
.ReadBits(1, &qpprime_y_zero_transform_bypass_flag
));
395 RCHECK(bit_reader
.ReadBits(1, &seq_scaling_matrix_present_flag
));
396 if (seq_scaling_matrix_present_flag
) {
397 int skip_count
= (chroma_format_idc
!= 3) ? 8 : 12;
398 RCHECK(bit_reader
.SkipBits(skip_count
));
402 RCHECK(bit_reader
.ReadBitsExpGolomb(&log2_max_frame_num_minus4
));
403 RCHECK(bit_reader
.ReadBitsExpGolomb(&pic_order_cnt_type
));
405 // |pic_order_cnt_type| shall be in the range of 0 to 2.
406 RCHECK(pic_order_cnt_type
<= 2);
407 if (pic_order_cnt_type
== 0) {
408 uint32 log2_max_pic_order_cnt_lsb_minus4
;
409 RCHECK(bit_reader
.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4
));
410 } else if (pic_order_cnt_type
== 1) {
411 // Note: |offset_for_non_ref_pic| and |offset_for_top_to_bottom_field|
412 // corresponds to their codenum not to their actual value.
413 int delta_pic_order_always_zero_flag
;
414 uint32 offset_for_non_ref_pic
;
415 uint32 offset_for_top_to_bottom_field
;
416 uint32 num_ref_frames_in_pic_order_cnt_cycle
;
417 RCHECK(bit_reader
.ReadBits(1, &delta_pic_order_always_zero_flag
));
418 RCHECK(bit_reader
.ReadBitsExpGolomb(&offset_for_non_ref_pic
));
419 RCHECK(bit_reader
.ReadBitsExpGolomb(&offset_for_top_to_bottom_field
));
421 bit_reader
.ReadBitsExpGolomb(&num_ref_frames_in_pic_order_cnt_cycle
));
422 for (uint32 i
= 0; i
< num_ref_frames_in_pic_order_cnt_cycle
; i
++) {
423 uint32 offset_for_ref_frame_codenum
;
424 RCHECK(bit_reader
.ReadBitsExpGolomb(&offset_for_ref_frame_codenum
));
428 uint32 num_ref_frames
;
429 int gaps_in_frame_num_value_allowed_flag
;
430 uint32 pic_width_in_mbs_minus1
;
431 uint32 pic_height_in_map_units_minus1
;
432 RCHECK(bit_reader
.ReadBitsExpGolomb(&num_ref_frames
));
433 RCHECK(bit_reader
.ReadBits(1, &gaps_in_frame_num_value_allowed_flag
));
434 RCHECK(bit_reader
.ReadBitsExpGolomb(&pic_width_in_mbs_minus1
));
435 RCHECK(bit_reader
.ReadBitsExpGolomb(&pic_height_in_map_units_minus1
));
437 int frame_mbs_only_flag
;
438 RCHECK(bit_reader
.ReadBits(1, &frame_mbs_only_flag
));
439 if (!frame_mbs_only_flag
) {
440 int mb_adaptive_frame_field_flag
;
441 RCHECK(bit_reader
.ReadBits(1, &mb_adaptive_frame_field_flag
));
444 int direct_8x8_inference_flag
;
445 RCHECK(bit_reader
.ReadBits(1, &direct_8x8_inference_flag
));
447 int frame_cropping_flag
;
448 uint32 frame_crop_left_offset
= 0;
449 uint32 frame_crop_right_offset
= 0;
450 uint32 frame_crop_top_offset
= 0;
451 uint32 frame_crop_bottom_offset
= 0;
452 RCHECK(bit_reader
.ReadBits(1, &frame_cropping_flag
));
453 if (frame_cropping_flag
) {
454 RCHECK(bit_reader
.ReadBitsExpGolomb(&frame_crop_left_offset
));
455 RCHECK(bit_reader
.ReadBitsExpGolomb(&frame_crop_right_offset
));
456 RCHECK(bit_reader
.ReadBitsExpGolomb(&frame_crop_top_offset
));
457 RCHECK(bit_reader
.ReadBitsExpGolomb(&frame_crop_bottom_offset
));
460 int vui_parameters_present_flag
;
461 RCHECK(bit_reader
.ReadBits(1, &vui_parameters_present_flag
));
464 if (vui_parameters_present_flag
) {
465 // Read only the aspect ratio information from the VUI section.
466 // TODO(damienv): check whether other VUI info are useful.
467 int aspect_ratio_info_present_flag
;
468 RCHECK(bit_reader
.ReadBits(1, &aspect_ratio_info_present_flag
));
469 if (aspect_ratio_info_present_flag
) {
470 int aspect_ratio_idc
;
471 RCHECK(bit_reader
.ReadBits(8, &aspect_ratio_idc
));
472 if (aspect_ratio_idc
== kExtendedSar
) {
473 RCHECK(bit_reader
.ReadBits(16, &sar_width
));
474 RCHECK(bit_reader
.ReadBits(16, &sar_height
));
475 } else if (aspect_ratio_idc
< kSarTableSize
) {
476 sar_width
= kTableSarWidth
[aspect_ratio_idc
];
477 sar_height
= kTableSarHeight
[aspect_ratio_idc
];
482 if (sar_width
== 0 || sar_height
== 0) {
483 DVLOG(1) << "Unspecified SAR not supported";
487 // TODO(damienv): a MAP unit can be either 16 or 32 pixels.
488 // although it's 16 pixels for progressive non MBAFF frames.
489 gfx::Size
coded_size((pic_width_in_mbs_minus1
+ 1) * 16,
490 (pic_height_in_map_units_minus1
+ 1) * 16);
491 gfx::Rect
visible_rect(
492 frame_crop_left_offset
,
493 frame_crop_top_offset
,
494 (coded_size
.width() - frame_crop_right_offset
) - frame_crop_left_offset
,
495 (coded_size
.height() - frame_crop_bottom_offset
) - frame_crop_top_offset
);
496 if (visible_rect
.width() <= 0 || visible_rect
.height() <= 0)
498 gfx::Size
natural_size((visible_rect
.width() * sar_width
) / sar_height
,
499 visible_rect
.height());
500 if (natural_size
.width() == 0)
504 // Assuming the SPS is used right away by the PPS
505 // and the slice headers is a strong assumption.
506 // In theory, we should process the SPS and PPS
507 // and only when one of the slice header is switching
508 // the PPS id, the video decoder config should be changed.
509 VideoDecoderConfig
video_decoder_config(
511 VIDEO_CODEC_PROFILE_UNKNOWN
, // TODO(damienv)
519 if (!video_decoder_config
.Matches(last_video_decoder_config_
)) {
520 DVLOG(1) << "Profile IDC: " << profile_idc
;
521 DVLOG(1) << "Level IDC: " << level_idc
;
522 DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1
+ 1) * 16;
523 DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1
+ 1) * 16;
524 DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4
;
525 DVLOG(1) << "SAR: width=" << sar_width
<< " height=" << sar_height
;
526 last_video_decoder_config_
= video_decoder_config
;
527 new_video_config_cb_
.Run(video_decoder_config
);