1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/ffmpeg/ffmpeg_common.h"
7 #include "base/basictypes.h"
8 #include "base/logging.h"
9 #include "media/base/decoder_buffer.h"
10 #include "media/base/video_frame.h"
11 #include "media/base/video_util.h"
15 // Why FF_INPUT_BUFFER_PADDING_SIZE? FFmpeg assumes all input buffers are
16 // padded. Check here to ensure FFmpeg only receives data padded to its
18 COMPILE_ASSERT(DecoderBuffer::kPaddingSize
>= FF_INPUT_BUFFER_PADDING_SIZE
,
19 decoder_buffer_padding_size_does_not_fit_ffmpeg_requirement
);
21 // Alignment requirement by FFmpeg for input and output buffers. This need to
22 // be updated to match FFmpeg when it changes.
23 #if defined(ARCH_CPU_ARM_FAMILY)
24 static const int kFFmpegBufferAddressAlignment
= 16;
26 static const int kFFmpegBufferAddressAlignment
= 32;
29 // Check here to ensure FFmpeg only receives data aligned to its specifications.
31 DecoderBuffer::kAlignmentSize
>= kFFmpegBufferAddressAlignment
&&
32 DecoderBuffer::kAlignmentSize
% kFFmpegBufferAddressAlignment
== 0,
33 decoder_buffer_alignment_size_does_not_fit_ffmpeg_requirement
);
35 // Allows faster SIMD YUV convert. Also, FFmpeg overreads/-writes occasionally.
36 // See video_get_buffer() in libavcodec/utils.c.
37 static const int kFFmpegOutputBufferPaddingSize
= 16;
39 COMPILE_ASSERT(VideoFrame::kFrameSizePadding
>= kFFmpegOutputBufferPaddingSize
,
40 video_frame_padding_size_does_not_fit_ffmpeg_requirement
);
43 VideoFrame::kFrameAddressAlignment
>= kFFmpegBufferAddressAlignment
&&
44 VideoFrame::kFrameAddressAlignment
% kFFmpegBufferAddressAlignment
== 0,
45 video_frame_address_alignment_does_not_fit_ffmpeg_requirement
);
47 static const AVRational kMicrosBase
= { 1, base::Time::kMicrosecondsPerSecond
};
49 base::TimeDelta
ConvertFromTimeBase(const AVRational
& time_base
,
51 int64 microseconds
= av_rescale_q(timestamp
, time_base
, kMicrosBase
);
52 return base::TimeDelta::FromMicroseconds(microseconds
);
55 int64
ConvertToTimeBase(const AVRational
& time_base
,
56 const base::TimeDelta
& timestamp
) {
57 return av_rescale_q(timestamp
.InMicroseconds(), kMicrosBase
, time_base
);
60 // Converts an FFmpeg audio codec ID into its corresponding supported codec id.
61 static AudioCodec
CodecIDToAudioCodec(AVCodecID codec_id
) {
67 case AV_CODEC_ID_VORBIS
:
69 case AV_CODEC_ID_PCM_U8
:
70 case AV_CODEC_ID_PCM_S16LE
:
71 case AV_CODEC_ID_PCM_S24LE
:
72 case AV_CODEC_ID_PCM_F32LE
:
74 case AV_CODEC_ID_PCM_S16BE
:
75 return kCodecPCM_S16BE
;
76 case AV_CODEC_ID_PCM_S24BE
:
77 return kCodecPCM_S24BE
;
78 case AV_CODEC_ID_FLAC
:
80 case AV_CODEC_ID_AMR_NB
:
82 case AV_CODEC_ID_AMR_WB
:
84 case AV_CODEC_ID_GSM_MS
:
86 case AV_CODEC_ID_PCM_MULAW
:
87 return kCodecPCM_MULAW
;
88 case AV_CODEC_ID_OPUS
:
91 DVLOG(1) << "Unknown audio CodecID: " << codec_id
;
93 return kUnknownAudioCodec
;
96 static AVCodecID
AudioCodecToCodecID(AudioCodec audio_codec
,
97 SampleFormat sample_format
) {
98 switch (audio_codec
) {
100 return AV_CODEC_ID_AAC
;
102 return AV_CODEC_ID_MP3
;
104 switch (sample_format
) {
105 case kSampleFormatU8
:
106 return AV_CODEC_ID_PCM_U8
;
107 case kSampleFormatS16
:
108 return AV_CODEC_ID_PCM_S16LE
;
109 case kSampleFormatS32
:
110 return AV_CODEC_ID_PCM_S24LE
;
111 case kSampleFormatF32
:
112 return AV_CODEC_ID_PCM_F32LE
;
114 DVLOG(1) << "Unsupported sample format: " << sample_format
;
117 case kCodecPCM_S16BE
:
118 return AV_CODEC_ID_PCM_S16BE
;
119 case kCodecPCM_S24BE
:
120 return AV_CODEC_ID_PCM_S24BE
;
122 return AV_CODEC_ID_VORBIS
;
124 return AV_CODEC_ID_FLAC
;
126 return AV_CODEC_ID_AMR_NB
;
128 return AV_CODEC_ID_AMR_WB
;
130 return AV_CODEC_ID_GSM_MS
;
131 case kCodecPCM_MULAW
:
132 return AV_CODEC_ID_PCM_MULAW
;
134 return AV_CODEC_ID_OPUS
;
136 DVLOG(1) << "Unknown AudioCodec: " << audio_codec
;
138 return AV_CODEC_ID_NONE
;
141 // Converts an FFmpeg video codec ID into its corresponding supported codec id.
142 static VideoCodec
CodecIDToVideoCodec(AVCodecID codec_id
) {
144 case AV_CODEC_ID_H264
:
146 case AV_CODEC_ID_THEORA
:
148 case AV_CODEC_ID_MPEG4
:
150 case AV_CODEC_ID_VP8
:
152 case AV_CODEC_ID_VP9
:
155 DVLOG(1) << "Unknown video CodecID: " << codec_id
;
157 return kUnknownVideoCodec
;
160 static AVCodecID
VideoCodecToCodecID(VideoCodec video_codec
) {
161 switch (video_codec
) {
163 return AV_CODEC_ID_H264
;
165 return AV_CODEC_ID_THEORA
;
167 return AV_CODEC_ID_MPEG4
;
169 return AV_CODEC_ID_VP8
;
171 return AV_CODEC_ID_VP9
;
173 DVLOG(1) << "Unknown VideoCodec: " << video_codec
;
175 return AV_CODEC_ID_NONE
;
178 static VideoCodecProfile
ProfileIDToVideoCodecProfile(int profile
) {
179 // Clear out the CONSTRAINED & INTRA flags which are strict subsets of the
180 // corresponding profiles with which they're used.
181 profile
&= ~FF_PROFILE_H264_CONSTRAINED
;
182 profile
&= ~FF_PROFILE_H264_INTRA
;
184 case FF_PROFILE_H264_BASELINE
:
185 return H264PROFILE_BASELINE
;
186 case FF_PROFILE_H264_MAIN
:
187 return H264PROFILE_MAIN
;
188 case FF_PROFILE_H264_EXTENDED
:
189 return H264PROFILE_EXTENDED
;
190 case FF_PROFILE_H264_HIGH
:
191 return H264PROFILE_HIGH
;
192 case FF_PROFILE_H264_HIGH_10
:
193 return H264PROFILE_HIGH10PROFILE
;
194 case FF_PROFILE_H264_HIGH_422
:
195 return H264PROFILE_HIGH422PROFILE
;
196 case FF_PROFILE_H264_HIGH_444_PREDICTIVE
:
197 return H264PROFILE_HIGH444PREDICTIVEPROFILE
;
199 DVLOG(1) << "Unknown profile id: " << profile
;
201 return VIDEO_CODEC_PROFILE_UNKNOWN
;
204 static int VideoCodecProfileToProfileID(VideoCodecProfile profile
) {
206 case H264PROFILE_BASELINE
:
207 return FF_PROFILE_H264_BASELINE
;
208 case H264PROFILE_MAIN
:
209 return FF_PROFILE_H264_MAIN
;
210 case H264PROFILE_EXTENDED
:
211 return FF_PROFILE_H264_EXTENDED
;
212 case H264PROFILE_HIGH
:
213 return FF_PROFILE_H264_HIGH
;
214 case H264PROFILE_HIGH10PROFILE
:
215 return FF_PROFILE_H264_HIGH_10
;
216 case H264PROFILE_HIGH422PROFILE
:
217 return FF_PROFILE_H264_HIGH_422
;
218 case H264PROFILE_HIGH444PREDICTIVEPROFILE
:
219 return FF_PROFILE_H264_HIGH_444_PREDICTIVE
;
221 DVLOG(1) << "Unknown VideoCodecProfile: " << profile
;
223 return FF_PROFILE_UNKNOWN
;
226 SampleFormat
AVSampleFormatToSampleFormat(AVSampleFormat sample_format
) {
227 switch (sample_format
) {
228 case AV_SAMPLE_FMT_U8
:
229 return kSampleFormatU8
;
230 case AV_SAMPLE_FMT_S16
:
231 return kSampleFormatS16
;
232 case AV_SAMPLE_FMT_S32
:
233 return kSampleFormatS32
;
234 case AV_SAMPLE_FMT_FLT
:
235 return kSampleFormatF32
;
236 case AV_SAMPLE_FMT_S16P
:
237 return kSampleFormatPlanarS16
;
238 case AV_SAMPLE_FMT_FLTP
:
239 return kSampleFormatPlanarF32
;
241 DVLOG(1) << "Unknown AVSampleFormat: " << sample_format
;
243 return kUnknownSampleFormat
;
246 static AVSampleFormat
SampleFormatToAVSampleFormat(SampleFormat sample_format
) {
247 switch (sample_format
) {
248 case kSampleFormatU8
:
249 return AV_SAMPLE_FMT_U8
;
250 case kSampleFormatS16
:
251 return AV_SAMPLE_FMT_S16
;
252 case kSampleFormatS32
:
253 return AV_SAMPLE_FMT_S32
;
254 case kSampleFormatF32
:
255 return AV_SAMPLE_FMT_FLT
;
256 case kSampleFormatPlanarS16
:
257 return AV_SAMPLE_FMT_S16P
;
258 case kSampleFormatPlanarF32
:
259 return AV_SAMPLE_FMT_FLTP
;
261 DVLOG(1) << "Unknown SampleFormat: " << sample_format
;
263 return AV_SAMPLE_FMT_NONE
;
266 static void AVCodecContextToAudioDecoderConfig(
267 const AVCodecContext
* codec_context
,
269 AudioDecoderConfig
* config
,
271 DCHECK_EQ(codec_context
->codec_type
, AVMEDIA_TYPE_AUDIO
);
273 AudioCodec codec
= CodecIDToAudioCodec(codec_context
->codec_id
);
275 SampleFormat sample_format
=
276 AVSampleFormatToSampleFormat(codec_context
->sample_fmt
);
278 ChannelLayout channel_layout
= ChannelLayoutToChromeChannelLayout(
279 codec_context
->channel_layout
, codec_context
->channels
);
281 if (codec
== kCodecOpus
) {
282 // |codec_context->sample_fmt| is not set by FFmpeg because Opus decoding is
283 // not enabled in FFmpeg, so we need to manually set the sample format.
284 sample_format
= kSampleFormatS16
;
287 config
->Initialize(codec
,
290 codec_context
->sample_rate
,
291 codec_context
->extradata
,
292 codec_context
->extradata_size
,
295 if (codec
!= kCodecOpus
) {
296 DCHECK_EQ(av_get_bytes_per_sample(codec_context
->sample_fmt
) * 8,
297 config
->bits_per_channel());
301 void AVStreamToAudioDecoderConfig(
302 const AVStream
* stream
,
303 AudioDecoderConfig
* config
,
305 bool is_encrypted
= false;
306 AVDictionaryEntry
* key
= av_dict_get(stream
->metadata
, "enc_key_id", NULL
, 0);
309 return AVCodecContextToAudioDecoderConfig(
310 stream
->codec
, is_encrypted
, config
, record_stats
);
313 void AudioDecoderConfigToAVCodecContext(const AudioDecoderConfig
& config
,
314 AVCodecContext
* codec_context
) {
315 codec_context
->codec_type
= AVMEDIA_TYPE_AUDIO
;
316 codec_context
->codec_id
= AudioCodecToCodecID(config
.codec(),
317 config
.sample_format());
318 codec_context
->sample_fmt
= SampleFormatToAVSampleFormat(
319 config
.sample_format());
321 // TODO(scherkus): should we set |channel_layout|? I'm not sure if FFmpeg uses
322 // said information to decode.
323 codec_context
->channels
=
324 ChannelLayoutToChannelCount(config
.channel_layout());
325 codec_context
->sample_rate
= config
.samples_per_second();
327 if (config
.extra_data()) {
328 codec_context
->extradata_size
= config
.extra_data_size();
329 codec_context
->extradata
= reinterpret_cast<uint8_t*>(
330 av_malloc(config
.extra_data_size() + FF_INPUT_BUFFER_PADDING_SIZE
));
331 memcpy(codec_context
->extradata
, config
.extra_data(),
332 config
.extra_data_size());
333 memset(codec_context
->extradata
+ config
.extra_data_size(), '\0',
334 FF_INPUT_BUFFER_PADDING_SIZE
);
336 codec_context
->extradata
= NULL
;
337 codec_context
->extradata_size
= 0;
341 void AVStreamToVideoDecoderConfig(
342 const AVStream
* stream
,
343 VideoDecoderConfig
* config
,
345 gfx::Size
coded_size(stream
->codec
->coded_width
, stream
->codec
->coded_height
);
347 // TODO(vrk): This assumes decoded frame data starts at (0, 0), which is true
348 // for now, but may not always be true forever. Fix this in the future.
349 gfx::Rect
visible_rect(stream
->codec
->width
, stream
->codec
->height
);
351 AVRational aspect_ratio
= { 1, 1 };
352 if (stream
->sample_aspect_ratio
.num
)
353 aspect_ratio
= stream
->sample_aspect_ratio
;
354 else if (stream
->codec
->sample_aspect_ratio
.num
)
355 aspect_ratio
= stream
->codec
->sample_aspect_ratio
;
357 VideoCodec codec
= CodecIDToVideoCodec(stream
->codec
->codec_id
);
359 VideoCodecProfile profile
= VIDEO_CODEC_PROFILE_UNKNOWN
;
360 if (codec
== kCodecVP8
)
361 profile
= VP8PROFILE_MAIN
;
362 else if (codec
== kCodecVP9
)
363 profile
= VP9PROFILE_MAIN
;
365 profile
= ProfileIDToVideoCodecProfile(stream
->codec
->profile
);
367 gfx::Size natural_size
= GetNaturalSize(
368 visible_rect
.size(), aspect_ratio
.num
, aspect_ratio
.den
);
370 VideoFrame::Format format
= PixelFormatToVideoFormat(stream
->codec
->pix_fmt
);
371 if (codec
== kCodecVP9
) {
372 // TODO(tomfinegan): libavcodec doesn't know about VP9.
373 format
= VideoFrame::YV12
;
374 coded_size
= natural_size
;
377 bool is_encrypted
= false;
378 AVDictionaryEntry
* key
= av_dict_get(stream
->metadata
, "enc_key_id", NULL
, 0);
382 AVDictionaryEntry
* webm_alpha
=
383 av_dict_get(stream
->metadata
, "alpha_mode", NULL
, 0);
384 if (webm_alpha
&& !strcmp(webm_alpha
->value
, "1")) {
385 format
= VideoFrame::YV12A
;
388 config
->Initialize(codec
,
391 coded_size
, visible_rect
, natural_size
,
392 stream
->codec
->extradata
, stream
->codec
->extradata_size
,
397 void VideoDecoderConfigToAVCodecContext(
398 const VideoDecoderConfig
& config
,
399 AVCodecContext
* codec_context
) {
400 codec_context
->codec_type
= AVMEDIA_TYPE_VIDEO
;
401 codec_context
->codec_id
= VideoCodecToCodecID(config
.codec());
402 codec_context
->profile
= VideoCodecProfileToProfileID(config
.profile());
403 codec_context
->coded_width
= config
.coded_size().width();
404 codec_context
->coded_height
= config
.coded_size().height();
405 codec_context
->pix_fmt
= VideoFormatToPixelFormat(config
.format());
407 if (config
.extra_data()) {
408 codec_context
->extradata_size
= config
.extra_data_size();
409 codec_context
->extradata
= reinterpret_cast<uint8_t*>(
410 av_malloc(config
.extra_data_size() + FF_INPUT_BUFFER_PADDING_SIZE
));
411 memcpy(codec_context
->extradata
, config
.extra_data(),
412 config
.extra_data_size());
413 memset(codec_context
->extradata
+ config
.extra_data_size(), '\0',
414 FF_INPUT_BUFFER_PADDING_SIZE
);
416 codec_context
->extradata
= NULL
;
417 codec_context
->extradata_size
= 0;
421 ChannelLayout
ChannelLayoutToChromeChannelLayout(int64_t layout
, int channels
) {
423 case AV_CH_LAYOUT_MONO
:
424 return CHANNEL_LAYOUT_MONO
;
425 case AV_CH_LAYOUT_STEREO
:
426 return CHANNEL_LAYOUT_STEREO
;
427 case AV_CH_LAYOUT_2_1
:
428 return CHANNEL_LAYOUT_2_1
;
429 case AV_CH_LAYOUT_SURROUND
:
430 return CHANNEL_LAYOUT_SURROUND
;
431 case AV_CH_LAYOUT_4POINT0
:
432 return CHANNEL_LAYOUT_4_0
;
433 case AV_CH_LAYOUT_2_2
:
434 return CHANNEL_LAYOUT_2_2
;
435 case AV_CH_LAYOUT_QUAD
:
436 return CHANNEL_LAYOUT_QUAD
;
437 case AV_CH_LAYOUT_5POINT0
:
438 return CHANNEL_LAYOUT_5_0
;
439 case AV_CH_LAYOUT_5POINT1
:
440 return CHANNEL_LAYOUT_5_1
;
441 case AV_CH_LAYOUT_5POINT0_BACK
:
442 return CHANNEL_LAYOUT_5_0_BACK
;
443 case AV_CH_LAYOUT_5POINT1_BACK
:
444 return CHANNEL_LAYOUT_5_1_BACK
;
445 case AV_CH_LAYOUT_7POINT0
:
446 return CHANNEL_LAYOUT_7_0
;
447 case AV_CH_LAYOUT_7POINT1
:
448 return CHANNEL_LAYOUT_7_1
;
449 case AV_CH_LAYOUT_7POINT1_WIDE
:
450 return CHANNEL_LAYOUT_7_1_WIDE
;
451 case AV_CH_LAYOUT_STEREO_DOWNMIX
:
452 return CHANNEL_LAYOUT_STEREO_DOWNMIX
;
453 case AV_CH_LAYOUT_2POINT1
:
454 return CHANNEL_LAYOUT_2POINT1
;
455 case AV_CH_LAYOUT_3POINT1
:
456 return CHANNEL_LAYOUT_3_1
;
457 case AV_CH_LAYOUT_4POINT1
:
458 return CHANNEL_LAYOUT_4_1
;
459 case AV_CH_LAYOUT_6POINT0
:
460 return CHANNEL_LAYOUT_6_0
;
461 case AV_CH_LAYOUT_6POINT0_FRONT
:
462 return CHANNEL_LAYOUT_6_0_FRONT
;
463 case AV_CH_LAYOUT_HEXAGONAL
:
464 return CHANNEL_LAYOUT_HEXAGONAL
;
465 case AV_CH_LAYOUT_6POINT1
:
466 return CHANNEL_LAYOUT_6_1
;
467 case AV_CH_LAYOUT_6POINT1_BACK
:
468 return CHANNEL_LAYOUT_6_1_BACK
;
469 case AV_CH_LAYOUT_6POINT1_FRONT
:
470 return CHANNEL_LAYOUT_6_1_FRONT
;
471 case AV_CH_LAYOUT_7POINT0_FRONT
:
472 return CHANNEL_LAYOUT_7_0_FRONT
;
473 #ifdef AV_CH_LAYOUT_7POINT1_WIDE_BACK
474 case AV_CH_LAYOUT_7POINT1_WIDE_BACK
:
475 return CHANNEL_LAYOUT_7_1_WIDE_BACK
;
477 case AV_CH_LAYOUT_OCTAGONAL
:
478 return CHANNEL_LAYOUT_OCTAGONAL
;
480 // FFmpeg channel_layout is 0 for .wav and .mp3. Attempt to guess layout
481 // based on the channel count.
482 return GuessChannelLayout(channels
);
486 VideoFrame::Format
PixelFormatToVideoFormat(PixelFormat pixel_format
) {
487 switch (pixel_format
) {
488 case PIX_FMT_YUV422P
:
489 return VideoFrame::YV16
;
490 // TODO(scherkus): We should be paying attention to the color range of each
491 // format and scaling as appropriate when rendering. Regular YUV has a range
492 // of 16-239 where as YUVJ has a range of 0-255.
493 case PIX_FMT_YUV420P
:
494 case PIX_FMT_YUVJ420P
:
495 return VideoFrame::YV12
;
496 case PIX_FMT_YUVA420P
:
497 return VideoFrame::YV12A
;
499 DVLOG(1) << "Unsupported PixelFormat: " << pixel_format
;
501 return VideoFrame::INVALID
;
504 PixelFormat
VideoFormatToPixelFormat(VideoFrame::Format video_format
) {
505 switch (video_format
) {
506 case VideoFrame::YV16
:
507 return PIX_FMT_YUV422P
;
508 case VideoFrame::YV12
:
509 return PIX_FMT_YUV420P
;
510 case VideoFrame::YV12A
:
511 return PIX_FMT_YUVA420P
;
513 DVLOG(1) << "Unsupported VideoFrame::Format: " << video_format
;