[Android WebView] Fix webview perf bot switchover to use org.chromium.webview_shell...
[chromium-blink-merge.git] / content / common / gpu / media / vt_video_decode_accelerator.cc
blob0d0fe628948ac7d92c2e682efddd1072bbeef6cf
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <algorithm>
7 #include <CoreVideo/CoreVideo.h>
8 #include <OpenGL/CGLIOSurface.h>
9 #include <OpenGL/gl.h>
11 #include "base/bind.h"
12 #include "base/command_line.h"
13 #include "base/logging.h"
14 #include "base/mac/mac_logging.h"
15 #include "base/metrics/histogram_macros.h"
16 #include "base/sys_byteorder.h"
17 #include "base/thread_task_runner_handle.h"
18 #include "content/common/gpu/media/vt_video_decode_accelerator.h"
19 #include "content/public/common/content_switches.h"
20 #include "media/base/limits.h"
21 #include "ui/gl/scoped_binders.h"
23 using content_common_gpu_media::kModuleVt;
24 using content_common_gpu_media::InitializeStubs;
25 using content_common_gpu_media::IsVtInitialized;
26 using content_common_gpu_media::StubPathMap;
28 #define NOTIFY_STATUS(name, status, session_failure) \
29 do { \
30 OSSTATUS_DLOG(ERROR, status) << name; \
31 NotifyError(PLATFORM_FAILURE, session_failure); \
32 } while (0)
34 namespace content {
36 // Only H.264 with 4:2:0 chroma sampling is supported.
37 static const media::VideoCodecProfile kSupportedProfiles[] = {
38 media::H264PROFILE_BASELINE,
39 media::H264PROFILE_MAIN,
40 media::H264PROFILE_EXTENDED,
41 media::H264PROFILE_HIGH,
42 media::H264PROFILE_HIGH10PROFILE,
43 media::H264PROFILE_SCALABLEBASELINE,
44 media::H264PROFILE_SCALABLEHIGH,
45 media::H264PROFILE_STEREOHIGH,
46 media::H264PROFILE_MULTIVIEWHIGH,
49 // Size to use for NALU length headers in AVC format (can be 1, 2, or 4).
50 static const int kNALUHeaderLength = 4;
52 // We request 5 picture buffers from the client, each of which has a texture ID
53 // that we can bind decoded frames to. We need enough to satisfy preroll, and
54 // enough to avoid unnecessary stalling, but no more than that. The resource
55 // requirements are low, as we don't need the textures to be backed by storage.
56 static const int kNumPictureBuffers = media::limits::kMaxVideoFrames + 1;
58 // Maximum number of frames to queue for reordering before we stop asking for
59 // more. (NotifyEndOfBitstreamBuffer() is called when frames are moved into the
60 // reorder queue.)
61 static const int kMaxReorderQueueSize = 16;
63 // Build an |image_config| dictionary for VideoToolbox initialization.
64 static base::ScopedCFTypeRef<CFMutableDictionaryRef>
65 BuildImageConfig(CMVideoDimensions coded_dimensions) {
66 base::ScopedCFTypeRef<CFMutableDictionaryRef> image_config;
68 // TODO(sandersd): Does it save some work or memory to use 4:2:0?
69 int32_t pixel_format = kCVPixelFormatType_422YpCbCr8;
70 #define CFINT(i) CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &i)
71 base::ScopedCFTypeRef<CFNumberRef> cf_pixel_format(CFINT(pixel_format));
72 base::ScopedCFTypeRef<CFNumberRef> cf_width(CFINT(coded_dimensions.width));
73 base::ScopedCFTypeRef<CFNumberRef> cf_height(CFINT(coded_dimensions.height));
74 #undef CFINT
75 if (!cf_pixel_format.get() || !cf_width.get() || !cf_height.get())
76 return image_config;
78 image_config.reset(
79 CFDictionaryCreateMutable(
80 kCFAllocatorDefault,
81 4, // capacity
82 &kCFTypeDictionaryKeyCallBacks,
83 &kCFTypeDictionaryValueCallBacks));
84 if (!image_config.get())
85 return image_config;
87 CFDictionarySetValue(image_config, kCVPixelBufferPixelFormatTypeKey,
88 cf_pixel_format);
89 CFDictionarySetValue(image_config, kCVPixelBufferWidthKey, cf_width);
90 CFDictionarySetValue(image_config, kCVPixelBufferHeightKey, cf_height);
91 CFDictionarySetValue(image_config, kCVPixelBufferOpenGLCompatibilityKey,
92 kCFBooleanTrue);
94 return image_config;
97 // Create a VTDecompressionSession using the provided |pps| and |sps|. If
98 // |require_hardware| is true, the session must uses real hardware decoding
99 // (as opposed to software decoding inside of VideoToolbox) to be considered
100 // successful.
102 // TODO(sandersd): Merge with ConfigureDecoder(), as the code is very similar.
103 static bool CreateVideoToolboxSession(const uint8_t* sps, size_t sps_size,
104 const uint8_t* pps, size_t pps_size,
105 bool require_hardware) {
106 const uint8_t* data_ptrs[] = {sps, pps};
107 const size_t data_sizes[] = {sps_size, pps_size};
109 base::ScopedCFTypeRef<CMFormatDescriptionRef> format;
110 OSStatus status = CMVideoFormatDescriptionCreateFromH264ParameterSets(
111 kCFAllocatorDefault,
112 2, // parameter_set_count
113 data_ptrs, // &parameter_set_pointers
114 data_sizes, // &parameter_set_sizes
115 kNALUHeaderLength, // nal_unit_header_length
116 format.InitializeInto());
117 if (status) {
118 OSSTATUS_DLOG(WARNING, status)
119 << "Failed to create CMVideoFormatDescription.";
120 return false;
123 base::ScopedCFTypeRef<CFMutableDictionaryRef> decoder_config(
124 CFDictionaryCreateMutable(
125 kCFAllocatorDefault,
126 1, // capacity
127 &kCFTypeDictionaryKeyCallBacks,
128 &kCFTypeDictionaryValueCallBacks));
129 if (!decoder_config.get())
130 return false;
132 if (require_hardware) {
133 CFDictionarySetValue(
134 decoder_config,
135 // kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder
136 CFSTR("RequireHardwareAcceleratedVideoDecoder"),
137 kCFBooleanTrue);
140 base::ScopedCFTypeRef<CFMutableDictionaryRef> image_config(
141 BuildImageConfig(CMVideoFormatDescriptionGetDimensions(format)));
142 if (!image_config.get())
143 return false;
145 VTDecompressionOutputCallbackRecord callback = {0};
147 base::ScopedCFTypeRef<VTDecompressionSessionRef> session;
148 status = VTDecompressionSessionCreate(
149 kCFAllocatorDefault,
150 format, // video_format_description
151 decoder_config, // video_decoder_specification
152 image_config, // destination_image_buffer_attributes
153 &callback, // output_callback
154 session.InitializeInto());
155 if (status) {
156 OSSTATUS_DLOG(WARNING, status) << "Failed to create VTDecompressionSession";
157 return false;
160 return true;
163 // The purpose of this function is to preload the generic and hardware-specific
164 // libraries required by VideoToolbox before the GPU sandbox is enabled.
165 // VideoToolbox normally loads the hardware-specific libraries lazily, so we
166 // must actually create a decompression session. If creating a decompression
167 // session fails, hardware decoding will be disabled (Initialize() will always
168 // return false).
169 static bool InitializeVideoToolboxInternal() {
170 if (base::CommandLine::ForCurrentProcess()->HasSwitch(
171 switches::kDisableAcceleratedVideoDecode)) {
172 return false;
175 if (!IsVtInitialized()) {
176 // CoreVideo is also required, but the loader stops after the first path is
177 // loaded. Instead we rely on the transitive dependency from VideoToolbox to
178 // CoreVideo.
179 // TODO(sandersd): Fallback to PrivateFrameworks to support OS X < 10.8.
180 StubPathMap paths;
181 paths[kModuleVt].push_back(FILE_PATH_LITERAL(
182 "/System/Library/Frameworks/VideoToolbox.framework/VideoToolbox"));
183 if (!InitializeStubs(paths)) {
184 LOG(WARNING) << "Failed to initialize VideoToolbox framework. "
185 << "Hardware accelerated video decoding will be disabled.";
186 return false;
190 // Create a hardware decoding session.
191 // SPS and PPS data are taken from a 480p sample (buck2.mp4).
192 const uint8_t sps_normal[] = {0x67, 0x64, 0x00, 0x1e, 0xac, 0xd9, 0x80, 0xd4,
193 0x3d, 0xa1, 0x00, 0x00, 0x03, 0x00, 0x01, 0x00,
194 0x00, 0x03, 0x00, 0x30, 0x8f, 0x16, 0x2d, 0x9a};
195 const uint8_t pps_normal[] = {0x68, 0xe9, 0x7b, 0xcb};
196 if (!CreateVideoToolboxSession(sps_normal, arraysize(sps_normal), pps_normal,
197 arraysize(pps_normal), true)) {
198 LOG(WARNING) << "Failed to create hardware VideoToolbox session. "
199 << "Hardware accelerated video decoding will be disabled.";
200 return false;
203 // Create a software decoding session.
204 // SPS and PPS data are taken from a 18p sample (small2.mp4).
205 const uint8_t sps_small[] = {0x67, 0x64, 0x00, 0x0a, 0xac, 0xd9, 0x89, 0x7e,
206 0x22, 0x10, 0x00, 0x00, 0x3e, 0x90, 0x00, 0x0e,
207 0xa6, 0x08, 0xf1, 0x22, 0x59, 0xa0};
208 const uint8_t pps_small[] = {0x68, 0xe9, 0x79, 0x72, 0xc0};
209 if (!CreateVideoToolboxSession(sps_small, arraysize(sps_small), pps_small,
210 arraysize(pps_small), false)) {
211 LOG(WARNING) << "Failed to create software VideoToolbox session. "
212 << "Hardware accelerated video decoding will be disabled.";
213 return false;
216 return true;
219 bool InitializeVideoToolbox() {
220 // InitializeVideoToolbox() is called only from the GPU process main thread;
221 // once for sandbox warmup, and then once each time a VTVideoDecodeAccelerator
222 // is initialized.
223 static bool attempted = false;
224 static bool succeeded = false;
226 if (!attempted) {
227 attempted = true;
228 succeeded = InitializeVideoToolboxInternal();
231 return succeeded;
234 // Route decoded frame callbacks back into the VTVideoDecodeAccelerator.
235 static void OutputThunk(
236 void* decompression_output_refcon,
237 void* source_frame_refcon,
238 OSStatus status,
239 VTDecodeInfoFlags info_flags,
240 CVImageBufferRef image_buffer,
241 CMTime presentation_time_stamp,
242 CMTime presentation_duration) {
243 VTVideoDecodeAccelerator* vda =
244 reinterpret_cast<VTVideoDecodeAccelerator*>(decompression_output_refcon);
245 vda->Output(source_frame_refcon, status, image_buffer);
248 VTVideoDecodeAccelerator::Task::Task(TaskType type) : type(type) {
251 VTVideoDecodeAccelerator::Task::~Task() {
254 VTVideoDecodeAccelerator::Frame::Frame(int32_t bitstream_id)
255 : bitstream_id(bitstream_id), pic_order_cnt(0), reorder_window(0) {
258 VTVideoDecodeAccelerator::Frame::~Frame() {
261 bool VTVideoDecodeAccelerator::FrameOrder::operator()(
262 const linked_ptr<Frame>& lhs,
263 const linked_ptr<Frame>& rhs) const {
264 if (lhs->pic_order_cnt != rhs->pic_order_cnt)
265 return lhs->pic_order_cnt > rhs->pic_order_cnt;
266 // If |pic_order_cnt| is the same, fall back on using the bitstream order.
267 // TODO(sandersd): Assign a sequence number in Decode() and use that instead.
268 // TODO(sandersd): Using the sequence number, ensure that frames older than
269 // |kMaxReorderQueueSize| are ordered first, regardless of |pic_order_cnt|.
270 return lhs->bitstream_id > rhs->bitstream_id;
273 VTVideoDecodeAccelerator::VTVideoDecodeAccelerator(
274 CGLContextObj cgl_context,
275 const base::Callback<bool(void)>& make_context_current)
276 : cgl_context_(cgl_context),
277 make_context_current_(make_context_current),
278 client_(nullptr),
279 state_(STATE_DECODING),
280 format_(nullptr),
281 session_(nullptr),
282 last_sps_id_(-1),
283 last_pps_id_(-1),
284 gpu_task_runner_(base::ThreadTaskRunnerHandle::Get()),
285 decoder_thread_("VTDecoderThread"),
286 weak_this_factory_(this) {
287 DCHECK(!make_context_current_.is_null());
288 callback_.decompressionOutputCallback = OutputThunk;
289 callback_.decompressionOutputRefCon = this;
290 weak_this_ = weak_this_factory_.GetWeakPtr();
293 VTVideoDecodeAccelerator::~VTVideoDecodeAccelerator() {
296 bool VTVideoDecodeAccelerator::Initialize(
297 media::VideoCodecProfile profile,
298 Client* client) {
299 DCHECK(gpu_thread_checker_.CalledOnValidThread());
300 client_ = client;
302 if (!InitializeVideoToolbox())
303 return false;
305 bool profile_supported = false;
306 for (const auto& supported_profile : kSupportedProfiles) {
307 if (profile == supported_profile) {
308 profile_supported = true;
309 break;
312 if (!profile_supported)
313 return false;
315 // Spawn a thread to handle parsing and calling VideoToolbox.
316 if (!decoder_thread_.Start())
317 return false;
319 // Count the session as successfully initialized.
320 UMA_HISTOGRAM_ENUMERATION("Media.VTVDA.SessionFailureReason",
321 SFT_SUCCESSFULLY_INITIALIZED,
322 SFT_MAX + 1);
323 return true;
326 bool VTVideoDecodeAccelerator::FinishDelayedFrames() {
327 DCHECK(decoder_thread_.task_runner()->BelongsToCurrentThread());
328 if (session_) {
329 OSStatus status = VTDecompressionSessionWaitForAsynchronousFrames(session_);
330 if (status) {
331 NOTIFY_STATUS("VTDecompressionSessionWaitForAsynchronousFrames()",
332 status, SFT_PLATFORM_ERROR);
333 return false;
336 return true;
339 bool VTVideoDecodeAccelerator::ConfigureDecoder() {
340 DCHECK(decoder_thread_.task_runner()->BelongsToCurrentThread());
341 DCHECK(!last_sps_.empty());
342 DCHECK(!last_pps_.empty());
344 // Build the configuration records.
345 std::vector<const uint8_t*> nalu_data_ptrs;
346 std::vector<size_t> nalu_data_sizes;
347 nalu_data_ptrs.reserve(3);
348 nalu_data_sizes.reserve(3);
349 nalu_data_ptrs.push_back(&last_sps_.front());
350 nalu_data_sizes.push_back(last_sps_.size());
351 if (!last_spsext_.empty()) {
352 nalu_data_ptrs.push_back(&last_spsext_.front());
353 nalu_data_sizes.push_back(last_spsext_.size());
355 nalu_data_ptrs.push_back(&last_pps_.front());
356 nalu_data_sizes.push_back(last_pps_.size());
358 // Construct a new format description from the parameter sets.
359 // TODO(sandersd): Replace this with custom code to support OS X < 10.9.
360 format_.reset();
361 OSStatus status = CMVideoFormatDescriptionCreateFromH264ParameterSets(
362 kCFAllocatorDefault,
363 nalu_data_ptrs.size(), // parameter_set_count
364 &nalu_data_ptrs.front(), // &parameter_set_pointers
365 &nalu_data_sizes.front(), // &parameter_set_sizes
366 kNALUHeaderLength, // nal_unit_header_length
367 format_.InitializeInto());
368 if (status) {
369 NOTIFY_STATUS("CMVideoFormatDescriptionCreateFromH264ParameterSets()",
370 status, SFT_PLATFORM_ERROR);
371 return false;
374 // Store the new configuration data.
375 CMVideoDimensions coded_dimensions =
376 CMVideoFormatDescriptionGetDimensions(format_);
377 coded_size_.SetSize(coded_dimensions.width, coded_dimensions.height);
379 // If the session is compatible, there's nothing else to do.
380 if (session_ &&
381 VTDecompressionSessionCanAcceptFormatDescription(session_, format_)) {
382 return true;
385 // Prepare VideoToolbox configuration dictionaries.
386 base::ScopedCFTypeRef<CFMutableDictionaryRef> decoder_config(
387 CFDictionaryCreateMutable(
388 kCFAllocatorDefault,
389 1, // capacity
390 &kCFTypeDictionaryKeyCallBacks,
391 &kCFTypeDictionaryValueCallBacks));
392 if (!decoder_config.get()) {
393 DLOG(ERROR) << "Failed to create CFMutableDictionary.";
394 NotifyError(PLATFORM_FAILURE, SFT_PLATFORM_ERROR);
395 return false;
398 CFDictionarySetValue(
399 decoder_config,
400 // kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder
401 CFSTR("EnableHardwareAcceleratedVideoDecoder"),
402 kCFBooleanTrue);
404 base::ScopedCFTypeRef<CFMutableDictionaryRef> image_config(
405 BuildImageConfig(coded_dimensions));
406 if (!image_config.get()) {
407 DLOG(ERROR) << "Failed to create decoder image configuration.";
408 NotifyError(PLATFORM_FAILURE, SFT_PLATFORM_ERROR);
409 return false;
412 // Ensure that the old decoder emits all frames before the new decoder can
413 // emit any.
414 if (!FinishDelayedFrames())
415 return false;
417 session_.reset();
418 status = VTDecompressionSessionCreate(
419 kCFAllocatorDefault,
420 format_, // video_format_description
421 decoder_config, // video_decoder_specification
422 image_config, // destination_image_buffer_attributes
423 &callback_, // output_callback
424 session_.InitializeInto());
425 if (status) {
426 NOTIFY_STATUS("VTDecompressionSessionCreate()", status,
427 SFT_UNSUPPORTED_STREAM_PARAMETERS);
428 return false;
431 // Report whether hardware decode is being used.
432 bool using_hardware = false;
433 base::ScopedCFTypeRef<CFBooleanRef> cf_using_hardware;
434 if (VTSessionCopyProperty(
435 session_,
436 // kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder
437 CFSTR("UsingHardwareAcceleratedVideoDecoder"),
438 kCFAllocatorDefault,
439 cf_using_hardware.InitializeInto()) == 0) {
440 using_hardware = CFBooleanGetValue(cf_using_hardware);
442 UMA_HISTOGRAM_BOOLEAN("Media.VTVDA.HardwareAccelerated", using_hardware);
444 return true;
447 void VTVideoDecodeAccelerator::DecodeTask(
448 const media::BitstreamBuffer& bitstream,
449 Frame* frame) {
450 DCHECK(decoder_thread_.task_runner()->BelongsToCurrentThread());
452 // Map the bitstream buffer.
453 base::SharedMemory memory(bitstream.handle(), true);
454 size_t size = bitstream.size();
455 if (!memory.Map(size)) {
456 DLOG(ERROR) << "Failed to map bitstream buffer";
457 NotifyError(PLATFORM_FAILURE, SFT_PLATFORM_ERROR);
458 return;
460 const uint8_t* buf = static_cast<uint8_t*>(memory.memory());
462 // NALUs are stored with Annex B format in the bitstream buffer (start codes),
463 // but VideoToolbox expects AVC format (length headers), so we must rewrite
464 // the data.
466 // Locate relevant NALUs and compute the size of the rewritten data. Also
467 // record any parameter sets for VideoToolbox initialization.
468 bool config_changed = false;
469 bool has_slice = false;
470 size_t data_size = 0;
471 std::vector<media::H264NALU> nalus;
472 parser_.SetStream(buf, size);
473 media::H264NALU nalu;
474 while (true) {
475 media::H264Parser::Result result = parser_.AdvanceToNextNALU(&nalu);
476 if (result == media::H264Parser::kEOStream)
477 break;
478 if (result == media::H264Parser::kUnsupportedStream) {
479 DLOG(ERROR) << "Unsupported H.264 stream";
480 NotifyError(PLATFORM_FAILURE, SFT_UNSUPPORTED_STREAM);
481 return;
483 if (result != media::H264Parser::kOk) {
484 DLOG(ERROR) << "Failed to parse H.264 stream";
485 NotifyError(UNREADABLE_INPUT, SFT_INVALID_STREAM);
486 return;
488 switch (nalu.nal_unit_type) {
489 case media::H264NALU::kSPS:
490 last_sps_.assign(nalu.data, nalu.data + nalu.size);
491 last_spsext_.clear();
492 config_changed = true;
493 result = parser_.ParseSPS(&last_sps_id_);
494 if (result == media::H264Parser::kUnsupportedStream) {
495 DLOG(ERROR) << "Unsupported SPS";
496 NotifyError(PLATFORM_FAILURE, SFT_UNSUPPORTED_STREAM);
497 return;
499 if (result != media::H264Parser::kOk) {
500 DLOG(ERROR) << "Could not parse SPS";
501 NotifyError(UNREADABLE_INPUT, SFT_INVALID_STREAM);
502 return;
504 break;
506 case media::H264NALU::kSPSExt:
507 // TODO(sandersd): Check that the previous NALU was an SPS.
508 last_spsext_.assign(nalu.data, nalu.data + nalu.size);
509 config_changed = true;
510 break;
512 case media::H264NALU::kPPS:
513 last_pps_.assign(nalu.data, nalu.data + nalu.size);
514 config_changed = true;
515 result = parser_.ParsePPS(&last_pps_id_);
516 if (result == media::H264Parser::kUnsupportedStream) {
517 DLOG(ERROR) << "Unsupported PPS";
518 NotifyError(PLATFORM_FAILURE, SFT_UNSUPPORTED_STREAM);
519 return;
521 if (result != media::H264Parser::kOk) {
522 DLOG(ERROR) << "Could not parse PPS";
523 NotifyError(UNREADABLE_INPUT, SFT_INVALID_STREAM);
524 return;
526 break;
528 case media::H264NALU::kSliceDataA:
529 case media::H264NALU::kSliceDataB:
530 case media::H264NALU::kSliceDataC:
531 case media::H264NALU::kNonIDRSlice:
532 // TODO(sandersd): Check that there has been an IDR slice since the
533 // last reset.
534 case media::H264NALU::kIDRSlice:
535 // Compute the |pic_order_cnt| for the picture from the first slice.
536 // TODO(sandersd): Make sure that any further slices are part of the
537 // same picture or a redundant coded picture.
538 if (!has_slice) {
539 media::H264SliceHeader slice_hdr;
540 result = parser_.ParseSliceHeader(nalu, &slice_hdr);
541 if (result == media::H264Parser::kUnsupportedStream) {
542 DLOG(ERROR) << "Unsupported slice header";
543 NotifyError(PLATFORM_FAILURE, SFT_UNSUPPORTED_STREAM);
544 return;
546 if (result != media::H264Parser::kOk) {
547 DLOG(ERROR) << "Could not parse slice header";
548 NotifyError(UNREADABLE_INPUT, SFT_INVALID_STREAM);
549 return;
552 // TODO(sandersd): Maintain a cache of configurations and reconfigure
553 // only when a slice references a new config.
554 DCHECK_EQ(slice_hdr.pic_parameter_set_id, last_pps_id_);
555 const media::H264PPS* pps =
556 parser_.GetPPS(slice_hdr.pic_parameter_set_id);
557 if (!pps) {
558 DLOG(ERROR) << "Mising PPS referenced by slice";
559 NotifyError(UNREADABLE_INPUT, SFT_INVALID_STREAM);
560 return;
563 DCHECK_EQ(pps->seq_parameter_set_id, last_sps_id_);
564 const media::H264SPS* sps = parser_.GetSPS(pps->seq_parameter_set_id);
565 if (!sps) {
566 DLOG(ERROR) << "Mising SPS referenced by PPS";
567 NotifyError(UNREADABLE_INPUT, SFT_INVALID_STREAM);
568 return;
571 if (!poc_.ComputePicOrderCnt(sps, slice_hdr, &frame->pic_order_cnt)) {
572 DLOG(ERROR) << "Unable to compute POC";
573 NotifyError(UNREADABLE_INPUT, SFT_INVALID_STREAM);
574 return;
577 if (sps->vui_parameters_present_flag &&
578 sps->bitstream_restriction_flag) {
579 frame->reorder_window = std::min(sps->max_num_reorder_frames,
580 kMaxReorderQueueSize - 1);
583 has_slice = true;
584 default:
585 nalus.push_back(nalu);
586 data_size += kNALUHeaderLength + nalu.size;
587 break;
591 // Initialize VideoToolbox.
592 // TODO(sandersd): Instead of assuming that the last SPS and PPS units are
593 // always the correct ones, maintain a cache of recent SPS and PPS units and
594 // select from them using the slice header.
595 if (config_changed) {
596 if (last_sps_.size() == 0 || last_pps_.size() == 0) {
597 DLOG(ERROR) << "Invalid configuration data";
598 NotifyError(INVALID_ARGUMENT, SFT_INVALID_STREAM);
599 return;
601 if (!ConfigureDecoder())
602 return;
605 // If there are no image slices, drop the bitstream buffer by returning an
606 // empty frame.
607 if (!has_slice) {
608 if (!FinishDelayedFrames())
609 return;
610 gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
611 &VTVideoDecodeAccelerator::DecodeDone, weak_this_, frame));
612 return;
615 // If the session is not configured by this point, fail.
616 if (!session_) {
617 DLOG(ERROR) << "Configuration data missing";
618 NotifyError(INVALID_ARGUMENT, SFT_INVALID_STREAM);
619 return;
622 // Update the frame metadata with configuration data.
623 frame->coded_size = coded_size_;
625 // Create a memory-backed CMBlockBuffer for the translated data.
626 // TODO(sandersd): Pool of memory blocks.
627 base::ScopedCFTypeRef<CMBlockBufferRef> data;
628 OSStatus status = CMBlockBufferCreateWithMemoryBlock(
629 kCFAllocatorDefault,
630 nullptr, // &memory_block
631 data_size, // block_length
632 kCFAllocatorDefault, // block_allocator
633 nullptr, // &custom_block_source
634 0, // offset_to_data
635 data_size, // data_length
636 0, // flags
637 data.InitializeInto());
638 if (status) {
639 NOTIFY_STATUS("CMBlockBufferCreateWithMemoryBlock()", status,
640 SFT_PLATFORM_ERROR);
641 return;
644 // Make sure that the memory is actually allocated.
645 // CMBlockBufferReplaceDataBytes() is documented to do this, but prints a
646 // message each time starting in Mac OS X 10.10.
647 status = CMBlockBufferAssureBlockMemory(data);
648 if (status) {
649 NOTIFY_STATUS("CMBlockBufferAssureBlockMemory()", status,
650 SFT_PLATFORM_ERROR);
651 return;
654 // Copy NALU data into the CMBlockBuffer, inserting length headers.
655 size_t offset = 0;
656 for (size_t i = 0; i < nalus.size(); i++) {
657 media::H264NALU& nalu = nalus[i];
658 uint32_t header = base::HostToNet32(static_cast<uint32_t>(nalu.size));
659 status = CMBlockBufferReplaceDataBytes(
660 &header, data, offset, kNALUHeaderLength);
661 if (status) {
662 NOTIFY_STATUS("CMBlockBufferReplaceDataBytes()", status,
663 SFT_PLATFORM_ERROR);
664 return;
666 offset += kNALUHeaderLength;
667 status = CMBlockBufferReplaceDataBytes(nalu.data, data, offset, nalu.size);
668 if (status) {
669 NOTIFY_STATUS("CMBlockBufferReplaceDataBytes()", status,
670 SFT_PLATFORM_ERROR);
671 return;
673 offset += nalu.size;
676 // Package the data in a CMSampleBuffer.
677 base::ScopedCFTypeRef<CMSampleBufferRef> sample;
678 status = CMSampleBufferCreate(
679 kCFAllocatorDefault,
680 data, // data_buffer
681 true, // data_ready
682 nullptr, // make_data_ready_callback
683 nullptr, // make_data_ready_refcon
684 format_, // format_description
685 1, // num_samples
686 0, // num_sample_timing_entries
687 nullptr, // &sample_timing_array
688 1, // num_sample_size_entries
689 &data_size, // &sample_size_array
690 sample.InitializeInto());
691 if (status) {
692 NOTIFY_STATUS("CMSampleBufferCreate()", status, SFT_PLATFORM_ERROR);
693 return;
696 // Send the frame for decoding.
697 // Asynchronous Decompression allows for parallel submission of frames
698 // (without it, DecodeFrame() does not return until the frame has been
699 // decoded). We don't enable Temporal Processing so that frames are always
700 // returned in decode order; this makes it easier to avoid deadlock.
701 VTDecodeFrameFlags decode_flags =
702 kVTDecodeFrame_EnableAsynchronousDecompression;
703 status = VTDecompressionSessionDecodeFrame(
704 session_,
705 sample, // sample_buffer
706 decode_flags, // decode_flags
707 reinterpret_cast<void*>(frame), // source_frame_refcon
708 nullptr); // &info_flags_out
709 if (status) {
710 NOTIFY_STATUS("VTDecompressionSessionDecodeFrame()", status,
711 SFT_DECODE_ERROR);
712 return;
716 // This method may be called on any VideoToolbox thread.
717 void VTVideoDecodeAccelerator::Output(
718 void* source_frame_refcon,
719 OSStatus status,
720 CVImageBufferRef image_buffer) {
721 if (status) {
722 NOTIFY_STATUS("Decoding", status, SFT_DECODE_ERROR);
723 return;
726 // The type of |image_buffer| is CVImageBuffer, but we only handle
727 // CVPixelBuffers. This should be guaranteed as we set
728 // kCVPixelBufferOpenGLCompatibilityKey in |image_config|.
730 // Sometimes, for unknown reasons (http://crbug.com/453050), |image_buffer| is
731 // NULL, which causes CFGetTypeID() to crash. While the rest of the code would
732 // smoothly handle NULL as a dropped frame, we choose to fail permanantly here
733 // until the issue is better understood.
734 if (!image_buffer || CFGetTypeID(image_buffer) != CVPixelBufferGetTypeID()) {
735 DLOG(ERROR) << "Decoded frame is not a CVPixelBuffer";
736 NotifyError(PLATFORM_FAILURE, SFT_DECODE_ERROR);
737 return;
740 Frame* frame = reinterpret_cast<Frame*>(source_frame_refcon);
741 frame->image.reset(image_buffer, base::scoped_policy::RETAIN);
742 gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
743 &VTVideoDecodeAccelerator::DecodeDone, weak_this_, frame));
746 void VTVideoDecodeAccelerator::DecodeDone(Frame* frame) {
747 DCHECK(gpu_thread_checker_.CalledOnValidThread());
748 DCHECK_EQ(1u, pending_frames_.count(frame->bitstream_id));
749 Task task(TASK_FRAME);
750 task.frame = pending_frames_[frame->bitstream_id];
751 pending_frames_.erase(frame->bitstream_id);
752 task_queue_.push(task);
753 ProcessWorkQueues();
756 void VTVideoDecodeAccelerator::FlushTask(TaskType type) {
757 DCHECK(decoder_thread_.task_runner()->BelongsToCurrentThread());
758 FinishDelayedFrames();
760 // Always queue a task, even if FinishDelayedFrames() fails, so that
761 // destruction always completes.
762 gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
763 &VTVideoDecodeAccelerator::FlushDone, weak_this_, type));
766 void VTVideoDecodeAccelerator::FlushDone(TaskType type) {
767 DCHECK(gpu_thread_checker_.CalledOnValidThread());
768 task_queue_.push(Task(type));
769 ProcessWorkQueues();
772 void VTVideoDecodeAccelerator::Decode(const media::BitstreamBuffer& bitstream) {
773 DCHECK(gpu_thread_checker_.CalledOnValidThread());
774 DCHECK_EQ(0u, assigned_bitstream_ids_.count(bitstream.id()));
775 assigned_bitstream_ids_.insert(bitstream.id());
776 Frame* frame = new Frame(bitstream.id());
777 pending_frames_[frame->bitstream_id] = make_linked_ptr(frame);
778 decoder_thread_.task_runner()->PostTask(
779 FROM_HERE, base::Bind(&VTVideoDecodeAccelerator::DecodeTask,
780 base::Unretained(this), bitstream, frame));
783 void VTVideoDecodeAccelerator::AssignPictureBuffers(
784 const std::vector<media::PictureBuffer>& pictures) {
785 DCHECK(gpu_thread_checker_.CalledOnValidThread());
787 for (const media::PictureBuffer& picture : pictures) {
788 DCHECK(!texture_ids_.count(picture.id()));
789 assigned_picture_ids_.insert(picture.id());
790 available_picture_ids_.push_back(picture.id());
791 texture_ids_[picture.id()] = picture.texture_id();
794 // Pictures are not marked as uncleared until after this method returns, and
795 // they will be broken if they are used before that happens. So, schedule
796 // future work after that happens.
797 gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
798 &VTVideoDecodeAccelerator::ProcessWorkQueues, weak_this_));
801 void VTVideoDecodeAccelerator::ReusePictureBuffer(int32_t picture_id) {
802 DCHECK(gpu_thread_checker_.CalledOnValidThread());
803 DCHECK_EQ(CFGetRetainCount(picture_bindings_[picture_id]), 1);
804 picture_bindings_.erase(picture_id);
805 if (assigned_picture_ids_.count(picture_id) != 0) {
806 available_picture_ids_.push_back(picture_id);
807 ProcessWorkQueues();
808 } else {
809 client_->DismissPictureBuffer(picture_id);
813 void VTVideoDecodeAccelerator::ProcessWorkQueues() {
814 DCHECK(gpu_thread_checker_.CalledOnValidThread());
815 switch (state_) {
816 case STATE_DECODING:
817 // TODO(sandersd): Batch where possible.
818 while (state_ == STATE_DECODING) {
819 if (!ProcessReorderQueue() && !ProcessTaskQueue())
820 break;
822 return;
824 case STATE_ERROR:
825 // Do nothing until Destroy() is called.
826 return;
828 case STATE_DESTROYING:
829 // Drop tasks until we are ready to destruct.
830 while (!task_queue_.empty()) {
831 if (task_queue_.front().type == TASK_DESTROY) {
832 delete this;
833 return;
835 task_queue_.pop();
837 return;
841 bool VTVideoDecodeAccelerator::ProcessTaskQueue() {
842 DCHECK(gpu_thread_checker_.CalledOnValidThread());
843 DCHECK_EQ(state_, STATE_DECODING);
845 if (task_queue_.empty())
846 return false;
848 const Task& task = task_queue_.front();
849 switch (task.type) {
850 case TASK_FRAME:
851 // TODO(sandersd): Signal IDR explicitly (not using pic_order_cnt == 0).
852 if (reorder_queue_.size() < kMaxReorderQueueSize &&
853 (task.frame->pic_order_cnt != 0 || reorder_queue_.empty())) {
854 assigned_bitstream_ids_.erase(task.frame->bitstream_id);
855 client_->NotifyEndOfBitstreamBuffer(task.frame->bitstream_id);
856 reorder_queue_.push(task.frame);
857 task_queue_.pop();
858 return true;
860 return false;
862 case TASK_FLUSH:
863 DCHECK_EQ(task.type, pending_flush_tasks_.front());
864 if (reorder_queue_.size() == 0) {
865 pending_flush_tasks_.pop();
866 client_->NotifyFlushDone();
867 task_queue_.pop();
868 return true;
870 return false;
872 case TASK_RESET:
873 DCHECK_EQ(task.type, pending_flush_tasks_.front());
874 if (reorder_queue_.size() == 0) {
875 last_sps_id_ = -1;
876 last_pps_id_ = -1;
877 last_sps_.clear();
878 last_spsext_.clear();
879 last_pps_.clear();
880 poc_.Reset();
881 pending_flush_tasks_.pop();
882 client_->NotifyResetDone();
883 task_queue_.pop();
884 return true;
886 return false;
888 case TASK_DESTROY:
889 NOTREACHED() << "Can't destroy while in STATE_DECODING.";
890 NotifyError(ILLEGAL_STATE, SFT_PLATFORM_ERROR);
891 return false;
895 bool VTVideoDecodeAccelerator::ProcessReorderQueue() {
896 DCHECK(gpu_thread_checker_.CalledOnValidThread());
897 DCHECK_EQ(state_, STATE_DECODING);
899 if (reorder_queue_.empty())
900 return false;
902 // If the next task is a flush (because there is a pending flush or becuase
903 // the next frame is an IDR), then we don't need a full reorder buffer to send
904 // the next frame.
905 bool flushing = !task_queue_.empty() &&
906 (task_queue_.front().type != TASK_FRAME ||
907 task_queue_.front().frame->pic_order_cnt == 0);
909 size_t reorder_window = std::max(0, reorder_queue_.top()->reorder_window);
910 if (flushing || reorder_queue_.size() > reorder_window) {
911 if (ProcessFrame(*reorder_queue_.top())) {
912 reorder_queue_.pop();
913 return true;
917 return false;
920 bool VTVideoDecodeAccelerator::ProcessFrame(const Frame& frame) {
921 DCHECK(gpu_thread_checker_.CalledOnValidThread());
922 DCHECK_EQ(state_, STATE_DECODING);
924 // If the next pending flush is for a reset, then the frame will be dropped.
925 bool resetting = !pending_flush_tasks_.empty() &&
926 pending_flush_tasks_.front() == TASK_RESET;
928 if (!resetting && frame.image.get()) {
929 // If the |coded_size| has changed, request new picture buffers and then
930 // wait for them.
931 // TODO(sandersd): If GpuVideoDecoder didn't specifically check the size of
932 // textures, this would be unnecessary, as the size is actually a property
933 // of the texture binding, not the texture. We rebind every frame, so the
934 // size passed to ProvidePictureBuffers() is meaningless.
935 if (picture_size_ != frame.coded_size) {
936 // Dismiss current pictures.
937 for (int32_t picture_id : assigned_picture_ids_)
938 client_->DismissPictureBuffer(picture_id);
939 assigned_picture_ids_.clear();
940 available_picture_ids_.clear();
942 // Request new pictures.
943 picture_size_ = frame.coded_size;
944 client_->ProvidePictureBuffers(
945 kNumPictureBuffers, coded_size_, GL_TEXTURE_RECTANGLE_ARB);
946 return false;
948 if (!SendFrame(frame))
949 return false;
952 return true;
955 bool VTVideoDecodeAccelerator::SendFrame(const Frame& frame) {
956 DCHECK(gpu_thread_checker_.CalledOnValidThread());
957 DCHECK_EQ(state_, STATE_DECODING);
959 if (available_picture_ids_.empty())
960 return false;
962 int32_t picture_id = available_picture_ids_.back();
963 IOSurfaceRef surface = CVPixelBufferGetIOSurface(frame.image.get());
965 if (!make_context_current_.Run()) {
966 DLOG(ERROR) << "Failed to make GL context current";
967 NotifyError(PLATFORM_FAILURE, SFT_PLATFORM_ERROR);
968 return false;
971 glEnable(GL_TEXTURE_RECTANGLE_ARB);
972 gfx::ScopedTextureBinder
973 texture_binder(GL_TEXTURE_RECTANGLE_ARB, texture_ids_[picture_id]);
974 CGLError status = CGLTexImageIOSurface2D(
975 cgl_context_, // ctx
976 GL_TEXTURE_RECTANGLE_ARB, // target
977 GL_RGB, // internal_format
978 frame.coded_size.width(), // width
979 frame.coded_size.height(), // height
980 GL_YCBCR_422_APPLE, // format
981 GL_UNSIGNED_SHORT_8_8_APPLE, // type
982 surface, // io_surface
983 0); // plane
984 if (status != kCGLNoError) {
985 NOTIFY_STATUS("CGLTexImageIOSurface2D()", status, SFT_PLATFORM_ERROR);
986 return false;
988 glDisable(GL_TEXTURE_RECTANGLE_ARB);
990 available_picture_ids_.pop_back();
991 picture_bindings_[picture_id] = frame.image;
992 client_->PictureReady(media::Picture(picture_id, frame.bitstream_id,
993 gfx::Rect(frame.coded_size), false));
994 return true;
997 void VTVideoDecodeAccelerator::NotifyError(
998 Error vda_error_type,
999 VTVDASessionFailureType session_failure_type) {
1000 DCHECK_LT(session_failure_type, SFT_MAX + 1);
1001 if (!gpu_thread_checker_.CalledOnValidThread()) {
1002 gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
1003 &VTVideoDecodeAccelerator::NotifyError, weak_this_, vda_error_type,
1004 session_failure_type));
1005 } else if (state_ == STATE_DECODING) {
1006 state_ = STATE_ERROR;
1007 UMA_HISTOGRAM_ENUMERATION("Media.VTVDA.SessionFailureReason",
1008 session_failure_type,
1009 SFT_MAX + 1);
1010 client_->NotifyError(vda_error_type);
1014 void VTVideoDecodeAccelerator::QueueFlush(TaskType type) {
1015 DCHECK(gpu_thread_checker_.CalledOnValidThread());
1016 pending_flush_tasks_.push(type);
1017 decoder_thread_.task_runner()->PostTask(
1018 FROM_HERE, base::Bind(&VTVideoDecodeAccelerator::FlushTask,
1019 base::Unretained(this), type));
1021 // If this is a new flush request, see if we can make progress.
1022 if (pending_flush_tasks_.size() == 1)
1023 ProcessWorkQueues();
1026 void VTVideoDecodeAccelerator::Flush() {
1027 DCHECK(gpu_thread_checker_.CalledOnValidThread());
1028 QueueFlush(TASK_FLUSH);
1031 void VTVideoDecodeAccelerator::Reset() {
1032 DCHECK(gpu_thread_checker_.CalledOnValidThread());
1033 QueueFlush(TASK_RESET);
1036 void VTVideoDecodeAccelerator::Destroy() {
1037 DCHECK(gpu_thread_checker_.CalledOnValidThread());
1039 // In a forceful shutdown, the decoder thread may be dead already.
1040 if (!decoder_thread_.IsRunning()) {
1041 delete this;
1042 return;
1045 // For a graceful shutdown, return assigned buffers and flush before
1046 // destructing |this|.
1047 // TODO(sandersd): Make sure the decoder won't try to read the buffers again
1048 // before discarding them.
1049 for (int32_t bitstream_id : assigned_bitstream_ids_)
1050 client_->NotifyEndOfBitstreamBuffer(bitstream_id);
1051 assigned_bitstream_ids_.clear();
1052 state_ = STATE_DESTROYING;
1053 QueueFlush(TASK_DESTROY);
1056 bool VTVideoDecodeAccelerator::CanDecodeOnIOThread() {
1057 return false;
1060 // static
1061 media::VideoDecodeAccelerator::SupportedProfiles
1062 VTVideoDecodeAccelerator::GetSupportedProfiles() {
1063 SupportedProfiles profiles;
1064 for (const auto& supported_profile : kSupportedProfiles) {
1065 SupportedProfile profile;
1066 profile.profile = supported_profile;
1067 profile.min_resolution.SetSize(16, 16);
1068 profile.max_resolution.SetSize(4096, 2160);
1069 profiles.push_back(profile);
1071 return profiles;
1074 } // namespace content