content/common/gpu/media/vt_video_decode_accelerator.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <CoreVideo/CoreVideo.h>
   6 #include <OpenGL/CGLIOSurface.h>
   7 #include <OpenGL/gl.h>
   8
   9 #include "base/bind.h"
  10 #include "base/command_line.h"
  11 #include "base/sys_byteorder.h"
  12 #include "base/thread_task_runner_handle.h"
  13 #include "content/common/gpu/media/vt_video_decode_accelerator.h"
  14 #include "content/public/common/content_switches.h"
  15 #include "media/filters/h264_parser.h"
  16 #include "ui/gl/scoped_binders.h"
  17 #include "ui/gl/scoped_cgl.h"
  18
  19 using content_common_gpu_media::kModuleVt;
  20 using content_common_gpu_media::InitializeStubs;
  21 using content_common_gpu_media::IsVtInitialized;
  22 using content_common_gpu_media::StubPathMap;
  23
  24 namespace content {
  25
  26 // Size of NALU length headers in AVCC/MPEG-4 format (can be 1, 2, or 4).
  27 static const int kNALUHeaderLength = 4;
  28
  29 // We only request 5 picture buffers from the client which are used to hold the
  30 // decoded samples. These buffers are then reused when the client tells us that
  31 // it is done with the buffer.
  32 static const int kNumPictureBuffers = 5;
  33
  34 // Route decoded frame callbacks back into the VTVideoDecodeAccelerator.
  35 static void OutputThunk(
  36     void* decompression_output_refcon,
  37     void* source_frame_refcon,
  38     OSStatus status,
  39     VTDecodeInfoFlags info_flags,
  40     CVImageBufferRef image_buffer,
  41     CMTime presentation_time_stamp,
  42     CMTime presentation_duration) {
  43   // TODO(sandersd): Implement flush-before-delete to guarantee validity.
  44   VTVideoDecodeAccelerator* vda =
  45       reinterpret_cast<VTVideoDecodeAccelerator*>(decompression_output_refcon);
  46   int32_t bitstream_id = reinterpret_cast<intptr_t>(source_frame_refcon);
  47   vda->Output(bitstream_id, status, image_buffer);
  48 }
  49
  50 VTVideoDecodeAccelerator::DecodedFrame::DecodedFrame(
  51     int32_t bitstream_id,
  52     CVImageBufferRef image_buffer)
  53     : bitstream_id(bitstream_id),
  54       image_buffer(image_buffer) {
  55 }
  56
  57 VTVideoDecodeAccelerator::DecodedFrame::~DecodedFrame() {
  58 }
  59
  60 VTVideoDecodeAccelerator::VTVideoDecodeAccelerator(CGLContextObj cgl_context)
  61     : cgl_context_(cgl_context),
  62       client_(NULL),
  63       format_(NULL),
  64       session_(NULL),
  65       gpu_task_runner_(base::ThreadTaskRunnerHandle::Get()),
  66       weak_this_factory_(this),
  67       decoder_thread_("VTDecoderThread") {
  68   callback_.decompressionOutputCallback = OutputThunk;
  69   callback_.decompressionOutputRefCon = this;
  70 }
  71
  72 VTVideoDecodeAccelerator::~VTVideoDecodeAccelerator() {
  73 }
  74
  75 bool VTVideoDecodeAccelerator::Initialize(
  76     media::VideoCodecProfile profile,
  77     Client* client) {
  78   DCHECK(CalledOnValidThread());
  79   client_ = client;
  80
  81   // Only H.264 is supported.
  82   if (profile < media::H264PROFILE_MIN || profile > media::H264PROFILE_MAX)
  83     return false;
  84
  85   // Require --no-sandbox until VideoToolbox library loading is part of sandbox
  86   // startup (and this VDA is ready for regular users).
  87   if (!base::CommandLine::ForCurrentProcess()->HasSwitch(switches::kNoSandbox))
  88     return false;
  89
  90   if (!IsVtInitialized()) {
  91     // CoreVideo is also required, but the loader stops after the first
  92     // path is loaded. Instead we rely on the transitive dependency from
  93     // VideoToolbox to CoreVideo.
  94     // TODO(sandersd): Fallback to PrivateFrameworks for VideoToolbox.
  95     StubPathMap paths;
  96     paths[kModuleVt].push_back(FILE_PATH_LITERAL(
  97         "/System/Library/Frameworks/VideoToolbox.framework/VideoToolbox"));
  98     if (!InitializeStubs(paths))
  99       return false;
 100   }
 101
 102   // Spawn a thread to handle parsing and calling VideoToolbox.
 103   if (!decoder_thread_.Start())
 104     return false;
 105
 106   return true;
 107 }
 108
 109 // TODO(sandersd): Proper error reporting instead of CHECKs.
 110 void VTVideoDecodeAccelerator::ConfigureDecoder(
 111     const std::vector<const uint8_t*>& nalu_data_ptrs,
 112     const std::vector<size_t>& nalu_data_sizes) {
 113   DCHECK(decoder_thread_.message_loop_proxy()->BelongsToCurrentThread());
 114   // Construct a new format description from the parameter sets.
 115   // TODO(sandersd): Replace this with custom code to support OS X < 10.9.
 116   format_.reset();
 117   CHECK(!CMVideoFormatDescriptionCreateFromH264ParameterSets(
 118       kCFAllocatorDefault,
 119       nalu_data_ptrs.size(),      // parameter_set_count
 120       &nalu_data_ptrs.front(),    // &parameter_set_pointers
 121       &nalu_data_sizes.front(),   // &parameter_set_sizes
 122       kNALUHeaderLength,          // nal_unit_header_length
 123       format_.InitializeInto()));
 124   CMVideoDimensions coded_dimensions =
 125       CMVideoFormatDescriptionGetDimensions(format_);
 126
 127   // Prepare VideoToolbox configuration dictionaries.
 128   base::ScopedCFTypeRef<CFMutableDictionaryRef> decoder_config(
 129       CFDictionaryCreateMutable(
 130           kCFAllocatorDefault,
 131           1,  // capacity
 132           &kCFTypeDictionaryKeyCallBacks,
 133           &kCFTypeDictionaryValueCallBacks));
 134
 135   CFDictionarySetValue(
 136       decoder_config,
 137       // kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder
 138       CFSTR("EnableHardwareAcceleratedVideoDecoder"),
 139       kCFBooleanTrue);
 140
 141   base::ScopedCFTypeRef<CFMutableDictionaryRef> image_config(
 142       CFDictionaryCreateMutable(
 143           kCFAllocatorDefault,
 144           4,  // capacity
 145           &kCFTypeDictionaryKeyCallBacks,
 146           &kCFTypeDictionaryValueCallBacks));
 147
 148 #define CFINT(i) CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &i)
 149   // TODO(sandersd): RGBA option for 4:4:4 video.
 150   int32_t pixel_format = kCVPixelFormatType_422YpCbCr8;
 151   base::ScopedCFTypeRef<CFNumberRef> cf_pixel_format(CFINT(pixel_format));
 152   base::ScopedCFTypeRef<CFNumberRef> cf_width(CFINT(coded_dimensions.width));
 153   base::ScopedCFTypeRef<CFNumberRef> cf_height(CFINT(coded_dimensions.height));
 154 #undef CFINT
 155   CFDictionarySetValue(
 156       image_config, kCVPixelBufferPixelFormatTypeKey, cf_pixel_format);
 157   CFDictionarySetValue(image_config, kCVPixelBufferWidthKey, cf_width);
 158   CFDictionarySetValue(image_config, kCVPixelBufferHeightKey, cf_height);
 159   CFDictionarySetValue(
 160       image_config, kCVPixelBufferOpenGLCompatibilityKey, kCFBooleanTrue);
 161
 162   // TODO(sandersd): Check if the session is already compatible.
 163   // TODO(sandersd): Flush.
 164   session_.reset();
 165   CHECK(!VTDecompressionSessionCreate(
 166       kCFAllocatorDefault,
 167       format_,              // video_format_description
 168       decoder_config,       // video_decoder_specification
 169       image_config,         // destination_image_buffer_attributes
 170       &callback_,           // output_callback
 171       session_.InitializeInto()));
 172
 173   // If the size has changed, trigger a request for new picture buffers.
 174   gfx::Size new_coded_size(coded_dimensions.width, coded_dimensions.height);
 175   if (coded_size_ != new_coded_size) {
 176     coded_size_ = new_coded_size;
 177     gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
 178         &VTVideoDecodeAccelerator::SizeChangedTask,
 179         weak_this_factory_.GetWeakPtr(),
 180         coded_size_));;
 181   }
 182 }
 183
 184 void VTVideoDecodeAccelerator::Decode(const media::BitstreamBuffer& bitstream) {
 185   DCHECK(CalledOnValidThread());
 186   // TODO(sandersd): Test what happens if bitstream buffers are passed to VT out
 187   // of order.
 188   decoder_thread_.message_loop_proxy()->PostTask(FROM_HERE, base::Bind(
 189       &VTVideoDecodeAccelerator::DecodeTask, base::Unretained(this),
 190       bitstream));
 191 }
 192
 193 // TODO(sandersd): Proper error reporting instead of CHECKs.
 194 void VTVideoDecodeAccelerator::DecodeTask(
 195     const media::BitstreamBuffer bitstream) {
 196   DCHECK(decoder_thread_.message_loop_proxy()->BelongsToCurrentThread());
 197
 198   // Map the bitstream buffer.
 199   base::SharedMemory memory(bitstream.handle(), true);
 200   size_t size = bitstream.size();
 201   CHECK(memory.Map(size));
 202   const uint8_t* buf = static_cast<uint8_t*>(memory.memory());
 203
 204   // NALUs are stored with Annex B format in the bitstream buffer (start codes),
 205   // but VideoToolbox expects AVCC/MPEG-4 format (length headers), so we must
 206   // rewrite the data.
 207   //
 208   // 1. Locate relevant NALUs and compute the size of the translated data.
 209   //    Also record any parameter sets for VideoToolbox initialization.
 210   size_t data_size = 0;
 211   std::vector<media::H264NALU> nalus;
 212   std::vector<const uint8_t*> config_nalu_data_ptrs;
 213   std::vector<size_t> config_nalu_data_sizes;
 214   parser_.SetStream(buf, size);
 215   media::H264NALU nalu;
 216   while (true) {
 217     media::H264Parser::Result result = parser_.AdvanceToNextNALU(&nalu);
 218     if (result == media::H264Parser::kEOStream)
 219       break;
 220     CHECK_EQ(result, media::H264Parser::kOk);
 221     // TODO(sandersd): Check that these are only at the start.
 222     if (nalu.nal_unit_type == media::H264NALU::kSPS ||
 223         nalu.nal_unit_type == media::H264NALU::kPPS ||
 224         nalu.nal_unit_type == media::H264NALU::kSPSExt) {
 225       DVLOG(2) << "Parameter set " << nalu.nal_unit_type;
 226       config_nalu_data_ptrs.push_back(nalu.data);
 227       config_nalu_data_sizes.push_back(nalu.size);
 228     } else {
 229       nalus.push_back(nalu);
 230       data_size += kNALUHeaderLength + nalu.size;
 231     }
 232   }
 233
 234   // 2. Initialize VideoToolbox.
 235   // TODO(sandersd): Reinitialize when there are new parameter sets.
 236   if (!session_)
 237     ConfigureDecoder(config_nalu_data_ptrs, config_nalu_data_sizes);
 238
 239   // 3. Allocate a memory-backed CMBlockBuffer for the translated data.
 240   base::ScopedCFTypeRef<CMBlockBufferRef> data;
 241   CHECK(!CMBlockBufferCreateWithMemoryBlock(
 242       kCFAllocatorDefault,
 243       NULL,                 // &memory_block
 244       data_size,            // block_length
 245       kCFAllocatorDefault,  // block_allocator
 246       NULL,                 // &custom_block_source
 247       0,                    // offset_to_data
 248       data_size,            // data_length
 249       0,                    // flags
 250       data.InitializeInto()));
 251
 252   // 4. Copy NALU data, inserting length headers.
 253   size_t offset = 0;
 254   for (size_t i = 0; i < nalus.size(); i++) {
 255     media::H264NALU& nalu = nalus[i];
 256     uint32_t header = base::HostToNet32(static_cast<uint32_t>(nalu.size));
 257     CHECK(!CMBlockBufferReplaceDataBytes(
 258         &header, data, offset, kNALUHeaderLength));
 259     offset += kNALUHeaderLength;
 260     CHECK(!CMBlockBufferReplaceDataBytes(nalu.data, data, offset, nalu.size));
 261     offset += nalu.size;
 262   }
 263
 264   // 5. Package the data for VideoToolbox and request decoding.
 265   base::ScopedCFTypeRef<CMSampleBufferRef> frame;
 266   CHECK(!CMSampleBufferCreate(
 267       kCFAllocatorDefault,
 268       data,                 // data_buffer
 269       true,                 // data_ready
 270       NULL,                 // make_data_ready_callback
 271       NULL,                 // make_data_ready_refcon
 272       format_,              // format_description
 273       1,                    // num_samples
 274       0,                    // num_sample_timing_entries
 275       NULL,                 // &sample_timing_array
 276       0,                    // num_sample_size_entries
 277       NULL,                 // &sample_size_array
 278       frame.InitializeInto()));
 279
 280   // Asynchronous Decompression allows for parallel submission of frames
 281   // (without it, DecodeFrame() does not return until the frame has been
 282   // decoded). We don't enable Temporal Processing so that frames are always
 283   // returned in decode order; this makes it easier to avoid deadlock.
 284   VTDecodeFrameFlags decode_flags =
 285       kVTDecodeFrame_EnableAsynchronousDecompression;
 286
 287   intptr_t bitstream_id = bitstream.id();
 288   CHECK(!VTDecompressionSessionDecodeFrame(
 289       session_,
 290       frame,                                  // sample_buffer
 291       decode_flags,                           // decode_flags
 292       reinterpret_cast<void*>(bitstream_id),  // source_frame_refcon
 293       NULL));                                 // &info_flags_out
 294 }
 295
 296 // This method may be called on any VideoToolbox thread.
 297 // TODO(sandersd): Proper error reporting instead of CHECKs.
 298 void VTVideoDecodeAccelerator::Output(
 299     int32_t bitstream_id,
 300     OSStatus status,
 301     CVImageBufferRef image_buffer) {
 302   CHECK(!status);
 303   CHECK_EQ(CFGetTypeID(image_buffer), CVPixelBufferGetTypeID());
 304   CFRetain(image_buffer);
 305   gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
 306       &VTVideoDecodeAccelerator::OutputTask,
 307       weak_this_factory_.GetWeakPtr(),
 308       DecodedFrame(bitstream_id, image_buffer)));
 309 }
 310
 311 void VTVideoDecodeAccelerator::OutputTask(DecodedFrame frame) {
 312   DCHECK(CalledOnValidThread());
 313   decoded_frames_.push(frame);
 314   SendPictures();
 315 }
 316
 317 void VTVideoDecodeAccelerator::SizeChangedTask(gfx::Size coded_size) {
 318   DCHECK(CalledOnValidThread());
 319   texture_size_ = coded_size;
 320   // TODO(sandersd): Dismiss existing picture buffers.
 321   client_->ProvidePictureBuffers(
 322       kNumPictureBuffers, texture_size_, GL_TEXTURE_RECTANGLE_ARB);
 323 }
 324
 325 void VTVideoDecodeAccelerator::AssignPictureBuffers(
 326     const std::vector<media::PictureBuffer>& pictures) {
 327   DCHECK(CalledOnValidThread());
 328
 329   for (size_t i = 0; i < pictures.size(); i++) {
 330     CHECK(!texture_ids_.count(pictures[i].id()));
 331     available_picture_ids_.push(pictures[i].id());
 332     texture_ids_[pictures[i].id()] = pictures[i].texture_id();
 333   }
 334
 335   // Pictures are not marked as uncleared until this method returns. They will
 336   // become broken if they are used before that happens.
 337   gpu_task_runner_->PostTask(FROM_HERE, base::Bind(
 338       &VTVideoDecodeAccelerator::SendPictures,
 339       weak_this_factory_.GetWeakPtr()));
 340 }
 341
 342 void VTVideoDecodeAccelerator::ReusePictureBuffer(int32_t picture_id) {
 343   DCHECK(CalledOnValidThread());
 344   DCHECK_EQ(CFGetRetainCount(picture_bindings_[picture_id]), 1);
 345   picture_bindings_.erase(picture_id);
 346   available_picture_ids_.push(picture_id);
 347   SendPictures();
 348 }
 349
 350 // TODO(sandersd): Proper error reporting instead of CHECKs.
 351 void VTVideoDecodeAccelerator::SendPictures() {
 352   DCHECK(CalledOnValidThread());
 353   if (available_picture_ids_.empty() || decoded_frames_.empty())
 354     return;
 355
 356   gfx::ScopedCGLSetCurrentContext scoped_set_current_context(cgl_context_);
 357   glEnable(GL_TEXTURE_RECTANGLE_ARB);
 358
 359   while (!available_picture_ids_.empty() && !decoded_frames_.empty()) {
 360     int32_t picture_id = available_picture_ids_.front();
 361     available_picture_ids_.pop();
 362     DecodedFrame frame = decoded_frames_.front();
 363     decoded_frames_.pop();
 364     IOSurfaceRef surface = CVPixelBufferGetIOSurface(frame.image_buffer);
 365
 366     gfx::ScopedTextureBinder
 367         texture_binder(GL_TEXTURE_RECTANGLE_ARB, texture_ids_[picture_id]);
 368     CHECK(!CGLTexImageIOSurface2D(
 369         cgl_context_,                 // ctx
 370         GL_TEXTURE_RECTANGLE_ARB,     // target
 371         GL_RGB,                       // internal_format
 372         texture_size_.width(),        // width
 373         texture_size_.height(),       // height
 374         GL_YCBCR_422_APPLE,           // format
 375         GL_UNSIGNED_SHORT_8_8_APPLE,  // type
 376         surface,                      // io_surface
 377         0));                          // plane
 378
 379     picture_bindings_[picture_id] = frame.image_buffer;
 380     client_->PictureReady(media::Picture(
 381         picture_id, frame.bitstream_id, gfx::Rect(texture_size_)));
 382     client_->NotifyEndOfBitstreamBuffer(frame.bitstream_id);
 383   }
 384
 385   glDisable(GL_TEXTURE_RECTANGLE_ARB);
 386 }
 387
 388 void VTVideoDecodeAccelerator::Flush() {
 389   DCHECK(CalledOnValidThread());
 390   // TODO(sandersd): Trigger flush, sending frames.
 391 }
 392
 393 void VTVideoDecodeAccelerator::Reset() {
 394   DCHECK(CalledOnValidThread());
 395   // TODO(sandersd): Trigger flush, discarding frames.
 396 }
 397
 398 void VTVideoDecodeAccelerator::Destroy() {
 399   DCHECK(CalledOnValidThread());
 400   // TODO(sandersd): Trigger flush, discarding frames, and wait for them.
 401   delete this;
 402 }
 403
 404 bool VTVideoDecodeAccelerator::CanDecodeOnIOThread() {
 405   return false;
 406 }
 407
 408 }  // namespace content