1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This file contains an implementation of VideoDecoderAccelerator
6 // that utilizes the hardware video decoder present on the Exynos SoC.
8 #ifndef CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_
9 #define CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_
14 #include "base/callback_forward.h"
15 #include "base/memory/linked_ptr.h"
16 #include "base/memory/scoped_ptr.h"
17 #include "base/threading/thread.h"
18 #include "content/common/content_export.h"
19 #include "content/common/gpu/media/video_decode_accelerator_impl.h"
20 #include "media/base/video_decoder_config.h"
21 #include "ui/gfx/size.h"
22 #include "ui/gl/gl_bindings.h"
25 class MessageLoopProxy
;
31 // This class handles Exynos video acceleration directly through the V4L2
32 // devices exported by the Multi Format Codec and GScaler hardware blocks.
34 // The threading model of this class is driven by the fact that it needs to
35 // interface two fundamentally different event queues -- the one Chromium
36 // provides through MessageLoop, and the one driven by the V4L2 devices which
37 // is waited on with epoll(). There are three threads involved in this class:
39 // * The child thread, which is the main GPU process thread which calls the
40 // media::VideoDecodeAccelerator entry points. Calls from this thread
41 // generally do not block (with the exception of Initialize() and Destroy()).
42 // They post tasks to the decoder_thread_, which actually services the task
43 // and calls back when complete through the
44 // media::VideoDecodeAccelerator::Client interface.
45 // * The decoder_thread_, owned by this class. It services API tasks, through
46 // the *Task() routines, as well as V4L2 device events, through
47 // ServiceDeviceTask(). Almost all state modification is done on this thread.
48 // * The device_poll_thread_, owned by this class. All it does is epoll() on
49 // the V4L2 in DevicePollTask() and schedule a ServiceDeviceTask() on the
50 // decoder_thread_ when something interesting happens.
51 // TODO(sheu): replace this thread with an TYPE_IO decoder_thread_.
53 // Note that this class has no locks! Everything's serviced on the
54 // decoder_thread_, so there are no synchronization issues.
55 // ... well, there are, but it's a matter of getting messages posted in the
56 // right order, not fiddling with locks.
57 class CONTENT_EXPORT ExynosVideoDecodeAccelerator
58 : public VideoDecodeAcceleratorImpl
{
60 ExynosVideoDecodeAccelerator(
61 EGLDisplay egl_display
,
62 EGLContext egl_context
,
64 const base::WeakPtr
<Client
>& io_client_
,
65 const base::Callback
<bool(void)>& make_context_current
,
66 const scoped_refptr
<base::MessageLoopProxy
>& io_message_loop_proxy
);
67 virtual ~ExynosVideoDecodeAccelerator();
69 // media::VideoDecodeAccelerator implementation.
70 // Note: Initialize() and Destroy() are synchronous.
71 virtual bool Initialize(media::VideoCodecProfile profile
) OVERRIDE
;
72 virtual void Decode(const media::BitstreamBuffer
& bitstream_buffer
) OVERRIDE
;
73 virtual void AssignPictureBuffers(
74 const std::vector
<media::PictureBuffer
>& buffers
) OVERRIDE
;
75 virtual void ReusePictureBuffer(int32 picture_buffer_id
) OVERRIDE
;
76 virtual void Flush() OVERRIDE
;
77 virtual void Reset() OVERRIDE
;
78 virtual void Destroy() OVERRIDE
;
80 // VideoDecodeAcceleratorImpl implementation.
81 virtual bool CanDecodeOnIOThread() OVERRIDE
;
83 // Do any necessary initialization before the sandbox is enabled.
84 static void PreSandboxInitialization();
86 // Lazily initialize static data after sandbox is enabled. Return false on
88 static bool PostSandboxInitialization();
91 // These are rather subjectively tuned.
93 kMfcInputBufferCount
= 8,
94 // TODO(posciak): determine MFC input buffer size based on level limits.
95 // See http://crbug.com/255116.
96 kMfcInputBufferMaxSize
= 1024 * 1024,
97 kGscInputBufferCount
= 4,
98 // Number of output buffers to use for each VDA stage above what's required
99 // by the decoder (e.g. DPB size, in H264).
100 kDpbOutputBufferExtraCount
= 3,
103 // Internal state of the decoder.
105 kUninitialized
, // Initialize() not yet called.
106 kInitialized
, // Initialize() returned true; ready to start decoding.
107 kDecoding
, // DecodeBufferInitial() successful; decoding frames.
108 kResetting
, // Presently resetting.
109 kAfterReset
, // After Reset(), ready to start decoding again.
110 kChangingResolution
, // Performing resolution change, all remaining
111 // pre-change frames decoded and processed.
112 kError
, // Error in kDecoding state.
116 kFlushBufferId
= -2 // Buffer id for flush buffer, queued by FlushTask().
119 // File descriptors we need to poll.
125 // Auto-destruction reference for BitstreamBuffer, for message-passing from
126 // Decode() to DecodeTask().
127 struct BitstreamBufferRef
;
129 // Auto-destruction reference for an array of PictureBuffer, for
130 // message-passing from AssignPictureBuffers() to AssignPictureBuffersTask().
131 struct PictureBufferArrayRef
;
133 // Auto-destruction reference for EGLSync (for message-passing).
134 struct EGLSyncKHRRef
;
136 // Record for MFC input buffers.
137 struct MfcInputRecord
{
140 bool at_device
; // held by device.
141 void* address
; // mmap() address.
142 size_t length
; // mmap() length.
143 off_t bytes_used
; // bytes filled in the mmap() segment.
144 int32 input_id
; // triggering input_id as given to Decode().
147 // Record for MFC output buffers.
148 struct MfcOutputRecord
{
151 bool at_device
; // held by device.
152 size_t bytes_used
[2]; // bytes used in each dmabuf.
153 void* address
[2]; // mmap() address for each plane.
154 size_t length
[2]; // mmap() length for each plane.
155 int32 input_id
; // triggering input_id as given to Decode().
158 // Record for GSC input buffers.
159 struct GscInputRecord
{
162 bool at_device
; // held by device.
163 int mfc_output
; // MFC output buffer index to recycle when this input
167 // Record for GSC output buffers.
168 struct GscOutputRecord
{
171 bool at_device
; // held by device.
172 bool at_client
; // held by client.
173 int fd
; // file descriptor from backing EGLImage.
174 EGLImageKHR egl_image
; // backing EGLImage.
175 EGLSyncKHR egl_sync
; // sync the compositor's use of the EGLImage.
176 int32 picture_id
; // picture buffer id as returned to PictureReady().
180 // Decoding tasks, to be run on decode_thread_.
183 // Enqueue a BitstreamBuffer to decode. This will enqueue a buffer to the
184 // decoder_input_queue_, then queue a DecodeBufferTask() to actually decode
186 void DecodeTask(const media::BitstreamBuffer
& bitstream_buffer
);
188 // Decode from the buffers queued in decoder_input_queue_. Calls
189 // DecodeBufferInitial() or DecodeBufferContinue() as appropriate.
190 void DecodeBufferTask();
191 // Advance to the next fragment that begins a frame.
192 bool AdvanceFrameFragment(const uint8
* data
, size_t size
, size_t* endpos
);
193 // Schedule another DecodeBufferTask() if we're behind.
194 void ScheduleDecodeBufferTaskIfNeeded();
196 // Return true if we should continue to schedule DecodeBufferTask()s after
197 // completion. Store the amount of input actually consumed in |endpos|.
198 bool DecodeBufferInitial(const void* data
, size_t size
, size_t* endpos
);
199 bool DecodeBufferContinue(const void* data
, size_t size
);
201 // Accumulate data for the next frame to decode. May return false in
202 // non-error conditions; for example when pipeline is full and should be
204 bool AppendToInputFrame(const void* data
, size_t size
);
205 // Flush data for one decoded frame.
206 bool FlushInputFrame();
208 // Process an AssignPictureBuffers() API call. After this, the
209 // device_poll_thread_ can be started safely, since we have all our
211 void AssignPictureBuffersTask(scoped_ptr
<PictureBufferArrayRef
> pic_buffers
);
213 // Service I/O on the V4L2 devices. This task should only be scheduled from
214 // DevicePollTask(). If |mfc_event_pending| is true, one or more events
215 // on MFC file descriptor are pending.
216 void ServiceDeviceTask(bool mfc_event_pending
);
217 // Handle the various device queues.
222 // Handle incoming MFC events.
223 void DequeueMfcEvents();
224 // Enqueue a buffer on the corresponding queue.
225 bool EnqueueMfcInputRecord();
226 bool EnqueueMfcOutputRecord();
227 bool EnqueueGscInputRecord();
228 bool EnqueueGscOutputRecord();
230 // Process a ReusePictureBuffer() API call. The API call create an EGLSync
231 // object on the main (GPU process) thread; we will record this object so we
232 // can wait on it before reusing the buffer.
233 void ReusePictureBufferTask(int32 picture_buffer_id
,
234 scoped_ptr
<EGLSyncKHRRef
> egl_sync_ref
);
236 // Flush() task. Child thread should not submit any more buffers until it
237 // receives the NotifyFlushDone callback. This task will schedule an empty
238 // BitstreamBufferRef (with input_id == kFlushBufferId) to perform the flush.
240 // Notify the client of a flush completion, if required. This should be
241 // called any time a relevant queue could potentially be emptied: see
242 // function definition.
243 void NotifyFlushDoneIfNeeded();
245 // Reset() task. This task will schedule a ResetDoneTask() that will send
246 // the NotifyResetDone callback, then set the decoder state to kResetting so
247 // that all intervening tasks will drain.
249 // ResetDoneTask() will set the decoder state back to kAfterReset, so
250 // subsequent decoding can continue.
251 void ResetDoneTask();
253 // Device destruction task.
256 // Attempt to start/stop device_poll_thread_.
257 bool StartDevicePoll();
258 // If |keep_mfc_input_state| is true, don't reset MFC input state; used during
259 // resolution change.
260 bool StopDevicePoll(bool keep_mfc_input_state
);
261 // Set/clear the device poll interrupt (using device_poll_interrupt_fd_).
262 bool SetDevicePollInterrupt();
263 bool ClearDevicePollInterrupt();
265 void StartResolutionChangeIfNeeded();
266 void FinishResolutionChange();
267 void ResumeAfterResolutionChange();
269 // Try to get output format from MFC, detected after parsing the beginning
270 // of the stream. Sets |again| to true if more parsing is needed.
271 bool GetFormatInfo(struct v4l2_format
* format
, bool* again
);
272 // Create MFC output and GSC input and output buffers for the given |format|.
273 bool CreateBuffersForFormat(const struct v4l2_format
& format
);
276 // Device tasks, to be run on device_poll_thread_.
280 void DevicePollTask(unsigned int poll_fds
);
283 // Safe from any thread.
286 // Error notification (using PostTask() to child thread, if necessary).
287 void NotifyError(Error error
);
289 // Set the decoder_thread_ state (using PostTask to decoder thread, if
291 void SetDecoderState(State state
);
294 // Other utility functions. Called on decoder_thread_, unless
295 // decoder_thread_ is not yet started, in which case the child thread can call
296 // these (e.g. in Initialize() or Destroy()).
299 // Create the buffers we need.
300 bool CreateMfcInputBuffers();
301 bool CreateMfcOutputBuffers();
302 bool CreateGscInputBuffers();
303 bool CreateGscOutputBuffers();
306 // Methods run on child thread.
310 void DestroyMfcInputBuffers();
311 void DestroyMfcOutputBuffers();
312 void DestroyGscInputBuffers();
313 void DestroyGscOutputBuffers();
314 void ResolutionChangeDestroyBuffers();
316 // Our original calling message loop for the child thread.
317 scoped_refptr
<base::MessageLoopProxy
> child_message_loop_proxy_
;
319 // Message loop of the IO thread.
320 scoped_refptr
<base::MessageLoopProxy
> io_message_loop_proxy_
;
322 // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
323 // device worker threads back to the child thread. Because the worker threads
324 // are members of this class, any task running on those threads is guaranteed
325 // that this object is still alive. As a result, tasks posted from the child
326 // thread to the decoder or device thread should use base::Unretained(this),
327 // and tasks posted the other way should use |weak_this_|.
328 base::WeakPtr
<ExynosVideoDecodeAccelerator
> weak_this_
;
330 // To expose client callbacks from VideoDecodeAccelerator.
331 // NOTE: all calls to these objects *MUST* be executed on
332 // child_message_loop_proxy_.
333 base::WeakPtrFactory
<Client
> client_ptr_factory_
;
334 base::WeakPtr
<Client
> client_
;
335 // Callbacks to |io_client_| must be executed on |io_message_loop_proxy_|.
336 base::WeakPtr
<Client
> io_client_
;
339 // Decoder state, owned and operated by decoder_thread_.
340 // Before decoder_thread_ has started, the decoder state is managed by
341 // the child (main) thread. After decoder_thread_ has started, the decoder
342 // thread should be the only one managing these.
345 // This thread services tasks posted from the VDA API entry points by the
346 // child thread and device service callbacks posted from the device thread.
347 base::Thread decoder_thread_
;
348 // Decoder state machine state.
349 State decoder_state_
;
350 // BitstreamBuffer we're presently reading.
351 scoped_ptr
<BitstreamBufferRef
> decoder_current_bitstream_buffer_
;
352 // FlushTask() and ResetTask() should not affect buffers that have been
353 // queued afterwards. For flushing or resetting the pipeline then, we will
354 // delay these buffers until after the flush or reset completes.
355 int decoder_delay_bitstream_buffer_id_
;
356 // MFC input buffer we're presently filling.
357 int decoder_current_input_buffer_
;
358 // We track the number of buffer decode tasks we have scheduled, since each
359 // task execution should complete one buffer. If we fall behind (due to
360 // resource backpressure, etc.), we'll have to schedule more to catch up.
361 int decoder_decode_buffer_tasks_scheduled_
;
362 // Picture buffers held by the client.
363 int decoder_frames_at_client_
;
365 bool decoder_flushing_
;
366 // Got a notification from driver that it reached resolution change point
368 bool resolution_change_pending_
;
369 // Got a reset request while we were performing resolution change.
370 bool resolution_change_reset_pending_
;
371 // Input queue for decoder_thread_: BitstreamBuffers in.
372 std::list
<linked_ptr
<BitstreamBufferRef
> > decoder_input_queue_
;
373 // For H264 decode, hardware requires that we send it frame-sized chunks.
374 // We'll need to parse the stream.
375 scoped_ptr
<content::H264Parser
> decoder_h264_parser_
;
376 // Set if the decoder has a pending incomplete frame in an input buffer.
377 bool decoder_partial_frame_pending_
;
380 // Hardware state and associated queues. Since decoder_thread_ services
381 // the hardware, decoder_thread_ owns these too.
384 // Completed decode buffers, waiting for MFC.
385 std::list
<int> mfc_input_ready_queue_
;
387 // MFC decode device.
390 // MFC input buffer state.
391 bool mfc_input_streamon_
;
392 // MFC input buffers enqueued to device.
393 int mfc_input_buffer_queued_count_
;
394 // Input buffers ready to use, as a LIFO since we don't care about ordering.
395 std::vector
<int> mfc_free_input_buffers_
;
396 // Mapping of int index to MFC input buffer record.
397 std::vector
<MfcInputRecord
> mfc_input_buffer_map_
;
399 // MFC output buffer state.
400 bool mfc_output_streamon_
;
401 // MFC output buffers enqueued to device.
402 int mfc_output_buffer_queued_count_
;
403 // Output buffers ready to use, as a LIFO since we don't care about ordering.
404 std::vector
<int> mfc_free_output_buffers_
;
405 // Mapping of int index to MFC output buffer record.
406 std::vector
<MfcOutputRecord
> mfc_output_buffer_map_
;
407 // Required size of MFC output buffers. Two sizes for two planes.
408 size_t mfc_output_buffer_size_
[2];
409 uint32 mfc_output_buffer_pixelformat_
;
410 // Required size of DPB for decoding.
411 int mfc_output_dpb_size_
;
413 // Completed MFC outputs, waiting for GSC.
414 std::list
<int> mfc_output_gsc_input_queue_
;
416 // GSC decode device.
419 // GSC input buffer state.
420 bool gsc_input_streamon_
;
421 // GSC input buffers enqueued to device.
422 int gsc_input_buffer_queued_count_
;
423 // Input buffers ready to use, as a LIFO since we don't care about ordering.
424 std::vector
<int> gsc_free_input_buffers_
;
425 // Mapping of int index to GSC input buffer record.
426 std::vector
<GscInputRecord
> gsc_input_buffer_map_
;
428 // GSC output buffer state.
429 bool gsc_output_streamon_
;
430 // GSC output buffers enqueued to device.
431 int gsc_output_buffer_queued_count_
;
432 // Output buffers ready to use. We need a FIFO here.
433 std::list
<int> gsc_free_output_buffers_
;
434 // Mapping of int index to GSC output buffer record.
435 std::vector
<GscOutputRecord
> gsc_output_buffer_map_
;
437 // Output picture size.
438 gfx::Size frame_buffer_size_
;
441 // The device polling thread handles notifications of V4L2 device changes.
445 base::Thread device_poll_thread_
;
446 // eventfd fd to signal device poll thread when its poll() should be
448 int device_poll_interrupt_fd_
;
451 // Other state, held by the child (main) thread.
454 // Make our context current before running any EGL entry points.
455 base::Callback
<bool(void)> make_context_current_
;
458 EGLDisplay egl_display_
;
459 EGLContext egl_context_
;
461 // The codec we'll be decoding for.
462 media::VideoCodecProfile video_profile_
;
464 DISALLOW_COPY_AND_ASSIGN(ExynosVideoDecodeAccelerator
);
467 } // namespace content
469 #endif // CONTENT_COMMON_GPU_MEDIA_EXYNOS_VIDEO_DECODE_ACCELERATOR_H_