1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
6 #define CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
8 #include <linux/videodev2.h>
12 #include "base/memory/linked_ptr.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/memory/scoped_ptr.h"
15 #include "base/memory/weak_ptr.h"
16 #include "base/synchronization/waitable_event.h"
17 #include "base/threading/thread.h"
18 #include "content/common/content_export.h"
19 #include "content/common/gpu/media/h264_decoder.h"
20 #include "content/common/gpu/media/v4l2_device.h"
21 #include "content/common/gpu/media/vp8_decoder.h"
22 #include "media/video/video_decode_accelerator.h"
26 // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice
27 // level codec API for decoding. The slice level API provides only a low-level
28 // decoding functionality and requires userspace to provide support for parsing
29 // the input stream and managing decoder state across frames.
30 class CONTENT_EXPORT V4L2SliceVideoDecodeAccelerator
31 : public media::VideoDecodeAccelerator
{
33 class V4L2DecodeSurface
;
35 V4L2SliceVideoDecodeAccelerator(
36 const scoped_refptr
<V4L2Device
>& device
,
37 EGLDisplay egl_display
,
38 EGLContext egl_context
,
39 const base::WeakPtr
<Client
>& io_client_
,
40 const base::Callback
<bool(void)>& make_context_current
,
41 const scoped_refptr
<base::SingleThreadTaskRunner
>& io_task_runner
);
42 ~V4L2SliceVideoDecodeAccelerator() override
;
44 // media::VideoDecodeAccelerator implementation.
45 bool Initialize(media::VideoCodecProfile profile
,
46 VideoDecodeAccelerator::Client
* client
) override
;
47 void Decode(const media::BitstreamBuffer
& bitstream_buffer
) override
;
48 void AssignPictureBuffers(
49 const std::vector
<media::PictureBuffer
>& buffers
) override
;
50 void ReusePictureBuffer(int32 picture_buffer_id
) override
;
51 void Flush() override
;
52 void Reset() override
;
53 void Destroy() override
;
54 bool CanDecodeOnIOThread() override
;
56 static media::VideoDecodeAccelerator::SupportedProfiles
57 GetSupportedProfiles();
60 class V4L2H264Accelerator
;
61 class V4L2VP8Accelerator
;
63 // Record for input buffers.
73 // Record for output buffers.
79 EGLImageKHR egl_image
;
84 // See http://crbug.com/255116.
85 // Input bitstream buffer size for up to 1080p streams.
86 const size_t kInputBufferMaxSizeFor1080p
= 1024 * 1024;
87 // Input bitstream buffer size for up to 4k streams.
88 const size_t kInputBufferMaxSizeFor4k
= 4 * kInputBufferMaxSizeFor1080p
;
89 const size_t kNumInputBuffers
= 16;
92 // Below methods are used by accelerator implementations.
94 // Append slice data in |data| of size |size| to pending hardware
95 // input buffer with |index|. This buffer will be submitted for decode
96 // on the next DecodeSurface(). Return true on success.
97 bool SubmitSlice(int index
, const uint8_t* data
, size_t size
);
99 // Submit controls in |ext_ctrls| to hardware. Return true on success.
100 bool SubmitExtControls(struct v4l2_ext_controls
* ext_ctrls
);
102 // Decode of |dec_surface| is ready to be submitted and all codec-specific
103 // settings are set in hardware.
104 void DecodeSurface(const scoped_refptr
<V4L2DecodeSurface
>& dec_surface
);
106 // |dec_surface| is ready to be outputted once decode is finished.
107 // This can be called before decode is actually done in hardware, and this
108 // method is responsible for maintaining the ordering, i.e. the surfaces will
109 // be outputted in the same order as SurfaceReady calls. To do so, the
110 // surfaces are put on decoder_display_queue_ and sent to output in that
111 // order once all preceding surfaces are sent.
112 void SurfaceReady(const scoped_refptr
<V4L2DecodeSurface
>& dec_surface
);
115 // Internal methods of this class.
117 // Recycle a V4L2 input buffer with |index| after dequeuing from device.
118 void ReuseInputBuffer(int index
);
120 // Recycle V4L2 output buffer with |index|. Used as surface release callback.
121 void ReuseOutputBuffer(int index
);
123 // Queue a |dec_surface| to device for decoding.
124 void Enqueue(const scoped_refptr
<V4L2DecodeSurface
>& dec_surface
);
126 // Dequeue any V4L2 buffers available and process.
129 // V4L2 QBUF helpers.
130 bool EnqueueInputRecord(int index
, uint32_t config_store
);
131 bool EnqueueOutputRecord(int index
);
133 // Set input and output formats in hardware.
136 // Create input and output buffers.
137 bool CreateInputBuffers();
138 bool CreateOutputBuffers();
140 // Destroy input buffers.
141 void DestroyInputBuffers();
143 // Destroy output buffers and release associated resources (textures,
144 // EGLImages). If |dismiss| is true, also dismissing the associated
146 bool DestroyOutputs(bool dismiss
);
148 // Used by DestroyOutputs.
149 bool DestroyOutputBuffers();
151 // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer()
152 // and signal |done| after finishing.
153 void DismissPictures(std::vector
<int32
> picture_buffer_ids
,
154 base::WaitableEvent
* done
);
156 // Task to finish initialization on decoder_thread_.
157 void InitializeTask();
159 // Surface set change (resolution change) flow.
160 // If we have no surfaces allocated, just allocate them and return.
161 // Otherwise mark us as pending for surface set change.
162 void InitiateSurfaceSetChange();
163 // If a surface set change is pending and we are ready, stop the device,
164 // destroy outputs, releasing resources and dismissing pictures as required,
165 // followed by allocating a new set for the new resolution/DPB size
166 // as provided by decoder. Finally, try to resume decoding.
167 void FinishSurfaceSetChangeIfNeeded();
169 void NotifyError(Error error
);
172 // Sets the state to kError and notifies client if needed.
173 void SetErrorState(Error error
);
175 // Flush flow when requested by client.
176 // When Flush() is called, it posts a FlushTask, which checks the input queue.
177 // If nothing is pending for decode on decoder_input_queue_, we call
178 // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef
179 // onto the decoder_input_queue_ to schedule a flush. When we reach it later
180 // on, we call InitiateFlush() to perform it at the correct time.
182 // Tell the decoder to flush all frames, reset it and mark us as scheduled
183 // for flush, so that we can finish it once all pending decodes are finished.
184 void InitiateFlush();
185 // If all pending frames are decoded and we are waiting to flush, perform it.
186 // This will send all pending pictures to client and notify the client that
187 // flush is complete and puts us in a state ready to resume.
188 void FinishFlushIfNeeded();
190 // Reset flow when requested by client.
191 // Drop all inputs and reset the decoder and mark us as pending for reset.
193 // If all pending frames are decoded and we are waiting to reset, perform it.
194 // This drops all pending outputs (client is not interested anymore),
195 // notifies the client we are done and puts us in a state ready to resume.
196 void FinishResetIfNeeded();
198 // Process pending events if any.
199 void ProcessPendingEventsIfNeeded();
201 // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_
202 // returning an event.
203 void ServiceDeviceTask();
205 // Schedule poll if we have any buffers queued and the poll thread
206 // is not stopped (on surface set change).
207 void SchedulePollIfNeeded();
209 // Attempt to start/stop device_poll_thread_.
210 bool StartDevicePoll();
211 bool StopDevicePoll(bool keep_input_state
);
213 // Ran on device_poll_thread_ to wait for device events.
214 void DevicePollTask(bool poll_device
);
217 // We are in this state until Initialize() returns successfully.
218 // We can't post errors to the client in this state yet.
220 // Initialize() returned successfully.
222 // This state allows making progress decoding more input stream.
224 // Transitional state when we are not decoding any more stream, but are
225 // performing flush, reset, resolution change or are destroying ourselves.
227 // Error state, set when sending NotifyError to client.
231 // Buffer id for flush buffer, queued by FlushTask().
232 const int kFlushBufferId
= -2;
234 // Handler for Decode() on decoder_thread_.
235 void DecodeTask(const media::BitstreamBuffer
& bitstream_buffer
);
237 // Schedule a new DecodeBufferTask if we are decoding.
238 void ScheduleDecodeBufferTaskIfNeeded();
240 // Main decoder loop. Keep decoding the current buffer in decoder_, asking
241 // for more stream via TrySetNewBistreamBuffer() if decoder_ requests so,
242 // and handle other returns from it appropriately.
243 void DecodeBufferTask();
245 // Check decoder_input_queue_ for any available buffers to decode and
246 // set the decoder_current_bitstream_buffer_ to the next buffer if one is
247 // available, taking it off the queue. Also set the current stream pointer
248 // in decoder_, and return true.
249 // Return false if no buffers are pending on decoder_input_queue_.
250 bool TrySetNewBistreamBuffer();
252 // Auto-destruction reference for EGLSync (for message-passing).
253 struct EGLSyncKHRRef
;
254 void ReusePictureBufferTask(int32 picture_buffer_id
,
255 scoped_ptr
<EGLSyncKHRRef
> egl_sync_ref
);
257 // Called to actually send |dec_surface| to the client, after it is decoded
258 // preserving the order in which it was scheduled via SurfaceReady().
259 void OutputSurface(const scoped_refptr
<V4L2DecodeSurface
>& dec_surface
);
261 // Goes over the |decoder_display_queue_| and sends all buffers from the
262 // front of the queue that are already decoded to the client, in order.
263 void TryOutputSurfaces();
265 // Creates a new decode surface or returns nullptr if one is not available.
266 scoped_refptr
<V4L2DecodeSurface
> CreateSurface();
268 // Send decoded pictures to PictureReady.
269 void SendPictureReady();
271 // Callback that indicates a picture has been cleared.
272 void PictureCleared();
274 size_t input_planes_count_
;
275 size_t output_planes_count_
;
277 // GPU Child thread task runner.
278 const scoped_refptr
<base::SingleThreadTaskRunner
> child_task_runner_
;
280 // IO thread task runner.
281 scoped_refptr
<base::SingleThreadTaskRunner
> io_task_runner_
;
283 // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
284 // device worker threads back to the child thread.
285 base::WeakPtr
<V4L2SliceVideoDecodeAccelerator
> weak_this_
;
287 // To expose client callbacks from VideoDecodeAccelerator.
288 // NOTE: all calls to these objects *MUST* be executed on
289 // child_task_runner_.
290 scoped_ptr
<base::WeakPtrFactory
<VideoDecodeAccelerator::Client
>>
292 base::WeakPtr
<VideoDecodeAccelerator::Client
> client_
;
293 // Callbacks to |io_client_| must be executed on |io_task_runner_|.
294 base::WeakPtr
<Client
> io_client_
;
296 // V4L2 device in use.
297 scoped_refptr
<V4L2Device
> device_
;
299 // Thread to communicate with the device on.
300 base::Thread decoder_thread_
;
301 scoped_refptr
<base::SingleThreadTaskRunner
> decoder_thread_task_runner_
;
303 // Thread used to poll the device for events.
304 base::Thread device_poll_thread_
;
306 // Input queue state.
307 bool input_streamon_
;
308 // Number of input buffers enqueued to the device.
309 int input_buffer_queued_count_
;
310 // Input buffers ready to use; LIFO since we don't care about ordering.
311 std::list
<int> free_input_buffers_
;
312 // Mapping of int index to an input buffer record.
313 std::vector
<InputRecord
> input_buffer_map_
;
315 // Output queue state.
316 bool output_streamon_
;
317 // Number of output buffers enqueued to the device.
318 int output_buffer_queued_count_
;
319 // Output buffers ready to use.
320 std::list
<int> free_output_buffers_
;
321 // Mapping of int index to an output buffer record.
322 std::vector
<OutputRecord
> output_buffer_map_
;
324 media::VideoCodecProfile video_profile_
;
325 uint32_t output_format_fourcc_
;
326 gfx::Size visible_size_
;
327 gfx::Size coded_size_
;
329 struct BitstreamBufferRef
;
330 // Input queue of stream buffers coming from the client.
331 std::queue
<linked_ptr
<BitstreamBufferRef
>> decoder_input_queue_
;
332 // BitstreamBuffer currently being processed.
333 scoped_ptr
<BitstreamBufferRef
> decoder_current_bitstream_buffer_
;
335 // Queue storing decode surfaces ready to be output as soon as they are
336 // decoded. The surfaces must be output in order they are queued.
337 std::queue
<scoped_refptr
<V4L2DecodeSurface
>> decoder_display_queue_
;
342 // If any of these are true, we are waiting for the device to finish decoding
343 // all previously-queued frames, so we can finish the flush/reset/surface
344 // change flows. These can stack.
345 bool decoder_flushing_
;
346 bool decoder_resetting_
;
347 bool surface_set_change_pending_
;
349 // Hardware accelerators.
350 // TODO(posciak): Try to have a superclass here if possible.
351 scoped_ptr
<V4L2H264Accelerator
> h264_accelerator_
;
352 scoped_ptr
<V4L2VP8Accelerator
> vp8_accelerator_
;
354 // Codec-specific software decoder in use.
355 scoped_ptr
<AcceleratedVideoDecoder
> decoder_
;
357 // Surfaces queued to device to keep references to them while decoded.
358 using V4L2DecodeSurfaceByOutputId
=
359 std::map
<int, scoped_refptr
<V4L2DecodeSurface
>>;
360 V4L2DecodeSurfaceByOutputId surfaces_at_device_
;
362 // Surfaces sent to client to keep references to them while displayed.
363 using V4L2DecodeSurfaceByPictureBufferId
=
364 std::map
<int32
, scoped_refptr
<V4L2DecodeSurface
>>;
365 V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_
;
367 // Record for decoded pictures that can be sent to PictureReady.
368 struct PictureRecord
;
369 // Pictures that are ready but not sent to PictureReady yet.
370 std::queue
<PictureRecord
> pending_picture_ready_
;
372 // The number of pictures that are sent to PictureReady and will be cleared.
373 int picture_clearing_count_
;
375 // Used by the decoder thread to wait for AssignPictureBuffers to arrive
376 // to avoid races with potential Reset requests.
377 base::WaitableEvent pictures_assigned_
;
379 // Make the GL context current callback.
380 base::Callback
<bool(void)> make_context_current_
;
383 EGLDisplay egl_display_
;
384 EGLContext egl_context_
;
386 // The WeakPtrFactory for |weak_this_|.
387 base::WeakPtrFactory
<V4L2SliceVideoDecodeAccelerator
> weak_this_factory_
;
389 DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator
);
392 class V4L2H264Picture
;
393 class V4L2VP8Picture
;
395 } // namespace content
397 #endif // CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_