[Android WebView] Fix webview perf bot switchover to use org.chromium.webview_shell...
[chromium-blink-merge.git] / content / common / gpu / media / v4l2_slice_video_decode_accelerator.h
blobdd12487bb2ce214991d6c2f57940b4e80790e629
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
6 #define CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
8 #include <linux/videodev2.h>
9 #include <queue>
10 #include <vector>
12 #include "base/memory/linked_ptr.h"
13 #include "base/memory/ref_counted.h"
14 #include "base/memory/scoped_ptr.h"
15 #include "base/memory/weak_ptr.h"
16 #include "base/synchronization/waitable_event.h"
17 #include "base/threading/thread.h"
18 #include "content/common/content_export.h"
19 #include "content/common/gpu/media/h264_decoder.h"
20 #include "content/common/gpu/media/v4l2_device.h"
21 #include "content/common/gpu/media/vp8_decoder.h"
22 #include "media/video/video_decode_accelerator.h"
24 namespace content {
26 // An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice
27 // level codec API for decoding. The slice level API provides only a low-level
28 // decoding functionality and requires userspace to provide support for parsing
29 // the input stream and managing decoder state across frames.
30 class CONTENT_EXPORT V4L2SliceVideoDecodeAccelerator
31 : public media::VideoDecodeAccelerator {
32 public:
33 class V4L2DecodeSurface;
35 V4L2SliceVideoDecodeAccelerator(
36 const scoped_refptr<V4L2Device>& device,
37 EGLDisplay egl_display,
38 EGLContext egl_context,
39 const base::WeakPtr<Client>& io_client_,
40 const base::Callback<bool(void)>& make_context_current,
41 const scoped_refptr<base::SingleThreadTaskRunner>& io_task_runner);
42 ~V4L2SliceVideoDecodeAccelerator() override;
44 // media::VideoDecodeAccelerator implementation.
45 bool Initialize(media::VideoCodecProfile profile,
46 VideoDecodeAccelerator::Client* client) override;
47 void Decode(const media::BitstreamBuffer& bitstream_buffer) override;
48 void AssignPictureBuffers(
49 const std::vector<media::PictureBuffer>& buffers) override;
50 void ReusePictureBuffer(int32 picture_buffer_id) override;
51 void Flush() override;
52 void Reset() override;
53 void Destroy() override;
54 bool CanDecodeOnIOThread() override;
56 static media::VideoDecodeAccelerator::SupportedProfiles
57 GetSupportedProfiles();
59 private:
60 class V4L2H264Accelerator;
61 class V4L2VP8Accelerator;
63 // Record for input buffers.
64 struct InputRecord {
65 InputRecord();
66 int32 input_id;
67 void* address;
68 size_t length;
69 size_t bytes_used;
70 bool at_device;
73 // Record for output buffers.
74 struct OutputRecord {
75 OutputRecord();
76 bool at_device;
77 bool at_client;
78 int32 picture_id;
79 EGLImageKHR egl_image;
80 EGLSyncKHR egl_sync;
81 bool cleared;
84 // See http://crbug.com/255116.
85 // Input bitstream buffer size for up to 1080p streams.
86 const size_t kInputBufferMaxSizeFor1080p = 1024 * 1024;
87 // Input bitstream buffer size for up to 4k streams.
88 const size_t kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p;
89 const size_t kNumInputBuffers = 16;
92 // Below methods are used by accelerator implementations.
94 // Append slice data in |data| of size |size| to pending hardware
95 // input buffer with |index|. This buffer will be submitted for decode
96 // on the next DecodeSurface(). Return true on success.
97 bool SubmitSlice(int index, const uint8_t* data, size_t size);
99 // Submit controls in |ext_ctrls| to hardware. Return true on success.
100 bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls);
102 // Decode of |dec_surface| is ready to be submitted and all codec-specific
103 // settings are set in hardware.
104 void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
106 // |dec_surface| is ready to be outputted once decode is finished.
107 // This can be called before decode is actually done in hardware, and this
108 // method is responsible for maintaining the ordering, i.e. the surfaces will
109 // be outputted in the same order as SurfaceReady calls. To do so, the
110 // surfaces are put on decoder_display_queue_ and sent to output in that
111 // order once all preceding surfaces are sent.
112 void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
115 // Internal methods of this class.
117 // Recycle a V4L2 input buffer with |index| after dequeuing from device.
118 void ReuseInputBuffer(int index);
120 // Recycle V4L2 output buffer with |index|. Used as surface release callback.
121 void ReuseOutputBuffer(int index);
123 // Queue a |dec_surface| to device for decoding.
124 void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
126 // Dequeue any V4L2 buffers available and process.
127 void Dequeue();
129 // V4L2 QBUF helpers.
130 bool EnqueueInputRecord(int index, uint32_t config_store);
131 bool EnqueueOutputRecord(int index);
133 // Set input and output formats in hardware.
134 bool SetupFormats();
136 // Create input and output buffers.
137 bool CreateInputBuffers();
138 bool CreateOutputBuffers();
140 // Destroy input buffers.
141 void DestroyInputBuffers();
143 // Destroy output buffers and release associated resources (textures,
144 // EGLImages). If |dismiss| is true, also dismissing the associated
145 // PictureBuffers.
146 bool DestroyOutputs(bool dismiss);
148 // Used by DestroyOutputs.
149 bool DestroyOutputBuffers();
151 // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer()
152 // and signal |done| after finishing.
153 void DismissPictures(std::vector<int32> picture_buffer_ids,
154 base::WaitableEvent* done);
156 // Task to finish initialization on decoder_thread_.
157 void InitializeTask();
159 // Surface set change (resolution change) flow.
160 // If we have no surfaces allocated, just allocate them and return.
161 // Otherwise mark us as pending for surface set change.
162 void InitiateSurfaceSetChange();
163 // If a surface set change is pending and we are ready, stop the device,
164 // destroy outputs, releasing resources and dismissing pictures as required,
165 // followed by allocating a new set for the new resolution/DPB size
166 // as provided by decoder. Finally, try to resume decoding.
167 void FinishSurfaceSetChangeIfNeeded();
169 void NotifyError(Error error);
170 void DestroyTask();
172 // Sets the state to kError and notifies client if needed.
173 void SetErrorState(Error error);
175 // Flush flow when requested by client.
176 // When Flush() is called, it posts a FlushTask, which checks the input queue.
177 // If nothing is pending for decode on decoder_input_queue_, we call
178 // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef
179 // onto the decoder_input_queue_ to schedule a flush. When we reach it later
180 // on, we call InitiateFlush() to perform it at the correct time.
181 void FlushTask();
182 // Tell the decoder to flush all frames, reset it and mark us as scheduled
183 // for flush, so that we can finish it once all pending decodes are finished.
184 void InitiateFlush();
185 // If all pending frames are decoded and we are waiting to flush, perform it.
186 // This will send all pending pictures to client and notify the client that
187 // flush is complete and puts us in a state ready to resume.
188 void FinishFlushIfNeeded();
190 // Reset flow when requested by client.
191 // Drop all inputs and reset the decoder and mark us as pending for reset.
192 void ResetTask();
193 // If all pending frames are decoded and we are waiting to reset, perform it.
194 // This drops all pending outputs (client is not interested anymore),
195 // notifies the client we are done and puts us in a state ready to resume.
196 void FinishResetIfNeeded();
198 // Process pending events if any.
199 void ProcessPendingEventsIfNeeded();
201 // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_
202 // returning an event.
203 void ServiceDeviceTask();
205 // Schedule poll if we have any buffers queued and the poll thread
206 // is not stopped (on surface set change).
207 void SchedulePollIfNeeded();
209 // Attempt to start/stop device_poll_thread_.
210 bool StartDevicePoll();
211 bool StopDevicePoll(bool keep_input_state);
213 // Ran on device_poll_thread_ to wait for device events.
214 void DevicePollTask(bool poll_device);
216 enum State {
217 // We are in this state until Initialize() returns successfully.
218 // We can't post errors to the client in this state yet.
219 kUninitialized,
220 // Initialize() returned successfully.
221 kInitialized,
222 // This state allows making progress decoding more input stream.
223 kDecoding,
224 // Transitional state when we are not decoding any more stream, but are
225 // performing flush, reset, resolution change or are destroying ourselves.
226 kIdle,
227 // Error state, set when sending NotifyError to client.
228 kError,
231 // Buffer id for flush buffer, queued by FlushTask().
232 const int kFlushBufferId = -2;
234 // Handler for Decode() on decoder_thread_.
235 void DecodeTask(const media::BitstreamBuffer& bitstream_buffer);
237 // Schedule a new DecodeBufferTask if we are decoding.
238 void ScheduleDecodeBufferTaskIfNeeded();
240 // Main decoder loop. Keep decoding the current buffer in decoder_, asking
241 // for more stream via TrySetNewBistreamBuffer() if decoder_ requests so,
242 // and handle other returns from it appropriately.
243 void DecodeBufferTask();
245 // Check decoder_input_queue_ for any available buffers to decode and
246 // set the decoder_current_bitstream_buffer_ to the next buffer if one is
247 // available, taking it off the queue. Also set the current stream pointer
248 // in decoder_, and return true.
249 // Return false if no buffers are pending on decoder_input_queue_.
250 bool TrySetNewBistreamBuffer();
252 // Auto-destruction reference for EGLSync (for message-passing).
253 struct EGLSyncKHRRef;
254 void ReusePictureBufferTask(int32 picture_buffer_id,
255 scoped_ptr<EGLSyncKHRRef> egl_sync_ref);
257 // Called to actually send |dec_surface| to the client, after it is decoded
258 // preserving the order in which it was scheduled via SurfaceReady().
259 void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
261 // Goes over the |decoder_display_queue_| and sends all buffers from the
262 // front of the queue that are already decoded to the client, in order.
263 void TryOutputSurfaces();
265 // Creates a new decode surface or returns nullptr if one is not available.
266 scoped_refptr<V4L2DecodeSurface> CreateSurface();
268 // Send decoded pictures to PictureReady.
269 void SendPictureReady();
271 // Callback that indicates a picture has been cleared.
272 void PictureCleared();
274 size_t input_planes_count_;
275 size_t output_planes_count_;
277 // GPU Child thread task runner.
278 const scoped_refptr<base::SingleThreadTaskRunner> child_task_runner_;
280 // IO thread task runner.
281 scoped_refptr<base::SingleThreadTaskRunner> io_task_runner_;
283 // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
284 // device worker threads back to the child thread.
285 base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_;
287 // To expose client callbacks from VideoDecodeAccelerator.
288 // NOTE: all calls to these objects *MUST* be executed on
289 // child_task_runner_.
290 scoped_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>>
291 client_ptr_factory_;
292 base::WeakPtr<VideoDecodeAccelerator::Client> client_;
293 // Callbacks to |io_client_| must be executed on |io_task_runner_|.
294 base::WeakPtr<Client> io_client_;
296 // V4L2 device in use.
297 scoped_refptr<V4L2Device> device_;
299 // Thread to communicate with the device on.
300 base::Thread decoder_thread_;
301 scoped_refptr<base::SingleThreadTaskRunner> decoder_thread_task_runner_;
303 // Thread used to poll the device for events.
304 base::Thread device_poll_thread_;
306 // Input queue state.
307 bool input_streamon_;
308 // Number of input buffers enqueued to the device.
309 int input_buffer_queued_count_;
310 // Input buffers ready to use; LIFO since we don't care about ordering.
311 std::list<int> free_input_buffers_;
312 // Mapping of int index to an input buffer record.
313 std::vector<InputRecord> input_buffer_map_;
315 // Output queue state.
316 bool output_streamon_;
317 // Number of output buffers enqueued to the device.
318 int output_buffer_queued_count_;
319 // Output buffers ready to use.
320 std::list<int> free_output_buffers_;
321 // Mapping of int index to an output buffer record.
322 std::vector<OutputRecord> output_buffer_map_;
324 media::VideoCodecProfile video_profile_;
325 uint32_t output_format_fourcc_;
326 gfx::Size visible_size_;
327 gfx::Size coded_size_;
329 struct BitstreamBufferRef;
330 // Input queue of stream buffers coming from the client.
331 std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_;
332 // BitstreamBuffer currently being processed.
333 scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
335 // Queue storing decode surfaces ready to be output as soon as they are
336 // decoded. The surfaces must be output in order they are queued.
337 std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_;
339 // Decoder state.
340 State state_;
342 // If any of these are true, we are waiting for the device to finish decoding
343 // all previously-queued frames, so we can finish the flush/reset/surface
344 // change flows. These can stack.
345 bool decoder_flushing_;
346 bool decoder_resetting_;
347 bool surface_set_change_pending_;
349 // Hardware accelerators.
350 // TODO(posciak): Try to have a superclass here if possible.
351 scoped_ptr<V4L2H264Accelerator> h264_accelerator_;
352 scoped_ptr<V4L2VP8Accelerator> vp8_accelerator_;
354 // Codec-specific software decoder in use.
355 scoped_ptr<AcceleratedVideoDecoder> decoder_;
357 // Surfaces queued to device to keep references to them while decoded.
358 using V4L2DecodeSurfaceByOutputId =
359 std::map<int, scoped_refptr<V4L2DecodeSurface>>;
360 V4L2DecodeSurfaceByOutputId surfaces_at_device_;
362 // Surfaces sent to client to keep references to them while displayed.
363 using V4L2DecodeSurfaceByPictureBufferId =
364 std::map<int32, scoped_refptr<V4L2DecodeSurface>>;
365 V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_;
367 // Record for decoded pictures that can be sent to PictureReady.
368 struct PictureRecord;
369 // Pictures that are ready but not sent to PictureReady yet.
370 std::queue<PictureRecord> pending_picture_ready_;
372 // The number of pictures that are sent to PictureReady and will be cleared.
373 int picture_clearing_count_;
375 // Used by the decoder thread to wait for AssignPictureBuffers to arrive
376 // to avoid races with potential Reset requests.
377 base::WaitableEvent pictures_assigned_;
379 // Make the GL context current callback.
380 base::Callback<bool(void)> make_context_current_;
382 // EGL state
383 EGLDisplay egl_display_;
384 EGLContext egl_context_;
386 // The WeakPtrFactory for |weak_this_|.
387 base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_;
389 DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator);
392 class V4L2H264Picture;
393 class V4L2VP8Picture;
395 } // namespace content
397 #endif // CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_