2 * Copyright © 2014-2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * DOC: Render command list generation
27 * In the VC4 driver, render command list generation is performed by the
28 * kernel instead of userspace. We do this because validating a
29 * user-submitted command list is hard to get right and has high CPU overhead,
30 * while the number of valid configurations for render command lists is
31 * actually fairly low.
34 #include "uapi/drm/vc4_drm.h"
36 #include "vc4_packet.h"
38 struct vc4_rcl_setup
{
39 struct drm_gem_cma_object
*color_read
;
40 struct drm_gem_cma_object
*color_write
;
41 struct drm_gem_cma_object
*zs_read
;
42 struct drm_gem_cma_object
*zs_write
;
43 struct drm_gem_cma_object
*msaa_color_write
;
44 struct drm_gem_cma_object
*msaa_zs_write
;
46 struct drm_gem_cma_object
*rcl
;
49 u32 next_write_bo_index
;
52 static inline void rcl_u8(struct vc4_rcl_setup
*setup
, u8 val
)
54 *(u8
*)(setup
->rcl
->vaddr
+ setup
->next_offset
) = val
;
55 setup
->next_offset
+= 1;
58 static inline void rcl_u16(struct vc4_rcl_setup
*setup
, u16 val
)
60 *(u16
*)(setup
->rcl
->vaddr
+ setup
->next_offset
) = val
;
61 setup
->next_offset
+= 2;
64 static inline void rcl_u32(struct vc4_rcl_setup
*setup
, u32 val
)
66 *(u32
*)(setup
->rcl
->vaddr
+ setup
->next_offset
) = val
;
67 setup
->next_offset
+= 4;
71 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
73 * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
74 * some sort before another load is triggered.
76 static void vc4_store_before_load(struct vc4_rcl_setup
*setup
)
78 rcl_u8(setup
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
80 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE
,
81 VC4_LOADSTORE_TILE_BUFFER_BUFFER
) |
82 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR
|
83 VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR
|
84 VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR
);
85 rcl_u32(setup
, 0); /* no address, since we're in None mode */
89 * Calculates the physical address of the start of a tile in a RCL surface.
91 * Unlike the other load/store packets,
92 * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile
93 * coordinates packet, and instead just store to the address given.
95 static uint32_t vc4_full_res_offset(struct vc4_exec_info
*exec
,
96 struct drm_gem_cma_object
*bo
,
97 struct drm_vc4_submit_rcl_surface
*surf
,
100 return bo
->paddr
+ surf
->offset
+ VC4_TILE_BUFFER_SIZE
*
101 (DIV_ROUND_UP(exec
->args
->width
, 32) * y
+ x
);
105 * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
107 * The tile coordinates packet triggers a pending load if there is one, are
108 * used for clipping during rendering, and determine where loads/stores happen
109 * relative to their base address.
111 static void vc4_tile_coordinates(struct vc4_rcl_setup
*setup
,
112 uint32_t x
, uint32_t y
)
114 rcl_u8(setup
, VC4_PACKET_TILE_COORDINATES
);
119 static void emit_tile(struct vc4_exec_info
*exec
,
120 struct vc4_rcl_setup
*setup
,
121 uint8_t x
, uint8_t y
, bool first
, bool last
)
123 struct drm_vc4_submit_cl
*args
= exec
->args
;
124 bool has_bin
= args
->bin_cl_size
!= 0;
126 /* Note that the load doesn't actually occur until the
127 * tile coords packet is processed, and only one load
128 * may be outstanding at a time.
130 if (setup
->color_read
) {
131 if (args
->color_read
.flags
&
132 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
133 rcl_u8(setup
, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER
);
135 vc4_full_res_offset(exec
, setup
->color_read
,
136 &args
->color_read
, x
, y
) |
137 VC4_LOADSTORE_FULL_RES_DISABLE_ZS
);
139 rcl_u8(setup
, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
);
140 rcl_u16(setup
, args
->color_read
.bits
);
141 rcl_u32(setup
, setup
->color_read
->paddr
+
142 args
->color_read
.offset
);
146 if (setup
->zs_read
) {
147 if (args
->zs_read
.flags
&
148 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
149 rcl_u8(setup
, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER
);
151 vc4_full_res_offset(exec
, setup
->zs_read
,
152 &args
->zs_read
, x
, y
) |
153 VC4_LOADSTORE_FULL_RES_DISABLE_COLOR
);
155 if (setup
->color_read
) {
156 /* Exec previous load. */
157 vc4_tile_coordinates(setup
, x
, y
);
158 vc4_store_before_load(setup
);
161 rcl_u8(setup
, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
);
162 rcl_u16(setup
, args
->zs_read
.bits
);
163 rcl_u32(setup
, setup
->zs_read
->paddr
+
164 args
->zs_read
.offset
);
168 /* Clipping depends on tile coordinates having been
169 * emitted, so we always need one here.
171 vc4_tile_coordinates(setup
, x
, y
);
173 /* Wait for the binner before jumping to the first
176 if (first
&& has_bin
)
177 rcl_u8(setup
, VC4_PACKET_WAIT_ON_SEMAPHORE
);
180 rcl_u8(setup
, VC4_PACKET_BRANCH_TO_SUB_LIST
);
181 rcl_u32(setup
, (exec
->tile_bo
->paddr
+
182 exec
->tile_alloc_offset
+
183 (y
* exec
->bin_tiles_x
+ x
) * 32));
186 if (setup
->msaa_color_write
) {
187 bool last_tile_write
= (!setup
->msaa_zs_write
&&
189 !setup
->color_write
);
190 uint32_t bits
= VC4_LOADSTORE_FULL_RES_DISABLE_ZS
;
192 if (!last_tile_write
)
193 bits
|= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL
;
195 bits
|= VC4_LOADSTORE_FULL_RES_EOF
;
196 rcl_u8(setup
, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER
);
198 vc4_full_res_offset(exec
, setup
->msaa_color_write
,
199 &args
->msaa_color_write
, x
, y
) |
203 if (setup
->msaa_zs_write
) {
204 bool last_tile_write
= (!setup
->zs_write
&&
205 !setup
->color_write
);
206 uint32_t bits
= VC4_LOADSTORE_FULL_RES_DISABLE_COLOR
;
208 if (setup
->msaa_color_write
)
209 vc4_tile_coordinates(setup
, x
, y
);
210 if (!last_tile_write
)
211 bits
|= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL
;
213 bits
|= VC4_LOADSTORE_FULL_RES_EOF
;
214 rcl_u8(setup
, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER
);
216 vc4_full_res_offset(exec
, setup
->msaa_zs_write
,
217 &args
->msaa_zs_write
, x
, y
) |
221 if (setup
->zs_write
) {
222 bool last_tile_write
= !setup
->color_write
;
224 if (setup
->msaa_color_write
|| setup
->msaa_zs_write
)
225 vc4_tile_coordinates(setup
, x
, y
);
227 rcl_u8(setup
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
228 rcl_u16(setup
, args
->zs_write
.bits
|
230 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR
));
232 (setup
->zs_write
->paddr
+ args
->zs_write
.offset
) |
233 ((last
&& last_tile_write
) ?
234 VC4_LOADSTORE_TILE_BUFFER_EOF
: 0));
237 if (setup
->color_write
) {
238 if (setup
->msaa_color_write
|| setup
->msaa_zs_write
||
240 vc4_tile_coordinates(setup
, x
, y
);
244 rcl_u8(setup
, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF
);
246 rcl_u8(setup
, VC4_PACKET_STORE_MS_TILE_BUFFER
);
250 static int vc4_create_rcl_bo(struct drm_device
*dev
, struct vc4_exec_info
*exec
,
251 struct vc4_rcl_setup
*setup
)
253 struct drm_vc4_submit_cl
*args
= exec
->args
;
254 bool has_bin
= args
->bin_cl_size
!= 0;
255 uint8_t min_x_tile
= args
->min_x_tile
;
256 uint8_t min_y_tile
= args
->min_y_tile
;
257 uint8_t max_x_tile
= args
->max_x_tile
;
258 uint8_t max_y_tile
= args
->max_y_tile
;
259 uint8_t xtiles
= max_x_tile
- min_x_tile
+ 1;
260 uint8_t ytiles
= max_y_tile
- min_y_tile
+ 1;
262 uint32_t size
, loop_body_size
;
264 size
= VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE
;
265 loop_body_size
= VC4_PACKET_TILE_COORDINATES_SIZE
;
267 if (args
->flags
& VC4_SUBMIT_CL_USE_CLEAR_COLOR
) {
268 size
+= VC4_PACKET_CLEAR_COLORS_SIZE
+
269 VC4_PACKET_TILE_COORDINATES_SIZE
+
270 VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE
;
273 if (setup
->color_read
) {
274 if (args
->color_read
.flags
&
275 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
276 loop_body_size
+= VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE
;
278 loop_body_size
+= VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE
;
281 if (setup
->zs_read
) {
282 if (args
->zs_read
.flags
&
283 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
284 loop_body_size
+= VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE
;
286 if (setup
->color_read
&&
287 !(args
->color_read
.flags
&
288 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
)) {
289 loop_body_size
+= VC4_PACKET_TILE_COORDINATES_SIZE
;
290 loop_body_size
+= VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE
;
292 loop_body_size
+= VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE
;
297 size
+= VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE
;
298 loop_body_size
+= VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE
;
301 if (setup
->msaa_color_write
)
302 loop_body_size
+= VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE
;
303 if (setup
->msaa_zs_write
)
304 loop_body_size
+= VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE
;
307 loop_body_size
+= VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE
;
308 if (setup
->color_write
)
309 loop_body_size
+= VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE
;
311 /* We need a VC4_PACKET_TILE_COORDINATES in between each store. */
312 loop_body_size
+= VC4_PACKET_TILE_COORDINATES_SIZE
*
313 ((setup
->msaa_color_write
!= NULL
) +
314 (setup
->msaa_zs_write
!= NULL
) +
315 (setup
->color_write
!= NULL
) +
316 (setup
->zs_write
!= NULL
) - 1);
318 size
+= xtiles
* ytiles
* loop_body_size
;
320 setup
->rcl
= &vc4_bo_create(dev
, size
, true)->base
;
321 if (IS_ERR(setup
->rcl
))
322 return PTR_ERR(setup
->rcl
);
323 list_add_tail(&to_vc4_bo(&setup
->rcl
->base
)->unref_head
,
326 /* The tile buffer gets cleared when the previous tile is stored. If
327 * the clear values changed between frames, then the tile buffer has
328 * stale clear values in it, so we have to do a store in None mode (no
329 * writes) so that we trigger the tile buffer clear.
331 if (args
->flags
& VC4_SUBMIT_CL_USE_CLEAR_COLOR
) {
332 rcl_u8(setup
, VC4_PACKET_CLEAR_COLORS
);
333 rcl_u32(setup
, args
->clear_color
[0]);
334 rcl_u32(setup
, args
->clear_color
[1]);
335 rcl_u32(setup
, args
->clear_z
);
336 rcl_u8(setup
, args
->clear_s
);
338 vc4_tile_coordinates(setup
, 0, 0);
340 rcl_u8(setup
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
341 rcl_u16(setup
, VC4_LOADSTORE_TILE_BUFFER_NONE
);
342 rcl_u32(setup
, 0); /* no address, since we're in None mode */
345 rcl_u8(setup
, VC4_PACKET_TILE_RENDERING_MODE_CONFIG
);
347 (setup
->color_write
? (setup
->color_write
->paddr
+
348 args
->color_write
.offset
) :
350 rcl_u16(setup
, args
->width
);
351 rcl_u16(setup
, args
->height
);
352 rcl_u16(setup
, args
->color_write
.bits
);
354 for (y
= min_y_tile
; y
<= max_y_tile
; y
++) {
355 for (x
= min_x_tile
; x
<= max_x_tile
; x
++) {
356 bool first
= (x
== min_x_tile
&& y
== min_y_tile
);
357 bool last
= (x
== max_x_tile
&& y
== max_y_tile
);
359 emit_tile(exec
, setup
, x
, y
, first
, last
);
363 BUG_ON(setup
->next_offset
!= size
);
364 exec
->ct1ca
= setup
->rcl
->paddr
;
365 exec
->ct1ea
= setup
->rcl
->paddr
+ setup
->next_offset
;
370 static int vc4_full_res_bounds_check(struct vc4_exec_info
*exec
,
371 struct drm_gem_cma_object
*obj
,
372 struct drm_vc4_submit_rcl_surface
*surf
)
374 struct drm_vc4_submit_cl
*args
= exec
->args
;
375 u32 render_tiles_stride
= DIV_ROUND_UP(exec
->args
->width
, 32);
377 if (surf
->offset
> obj
->base
.size
) {
378 DRM_ERROR("surface offset %d > BO size %zd\n",
379 surf
->offset
, obj
->base
.size
);
383 if ((obj
->base
.size
- surf
->offset
) / VC4_TILE_BUFFER_SIZE
<
384 render_tiles_stride
* args
->max_y_tile
+ args
->max_x_tile
) {
385 DRM_ERROR("MSAA tile %d, %d out of bounds "
386 "(bo size %zd, offset %d).\n",
387 args
->max_x_tile
, args
->max_y_tile
,
396 static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info
*exec
,
397 struct drm_gem_cma_object
**obj
,
398 struct drm_vc4_submit_rcl_surface
*surf
)
400 if (surf
->flags
!= 0 || surf
->bits
!= 0) {
401 DRM_ERROR("MSAA surface had nonzero flags/bits\n");
405 if (surf
->hindex
== ~0)
408 *obj
= vc4_use_bo(exec
, surf
->hindex
);
412 exec
->rcl_write_bo
[exec
->rcl_write_bo_count
++] = *obj
;
414 if (surf
->offset
& 0xf) {
415 DRM_ERROR("MSAA write must be 16b aligned.\n");
419 return vc4_full_res_bounds_check(exec
, *obj
, surf
);
422 static int vc4_rcl_surface_setup(struct vc4_exec_info
*exec
,
423 struct drm_gem_cma_object
**obj
,
424 struct drm_vc4_submit_rcl_surface
*surf
,
427 uint8_t tiling
= VC4_GET_FIELD(surf
->bits
,
428 VC4_LOADSTORE_TILE_BUFFER_TILING
);
429 uint8_t buffer
= VC4_GET_FIELD(surf
->bits
,
430 VC4_LOADSTORE_TILE_BUFFER_BUFFER
);
431 uint8_t format
= VC4_GET_FIELD(surf
->bits
,
432 VC4_LOADSTORE_TILE_BUFFER_FORMAT
);
436 if (surf
->flags
& ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
437 DRM_ERROR("Extra flags set\n");
441 if (surf
->hindex
== ~0)
444 *obj
= vc4_use_bo(exec
, surf
->hindex
);
449 exec
->rcl_write_bo
[exec
->rcl_write_bo_count
++] = *obj
;
451 if (surf
->flags
& VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
452 if (surf
== &exec
->args
->zs_write
) {
453 DRM_ERROR("general zs write may not be a full-res.\n");
457 if (surf
->bits
!= 0) {
458 DRM_ERROR("load/store general bits set with "
459 "full res load/store.\n");
463 ret
= vc4_full_res_bounds_check(exec
, *obj
, surf
);
470 if (surf
->bits
& ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK
|
471 VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK
|
472 VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK
)) {
473 DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
478 if (tiling
> VC4_TILING_FORMAT_LT
) {
479 DRM_ERROR("Bad tiling format\n");
483 if (buffer
== VC4_LOADSTORE_TILE_BUFFER_ZS
) {
485 DRM_ERROR("No color format should be set for ZS\n");
489 } else if (buffer
== VC4_LOADSTORE_TILE_BUFFER_COLOR
) {
491 case VC4_LOADSTORE_TILE_BUFFER_BGR565
:
492 case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER
:
495 case VC4_LOADSTORE_TILE_BUFFER_RGBA8888
:
499 DRM_ERROR("Bad tile buffer format\n");
503 DRM_ERROR("Bad load/store buffer %d.\n", buffer
);
507 if (surf
->offset
& 0xf) {
508 DRM_ERROR("load/store buffer must be 16b aligned.\n");
512 if (!vc4_check_tex_size(exec
, *obj
, surf
->offset
, tiling
,
513 exec
->args
->width
, exec
->args
->height
, cpp
)) {
521 vc4_rcl_render_config_surface_setup(struct vc4_exec_info
*exec
,
522 struct vc4_rcl_setup
*setup
,
523 struct drm_gem_cma_object
**obj
,
524 struct drm_vc4_submit_rcl_surface
*surf
)
526 uint8_t tiling
= VC4_GET_FIELD(surf
->bits
,
527 VC4_RENDER_CONFIG_MEMORY_FORMAT
);
528 uint8_t format
= VC4_GET_FIELD(surf
->bits
,
529 VC4_RENDER_CONFIG_FORMAT
);
532 if (surf
->flags
!= 0) {
533 DRM_ERROR("No flags supported on render config.\n");
537 if (surf
->bits
& ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK
|
538 VC4_RENDER_CONFIG_FORMAT_MASK
|
539 VC4_RENDER_CONFIG_MS_MODE_4X
|
540 VC4_RENDER_CONFIG_DECIMATE_MODE_4X
)) {
541 DRM_ERROR("Unknown bits in render config: 0x%04x\n",
546 if (surf
->hindex
== ~0)
549 *obj
= vc4_use_bo(exec
, surf
->hindex
);
553 exec
->rcl_write_bo
[exec
->rcl_write_bo_count
++] = *obj
;
555 if (tiling
> VC4_TILING_FORMAT_LT
) {
556 DRM_ERROR("Bad tiling format\n");
561 case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED
:
562 case VC4_RENDER_CONFIG_FORMAT_BGR565
:
565 case VC4_RENDER_CONFIG_FORMAT_RGBA8888
:
569 DRM_ERROR("Bad tile buffer format\n");
573 if (!vc4_check_tex_size(exec
, *obj
, surf
->offset
, tiling
,
574 exec
->args
->width
, exec
->args
->height
, cpp
)) {
581 int vc4_get_rcl(struct drm_device
*dev
, struct vc4_exec_info
*exec
)
583 struct vc4_rcl_setup setup
= {0};
584 struct drm_vc4_submit_cl
*args
= exec
->args
;
585 bool has_bin
= args
->bin_cl_size
!= 0;
588 if (args
->min_x_tile
> args
->max_x_tile
||
589 args
->min_y_tile
> args
->max_y_tile
) {
590 DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
591 args
->min_x_tile
, args
->min_y_tile
,
592 args
->max_x_tile
, args
->max_y_tile
);
597 (args
->max_x_tile
> exec
->bin_tiles_x
||
598 args
->max_y_tile
> exec
->bin_tiles_y
)) {
599 DRM_ERROR("Render tiles (%d,%d) outside of bin config "
601 args
->max_x_tile
, args
->max_y_tile
,
602 exec
->bin_tiles_x
, exec
->bin_tiles_y
);
606 ret
= vc4_rcl_render_config_surface_setup(exec
, &setup
,
612 ret
= vc4_rcl_surface_setup(exec
, &setup
.color_read
, &args
->color_read
,
617 ret
= vc4_rcl_surface_setup(exec
, &setup
.zs_read
, &args
->zs_read
,
622 ret
= vc4_rcl_surface_setup(exec
, &setup
.zs_write
, &args
->zs_write
,
627 ret
= vc4_rcl_msaa_surface_setup(exec
, &setup
.msaa_color_write
,
628 &args
->msaa_color_write
);
632 ret
= vc4_rcl_msaa_surface_setup(exec
, &setup
.msaa_zs_write
,
633 &args
->msaa_zs_write
);
637 /* We shouldn't even have the job submitted to us if there's no
638 * surface to write out.
640 if (!setup
.color_write
&& !setup
.zs_write
&&
641 !setup
.msaa_color_write
&& !setup
.msaa_zs_write
) {
642 DRM_ERROR("RCL requires color or Z/S write\n");
646 return vc4_create_rcl_bo(dev
, exec
, &setup
);