2 * Copyright © 2014-2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * DOC: Render command list generation
27 * In the VC4 driver, render command list generation is performed by the
28 * kernel instead of userspace. We do this because validating a
29 * user-submitted command list is hard to get right and has high CPU overhead,
30 * while the number of valid configurations for render command lists is
31 * actually fairly low.
34 #include "uapi/drm/vc4_drm.h"
36 #include "vc4_packet.h"
38 struct vc4_rcl_setup
{
39 struct drm_gem_cma_object
*color_read
;
40 struct drm_gem_cma_object
*color_write
;
41 struct drm_gem_cma_object
*zs_read
;
42 struct drm_gem_cma_object
*zs_write
;
43 struct drm_gem_cma_object
*msaa_color_write
;
44 struct drm_gem_cma_object
*msaa_zs_write
;
46 struct drm_gem_cma_object
*rcl
;
50 static inline void rcl_u8(struct vc4_rcl_setup
*setup
, u8 val
)
52 *(u8
*)(setup
->rcl
->vaddr
+ setup
->next_offset
) = val
;
53 setup
->next_offset
+= 1;
56 static inline void rcl_u16(struct vc4_rcl_setup
*setup
, u16 val
)
58 *(u16
*)(setup
->rcl
->vaddr
+ setup
->next_offset
) = val
;
59 setup
->next_offset
+= 2;
62 static inline void rcl_u32(struct vc4_rcl_setup
*setup
, u32 val
)
64 *(u32
*)(setup
->rcl
->vaddr
+ setup
->next_offset
) = val
;
65 setup
->next_offset
+= 4;
69 * Emits a no-op STORE_TILE_BUFFER_GENERAL.
71 * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
72 * some sort before another load is triggered.
74 static void vc4_store_before_load(struct vc4_rcl_setup
*setup
)
76 rcl_u8(setup
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
78 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE
,
79 VC4_LOADSTORE_TILE_BUFFER_BUFFER
) |
80 VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR
|
81 VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR
|
82 VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR
);
83 rcl_u32(setup
, 0); /* no address, since we're in None mode */
87 * Calculates the physical address of the start of a tile in a RCL surface.
89 * Unlike the other load/store packets,
90 * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile
91 * coordinates packet, and instead just store to the address given.
93 static uint32_t vc4_full_res_offset(struct vc4_exec_info
*exec
,
94 struct drm_gem_cma_object
*bo
,
95 struct drm_vc4_submit_rcl_surface
*surf
,
98 return bo
->paddr
+ surf
->offset
+ VC4_TILE_BUFFER_SIZE
*
99 (DIV_ROUND_UP(exec
->args
->width
, 32) * y
+ x
);
103 * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
105 * The tile coordinates packet triggers a pending load if there is one, are
106 * used for clipping during rendering, and determine where loads/stores happen
107 * relative to their base address.
109 static void vc4_tile_coordinates(struct vc4_rcl_setup
*setup
,
110 uint32_t x
, uint32_t y
)
112 rcl_u8(setup
, VC4_PACKET_TILE_COORDINATES
);
117 static void emit_tile(struct vc4_exec_info
*exec
,
118 struct vc4_rcl_setup
*setup
,
119 uint8_t x
, uint8_t y
, bool first
, bool last
)
121 struct drm_vc4_submit_cl
*args
= exec
->args
;
122 bool has_bin
= args
->bin_cl_size
!= 0;
124 /* Note that the load doesn't actually occur until the
125 * tile coords packet is processed, and only one load
126 * may be outstanding at a time.
128 if (setup
->color_read
) {
129 if (args
->color_read
.flags
&
130 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
131 rcl_u8(setup
, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER
);
133 vc4_full_res_offset(exec
, setup
->color_read
,
134 &args
->color_read
, x
, y
) |
135 VC4_LOADSTORE_FULL_RES_DISABLE_ZS
);
137 rcl_u8(setup
, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
);
138 rcl_u16(setup
, args
->color_read
.bits
);
139 rcl_u32(setup
, setup
->color_read
->paddr
+
140 args
->color_read
.offset
);
144 if (setup
->zs_read
) {
145 if (args
->zs_read
.flags
&
146 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
147 rcl_u8(setup
, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER
);
149 vc4_full_res_offset(exec
, setup
->zs_read
,
150 &args
->zs_read
, x
, y
) |
151 VC4_LOADSTORE_FULL_RES_DISABLE_COLOR
);
153 if (setup
->color_read
) {
154 /* Exec previous load. */
155 vc4_tile_coordinates(setup
, x
, y
);
156 vc4_store_before_load(setup
);
159 rcl_u8(setup
, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
);
160 rcl_u16(setup
, args
->zs_read
.bits
);
161 rcl_u32(setup
, setup
->zs_read
->paddr
+
162 args
->zs_read
.offset
);
166 /* Clipping depends on tile coordinates having been
167 * emitted, so we always need one here.
169 vc4_tile_coordinates(setup
, x
, y
);
171 /* Wait for the binner before jumping to the first
174 if (first
&& has_bin
)
175 rcl_u8(setup
, VC4_PACKET_WAIT_ON_SEMAPHORE
);
178 rcl_u8(setup
, VC4_PACKET_BRANCH_TO_SUB_LIST
);
179 rcl_u32(setup
, (exec
->tile_bo
->paddr
+
180 exec
->tile_alloc_offset
+
181 (y
* exec
->bin_tiles_x
+ x
) * 32));
184 if (setup
->msaa_color_write
) {
185 bool last_tile_write
= (!setup
->msaa_zs_write
&&
187 !setup
->color_write
);
188 uint32_t bits
= VC4_LOADSTORE_FULL_RES_DISABLE_ZS
;
190 if (!last_tile_write
)
191 bits
|= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL
;
193 bits
|= VC4_LOADSTORE_FULL_RES_EOF
;
194 rcl_u8(setup
, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER
);
196 vc4_full_res_offset(exec
, setup
->msaa_color_write
,
197 &args
->msaa_color_write
, x
, y
) |
201 if (setup
->msaa_zs_write
) {
202 bool last_tile_write
= (!setup
->zs_write
&&
203 !setup
->color_write
);
204 uint32_t bits
= VC4_LOADSTORE_FULL_RES_DISABLE_COLOR
;
206 if (setup
->msaa_color_write
)
207 vc4_tile_coordinates(setup
, x
, y
);
208 if (!last_tile_write
)
209 bits
|= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL
;
211 bits
|= VC4_LOADSTORE_FULL_RES_EOF
;
212 rcl_u8(setup
, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER
);
214 vc4_full_res_offset(exec
, setup
->msaa_zs_write
,
215 &args
->msaa_zs_write
, x
, y
) |
219 if (setup
->zs_write
) {
220 bool last_tile_write
= !setup
->color_write
;
222 if (setup
->msaa_color_write
|| setup
->msaa_zs_write
)
223 vc4_tile_coordinates(setup
, x
, y
);
225 rcl_u8(setup
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
226 rcl_u16(setup
, args
->zs_write
.bits
|
228 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR
));
230 (setup
->zs_write
->paddr
+ args
->zs_write
.offset
) |
231 ((last
&& last_tile_write
) ?
232 VC4_LOADSTORE_TILE_BUFFER_EOF
: 0));
235 if (setup
->color_write
) {
236 if (setup
->msaa_color_write
|| setup
->msaa_zs_write
||
238 vc4_tile_coordinates(setup
, x
, y
);
242 rcl_u8(setup
, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF
);
244 rcl_u8(setup
, VC4_PACKET_STORE_MS_TILE_BUFFER
);
248 static int vc4_create_rcl_bo(struct drm_device
*dev
, struct vc4_exec_info
*exec
,
249 struct vc4_rcl_setup
*setup
)
251 struct drm_vc4_submit_cl
*args
= exec
->args
;
252 bool has_bin
= args
->bin_cl_size
!= 0;
253 uint8_t min_x_tile
= args
->min_x_tile
;
254 uint8_t min_y_tile
= args
->min_y_tile
;
255 uint8_t max_x_tile
= args
->max_x_tile
;
256 uint8_t max_y_tile
= args
->max_y_tile
;
257 uint8_t xtiles
= max_x_tile
- min_x_tile
+ 1;
258 uint8_t ytiles
= max_y_tile
- min_y_tile
+ 1;
260 uint32_t size
, loop_body_size
;
262 size
= VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE
;
263 loop_body_size
= VC4_PACKET_TILE_COORDINATES_SIZE
;
265 if (args
->flags
& VC4_SUBMIT_CL_USE_CLEAR_COLOR
) {
266 size
+= VC4_PACKET_CLEAR_COLORS_SIZE
+
267 VC4_PACKET_TILE_COORDINATES_SIZE
+
268 VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE
;
271 if (setup
->color_read
) {
272 if (args
->color_read
.flags
&
273 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
274 loop_body_size
+= VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE
;
276 loop_body_size
+= VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE
;
279 if (setup
->zs_read
) {
280 if (args
->zs_read
.flags
&
281 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
282 loop_body_size
+= VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE
;
284 if (setup
->color_read
&&
285 !(args
->color_read
.flags
&
286 VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
)) {
287 loop_body_size
+= VC4_PACKET_TILE_COORDINATES_SIZE
;
288 loop_body_size
+= VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE
;
290 loop_body_size
+= VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE
;
295 size
+= VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE
;
296 loop_body_size
+= VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE
;
299 if (setup
->msaa_color_write
)
300 loop_body_size
+= VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE
;
301 if (setup
->msaa_zs_write
)
302 loop_body_size
+= VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE
;
305 loop_body_size
+= VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE
;
306 if (setup
->color_write
)
307 loop_body_size
+= VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE
;
309 /* We need a VC4_PACKET_TILE_COORDINATES in between each store. */
310 loop_body_size
+= VC4_PACKET_TILE_COORDINATES_SIZE
*
311 ((setup
->msaa_color_write
!= NULL
) +
312 (setup
->msaa_zs_write
!= NULL
) +
313 (setup
->color_write
!= NULL
) +
314 (setup
->zs_write
!= NULL
) - 1);
316 size
+= xtiles
* ytiles
* loop_body_size
;
318 setup
->rcl
= &vc4_bo_create(dev
, size
, true)->base
;
319 if (IS_ERR(setup
->rcl
))
320 return PTR_ERR(setup
->rcl
);
321 list_add_tail(&to_vc4_bo(&setup
->rcl
->base
)->unref_head
,
324 /* The tile buffer gets cleared when the previous tile is stored. If
325 * the clear values changed between frames, then the tile buffer has
326 * stale clear values in it, so we have to do a store in None mode (no
327 * writes) so that we trigger the tile buffer clear.
329 if (args
->flags
& VC4_SUBMIT_CL_USE_CLEAR_COLOR
) {
330 rcl_u8(setup
, VC4_PACKET_CLEAR_COLORS
);
331 rcl_u32(setup
, args
->clear_color
[0]);
332 rcl_u32(setup
, args
->clear_color
[1]);
333 rcl_u32(setup
, args
->clear_z
);
334 rcl_u8(setup
, args
->clear_s
);
336 vc4_tile_coordinates(setup
, 0, 0);
338 rcl_u8(setup
, VC4_PACKET_STORE_TILE_BUFFER_GENERAL
);
339 rcl_u16(setup
, VC4_LOADSTORE_TILE_BUFFER_NONE
);
340 rcl_u32(setup
, 0); /* no address, since we're in None mode */
343 rcl_u8(setup
, VC4_PACKET_TILE_RENDERING_MODE_CONFIG
);
345 (setup
->color_write
? (setup
->color_write
->paddr
+
346 args
->color_write
.offset
) :
348 rcl_u16(setup
, args
->width
);
349 rcl_u16(setup
, args
->height
);
350 rcl_u16(setup
, args
->color_write
.bits
);
352 for (y
= min_y_tile
; y
<= max_y_tile
; y
++) {
353 for (x
= min_x_tile
; x
<= max_x_tile
; x
++) {
354 bool first
= (x
== min_x_tile
&& y
== min_y_tile
);
355 bool last
= (x
== max_x_tile
&& y
== max_y_tile
);
357 emit_tile(exec
, setup
, x
, y
, first
, last
);
361 BUG_ON(setup
->next_offset
!= size
);
362 exec
->ct1ca
= setup
->rcl
->paddr
;
363 exec
->ct1ea
= setup
->rcl
->paddr
+ setup
->next_offset
;
368 static int vc4_full_res_bounds_check(struct vc4_exec_info
*exec
,
369 struct drm_gem_cma_object
*obj
,
370 struct drm_vc4_submit_rcl_surface
*surf
)
372 struct drm_vc4_submit_cl
*args
= exec
->args
;
373 u32 render_tiles_stride
= DIV_ROUND_UP(exec
->args
->width
, 32);
375 if (surf
->offset
> obj
->base
.size
) {
376 DRM_ERROR("surface offset %d > BO size %zd\n",
377 surf
->offset
, obj
->base
.size
);
381 if ((obj
->base
.size
- surf
->offset
) / VC4_TILE_BUFFER_SIZE
<
382 render_tiles_stride
* args
->max_y_tile
+ args
->max_x_tile
) {
383 DRM_ERROR("MSAA tile %d, %d out of bounds "
384 "(bo size %zd, offset %d).\n",
385 args
->max_x_tile
, args
->max_y_tile
,
394 static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info
*exec
,
395 struct drm_gem_cma_object
**obj
,
396 struct drm_vc4_submit_rcl_surface
*surf
)
398 if (surf
->flags
!= 0 || surf
->bits
!= 0) {
399 DRM_ERROR("MSAA surface had nonzero flags/bits\n");
403 if (surf
->hindex
== ~0)
406 *obj
= vc4_use_bo(exec
, surf
->hindex
);
410 if (surf
->offset
& 0xf) {
411 DRM_ERROR("MSAA write must be 16b aligned.\n");
415 return vc4_full_res_bounds_check(exec
, *obj
, surf
);
418 static int vc4_rcl_surface_setup(struct vc4_exec_info
*exec
,
419 struct drm_gem_cma_object
**obj
,
420 struct drm_vc4_submit_rcl_surface
*surf
)
422 uint8_t tiling
= VC4_GET_FIELD(surf
->bits
,
423 VC4_LOADSTORE_TILE_BUFFER_TILING
);
424 uint8_t buffer
= VC4_GET_FIELD(surf
->bits
,
425 VC4_LOADSTORE_TILE_BUFFER_BUFFER
);
426 uint8_t format
= VC4_GET_FIELD(surf
->bits
,
427 VC4_LOADSTORE_TILE_BUFFER_FORMAT
);
431 if (surf
->flags
& ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
432 DRM_ERROR("Extra flags set\n");
436 if (surf
->hindex
== ~0)
439 *obj
= vc4_use_bo(exec
, surf
->hindex
);
443 if (surf
->flags
& VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
) {
444 if (surf
== &exec
->args
->zs_write
) {
445 DRM_ERROR("general zs write may not be a full-res.\n");
449 if (surf
->bits
!= 0) {
450 DRM_ERROR("load/store general bits set with "
451 "full res load/store.\n");
455 ret
= vc4_full_res_bounds_check(exec
, *obj
, surf
);
462 if (surf
->bits
& ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK
|
463 VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK
|
464 VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK
)) {
465 DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
470 if (tiling
> VC4_TILING_FORMAT_LT
) {
471 DRM_ERROR("Bad tiling format\n");
475 if (buffer
== VC4_LOADSTORE_TILE_BUFFER_ZS
) {
477 DRM_ERROR("No color format should be set for ZS\n");
481 } else if (buffer
== VC4_LOADSTORE_TILE_BUFFER_COLOR
) {
483 case VC4_LOADSTORE_TILE_BUFFER_BGR565
:
484 case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER
:
487 case VC4_LOADSTORE_TILE_BUFFER_RGBA8888
:
491 DRM_ERROR("Bad tile buffer format\n");
495 DRM_ERROR("Bad load/store buffer %d.\n", buffer
);
499 if (surf
->offset
& 0xf) {
500 DRM_ERROR("load/store buffer must be 16b aligned.\n");
504 if (!vc4_check_tex_size(exec
, *obj
, surf
->offset
, tiling
,
505 exec
->args
->width
, exec
->args
->height
, cpp
)) {
513 vc4_rcl_render_config_surface_setup(struct vc4_exec_info
*exec
,
514 struct vc4_rcl_setup
*setup
,
515 struct drm_gem_cma_object
**obj
,
516 struct drm_vc4_submit_rcl_surface
*surf
)
518 uint8_t tiling
= VC4_GET_FIELD(surf
->bits
,
519 VC4_RENDER_CONFIG_MEMORY_FORMAT
);
520 uint8_t format
= VC4_GET_FIELD(surf
->bits
,
521 VC4_RENDER_CONFIG_FORMAT
);
524 if (surf
->flags
!= 0) {
525 DRM_ERROR("No flags supported on render config.\n");
529 if (surf
->bits
& ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK
|
530 VC4_RENDER_CONFIG_FORMAT_MASK
|
531 VC4_RENDER_CONFIG_MS_MODE_4X
|
532 VC4_RENDER_CONFIG_DECIMATE_MODE_4X
)) {
533 DRM_ERROR("Unknown bits in render config: 0x%04x\n",
538 if (surf
->hindex
== ~0)
541 *obj
= vc4_use_bo(exec
, surf
->hindex
);
545 if (tiling
> VC4_TILING_FORMAT_LT
) {
546 DRM_ERROR("Bad tiling format\n");
551 case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED
:
552 case VC4_RENDER_CONFIG_FORMAT_BGR565
:
555 case VC4_RENDER_CONFIG_FORMAT_RGBA8888
:
559 DRM_ERROR("Bad tile buffer format\n");
563 if (!vc4_check_tex_size(exec
, *obj
, surf
->offset
, tiling
,
564 exec
->args
->width
, exec
->args
->height
, cpp
)) {
571 int vc4_get_rcl(struct drm_device
*dev
, struct vc4_exec_info
*exec
)
573 struct vc4_rcl_setup setup
= {0};
574 struct drm_vc4_submit_cl
*args
= exec
->args
;
575 bool has_bin
= args
->bin_cl_size
!= 0;
578 if (args
->min_x_tile
> args
->max_x_tile
||
579 args
->min_y_tile
> args
->max_y_tile
) {
580 DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
581 args
->min_x_tile
, args
->min_y_tile
,
582 args
->max_x_tile
, args
->max_y_tile
);
587 (args
->max_x_tile
> exec
->bin_tiles_x
||
588 args
->max_y_tile
> exec
->bin_tiles_y
)) {
589 DRM_ERROR("Render tiles (%d,%d) outside of bin config "
591 args
->max_x_tile
, args
->max_y_tile
,
592 exec
->bin_tiles_x
, exec
->bin_tiles_y
);
596 ret
= vc4_rcl_render_config_surface_setup(exec
, &setup
,
602 ret
= vc4_rcl_surface_setup(exec
, &setup
.color_read
, &args
->color_read
);
606 ret
= vc4_rcl_surface_setup(exec
, &setup
.zs_read
, &args
->zs_read
);
610 ret
= vc4_rcl_surface_setup(exec
, &setup
.zs_write
, &args
->zs_write
);
614 ret
= vc4_rcl_msaa_surface_setup(exec
, &setup
.msaa_color_write
,
615 &args
->msaa_color_write
);
619 ret
= vc4_rcl_msaa_surface_setup(exec
, &setup
.msaa_zs_write
,
620 &args
->msaa_zs_write
);
624 /* We shouldn't even have the job submitted to us if there's no
625 * surface to write out.
627 if (!setup
.color_write
&& !setup
.zs_write
&&
628 !setup
.msaa_color_write
&& !setup
.msaa_zs_write
) {
629 DRM_ERROR("RCL requires color or Z/S write\n");
633 return vc4_create_rcl_bo(dev
, exec
, &setup
);