drivers/gpu/drm/vc4/vc4_validate.c

   1 /*
   2  * Copyright © 2014 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * DOC: Command list validator for VC4.
  26  *
  27  * Since the VC4 has no IOMMU between it and system memory, a user
  28  * with access to execute command lists could escalate privilege by
  29  * overwriting system memory (drawing to it as a framebuffer) or
  30  * reading system memory it shouldn't (reading it as a vertex buffer
  31  * or index buffer)
  32  *
  33  * We validate binner command lists to ensure that all accesses are
  34  * within the bounds of the GEM objects referenced by the submitted
  35  * job.  It explicitly whitelists packets, and looks at the offsets in
  36  * any address fields to make sure they're contained within the BOs
  37  * they reference.
  38  *
  39  * Note that because CL validation is already reading the
  40  * user-submitted CL and writing the validated copy out to the memory
  41  * that the GPU will actually read, this is also where GEM relocation
  42  * processing (turning BO references into actual addresses for the GPU
  43  * to use) happens.
  44  */
  45
  46 #include "uapi/drm/vc4_drm.h"
  47 #include "vc4_drv.h"
  48 #include "vc4_packet.h"
  49
  50 #define VALIDATE_ARGS \
  51         struct vc4_exec_info *exec,                     \
  52         void *validated,                                \
  53         void *untrusted
  54
  55 /** Return the width in pixels of a 64-byte microtile. */
  56 static uint32_t
  57 utile_width(int cpp)
  58 {
  59         switch (cpp) {
  60         case 1:
  61         case 2:
  62                 return 8;
  63         case 4:
  64                 return 4;
  65         case 8:
  66                 return 2;
  67         default:
  68                 DRM_ERROR("unknown cpp: %d\n", cpp);
  69                 return 1;
  70         }
  71 }
  72
  73 /** Return the height in pixels of a 64-byte microtile. */
  74 static uint32_t
  75 utile_height(int cpp)
  76 {
  77         switch (cpp) {
  78         case 1:
  79                 return 8;
  80         case 2:
  81         case 4:
  82         case 8:
  83                 return 4;
  84         default:
  85                 DRM_ERROR("unknown cpp: %d\n", cpp);
  86                 return 1;
  87         }
  88 }
  89
  90 /**
  91  * size_is_lt() - Returns whether a miplevel of the given size will
  92  * use the lineartile (LT) tiling layout rather than the normal T
  93  * tiling layout.
  94  * @width: Width in pixels of the miplevel
  95  * @height: Height in pixels of the miplevel
  96  * @cpp: Bytes per pixel of the pixel format
  97  */
  98 static bool
  99 size_is_lt(uint32_t width, uint32_t height, int cpp)
 100 {
 101         return (width <= 4 * utile_width(cpp) ||
 102                 height <= 4 * utile_height(cpp));
 103 }
 104
 105 struct drm_gem_cma_object *
 106 vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
 107 {
 108         struct drm_gem_cma_object *obj;
 109         struct vc4_bo *bo;
 110
 111         if (hindex >= exec->bo_count) {
 112                 DRM_DEBUG("BO index %d greater than BO count %d\n",
 113                           hindex, exec->bo_count);
 114                 return NULL;
 115         }
 116         obj = exec->bo[hindex];
 117         bo = to_vc4_bo(&obj->base);
 118
 119         if (bo->validated_shader) {
 120                 DRM_DEBUG("Trying to use shader BO as something other than "
 121                           "a shader\n");
 122                 return NULL;
 123         }
 124
 125         return obj;
 126 }
 127
 128 static struct drm_gem_cma_object *
 129 vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
 130 {
 131         return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
 132 }
 133
 134 static bool
 135 validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
 136 {
 137         /* Note that the untrusted pointer passed to these functions is
 138          * incremented past the packet byte.
 139          */
 140         return (untrusted - 1 == exec->bin_u + pos);
 141 }
 142
 143 static uint32_t
 144 gl_shader_rec_size(uint32_t pointer_bits)
 145 {
 146         uint32_t attribute_count = pointer_bits & 7;
 147         bool extended = pointer_bits & 8;
 148
 149         if (attribute_count == 0)
 150                 attribute_count = 8;
 151
 152         if (extended)
 153                 return 100 + attribute_count * 4;
 154         else
 155                 return 36 + attribute_count * 8;
 156 }
 157
 158 bool
 159 vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
 160                    uint32_t offset, uint8_t tiling_format,
 161                    uint32_t width, uint32_t height, uint8_t cpp)
 162 {
 163         uint32_t aligned_width, aligned_height, stride, size;
 164         uint32_t utile_w = utile_width(cpp);
 165         uint32_t utile_h = utile_height(cpp);
 166
 167         /* The shaded vertex format stores signed 12.4 fixed point
 168          * (-2048,2047) offsets from the viewport center, so we should
 169          * never have a render target larger than 4096.  The texture
 170          * unit can only sample from 2048x2048, so it's even more
 171          * restricted.  This lets us avoid worrying about overflow in
 172          * our math.
 173          */
 174         if (width > 4096 || height > 4096) {
 175                 DRM_DEBUG("Surface dimensions (%d,%d) too large",
 176                           width, height);
 177                 return false;
 178         }
 179
 180         switch (tiling_format) {
 181         case VC4_TILING_FORMAT_LINEAR:
 182                 aligned_width = round_up(width, utile_w);
 183                 aligned_height = height;
 184                 break;
 185         case VC4_TILING_FORMAT_T:
 186                 aligned_width = round_up(width, utile_w * 8);
 187                 aligned_height = round_up(height, utile_h * 8);
 188                 break;
 189         case VC4_TILING_FORMAT_LT:
 190                 aligned_width = round_up(width, utile_w);
 191                 aligned_height = round_up(height, utile_h);
 192                 break;
 193         default:
 194                 DRM_DEBUG("buffer tiling %d unsupported\n", tiling_format);
 195                 return false;
 196         }
 197
 198         stride = aligned_width * cpp;
 199         size = stride * aligned_height;
 200
 201         if (size + offset < size ||
 202             size + offset > fbo->base.size) {
 203                 DRM_DEBUG("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
 204                           width, height,
 205                           aligned_width, aligned_height,
 206                           size, offset, fbo->base.size);
 207                 return false;
 208         }
 209
 210         return true;
 211 }
 212
 213 static int
 214 validate_flush(VALIDATE_ARGS)
 215 {
 216         if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
 217                 DRM_DEBUG("Bin CL must end with VC4_PACKET_FLUSH\n");
 218                 return -EINVAL;
 219         }
 220         exec->found_flush = true;
 221
 222         return 0;
 223 }
 224
 225 static int
 226 validate_start_tile_binning(VALIDATE_ARGS)
 227 {
 228         if (exec->found_start_tile_binning_packet) {
 229                 DRM_DEBUG("Duplicate VC4_PACKET_START_TILE_BINNING\n");
 230                 return -EINVAL;
 231         }
 232         exec->found_start_tile_binning_packet = true;
 233
 234         if (!exec->found_tile_binning_mode_config_packet) {
 235                 DRM_DEBUG("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
 236                 return -EINVAL;
 237         }
 238
 239         return 0;
 240 }
 241
 242 static int
 243 validate_increment_semaphore(VALIDATE_ARGS)
 244 {
 245         if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
 246                 DRM_DEBUG("Bin CL must end with "
 247                           "VC4_PACKET_INCREMENT_SEMAPHORE\n");
 248                 return -EINVAL;
 249         }
 250         exec->found_increment_semaphore_packet = true;
 251
 252         return 0;
 253 }
 254
 255 static int
 256 validate_indexed_prim_list(VALIDATE_ARGS)
 257 {
 258         struct drm_gem_cma_object *ib;
 259         uint32_t length = *(uint32_t *)(untrusted + 1);
 260         uint32_t offset = *(uint32_t *)(untrusted + 5);
 261         uint32_t max_index = *(uint32_t *)(untrusted + 9);
 262         uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
 263         struct vc4_shader_state *shader_state;
 264
 265         /* Check overflow condition */
 266         if (exec->shader_state_count == 0) {
 267                 DRM_DEBUG("shader state must precede primitives\n");
 268                 return -EINVAL;
 269         }
 270         shader_state = &exec->shader_state[exec->shader_state_count - 1];
 271
 272         if (max_index > shader_state->max_index)
 273                 shader_state->max_index = max_index;
 274
 275         ib = vc4_use_handle(exec, 0);
 276         if (!ib)
 277                 return -EINVAL;
 278
 279         exec->bin_dep_seqno = max(exec->bin_dep_seqno,
 280                                   to_vc4_bo(&ib->base)->write_seqno);
 281
 282         if (offset > ib->base.size ||
 283             (ib->base.size - offset) / index_size < length) {
 284                 DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n",
 285                           offset, length, index_size, ib->base.size);
 286                 return -EINVAL;
 287         }
 288
 289         *(uint32_t *)(validated + 5) = ib->paddr + offset;
 290
 291         return 0;
 292 }
 293
 294 static int
 295 validate_gl_array_primitive(VALIDATE_ARGS)
 296 {
 297         uint32_t length = *(uint32_t *)(untrusted + 1);
 298         uint32_t base_index = *(uint32_t *)(untrusted + 5);
 299         uint32_t max_index;
 300         struct vc4_shader_state *shader_state;
 301
 302         /* Check overflow condition */
 303         if (exec->shader_state_count == 0) {
 304                 DRM_DEBUG("shader state must precede primitives\n");
 305                 return -EINVAL;
 306         }
 307         shader_state = &exec->shader_state[exec->shader_state_count - 1];
 308
 309         if (length + base_index < length) {
 310                 DRM_DEBUG("primitive vertex count overflow\n");
 311                 return -EINVAL;
 312         }
 313         max_index = length + base_index - 1;
 314
 315         if (max_index > shader_state->max_index)
 316                 shader_state->max_index = max_index;
 317
 318         return 0;
 319 }
 320
 321 static int
 322 validate_gl_shader_state(VALIDATE_ARGS)
 323 {
 324         uint32_t i = exec->shader_state_count++;
 325
 326         if (i >= exec->shader_state_size) {
 327                 DRM_DEBUG("More requests for shader states than declared\n");
 328                 return -EINVAL;
 329         }
 330
 331         exec->shader_state[i].addr = *(uint32_t *)untrusted;
 332         exec->shader_state[i].max_index = 0;
 333
 334         if (exec->shader_state[i].addr & ~0xf) {
 335                 DRM_DEBUG("high bits set in GL shader rec reference\n");
 336                 return -EINVAL;
 337         }
 338
 339         *(uint32_t *)validated = (exec->shader_rec_p +
 340                                   exec->shader_state[i].addr);
 341
 342         exec->shader_rec_p +=
 343                 roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
 344
 345         return 0;
 346 }
 347
 348 static int
 349 validate_tile_binning_config(VALIDATE_ARGS)
 350 {
 351         struct drm_device *dev = exec->exec_bo->base.dev;
 352         struct vc4_dev *vc4 = to_vc4_dev(dev);
 353         uint8_t flags;
 354         uint32_t tile_state_size;
 355         uint32_t tile_count, bin_addr;
 356         int bin_slot;
 357
 358         if (exec->found_tile_binning_mode_config_packet) {
 359                 DRM_DEBUG("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
 360                 return -EINVAL;
 361         }
 362         exec->found_tile_binning_mode_config_packet = true;
 363
 364         exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
 365         exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
 366         tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
 367         flags = *(uint8_t *)(untrusted + 14);
 368
 369         if (exec->bin_tiles_x == 0 ||
 370             exec->bin_tiles_y == 0) {
 371                 DRM_DEBUG("Tile binning config of %dx%d too small\n",
 372                           exec->bin_tiles_x, exec->bin_tiles_y);
 373                 return -EINVAL;
 374         }
 375
 376         if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
 377                      VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
 378                 DRM_DEBUG("unsupported binning config flags 0x%02x\n", flags);
 379                 return -EINVAL;
 380         }
 381
 382         bin_slot = vc4_v3d_get_bin_slot(vc4);
 383         if (bin_slot < 0) {
 384                 if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) {
 385                         DRM_ERROR("Failed to allocate binner memory: %d\n",
 386                                   bin_slot);
 387                 }
 388                 return bin_slot;
 389         }
 390
 391         /* The slot we allocated will only be used by this job, and is
 392          * free when the job completes rendering.
 393          */
 394         exec->bin_slots |= BIT(bin_slot);
 395         bin_addr = vc4->bin_bo->base.paddr + bin_slot * vc4->bin_alloc_size;
 396
 397         /* The tile state data array is 48 bytes per tile, and we put it at
 398          * the start of a BO containing both it and the tile alloc.
 399          */
 400         tile_state_size = 48 * tile_count;
 401
 402         /* Since the tile alloc array will follow us, align. */
 403         exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096);
 404
 405         *(uint8_t *)(validated + 14) =
 406                 ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
 407                             VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
 408                  VC4_BIN_CONFIG_AUTO_INIT_TSDA |
 409                  VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
 410                                VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
 411                  VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
 412                                VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
 413
 414         /* tile alloc address. */
 415         *(uint32_t *)(validated + 0) = exec->tile_alloc_offset;
 416         /* tile alloc size. */
 417         *(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size -
 418                                         exec->tile_alloc_offset);
 419         /* tile state address. */
 420         *(uint32_t *)(validated + 8) = bin_addr;
 421
 422         return 0;
 423 }
 424
 425 static int
 426 validate_gem_handles(VALIDATE_ARGS)
 427 {
 428         memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
 429         return 0;
 430 }
 431
 432 #define VC4_DEFINE_PACKET(packet, func) \
 433         [packet] = { packet ## _SIZE, #packet, func }
 434
 435 static const struct cmd_info {
 436         uint16_t len;
 437         const char *name;
 438         int (*func)(struct vc4_exec_info *exec, void *validated,
 439                     void *untrusted);
 440 } cmd_info[] = {
 441         VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
 442         VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
 443         VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
 444         VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
 445         VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
 446                           validate_start_tile_binning),
 447         VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
 448                           validate_increment_semaphore),
 449
 450         VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
 451                           validate_indexed_prim_list),
 452         VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
 453                           validate_gl_array_primitive),
 454
 455         VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
 456
 457         VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
 458
 459         VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
 460         VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
 461         VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
 462         VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
 463         VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
 464         VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
 465         VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
 466         VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
 467         VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
 468         /* Note: The docs say this was also 105, but it was 106 in the
 469          * initial userland code drop.
 470          */
 471         VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
 472
 473         VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
 474                           validate_tile_binning_config),
 475
 476         VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
 477 };
 478
 479 int
 480 vc4_validate_bin_cl(struct drm_device *dev,
 481                     void *validated,
 482                     void *unvalidated,
 483                     struct vc4_exec_info *exec)
 484 {
 485         uint32_t len = exec->args->bin_cl_size;
 486         uint32_t dst_offset = 0;
 487         uint32_t src_offset = 0;
 488
 489         while (src_offset < len) {
 490                 void *dst_pkt = validated + dst_offset;
 491                 void *src_pkt = unvalidated + src_offset;
 492                 u8 cmd = *(uint8_t *)src_pkt;
 493                 const struct cmd_info *info;
 494
 495                 if (cmd >= ARRAY_SIZE(cmd_info)) {
 496                         DRM_DEBUG("0x%08x: packet %d out of bounds\n",
 497                                   src_offset, cmd);
 498                         return -EINVAL;
 499                 }
 500
 501                 info = &cmd_info[cmd];
 502                 if (!info->name) {
 503                         DRM_DEBUG("0x%08x: packet %d invalid\n",
 504                                   src_offset, cmd);
 505                         return -EINVAL;
 506                 }
 507
 508                 if (src_offset + info->len > len) {
 509                         DRM_DEBUG("0x%08x: packet %d (%s) length 0x%08x "
 510                                   "exceeds bounds (0x%08x)\n",
 511                                   src_offset, cmd, info->name, info->len,
 512                                   src_offset + len);
 513                         return -EINVAL;
 514                 }
 515
 516                 if (cmd != VC4_PACKET_GEM_HANDLES)
 517                         memcpy(dst_pkt, src_pkt, info->len);
 518
 519                 if (info->func && info->func(exec,
 520                                              dst_pkt + 1,
 521                                              src_pkt + 1)) {
 522                         DRM_DEBUG("0x%08x: packet %d (%s) failed to validate\n",
 523                                   src_offset, cmd, info->name);
 524                         return -EINVAL;
 525                 }
 526
 527                 src_offset += info->len;
 528                 /* GEM handle loading doesn't produce HW packets. */
 529                 if (cmd != VC4_PACKET_GEM_HANDLES)
 530                         dst_offset += info->len;
 531
 532                 /* When the CL hits halt, it'll stop reading anything else. */
 533                 if (cmd == VC4_PACKET_HALT)
 534                         break;
 535         }
 536
 537         exec->ct0ea = exec->ct0ca + dst_offset;
 538
 539         if (!exec->found_start_tile_binning_packet) {
 540                 DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
 541                 return -EINVAL;
 542         }
 543
 544         /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
 545          * semaphore is used to trigger the render CL to start up, and the
 546          * FLUSH is what caps the bin lists with
 547          * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
 548          * render CL when they get called to) and actually triggers the queued
 549          * semaphore increment.
 550          */
 551         if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
 552                 DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
 553                           "VC4_PACKET_FLUSH\n");
 554                 return -EINVAL;
 555         }
 556
 557         return 0;
 558 }
 559
 560 static bool
 561 reloc_tex(struct vc4_exec_info *exec,
 562           void *uniform_data_u,
 563           struct vc4_texture_sample_info *sample,
 564           uint32_t texture_handle_index, bool is_cs)
 565 {
 566         struct drm_gem_cma_object *tex;
 567         uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
 568         uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
 569         uint32_t p2 = (sample->p_offset[2] != ~0 ?
 570                        *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
 571         uint32_t p3 = (sample->p_offset[3] != ~0 ?
 572                        *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
 573         uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
 574         uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
 575         uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
 576         uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
 577         uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
 578         uint32_t cpp, tiling_format, utile_w, utile_h;
 579         uint32_t i;
 580         uint32_t cube_map_stride = 0;
 581         enum vc4_texture_data_type type;
 582
 583         tex = vc4_use_bo(exec, texture_handle_index);
 584         if (!tex)
 585                 return false;
 586
 587         if (sample->is_direct) {
 588                 uint32_t remaining_size = tex->base.size - p0;
 589
 590                 if (p0 > tex->base.size - 4) {
 591                         DRM_DEBUG("UBO offset greater than UBO size\n");
 592                         goto fail;
 593                 }
 594                 if (p1 > remaining_size - 4) {
 595                         DRM_DEBUG("UBO clamp would allow reads "
 596                                   "outside of UBO\n");
 597                         goto fail;
 598                 }
 599                 *validated_p0 = tex->paddr + p0;
 600                 return true;
 601         }
 602
 603         if (width == 0)
 604                 width = 2048;
 605         if (height == 0)
 606                 height = 2048;
 607
 608         if (p0 & VC4_TEX_P0_CMMODE_MASK) {
 609                 if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
 610                     VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
 611                         cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
 612                 if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
 613                     VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
 614                         if (cube_map_stride) {
 615                                 DRM_DEBUG("Cube map stride set twice\n");
 616                                 goto fail;
 617                         }
 618
 619                         cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
 620                 }
 621                 if (!cube_map_stride) {
 622                         DRM_DEBUG("Cube map stride not set\n");
 623                         goto fail;
 624                 }
 625         }
 626
 627         type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
 628                 (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
 629
 630         switch (type) {
 631         case VC4_TEXTURE_TYPE_RGBA8888:
 632         case VC4_TEXTURE_TYPE_RGBX8888:
 633         case VC4_TEXTURE_TYPE_RGBA32R:
 634                 cpp = 4;
 635                 break;
 636         case VC4_TEXTURE_TYPE_RGBA4444:
 637         case VC4_TEXTURE_TYPE_RGBA5551:
 638         case VC4_TEXTURE_TYPE_RGB565:
 639         case VC4_TEXTURE_TYPE_LUMALPHA:
 640         case VC4_TEXTURE_TYPE_S16F:
 641         case VC4_TEXTURE_TYPE_S16:
 642                 cpp = 2;
 643                 break;
 644         case VC4_TEXTURE_TYPE_LUMINANCE:
 645         case VC4_TEXTURE_TYPE_ALPHA:
 646         case VC4_TEXTURE_TYPE_S8:
 647                 cpp = 1;
 648                 break;
 649         case VC4_TEXTURE_TYPE_ETC1:
 650                 /* ETC1 is arranged as 64-bit blocks, where each block is 4x4
 651                  * pixels.
 652                  */
 653                 cpp = 8;
 654                 width = (width + 3) >> 2;
 655                 height = (height + 3) >> 2;
 656                 break;
 657         case VC4_TEXTURE_TYPE_BW1:
 658         case VC4_TEXTURE_TYPE_A4:
 659         case VC4_TEXTURE_TYPE_A1:
 660         case VC4_TEXTURE_TYPE_RGBA64:
 661         case VC4_TEXTURE_TYPE_YUV422R:
 662         default:
 663                 DRM_DEBUG("Texture format %d unsupported\n", type);
 664                 goto fail;
 665         }
 666         utile_w = utile_width(cpp);
 667         utile_h = utile_height(cpp);
 668
 669         if (type == VC4_TEXTURE_TYPE_RGBA32R) {
 670                 tiling_format = VC4_TILING_FORMAT_LINEAR;
 671         } else {
 672                 if (size_is_lt(width, height, cpp))
 673                         tiling_format = VC4_TILING_FORMAT_LT;
 674                 else
 675                         tiling_format = VC4_TILING_FORMAT_T;
 676         }
 677
 678         if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
 679                                 tiling_format, width, height, cpp)) {
 680                 goto fail;
 681         }
 682
 683         /* The mipmap levels are stored before the base of the texture.  Make
 684          * sure there is actually space in the BO.
 685          */
 686         for (i = 1; i <= miplevels; i++) {
 687                 uint32_t level_width = max(width >> i, 1u);
 688                 uint32_t level_height = max(height >> i, 1u);
 689                 uint32_t aligned_width, aligned_height;
 690                 uint32_t level_size;
 691
 692                 /* Once the levels get small enough, they drop from T to LT. */
 693                 if (tiling_format == VC4_TILING_FORMAT_T &&
 694                     size_is_lt(level_width, level_height, cpp)) {
 695                         tiling_format = VC4_TILING_FORMAT_LT;
 696                 }
 697
 698                 switch (tiling_format) {
 699                 case VC4_TILING_FORMAT_T:
 700                         aligned_width = round_up(level_width, utile_w * 8);
 701                         aligned_height = round_up(level_height, utile_h * 8);
 702                         break;
 703                 case VC4_TILING_FORMAT_LT:
 704                         aligned_width = round_up(level_width, utile_w);
 705                         aligned_height = round_up(level_height, utile_h);
 706                         break;
 707                 default:
 708                         aligned_width = round_up(level_width, utile_w);
 709                         aligned_height = level_height;
 710                         break;
 711                 }
 712
 713                 level_size = aligned_width * cpp * aligned_height;
 714
 715                 if (offset < level_size) {
 716                         DRM_DEBUG("Level %d (%dx%d -> %dx%d) size %db "
 717                                   "overflowed buffer bounds (offset %d)\n",
 718                                   i, level_width, level_height,
 719                                   aligned_width, aligned_height,
 720                                   level_size, offset);
 721                         goto fail;
 722                 }
 723
 724                 offset -= level_size;
 725         }
 726
 727         *validated_p0 = tex->paddr + p0;
 728
 729         if (is_cs) {
 730                 exec->bin_dep_seqno = max(exec->bin_dep_seqno,
 731                                           to_vc4_bo(&tex->base)->write_seqno);
 732         }
 733
 734         return true;
 735  fail:
 736         DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
 737         DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
 738         DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
 739         DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
 740         return false;
 741 }
 742
 743 static int
 744 validate_gl_shader_rec(struct drm_device *dev,
 745                        struct vc4_exec_info *exec,
 746                        struct vc4_shader_state *state)
 747 {
 748         uint32_t *src_handles;
 749         void *pkt_u, *pkt_v;
 750         static const uint32_t shader_reloc_offsets[] = {
 751                 4, /* fs */
 752                 16, /* vs */
 753                 28, /* cs */
 754         };
 755         uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
 756         struct drm_gem_cma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8];
 757         uint32_t nr_attributes, nr_relocs, packet_size;
 758         int i;
 759
 760         nr_attributes = state->addr & 0x7;
 761         if (nr_attributes == 0)
 762                 nr_attributes = 8;
 763         packet_size = gl_shader_rec_size(state->addr);
 764
 765         nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
 766         if (nr_relocs * 4 > exec->shader_rec_size) {
 767                 DRM_DEBUG("overflowed shader recs reading %d handles "
 768                           "from %d bytes left\n",
 769                           nr_relocs, exec->shader_rec_size);
 770                 return -EINVAL;
 771         }
 772         src_handles = exec->shader_rec_u;
 773         exec->shader_rec_u += nr_relocs * 4;
 774         exec->shader_rec_size -= nr_relocs * 4;
 775
 776         if (packet_size > exec->shader_rec_size) {
 777                 DRM_DEBUG("overflowed shader recs copying %db packet "
 778                           "from %d bytes left\n",
 779                           packet_size, exec->shader_rec_size);
 780                 return -EINVAL;
 781         }
 782         pkt_u = exec->shader_rec_u;
 783         pkt_v = exec->shader_rec_v;
 784         memcpy(pkt_v, pkt_u, packet_size);
 785         exec->shader_rec_u += packet_size;
 786         /* Shader recs have to be aligned to 16 bytes (due to the attribute
 787          * flags being in the low bytes), so round the next validated shader
 788          * rec address up.  This should be safe, since we've got so many
 789          * relocations in a shader rec packet.
 790          */
 791         BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
 792         exec->shader_rec_v += roundup(packet_size, 16);
 793         exec->shader_rec_size -= packet_size;
 794
 795         for (i = 0; i < shader_reloc_count; i++) {
 796                 if (src_handles[i] > exec->bo_count) {
 797                         DRM_DEBUG("Shader handle %d too big\n", src_handles[i]);
 798                         return -EINVAL;
 799                 }
 800
 801                 bo[i] = exec->bo[src_handles[i]];
 802                 if (!bo[i])
 803                         return -EINVAL;
 804         }
 805         for (i = shader_reloc_count; i < nr_relocs; i++) {
 806                 bo[i] = vc4_use_bo(exec, src_handles[i]);
 807                 if (!bo[i])
 808                         return -EINVAL;
 809         }
 810
 811         if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
 812             to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
 813                 DRM_DEBUG("Thread mode of CL and FS do not match\n");
 814                 return -EINVAL;
 815         }
 816
 817         if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
 818             to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
 819                 DRM_DEBUG("cs and vs cannot be threaded\n");
 820                 return -EINVAL;
 821         }
 822
 823         for (i = 0; i < shader_reloc_count; i++) {
 824                 struct vc4_validated_shader_info *validated_shader;
 825                 uint32_t o = shader_reloc_offsets[i];
 826                 uint32_t src_offset = *(uint32_t *)(pkt_u + o);
 827                 uint32_t *texture_handles_u;
 828                 void *uniform_data_u;
 829                 uint32_t tex, uni;
 830
 831                 *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
 832
 833                 if (src_offset != 0) {
 834                         DRM_DEBUG("Shaders must be at offset 0 of "
 835                                   "the BO.\n");
 836                         return -EINVAL;
 837                 }
 838
 839                 validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
 840                 if (!validated_shader)
 841                         return -EINVAL;
 842
 843                 if (validated_shader->uniforms_src_size >
 844                     exec->uniforms_size) {
 845                         DRM_DEBUG("Uniforms src buffer overflow\n");
 846                         return -EINVAL;
 847                 }
 848
 849                 texture_handles_u = exec->uniforms_u;
 850                 uniform_data_u = (texture_handles_u +
 851                                   validated_shader->num_texture_samples);
 852
 853                 memcpy(exec->uniforms_v, uniform_data_u,
 854                        validated_shader->uniforms_size);
 855
 856                 for (tex = 0;
 857                      tex < validated_shader->num_texture_samples;
 858                      tex++) {
 859                         if (!reloc_tex(exec,
 860                                        uniform_data_u,
 861                                        &validated_shader->texture_samples[tex],
 862                                        texture_handles_u[tex],
 863                                        i == 2)) {
 864                                 return -EINVAL;
 865                         }
 866                 }
 867
 868                 /* Fill in the uniform slots that need this shader's
 869                  * start-of-uniforms address (used for resetting the uniform
 870                  * stream in the presence of control flow).
 871                  */
 872                 for (uni = 0;
 873                      uni < validated_shader->num_uniform_addr_offsets;
 874                      uni++) {
 875                         uint32_t o = validated_shader->uniform_addr_offsets[uni];
 876                         ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
 877                 }
 878
 879                 *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
 880
 881                 exec->uniforms_u += validated_shader->uniforms_src_size;
 882                 exec->uniforms_v += validated_shader->uniforms_size;
 883                 exec->uniforms_p += validated_shader->uniforms_size;
 884         }
 885
 886         for (i = 0; i < nr_attributes; i++) {
 887                 struct drm_gem_cma_object *vbo =
 888                         bo[ARRAY_SIZE(shader_reloc_offsets) + i];
 889                 uint32_t o = 36 + i * 8;
 890                 uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
 891                 uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
 892                 uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
 893                 uint32_t max_index;
 894
 895                 exec->bin_dep_seqno = max(exec->bin_dep_seqno,
 896                                           to_vc4_bo(&vbo->base)->write_seqno);
 897
 898                 if (state->addr & 0x8)
 899                         stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
 900
 901                 if (vbo->base.size < offset ||
 902                     vbo->base.size - offset < attr_size) {
 903                         DRM_DEBUG("BO offset overflow (%d + %d > %zu)\n",
 904                                   offset, attr_size, vbo->base.size);
 905                         return -EINVAL;
 906                 }
 907
 908                 if (stride != 0) {
 909                         max_index = ((vbo->base.size - offset - attr_size) /
 910                                      stride);
 911                         if (state->max_index > max_index) {
 912                                 DRM_DEBUG("primitives use index %d out of "
 913                                           "supplied %d\n",
 914                                           state->max_index, max_index);
 915                                 return -EINVAL;
 916                         }
 917                 }
 918
 919                 *(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
 920         }
 921
 922         return 0;
 923 }
 924
 925 int
 926 vc4_validate_shader_recs(struct drm_device *dev,
 927                          struct vc4_exec_info *exec)
 928 {
 929         uint32_t i;
 930         int ret = 0;
 931
 932         for (i = 0; i < exec->shader_state_count; i++) {
 933                 ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
 934                 if (ret)
 935                         return ret;
 936         }
 937
 938         return ret;
 939 }