drivers/gpu/drm/vc4/vc4_gem.c

   1 /*
   2  * Copyright © 2014 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <linux/module.h>
  25 #include <linux/platform_device.h>
  26 #include <linux/pm_runtime.h>
  27 #include <linux/device.h>
  28 #include <linux/io.h>
  29
  30 #include "uapi/drm/vc4_drm.h"
  31 #include "vc4_drv.h"
  32 #include "vc4_regs.h"
  33 #include "vc4_trace.h"
  34
  35 static void
  36 vc4_queue_hangcheck(struct drm_device *dev)
  37 {
  38         struct vc4_dev *vc4 = to_vc4_dev(dev);
  39
  40         mod_timer(&vc4->hangcheck.timer,
  41                   round_jiffies_up(jiffies + msecs_to_jiffies(100)));
  42 }
  43
  44 struct vc4_hang_state {
  45         struct drm_vc4_get_hang_state user_state;
  46
  47         u32 bo_count;
  48         struct drm_gem_object **bo;
  49 };
  50
  51 static void
  52 vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
  53 {
  54         unsigned int i;
  55
  56         for (i = 0; i < state->user_state.bo_count; i++)
  57                 drm_gem_object_unreference_unlocked(state->bo[i]);
  58
  59         kfree(state);
  60 }
  61
  62 int
  63 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
  64                          struct drm_file *file_priv)
  65 {
  66         struct drm_vc4_get_hang_state *get_state = data;
  67         struct drm_vc4_get_hang_state_bo *bo_state;
  68         struct vc4_hang_state *kernel_state;
  69         struct drm_vc4_get_hang_state *state;
  70         struct vc4_dev *vc4 = to_vc4_dev(dev);
  71         unsigned long irqflags;
  72         u32 i;
  73         int ret = 0;
  74
  75         spin_lock_irqsave(&vc4->job_lock, irqflags);
  76         kernel_state = vc4->hang_state;
  77         if (!kernel_state) {
  78                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  79                 return -ENOENT;
  80         }
  81         state = &kernel_state->user_state;
  82
  83         /* If the user's array isn't big enough, just return the
  84          * required array size.
  85          */
  86         if (get_state->bo_count < state->bo_count) {
  87                 get_state->bo_count = state->bo_count;
  88                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  89                 return 0;
  90         }
  91
  92         vc4->hang_state = NULL;
  93         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  94
  95         /* Save the user's BO pointer, so we don't stomp it with the memcpy. */
  96         state->bo = get_state->bo;
  97         memcpy(get_state, state, sizeof(*state));
  98
  99         bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
 100         if (!bo_state) {
 101                 ret = -ENOMEM;
 102                 goto err_free;
 103         }
 104
 105         for (i = 0; i < state->bo_count; i++) {
 106                 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
 107                 u32 handle;
 108
 109                 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
 110                                             &handle);
 111
 112                 if (ret) {
 113                         state->bo_count = i;
 114                         goto err_delete_handle;
 115                 }
 116                 bo_state[i].handle = handle;
 117                 bo_state[i].paddr = vc4_bo->base.paddr;
 118                 bo_state[i].size = vc4_bo->base.base.size;
 119         }
 120
 121         if (copy_to_user((void __user *)(uintptr_t)get_state->bo,
 122                          bo_state,
 123                          state->bo_count * sizeof(*bo_state)))
 124                 ret = -EFAULT;
 125
 126 err_delete_handle:
 127         if (ret) {
 128                 for (i = 0; i < state->bo_count; i++)
 129                         drm_gem_handle_delete(file_priv, bo_state[i].handle);
 130         }
 131
 132 err_free:
 133         vc4_free_hang_state(dev, kernel_state);
 134         kfree(bo_state);
 135
 136         return ret;
 137 }
 138
 139 static void
 140 vc4_save_hang_state(struct drm_device *dev)
 141 {
 142         struct vc4_dev *vc4 = to_vc4_dev(dev);
 143         struct drm_vc4_get_hang_state *state;
 144         struct vc4_hang_state *kernel_state;
 145         struct vc4_exec_info *exec[2];
 146         struct vc4_bo *bo;
 147         unsigned long irqflags;
 148         unsigned int i, j, unref_list_count, prev_idx;
 149
 150         kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
 151         if (!kernel_state)
 152                 return;
 153
 154         state = &kernel_state->user_state;
 155
 156         spin_lock_irqsave(&vc4->job_lock, irqflags);
 157         exec[0] = vc4_first_bin_job(vc4);
 158         exec[1] = vc4_first_render_job(vc4);
 159         if (!exec[0] && !exec[1]) {
 160                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 161                 return;
 162         }
 163
 164         /* Get the bos from both binner and renderer into hang state. */
 165         state->bo_count = 0;
 166         for (i = 0; i < 2; i++) {
 167                 if (!exec[i])
 168                         continue;
 169
 170                 unref_list_count = 0;
 171                 list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
 172                         unref_list_count++;
 173                 state->bo_count += exec[i]->bo_count + unref_list_count;
 174         }
 175
 176         kernel_state->bo = kcalloc(state->bo_count,
 177                                    sizeof(*kernel_state->bo), GFP_ATOMIC);
 178
 179         if (!kernel_state->bo) {
 180                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 181                 return;
 182         }
 183
 184         prev_idx = 0;
 185         for (i = 0; i < 2; i++) {
 186                 if (!exec[i])
 187                         continue;
 188
 189                 for (j = 0; j < exec[i]->bo_count; j++) {
 190                         drm_gem_object_reference(&exec[i]->bo[j]->base);
 191                         kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base;
 192                 }
 193
 194                 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
 195                         drm_gem_object_reference(&bo->base.base);
 196                         kernel_state->bo[j + prev_idx] = &bo->base.base;
 197                         j++;
 198                 }
 199                 prev_idx = j + 1;
 200         }
 201
 202         if (exec[0])
 203                 state->start_bin = exec[0]->ct0ca;
 204         if (exec[1])
 205                 state->start_render = exec[1]->ct1ca;
 206
 207         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 208
 209         state->ct0ca = V3D_READ(V3D_CTNCA(0));
 210         state->ct0ea = V3D_READ(V3D_CTNEA(0));
 211
 212         state->ct1ca = V3D_READ(V3D_CTNCA(1));
 213         state->ct1ea = V3D_READ(V3D_CTNEA(1));
 214
 215         state->ct0cs = V3D_READ(V3D_CTNCS(0));
 216         state->ct1cs = V3D_READ(V3D_CTNCS(1));
 217
 218         state->ct0ra0 = V3D_READ(V3D_CT00RA0);
 219         state->ct1ra0 = V3D_READ(V3D_CT01RA0);
 220
 221         state->bpca = V3D_READ(V3D_BPCA);
 222         state->bpcs = V3D_READ(V3D_BPCS);
 223         state->bpoa = V3D_READ(V3D_BPOA);
 224         state->bpos = V3D_READ(V3D_BPOS);
 225
 226         state->vpmbase = V3D_READ(V3D_VPMBASE);
 227
 228         state->dbge = V3D_READ(V3D_DBGE);
 229         state->fdbgo = V3D_READ(V3D_FDBGO);
 230         state->fdbgb = V3D_READ(V3D_FDBGB);
 231         state->fdbgr = V3D_READ(V3D_FDBGR);
 232         state->fdbgs = V3D_READ(V3D_FDBGS);
 233         state->errstat = V3D_READ(V3D_ERRSTAT);
 234
 235         spin_lock_irqsave(&vc4->job_lock, irqflags);
 236         if (vc4->hang_state) {
 237                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 238                 vc4_free_hang_state(dev, kernel_state);
 239         } else {
 240                 vc4->hang_state = kernel_state;
 241                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 242         }
 243 }
 244
 245 static void
 246 vc4_reset(struct drm_device *dev)
 247 {
 248         struct vc4_dev *vc4 = to_vc4_dev(dev);
 249
 250         DRM_INFO("Resetting GPU.\n");
 251
 252         mutex_lock(&vc4->power_lock);
 253         if (vc4->power_refcount) {
 254                 /* Power the device off and back on the by dropping the
 255                  * reference on runtime PM.
 256                  */
 257                 pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev);
 258                 pm_runtime_get_sync(&vc4->v3d->pdev->dev);
 259         }
 260         mutex_unlock(&vc4->power_lock);
 261
 262         vc4_irq_reset(dev);
 263
 264         /* Rearm the hangcheck -- another job might have been waiting
 265          * for our hung one to get kicked off, and vc4_irq_reset()
 266          * would have started it.
 267          */
 268         vc4_queue_hangcheck(dev);
 269 }
 270
 271 static void
 272 vc4_reset_work(struct work_struct *work)
 273 {
 274         struct vc4_dev *vc4 =
 275                 container_of(work, struct vc4_dev, hangcheck.reset_work);
 276
 277         vc4_save_hang_state(vc4->dev);
 278
 279         vc4_reset(vc4->dev);
 280 }
 281
 282 static void
 283 vc4_hangcheck_elapsed(unsigned long data)
 284 {
 285         struct drm_device *dev = (struct drm_device *)data;
 286         struct vc4_dev *vc4 = to_vc4_dev(dev);
 287         uint32_t ct0ca, ct1ca;
 288         unsigned long irqflags;
 289         struct vc4_exec_info *bin_exec, *render_exec;
 290
 291         spin_lock_irqsave(&vc4->job_lock, irqflags);
 292
 293         bin_exec = vc4_first_bin_job(vc4);
 294         render_exec = vc4_first_render_job(vc4);
 295
 296         /* If idle, we can stop watching for hangs. */
 297         if (!bin_exec && !render_exec) {
 298                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 299                 return;
 300         }
 301
 302         ct0ca = V3D_READ(V3D_CTNCA(0));
 303         ct1ca = V3D_READ(V3D_CTNCA(1));
 304
 305         /* If we've made any progress in execution, rearm the timer
 306          * and wait.
 307          */
 308         if ((bin_exec && ct0ca != bin_exec->last_ct0ca) ||
 309             (render_exec && ct1ca != render_exec->last_ct1ca)) {
 310                 if (bin_exec)
 311                         bin_exec->last_ct0ca = ct0ca;
 312                 if (render_exec)
 313                         render_exec->last_ct1ca = ct1ca;
 314                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 315                 vc4_queue_hangcheck(dev);
 316                 return;
 317         }
 318
 319         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 320
 321         /* We've gone too long with no progress, reset.  This has to
 322          * be done from a work struct, since resetting can sleep and
 323          * this timer hook isn't allowed to.
 324          */
 325         schedule_work(&vc4->hangcheck.reset_work);
 326 }
 327
 328 static void
 329 submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
 330 {
 331         struct vc4_dev *vc4 = to_vc4_dev(dev);
 332
 333         /* Set the current and end address of the control list.
 334          * Writing the end register is what starts the job.
 335          */
 336         V3D_WRITE(V3D_CTNCA(thread), start);
 337         V3D_WRITE(V3D_CTNEA(thread), end);
 338 }
 339
 340 int
 341 vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
 342                    bool interruptible)
 343 {
 344         struct vc4_dev *vc4 = to_vc4_dev(dev);
 345         int ret = 0;
 346         unsigned long timeout_expire;
 347         DEFINE_WAIT(wait);
 348
 349         if (vc4->finished_seqno >= seqno)
 350                 return 0;
 351
 352         if (timeout_ns == 0)
 353                 return -ETIME;
 354
 355         timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
 356
 357         trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
 358         for (;;) {
 359                 prepare_to_wait(&vc4->job_wait_queue, &wait,
 360                                 interruptible ? TASK_INTERRUPTIBLE :
 361                                 TASK_UNINTERRUPTIBLE);
 362
 363                 if (interruptible && signal_pending(current)) {
 364                         ret = -ERESTARTSYS;
 365                         break;
 366                 }
 367
 368                 if (vc4->finished_seqno >= seqno)
 369                         break;
 370
 371                 if (timeout_ns != ~0ull) {
 372                         if (time_after_eq(jiffies, timeout_expire)) {
 373                                 ret = -ETIME;
 374                                 break;
 375                         }
 376                         schedule_timeout(timeout_expire - jiffies);
 377                 } else {
 378                         schedule();
 379                 }
 380         }
 381
 382         finish_wait(&vc4->job_wait_queue, &wait);
 383         trace_vc4_wait_for_seqno_end(dev, seqno);
 384
 385         return ret;
 386 }
 387
 388 static void
 389 vc4_flush_caches(struct drm_device *dev)
 390 {
 391         struct vc4_dev *vc4 = to_vc4_dev(dev);
 392
 393         /* Flush the GPU L2 caches.  These caches sit on top of system
 394          * L3 (the 128kb or so shared with the CPU), and are
 395          * non-allocating in the L3.
 396          */
 397         V3D_WRITE(V3D_L2CACTL,
 398                   V3D_L2CACTL_L2CCLR);
 399
 400         V3D_WRITE(V3D_SLCACTL,
 401                   VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
 402                   VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
 403                   VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
 404                   VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
 405 }
 406
 407 /* Sets the registers for the next job to be actually be executed in
 408  * the hardware.
 409  *
 410  * The job_lock should be held during this.
 411  */
 412 void
 413 vc4_submit_next_bin_job(struct drm_device *dev)
 414 {
 415         struct vc4_dev *vc4 = to_vc4_dev(dev);
 416         struct vc4_exec_info *exec;
 417
 418 again:
 419         exec = vc4_first_bin_job(vc4);
 420         if (!exec)
 421                 return;
 422
 423         vc4_flush_caches(dev);
 424
 425         /* Either put the job in the binner if it uses the binner, or
 426          * immediately move it to the to-be-rendered queue.
 427          */
 428         if (exec->ct0ca != exec->ct0ea) {
 429                 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
 430         } else {
 431                 vc4_move_job_to_render(dev, exec);
 432                 goto again;
 433         }
 434 }
 435
 436 void
 437 vc4_submit_next_render_job(struct drm_device *dev)
 438 {
 439         struct vc4_dev *vc4 = to_vc4_dev(dev);
 440         struct vc4_exec_info *exec = vc4_first_render_job(vc4);
 441
 442         if (!exec)
 443                 return;
 444
 445         submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
 446 }
 447
 448 void
 449 vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
 450 {
 451         struct vc4_dev *vc4 = to_vc4_dev(dev);
 452         bool was_empty = list_empty(&vc4->render_job_list);
 453
 454         list_move_tail(&exec->head, &vc4->render_job_list);
 455         if (was_empty)
 456                 vc4_submit_next_render_job(dev);
 457 }
 458
 459 static void
 460 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
 461 {
 462         struct vc4_bo *bo;
 463         unsigned i;
 464
 465         for (i = 0; i < exec->bo_count; i++) {
 466                 bo = to_vc4_bo(&exec->bo[i]->base);
 467                 bo->seqno = seqno;
 468         }
 469
 470         list_for_each_entry(bo, &exec->unref_list, unref_head) {
 471                 bo->seqno = seqno;
 472         }
 473
 474         for (i = 0; i < exec->rcl_write_bo_count; i++) {
 475                 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
 476                 bo->write_seqno = seqno;
 477         }
 478 }
 479
 480 /* Queues a struct vc4_exec_info for execution.  If no job is
 481  * currently executing, then submits it.
 482  *
 483  * Unlike most GPUs, our hardware only handles one command list at a
 484  * time.  To queue multiple jobs at once, we'd need to edit the
 485  * previous command list to have a jump to the new one at the end, and
 486  * then bump the end address.  That's a change for a later date,
 487  * though.
 488  */
 489 static void
 490 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
 491 {
 492         struct vc4_dev *vc4 = to_vc4_dev(dev);
 493         uint64_t seqno;
 494         unsigned long irqflags;
 495
 496         spin_lock_irqsave(&vc4->job_lock, irqflags);
 497
 498         seqno = ++vc4->emit_seqno;
 499         exec->seqno = seqno;
 500         vc4_update_bo_seqnos(exec, seqno);
 501
 502         list_add_tail(&exec->head, &vc4->bin_job_list);
 503
 504         /* If no job was executing, kick ours off.  Otherwise, it'll
 505          * get started when the previous job's flush done interrupt
 506          * occurs.
 507          */
 508         if (vc4_first_bin_job(vc4) == exec) {
 509                 vc4_submit_next_bin_job(dev);
 510                 vc4_queue_hangcheck(dev);
 511         }
 512
 513         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 514 }
 515
 516 /**
 517  * Looks up a bunch of GEM handles for BOs and stores the array for
 518  * use in the command validator that actually writes relocated
 519  * addresses pointing to them.
 520  */
 521 static int
 522 vc4_cl_lookup_bos(struct drm_device *dev,
 523                   struct drm_file *file_priv,
 524                   struct vc4_exec_info *exec)
 525 {
 526         struct drm_vc4_submit_cl *args = exec->args;
 527         uint32_t *handles;
 528         int ret = 0;
 529         int i;
 530
 531         exec->bo_count = args->bo_handle_count;
 532
 533         if (!exec->bo_count) {
 534                 /* See comment on bo_index for why we have to check
 535                  * this.
 536                  */
 537                 DRM_ERROR("Rendering requires BOs to validate\n");
 538                 return -EINVAL;
 539         }
 540
 541         exec->bo = drm_calloc_large(exec->bo_count,
 542                                     sizeof(struct drm_gem_cma_object *));
 543         if (!exec->bo) {
 544                 DRM_ERROR("Failed to allocate validated BO pointers\n");
 545                 return -ENOMEM;
 546         }
 547
 548         handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
 549         if (!handles) {
 550                 ret = -ENOMEM;
 551                 DRM_ERROR("Failed to allocate incoming GEM handles\n");
 552                 goto fail;
 553         }
 554
 555         if (copy_from_user(handles,
 556                            (void __user *)(uintptr_t)args->bo_handles,
 557                            exec->bo_count * sizeof(uint32_t))) {
 558                 ret = -EFAULT;
 559                 DRM_ERROR("Failed to copy in GEM handles\n");
 560                 goto fail;
 561         }
 562
 563         spin_lock(&file_priv->table_lock);
 564         for (i = 0; i < exec->bo_count; i++) {
 565                 struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
 566                                                      handles[i]);
 567                 if (!bo) {
 568                         DRM_ERROR("Failed to look up GEM BO %d: %d\n",
 569                                   i, handles[i]);
 570                         ret = -EINVAL;
 571                         spin_unlock(&file_priv->table_lock);
 572                         goto fail;
 573                 }
 574                 drm_gem_object_reference(bo);
 575                 exec->bo[i] = (struct drm_gem_cma_object *)bo;
 576         }
 577         spin_unlock(&file_priv->table_lock);
 578
 579 fail:
 580         drm_free_large(handles);
 581         return ret;
 582 }
 583
 584 static int
 585 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 586 {
 587         struct drm_vc4_submit_cl *args = exec->args;
 588         void *temp = NULL;
 589         void *bin;
 590         int ret = 0;
 591         uint32_t bin_offset = 0;
 592         uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
 593                                              16);
 594         uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
 595         uint32_t exec_size = uniforms_offset + args->uniforms_size;
 596         uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
 597                                           args->shader_rec_count);
 598         struct vc4_bo *bo;
 599
 600         if (shader_rec_offset < args->bin_cl_size ||
 601             uniforms_offset < shader_rec_offset ||
 602             exec_size < uniforms_offset ||
 603             args->shader_rec_count >= (UINT_MAX /
 604                                           sizeof(struct vc4_shader_state)) ||
 605             temp_size < exec_size) {
 606                 DRM_ERROR("overflow in exec arguments\n");
 607                 ret = -EINVAL;
 608                 goto fail;
 609         }
 610
 611         /* Allocate space where we'll store the copied in user command lists
 612          * and shader records.
 613          *
 614          * We don't just copy directly into the BOs because we need to
 615          * read the contents back for validation, and I think the
 616          * bo->vaddr is uncached access.
 617          */
 618         temp = drm_malloc_ab(temp_size, 1);
 619         if (!temp) {
 620                 DRM_ERROR("Failed to allocate storage for copying "
 621                           "in bin/render CLs.\n");
 622                 ret = -ENOMEM;
 623                 goto fail;
 624         }
 625         bin = temp + bin_offset;
 626         exec->shader_rec_u = temp + shader_rec_offset;
 627         exec->uniforms_u = temp + uniforms_offset;
 628         exec->shader_state = temp + exec_size;
 629         exec->shader_state_size = args->shader_rec_count;
 630
 631         if (copy_from_user(bin,
 632                            (void __user *)(uintptr_t)args->bin_cl,
 633                            args->bin_cl_size)) {
 634                 ret = -EFAULT;
 635                 goto fail;
 636         }
 637
 638         if (copy_from_user(exec->shader_rec_u,
 639                            (void __user *)(uintptr_t)args->shader_rec,
 640                            args->shader_rec_size)) {
 641                 ret = -EFAULT;
 642                 goto fail;
 643         }
 644
 645         if (copy_from_user(exec->uniforms_u,
 646                            (void __user *)(uintptr_t)args->uniforms,
 647                            args->uniforms_size)) {
 648                 ret = -EFAULT;
 649                 goto fail;
 650         }
 651
 652         bo = vc4_bo_create(dev, exec_size, true);
 653         if (IS_ERR(bo)) {
 654                 DRM_ERROR("Couldn't allocate BO for binning\n");
 655                 ret = PTR_ERR(bo);
 656                 goto fail;
 657         }
 658         exec->exec_bo = &bo->base;
 659
 660         list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
 661                       &exec->unref_list);
 662
 663         exec->ct0ca = exec->exec_bo->paddr + bin_offset;
 664
 665         exec->bin_u = bin;
 666
 667         exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
 668         exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
 669         exec->shader_rec_size = args->shader_rec_size;
 670
 671         exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
 672         exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
 673         exec->uniforms_size = args->uniforms_size;
 674
 675         ret = vc4_validate_bin_cl(dev,
 676                                   exec->exec_bo->vaddr + bin_offset,
 677                                   bin,
 678                                   exec);
 679         if (ret)
 680                 goto fail;
 681
 682         ret = vc4_validate_shader_recs(dev, exec);
 683         if (ret)
 684                 goto fail;
 685
 686         /* Block waiting on any previous rendering into the CS's VBO,
 687          * IB, or textures, so that pixels are actually written by the
 688          * time we try to read them.
 689          */
 690         ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
 691
 692 fail:
 693         drm_free_large(temp);
 694         return ret;
 695 }
 696
 697 static void
 698 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 699 {
 700         struct vc4_dev *vc4 = to_vc4_dev(dev);
 701         unsigned i;
 702
 703         if (exec->bo) {
 704                 for (i = 0; i < exec->bo_count; i++)
 705                         drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
 706                 drm_free_large(exec->bo);
 707         }
 708
 709         while (!list_empty(&exec->unref_list)) {
 710                 struct vc4_bo *bo = list_first_entry(&exec->unref_list,
 711                                                      struct vc4_bo, unref_head);
 712                 list_del(&bo->unref_head);
 713                 drm_gem_object_unreference_unlocked(&bo->base.base);
 714         }
 715
 716         mutex_lock(&vc4->power_lock);
 717         if (--vc4->power_refcount == 0) {
 718                 pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
 719                 pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
 720         }
 721         mutex_unlock(&vc4->power_lock);
 722
 723         kfree(exec);
 724 }
 725
 726 void
 727 vc4_job_handle_completed(struct vc4_dev *vc4)
 728 {
 729         unsigned long irqflags;
 730         struct vc4_seqno_cb *cb, *cb_temp;
 731
 732         spin_lock_irqsave(&vc4->job_lock, irqflags);
 733         while (!list_empty(&vc4->job_done_list)) {
 734                 struct vc4_exec_info *exec =
 735                         list_first_entry(&vc4->job_done_list,
 736                                          struct vc4_exec_info, head);
 737                 list_del(&exec->head);
 738
 739                 spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 740                 vc4_complete_exec(vc4->dev, exec);
 741                 spin_lock_irqsave(&vc4->job_lock, irqflags);
 742         }
 743
 744         list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
 745                 if (cb->seqno <= vc4->finished_seqno) {
 746                         list_del_init(&cb->work.entry);
 747                         schedule_work(&cb->work);
 748                 }
 749         }
 750
 751         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 752 }
 753
 754 static void vc4_seqno_cb_work(struct work_struct *work)
 755 {
 756         struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
 757
 758         cb->func(cb);
 759 }
 760
 761 int vc4_queue_seqno_cb(struct drm_device *dev,
 762                        struct vc4_seqno_cb *cb, uint64_t seqno,
 763                        void (*func)(struct vc4_seqno_cb *cb))
 764 {
 765         struct vc4_dev *vc4 = to_vc4_dev(dev);
 766         int ret = 0;
 767         unsigned long irqflags;
 768
 769         cb->func = func;
 770         INIT_WORK(&cb->work, vc4_seqno_cb_work);
 771
 772         spin_lock_irqsave(&vc4->job_lock, irqflags);
 773         if (seqno > vc4->finished_seqno) {
 774                 cb->seqno = seqno;
 775                 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
 776         } else {
 777                 schedule_work(&cb->work);
 778         }
 779         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 780
 781         return ret;
 782 }
 783
 784 /* Scheduled when any job has been completed, this walks the list of
 785  * jobs that had completed and unrefs their BOs and frees their exec
 786  * structs.
 787  */
 788 static void
 789 vc4_job_done_work(struct work_struct *work)
 790 {
 791         struct vc4_dev *vc4 =
 792                 container_of(work, struct vc4_dev, job_done_work);
 793
 794         vc4_job_handle_completed(vc4);
 795 }
 796
 797 static int
 798 vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
 799                                 uint64_t seqno,
 800                                 uint64_t *timeout_ns)
 801 {
 802         unsigned long start = jiffies;
 803         int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
 804
 805         if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
 806                 uint64_t delta = jiffies_to_nsecs(jiffies - start);
 807
 808                 if (*timeout_ns >= delta)
 809                         *timeout_ns -= delta;
 810         }
 811
 812         return ret;
 813 }
 814
 815 int
 816 vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
 817                      struct drm_file *file_priv)
 818 {
 819         struct drm_vc4_wait_seqno *args = data;
 820
 821         return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
 822                                                &args->timeout_ns);
 823 }
 824
 825 int
 826 vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
 827                   struct drm_file *file_priv)
 828 {
 829         int ret;
 830         struct drm_vc4_wait_bo *args = data;
 831         struct drm_gem_object *gem_obj;
 832         struct vc4_bo *bo;
 833
 834         if (args->pad != 0)
 835                 return -EINVAL;
 836
 837         gem_obj = drm_gem_object_lookup(file_priv, args->handle);
 838         if (!gem_obj) {
 839                 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
 840                 return -EINVAL;
 841         }
 842         bo = to_vc4_bo(gem_obj);
 843
 844         ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
 845                                               &args->timeout_ns);
 846
 847         drm_gem_object_unreference_unlocked(gem_obj);
 848         return ret;
 849 }
 850
 851 /**
 852  * Submits a command list to the VC4.
 853  *
 854  * This is what is called batchbuffer emitting on other hardware.
 855  */
 856 int
 857 vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 858                     struct drm_file *file_priv)
 859 {
 860         struct vc4_dev *vc4 = to_vc4_dev(dev);
 861         struct drm_vc4_submit_cl *args = data;
 862         struct vc4_exec_info *exec;
 863         int ret = 0;
 864
 865         if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
 866                 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
 867                 return -EINVAL;
 868         }
 869
 870         exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
 871         if (!exec) {
 872                 DRM_ERROR("malloc failure on exec struct\n");
 873                 return -ENOMEM;
 874         }
 875
 876         mutex_lock(&vc4->power_lock);
 877         if (vc4->power_refcount++ == 0)
 878                 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
 879         mutex_unlock(&vc4->power_lock);
 880         if (ret < 0) {
 881                 kfree(exec);
 882                 return ret;
 883         }
 884
 885         exec->args = args;
 886         INIT_LIST_HEAD(&exec->unref_list);
 887
 888         ret = vc4_cl_lookup_bos(dev, file_priv, exec);
 889         if (ret)
 890                 goto fail;
 891
 892         if (exec->args->bin_cl_size != 0) {
 893                 ret = vc4_get_bcl(dev, exec);
 894                 if (ret)
 895                         goto fail;
 896         } else {
 897                 exec->ct0ca = 0;
 898                 exec->ct0ea = 0;
 899         }
 900
 901         ret = vc4_get_rcl(dev, exec);
 902         if (ret)
 903                 goto fail;
 904
 905         /* Clear this out of the struct we'll be putting in the queue,
 906          * since it's part of our stack.
 907          */
 908         exec->args = NULL;
 909
 910         vc4_queue_submit(dev, exec);
 911
 912         /* Return the seqno for our job. */
 913         args->seqno = vc4->emit_seqno;
 914
 915         return 0;
 916
 917 fail:
 918         vc4_complete_exec(vc4->dev, exec);
 919
 920         return ret;
 921 }
 922
 923 void
 924 vc4_gem_init(struct drm_device *dev)
 925 {
 926         struct vc4_dev *vc4 = to_vc4_dev(dev);
 927
 928         INIT_LIST_HEAD(&vc4->bin_job_list);
 929         INIT_LIST_HEAD(&vc4->render_job_list);
 930         INIT_LIST_HEAD(&vc4->job_done_list);
 931         INIT_LIST_HEAD(&vc4->seqno_cb_list);
 932         spin_lock_init(&vc4->job_lock);
 933
 934         INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
 935         setup_timer(&vc4->hangcheck.timer,
 936                     vc4_hangcheck_elapsed,
 937                     (unsigned long)dev);
 938
 939         INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
 940
 941         mutex_init(&vc4->power_lock);
 942 }
 943
 944 void
 945 vc4_gem_destroy(struct drm_device *dev)
 946 {
 947         struct vc4_dev *vc4 = to_vc4_dev(dev);
 948
 949         /* Waiting for exec to finish would need to be done before
 950          * unregistering V3D.
 951          */
 952         WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
 953
 954         /* V3D should already have disabled its interrupt and cleared
 955          * the overflow allocation registers.  Now free the object.
 956          */
 957         if (vc4->overflow_mem) {
 958                 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
 959                 vc4->overflow_mem = NULL;
 960         }
 961
 962         if (vc4->hang_state)
 963                 vc4_free_hang_state(dev, vc4->hang_state);
 964
 965         vc4_bo_cache_destroy(dev);
 966 }