drivers/gpu/drm/radeon/radeon_cs.c

   1 /*
   2  * Copyright 2008 Jerome Glisse.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22  * DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Jerome Glisse <glisse@freedesktop.org>
  26  */
  27 #include <linux/list_sort.h>
  28 #include <drm/drmP.h>
  29 #include <drm/radeon_drm.h>
  30 #include "radeon_reg.h"
  31 #include "radeon.h"
  32 #include "radeon_trace.h"
  33
  34 #define RADEON_CS_MAX_PRIORITY          32u
  35 #define RADEON_CS_NUM_BUCKETS           (RADEON_CS_MAX_PRIORITY + 1)
  36
  37 /* This is based on the bucket sort with O(n) time complexity.
  38  * An item with priority "i" is added to bucket[i]. The lists are then
  39  * concatenated in descending order.
  40  */
  41 struct radeon_cs_buckets {
  42         struct list_head bucket[RADEON_CS_NUM_BUCKETS];
  43 };
  44
  45 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
  46 {
  47         unsigned i;
  48
  49         for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
  50                 INIT_LIST_HEAD(&b->bucket[i]);
  51 }
  52
  53 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
  54                                   struct list_head *item, unsigned priority)
  55 {
  56         /* Since buffers which appear sooner in the relocation list are
  57          * likely to be used more often than buffers which appear later
  58          * in the list, the sort mustn't change the ordering of buffers
  59          * with the same priority, i.e. it must be stable.
  60          */
  61         list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
  62 }
  63
  64 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
  65                                        struct list_head *out_list)
  66 {
  67         unsigned i;
  68
  69         /* Connect the sorted buckets in the output list. */
  70         for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
  71                 list_splice(&b->bucket[i], out_list);
  72         }
  73 }
  74
  75 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
  76 {
  77         struct drm_device *ddev = p->rdev->ddev;
  78         struct radeon_cs_chunk *chunk;
  79         struct radeon_cs_buckets buckets;
  80         unsigned i;
  81         bool need_mmap_lock = false;
  82         int r;
  83
  84         if (p->chunk_relocs == NULL) {
  85                 return 0;
  86         }
  87         chunk = p->chunk_relocs;
  88         p->dma_reloc_idx = 0;
  89         /* FIXME: we assume that each relocs use 4 dwords */
  90         p->nrelocs = chunk->length_dw / 4;
  91         p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list));
  92         if (p->relocs == NULL) {
  93                 return -ENOMEM;
  94         }
  95
  96         radeon_cs_buckets_init(&buckets);
  97
  98         for (i = 0; i < p->nrelocs; i++) {
  99                 struct drm_radeon_cs_reloc *r;
 100                 struct drm_gem_object *gobj;
 101                 unsigned priority;
 102
 103                 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
 104                 gobj = drm_gem_object_lookup(ddev, p->filp, r->handle);
 105                 if (gobj == NULL) {
 106                         DRM_ERROR("gem object lookup failed 0x%x\n",
 107                                   r->handle);
 108                         return -ENOENT;
 109                 }
 110                 p->relocs[i].robj = gem_to_radeon_bo(gobj);
 111
 112                 /* The userspace buffer priorities are from 0 to 15. A higher
 113                  * number means the buffer is more important.
 114                  * Also, the buffers used for write have a higher priority than
 115                  * the buffers used for read only, which doubles the range
 116                  * to 0 to 31. 32 is reserved for the kernel driver.
 117                  */
 118                 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
 119                            + !!r->write_domain;
 120
 121                 /* the first reloc of an UVD job is the msg and that must be in
 122                    VRAM, also but everything into VRAM on AGP cards and older
 123                    IGP chips to avoid image corruptions */
 124                 if (p->ring == R600_RING_TYPE_UVD_INDEX &&
 125                     (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
 126                      p->rdev->family == CHIP_RS780 ||
 127                      p->rdev->family == CHIP_RS880)) {
 128
 129                         /* TODO: is this still needed for NI+ ? */
 130                         p->relocs[i].prefered_domains =
 131                                 RADEON_GEM_DOMAIN_VRAM;
 132
 133                         p->relocs[i].allowed_domains =
 134                                 RADEON_GEM_DOMAIN_VRAM;
 135
 136                         /* prioritize this over any other relocation */
 137                         priority = RADEON_CS_MAX_PRIORITY;
 138                 } else {
 139                         uint32_t domain = r->write_domain ?
 140                                 r->write_domain : r->read_domains;
 141
 142                         if (domain & RADEON_GEM_DOMAIN_CPU) {
 143                                 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
 144                                           "for command submission\n");
 145                                 return -EINVAL;
 146                         }
 147
 148                         p->relocs[i].prefered_domains = domain;
 149                         if (domain == RADEON_GEM_DOMAIN_VRAM)
 150                                 domain |= RADEON_GEM_DOMAIN_GTT;
 151                         p->relocs[i].allowed_domains = domain;
 152                 }
 153
 154                 if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
 155                         uint32_t domain = p->relocs[i].prefered_domains;
 156                         if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
 157                                 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
 158                                           "allowed for userptr BOs\n");
 159                                 return -EINVAL;
 160                         }
 161                         need_mmap_lock = true;
 162                         domain = RADEON_GEM_DOMAIN_GTT;
 163                         p->relocs[i].prefered_domains = domain;
 164                         p->relocs[i].allowed_domains = domain;
 165                 }
 166
 167                 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
 168                 p->relocs[i].tv.shared = !r->write_domain;
 169
 170                 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
 171                                       priority);
 172         }
 173
 174         radeon_cs_buckets_get_list(&buckets, &p->validated);
 175
 176         if (p->cs_flags & RADEON_CS_USE_VM)
 177                 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
 178                                               &p->validated);
 179         if (need_mmap_lock)
 180                 down_read(&current->mm->mmap_sem);
 181
 182         r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
 183
 184         if (need_mmap_lock)
 185                 up_read(&current->mm->mmap_sem);
 186
 187         return r;
 188 }
 189
 190 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
 191 {
 192         p->priority = priority;
 193
 194         switch (ring) {
 195         default:
 196                 DRM_ERROR("unknown ring id: %d\n", ring);
 197                 return -EINVAL;
 198         case RADEON_CS_RING_GFX:
 199                 p->ring = RADEON_RING_TYPE_GFX_INDEX;
 200                 break;
 201         case RADEON_CS_RING_COMPUTE:
 202                 if (p->rdev->family >= CHIP_TAHITI) {
 203                         if (p->priority > 0)
 204                                 p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
 205                         else
 206                                 p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
 207                 } else
 208                         p->ring = RADEON_RING_TYPE_GFX_INDEX;
 209                 break;
 210         case RADEON_CS_RING_DMA:
 211                 if (p->rdev->family >= CHIP_CAYMAN) {
 212                         if (p->priority > 0)
 213                                 p->ring = R600_RING_TYPE_DMA_INDEX;
 214                         else
 215                                 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
 216                 } else if (p->rdev->family >= CHIP_RV770) {
 217                         p->ring = R600_RING_TYPE_DMA_INDEX;
 218                 } else {
 219                         return -EINVAL;
 220                 }
 221                 break;
 222         case RADEON_CS_RING_UVD:
 223                 p->ring = R600_RING_TYPE_UVD_INDEX;
 224                 break;
 225         case RADEON_CS_RING_VCE:
 226                 /* TODO: only use the low priority ring for now */
 227                 p->ring = TN_RING_TYPE_VCE1_INDEX;
 228                 break;
 229         }
 230         return 0;
 231 }
 232
 233 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 234 {
 235         struct radeon_bo_list *reloc;
 236         int r;
 237
 238         list_for_each_entry(reloc, &p->validated, tv.head) {
 239                 struct reservation_object *resv;
 240
 241                 resv = reloc->robj->tbo.resv;
 242                 r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
 243                                      reloc->tv.shared);
 244                 if (r)
 245                         return r;
 246         }
 247         return 0;
 248 }
 249
 250 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
 251 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 252 {
 253         struct drm_radeon_cs *cs = data;
 254         uint64_t *chunk_array_ptr;
 255         unsigned size, i;
 256         u32 ring = RADEON_CS_RING_GFX;
 257         s32 priority = 0;
 258
 259         INIT_LIST_HEAD(&p->validated);
 260
 261         if (!cs->num_chunks) {
 262                 return 0;
 263         }
 264
 265         /* get chunks */
 266         p->idx = 0;
 267         p->ib.sa_bo = NULL;
 268         p->const_ib.sa_bo = NULL;
 269         p->chunk_ib = NULL;
 270         p->chunk_relocs = NULL;
 271         p->chunk_flags = NULL;
 272         p->chunk_const_ib = NULL;
 273         p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
 274         if (p->chunks_array == NULL) {
 275                 return -ENOMEM;
 276         }
 277         chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
 278         if (copy_from_user(p->chunks_array, chunk_array_ptr,
 279                                sizeof(uint64_t)*cs->num_chunks)) {
 280                 return -EFAULT;
 281         }
 282         p->cs_flags = 0;
 283         p->nchunks = cs->num_chunks;
 284         p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
 285         if (p->chunks == NULL) {
 286                 return -ENOMEM;
 287         }
 288         for (i = 0; i < p->nchunks; i++) {
 289                 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
 290                 struct drm_radeon_cs_chunk user_chunk;
 291                 uint32_t __user *cdata;
 292
 293                 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
 294                 if (copy_from_user(&user_chunk, chunk_ptr,
 295                                        sizeof(struct drm_radeon_cs_chunk))) {
 296                         return -EFAULT;
 297                 }
 298                 p->chunks[i].length_dw = user_chunk.length_dw;
 299                 if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
 300                         p->chunk_relocs = &p->chunks[i];
 301                 }
 302                 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
 303                         p->chunk_ib = &p->chunks[i];
 304                         /* zero length IB isn't useful */
 305                         if (p->chunks[i].length_dw == 0)
 306                                 return -EINVAL;
 307                 }
 308                 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
 309                         p->chunk_const_ib = &p->chunks[i];
 310                         /* zero length CONST IB isn't useful */
 311                         if (p->chunks[i].length_dw == 0)
 312                                 return -EINVAL;
 313                 }
 314                 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
 315                         p->chunk_flags = &p->chunks[i];
 316                         /* zero length flags aren't useful */
 317                         if (p->chunks[i].length_dw == 0)
 318                                 return -EINVAL;
 319                 }
 320
 321                 size = p->chunks[i].length_dw;
 322                 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
 323                 p->chunks[i].user_ptr = cdata;
 324                 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
 325                         continue;
 326
 327                 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
 328                         if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
 329                                 continue;
 330                 }
 331
 332                 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
 333                 size *= sizeof(uint32_t);
 334                 if (p->chunks[i].kdata == NULL) {
 335                         return -ENOMEM;
 336                 }
 337                 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
 338                         return -EFAULT;
 339                 }
 340                 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
 341                         p->cs_flags = p->chunks[i].kdata[0];
 342                         if (p->chunks[i].length_dw > 1)
 343                                 ring = p->chunks[i].kdata[1];
 344                         if (p->chunks[i].length_dw > 2)
 345                                 priority = (s32)p->chunks[i].kdata[2];
 346                 }
 347         }
 348
 349         /* these are KMS only */
 350         if (p->rdev) {
 351                 if ((p->cs_flags & RADEON_CS_USE_VM) &&
 352                     !p->rdev->vm_manager.enabled) {
 353                         DRM_ERROR("VM not active on asic!\n");
 354                         return -EINVAL;
 355                 }
 356
 357                 if (radeon_cs_get_ring(p, ring, priority))
 358                         return -EINVAL;
 359
 360                 /* we only support VM on some SI+ rings */
 361                 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
 362                         if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
 363                                 DRM_ERROR("Ring %d requires VM!\n", p->ring);
 364                                 return -EINVAL;
 365                         }
 366                 } else {
 367                         if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
 368                                 DRM_ERROR("VM not supported on ring %d!\n",
 369                                           p->ring);
 370                                 return -EINVAL;
 371                         }
 372                 }
 373         }
 374
 375         return 0;
 376 }
 377
 378 static int cmp_size_smaller_first(void *priv, struct list_head *a,
 379                                   struct list_head *b)
 380 {
 381         struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
 382         struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
 383
 384         /* Sort A before B if A is smaller. */
 385         return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
 386 }
 387
 388 /**
 389  * cs_parser_fini() - clean parser states
 390  * @parser:     parser structure holding parsing context.
 391  * @error:      error number
 392  *
 393  * If error is set than unvalidate buffer, otherwise just free memory
 394  * used by parsing context.
 395  **/
 396 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
 397 {
 398         unsigned i;
 399
 400         if (!error) {
 401                 /* Sort the buffer list from the smallest to largest buffer,
 402                  * which affects the order of buffers in the LRU list.
 403                  * This assures that the smallest buffers are added first
 404                  * to the LRU list, so they are likely to be later evicted
 405                  * first, instead of large buffers whose eviction is more
 406                  * expensive.
 407                  *
 408                  * This slightly lowers the number of bytes moved by TTM
 409                  * per frame under memory pressure.
 410                  */
 411                 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
 412
 413                 ttm_eu_fence_buffer_objects(&parser->ticket,
 414                                             &parser->validated,
 415                                             &parser->ib.fence->base);
 416         } else if (backoff) {
 417                 ttm_eu_backoff_reservation(&parser->ticket,
 418                                            &parser->validated);
 419         }
 420
 421         if (parser->relocs != NULL) {
 422                 for (i = 0; i < parser->nrelocs; i++) {
 423                         struct radeon_bo *bo = parser->relocs[i].robj;
 424                         if (bo == NULL)
 425                                 continue;
 426
 427                         drm_gem_object_unreference_unlocked(&bo->gem_base);
 428                 }
 429         }
 430         kfree(parser->track);
 431         drm_free_large(parser->relocs);
 432         drm_free_large(parser->vm_bos);
 433         for (i = 0; i < parser->nchunks; i++)
 434                 drm_free_large(parser->chunks[i].kdata);
 435         kfree(parser->chunks);
 436         kfree(parser->chunks_array);
 437         radeon_ib_free(parser->rdev, &parser->ib);
 438         radeon_ib_free(parser->rdev, &parser->const_ib);
 439 }
 440
 441 static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 442                               struct radeon_cs_parser *parser)
 443 {
 444         int r;
 445
 446         if (parser->chunk_ib == NULL)
 447                 return 0;
 448
 449         if (parser->cs_flags & RADEON_CS_USE_VM)
 450                 return 0;
 451
 452         r = radeon_cs_parse(rdev, parser->ring, parser);
 453         if (r || parser->parser_error) {
 454                 DRM_ERROR("Invalid command stream !\n");
 455                 return r;
 456         }
 457
 458         r = radeon_cs_sync_rings(parser);
 459         if (r) {
 460                 if (r != -ERESTARTSYS)
 461                         DRM_ERROR("Failed to sync rings: %i\n", r);
 462                 return r;
 463         }
 464
 465         if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 466                 radeon_uvd_note_usage(rdev);
 467         else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
 468                  (parser->ring == TN_RING_TYPE_VCE2_INDEX))
 469                 radeon_vce_note_usage(rdev);
 470
 471         r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
 472         if (r) {
 473                 DRM_ERROR("Failed to schedule IB !\n");
 474         }
 475         return r;
 476 }
 477
 478 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
 479                                    struct radeon_vm *vm)
 480 {
 481         struct radeon_device *rdev = p->rdev;
 482         struct radeon_bo_va *bo_va;
 483         int i, r;
 484
 485         r = radeon_vm_update_page_directory(rdev, vm);
 486         if (r)
 487                 return r;
 488
 489         r = radeon_vm_clear_freed(rdev, vm);
 490         if (r)
 491                 return r;
 492
 493         if (vm->ib_bo_va == NULL) {
 494                 DRM_ERROR("Tmp BO not in VM!\n");
 495                 return -EINVAL;
 496         }
 497
 498         r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
 499                                 &rdev->ring_tmp_bo.bo->tbo.mem);
 500         if (r)
 501                 return r;
 502
 503         for (i = 0; i < p->nrelocs; i++) {
 504                 struct radeon_bo *bo;
 505
 506                 bo = p->relocs[i].robj;
 507                 bo_va = radeon_vm_bo_find(vm, bo);
 508                 if (bo_va == NULL) {
 509                         dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
 510                         return -EINVAL;
 511                 }
 512
 513                 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
 514                 if (r)
 515                         return r;
 516
 517                 radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
 518         }
 519
 520         return radeon_vm_clear_invalids(rdev, vm);
 521 }
 522
 523 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 524                                  struct radeon_cs_parser *parser)
 525 {
 526         struct radeon_fpriv *fpriv = parser->filp->driver_priv;
 527         struct radeon_vm *vm = &fpriv->vm;
 528         int r;
 529
 530         if (parser->chunk_ib == NULL)
 531                 return 0;
 532         if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
 533                 return 0;
 534
 535         if (parser->const_ib.length_dw) {
 536                 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
 537                 if (r) {
 538                         return r;
 539                 }
 540         }
 541
 542         r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
 543         if (r) {
 544                 return r;
 545         }
 546
 547         if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 548                 radeon_uvd_note_usage(rdev);
 549
 550         mutex_lock(&vm->mutex);
 551         r = radeon_bo_vm_update_pte(parser, vm);
 552         if (r) {
 553                 goto out;
 554         }
 555
 556         r = radeon_cs_sync_rings(parser);
 557         if (r) {
 558                 if (r != -ERESTARTSYS)
 559                         DRM_ERROR("Failed to sync rings: %i\n", r);
 560                 goto out;
 561         }
 562
 563         if ((rdev->family >= CHIP_TAHITI) &&
 564             (parser->chunk_const_ib != NULL)) {
 565                 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
 566         } else {
 567                 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
 568         }
 569
 570 out:
 571         mutex_unlock(&vm->mutex);
 572         return r;
 573 }
 574
 575 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
 576 {
 577         if (r == -EDEADLK) {
 578                 r = radeon_gpu_reset(rdev);
 579                 if (!r)
 580                         r = -EAGAIN;
 581         }
 582         return r;
 583 }
 584
 585 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
 586 {
 587         struct radeon_cs_chunk *ib_chunk;
 588         struct radeon_vm *vm = NULL;
 589         int r;
 590
 591         if (parser->chunk_ib == NULL)
 592                 return 0;
 593
 594         if (parser->cs_flags & RADEON_CS_USE_VM) {
 595                 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
 596                 vm = &fpriv->vm;
 597
 598                 if ((rdev->family >= CHIP_TAHITI) &&
 599                     (parser->chunk_const_ib != NULL)) {
 600                         ib_chunk = parser->chunk_const_ib;
 601                         if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 602                                 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
 603                                 return -EINVAL;
 604                         }
 605                         r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
 606                                            vm, ib_chunk->length_dw * 4);
 607                         if (r) {
 608                                 DRM_ERROR("Failed to get const ib !\n");
 609                                 return r;
 610                         }
 611                         parser->const_ib.is_const_ib = true;
 612                         parser->const_ib.length_dw = ib_chunk->length_dw;
 613                         if (copy_from_user(parser->const_ib.ptr,
 614                                                ib_chunk->user_ptr,
 615                                                ib_chunk->length_dw * 4))
 616                                 return -EFAULT;
 617                 }
 618
 619                 ib_chunk = parser->chunk_ib;
 620                 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
 621                         DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
 622                         return -EINVAL;
 623                 }
 624         }
 625         ib_chunk = parser->chunk_ib;
 626
 627         r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
 628                            vm, ib_chunk->length_dw * 4);
 629         if (r) {
 630                 DRM_ERROR("Failed to get ib !\n");
 631                 return r;
 632         }
 633         parser->ib.length_dw = ib_chunk->length_dw;
 634         if (ib_chunk->kdata)
 635                 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
 636         else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
 637                 return -EFAULT;
 638         return 0;
 639 }
 640
 641 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 642 {
 643         struct radeon_device *rdev = dev->dev_private;
 644         struct radeon_cs_parser parser;
 645         int r;
 646
 647         down_read(&rdev->exclusive_lock);
 648         if (!rdev->accel_working) {
 649                 up_read(&rdev->exclusive_lock);
 650                 return -EBUSY;
 651         }
 652         if (rdev->in_reset) {
 653                 up_read(&rdev->exclusive_lock);
 654                 r = radeon_gpu_reset(rdev);
 655                 if (!r)
 656                         r = -EAGAIN;
 657                 return r;
 658         }
 659         /* initialize parser */
 660         memset(&parser, 0, sizeof(struct radeon_cs_parser));
 661         parser.filp = filp;
 662         parser.rdev = rdev;
 663         parser.dev = rdev->dev;
 664         parser.family = rdev->family;
 665         r = radeon_cs_parser_init(&parser, data);
 666         if (r) {
 667                 DRM_ERROR("Failed to initialize parser !\n");
 668                 radeon_cs_parser_fini(&parser, r, false);
 669                 up_read(&rdev->exclusive_lock);
 670                 r = radeon_cs_handle_lockup(rdev, r);
 671                 return r;
 672         }
 673
 674         r = radeon_cs_ib_fill(rdev, &parser);
 675         if (!r) {
 676                 r = radeon_cs_parser_relocs(&parser);
 677                 if (r && r != -ERESTARTSYS)
 678                         DRM_ERROR("Failed to parse relocation %d!\n", r);
 679         }
 680
 681         if (r) {
 682                 radeon_cs_parser_fini(&parser, r, false);
 683                 up_read(&rdev->exclusive_lock);
 684                 r = radeon_cs_handle_lockup(rdev, r);
 685                 return r;
 686         }
 687
 688         trace_radeon_cs(&parser);
 689
 690         r = radeon_cs_ib_chunk(rdev, &parser);
 691         if (r) {
 692                 goto out;
 693         }
 694         r = radeon_cs_ib_vm_chunk(rdev, &parser);
 695         if (r) {
 696                 goto out;
 697         }
 698 out:
 699         radeon_cs_parser_fini(&parser, r, true);
 700         up_read(&rdev->exclusive_lock);
 701         r = radeon_cs_handle_lockup(rdev, r);
 702         return r;
 703 }
 704
 705 /**
 706  * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
 707  * @parser:     parser structure holding parsing context.
 708  * @pkt:        where to store packet information
 709  *
 710  * Assume that chunk_ib_index is properly set. Will return -EINVAL
 711  * if packet is bigger than remaining ib size. or if packets is unknown.
 712  **/
 713 int radeon_cs_packet_parse(struct radeon_cs_parser *p,
 714                            struct radeon_cs_packet *pkt,
 715                            unsigned idx)
 716 {
 717         struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
 718         struct radeon_device *rdev = p->rdev;
 719         uint32_t header;
 720         int ret = 0, i;
 721
 722         if (idx >= ib_chunk->length_dw) {
 723                 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
 724                           idx, ib_chunk->length_dw);
 725                 return -EINVAL;
 726         }
 727         header = radeon_get_ib_value(p, idx);
 728         pkt->idx = idx;
 729         pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
 730         pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
 731         pkt->one_reg_wr = 0;
 732         switch (pkt->type) {
 733         case RADEON_PACKET_TYPE0:
 734                 if (rdev->family < CHIP_R600) {
 735                         pkt->reg = R100_CP_PACKET0_GET_REG(header);
 736                         pkt->one_reg_wr =
 737                                 RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
 738                 } else
 739                         pkt->reg = R600_CP_PACKET0_GET_REG(header);
 740                 break;
 741         case RADEON_PACKET_TYPE3:
 742                 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
 743                 break;
 744         case RADEON_PACKET_TYPE2:
 745                 pkt->count = -1;
 746                 break;
 747         default:
 748                 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
 749                 ret = -EINVAL;
 750                 goto dump_ib;
 751         }
 752         if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
 753                 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
 754                           pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
 755                 ret = -EINVAL;
 756                 goto dump_ib;
 757         }
 758         return 0;
 759
 760 dump_ib:
 761         for (i = 0; i < ib_chunk->length_dw; i++) {
 762                 if (i == idx)
 763                         printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
 764                 else
 765                         printk("\t0x%08x\n", radeon_get_ib_value(p, i));
 766         }
 767         return ret;
 768 }
 769
 770 /**
 771  * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
 772  * @p:          structure holding the parser context.
 773  *
 774  * Check if the next packet is NOP relocation packet3.
 775  **/
 776 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
 777 {
 778         struct radeon_cs_packet p3reloc;
 779         int r;
 780
 781         r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
 782         if (r)
 783                 return false;
 784         if (p3reloc.type != RADEON_PACKET_TYPE3)
 785                 return false;
 786         if (p3reloc.opcode != RADEON_PACKET3_NOP)
 787                 return false;
 788         return true;
 789 }
 790
 791 /**
 792  * radeon_cs_dump_packet() - dump raw packet context
 793  * @p:          structure holding the parser context.
 794  * @pkt:        structure holding the packet.
 795  *
 796  * Used mostly for debugging and error reporting.
 797  **/
 798 void radeon_cs_dump_packet(struct radeon_cs_parser *p,
 799                            struct radeon_cs_packet *pkt)
 800 {
 801         volatile uint32_t *ib;
 802         unsigned i;
 803         unsigned idx;
 804
 805         ib = p->ib.ptr;
 806         idx = pkt->idx;
 807         for (i = 0; i <= (pkt->count + 1); i++, idx++)
 808                 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
 809 }
 810
 811 /**
 812  * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
 813  * @parser:             parser structure holding parsing context.
 814  * @data:               pointer to relocation data
 815  * @offset_start:       starting offset
 816  * @offset_mask:        offset mask (to align start offset on)
 817  * @reloc:              reloc informations
 818  *
 819  * Check if next packet is relocation packet3, do bo validation and compute
 820  * GPU offset using the provided start.
 821  **/
 822 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
 823                                 struct radeon_bo_list **cs_reloc,
 824                                 int nomm)
 825 {
 826         struct radeon_cs_chunk *relocs_chunk;
 827         struct radeon_cs_packet p3reloc;
 828         unsigned idx;
 829         int r;
 830
 831         if (p->chunk_relocs == NULL) {
 832                 DRM_ERROR("No relocation chunk !\n");
 833                 return -EINVAL;
 834         }
 835         *cs_reloc = NULL;
 836         relocs_chunk = p->chunk_relocs;
 837         r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
 838         if (r)
 839                 return r;
 840         p->idx += p3reloc.count + 2;
 841         if (p3reloc.type != RADEON_PACKET_TYPE3 ||
 842             p3reloc.opcode != RADEON_PACKET3_NOP) {
 843                 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
 844                           p3reloc.idx);
 845                 radeon_cs_dump_packet(p, &p3reloc);
 846                 return -EINVAL;
 847         }
 848         idx = radeon_get_ib_value(p, p3reloc.idx + 1);
 849         if (idx >= relocs_chunk->length_dw) {
 850                 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
 851                           idx, relocs_chunk->length_dw);
 852                 radeon_cs_dump_packet(p, &p3reloc);
 853                 return -EINVAL;
 854         }
 855         /* FIXME: we assume reloc size is 4 dwords */
 856         if (nomm) {
 857                 *cs_reloc = p->relocs;
 858                 (*cs_reloc)->gpu_offset =
 859                         (u64)relocs_chunk->kdata[idx + 3] << 32;
 860                 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
 861         } else
 862                 *cs_reloc = &p->relocs[(idx / 4)];
 863         return 0;
 864 }