2 * Copyright 2008 Jerome Glisse.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Jerome Glisse <glisse@freedesktop.org>
28 #include <linux/list_sort.h>
29 #include <linux/pci.h>
30 #include <linux/uaccess.h>
32 #include <drm/drm_device.h>
33 #include <drm/drm_file.h>
34 #include <drm/radeon_drm.h>
37 #include "radeon_reg.h"
38 #include "radeon_trace.h"
40 #define RADEON_CS_MAX_PRIORITY 32u
41 #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
43 /* This is based on the bucket sort with O(n) time complexity.
44 * An item with priority "i" is added to bucket[i]. The lists are then
45 * concatenated in descending order.
47 struct radeon_cs_buckets
{
48 struct list_head bucket
[RADEON_CS_NUM_BUCKETS
];
51 static void radeon_cs_buckets_init(struct radeon_cs_buckets
*b
)
55 for (i
= 0; i
< RADEON_CS_NUM_BUCKETS
; i
++)
56 INIT_LIST_HEAD(&b
->bucket
[i
]);
59 static void radeon_cs_buckets_add(struct radeon_cs_buckets
*b
,
60 struct list_head
*item
, unsigned priority
)
62 /* Since buffers which appear sooner in the relocation list are
63 * likely to be used more often than buffers which appear later
64 * in the list, the sort mustn't change the ordering of buffers
65 * with the same priority, i.e. it must be stable.
67 list_add_tail(item
, &b
->bucket
[min(priority
, RADEON_CS_MAX_PRIORITY
)]);
70 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets
*b
,
71 struct list_head
*out_list
)
75 /* Connect the sorted buckets in the output list. */
76 for (i
= 0; i
< RADEON_CS_NUM_BUCKETS
; i
++) {
77 list_splice(&b
->bucket
[i
], out_list
);
81 static int radeon_cs_parser_relocs(struct radeon_cs_parser
*p
)
83 struct radeon_cs_chunk
*chunk
;
84 struct radeon_cs_buckets buckets
;
86 bool need_mmap_lock
= false;
89 if (p
->chunk_relocs
== NULL
) {
92 chunk
= p
->chunk_relocs
;
94 /* FIXME: we assume that each relocs use 4 dwords */
95 p
->nrelocs
= chunk
->length_dw
/ 4;
96 p
->relocs
= kvmalloc_array(p
->nrelocs
, sizeof(struct radeon_bo_list
),
97 GFP_KERNEL
| __GFP_ZERO
);
98 if (p
->relocs
== NULL
) {
102 radeon_cs_buckets_init(&buckets
);
104 for (i
= 0; i
< p
->nrelocs
; i
++) {
105 struct drm_radeon_cs_reloc
*r
;
106 struct drm_gem_object
*gobj
;
109 r
= (struct drm_radeon_cs_reloc
*)&chunk
->kdata
[i
*4];
110 gobj
= drm_gem_object_lookup(p
->filp
, r
->handle
);
112 DRM_ERROR("gem object lookup failed 0x%x\n",
116 p
->relocs
[i
].robj
= gem_to_radeon_bo(gobj
);
118 /* The userspace buffer priorities are from 0 to 15. A higher
119 * number means the buffer is more important.
120 * Also, the buffers used for write have a higher priority than
121 * the buffers used for read only, which doubles the range
122 * to 0 to 31. 32 is reserved for the kernel driver.
124 priority
= (r
->flags
& RADEON_RELOC_PRIO_MASK
) * 2
127 /* The first reloc of an UVD job is the msg and that must be in
128 * VRAM, the second reloc is the DPB and for WMV that must be in
129 * VRAM as well. Also put everything into VRAM on AGP cards and older
130 * IGP chips to avoid image corruptions
132 if (p
->ring
== R600_RING_TYPE_UVD_INDEX
&&
133 (i
<= 0 || pci_find_capability(p
->rdev
->ddev
->pdev
,
135 p
->rdev
->family
== CHIP_RS780
||
136 p
->rdev
->family
== CHIP_RS880
)) {
138 /* TODO: is this still needed for NI+ ? */
139 p
->relocs
[i
].preferred_domains
=
140 RADEON_GEM_DOMAIN_VRAM
;
142 p
->relocs
[i
].allowed_domains
=
143 RADEON_GEM_DOMAIN_VRAM
;
145 /* prioritize this over any other relocation */
146 priority
= RADEON_CS_MAX_PRIORITY
;
148 uint32_t domain
= r
->write_domain
?
149 r
->write_domain
: r
->read_domains
;
151 if (domain
& RADEON_GEM_DOMAIN_CPU
) {
152 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
153 "for command submission\n");
157 p
->relocs
[i
].preferred_domains
= domain
;
158 if (domain
== RADEON_GEM_DOMAIN_VRAM
)
159 domain
|= RADEON_GEM_DOMAIN_GTT
;
160 p
->relocs
[i
].allowed_domains
= domain
;
163 if (radeon_ttm_tt_has_userptr(p
->relocs
[i
].robj
->tbo
.ttm
)) {
164 uint32_t domain
= p
->relocs
[i
].preferred_domains
;
165 if (!(domain
& RADEON_GEM_DOMAIN_GTT
)) {
166 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
167 "allowed for userptr BOs\n");
170 need_mmap_lock
= true;
171 domain
= RADEON_GEM_DOMAIN_GTT
;
172 p
->relocs
[i
].preferred_domains
= domain
;
173 p
->relocs
[i
].allowed_domains
= domain
;
176 /* Objects shared as dma-bufs cannot be moved to VRAM */
177 if (p
->relocs
[i
].robj
->prime_shared_count
) {
178 p
->relocs
[i
].allowed_domains
&= ~RADEON_GEM_DOMAIN_VRAM
;
179 if (!p
->relocs
[i
].allowed_domains
) {
180 DRM_ERROR("BO associated with dma-buf cannot "
181 "be moved to VRAM\n");
186 p
->relocs
[i
].tv
.bo
= &p
->relocs
[i
].robj
->tbo
;
187 p
->relocs
[i
].tv
.num_shared
= !r
->write_domain
;
189 radeon_cs_buckets_add(&buckets
, &p
->relocs
[i
].tv
.head
,
193 radeon_cs_buckets_get_list(&buckets
, &p
->validated
);
195 if (p
->cs_flags
& RADEON_CS_USE_VM
)
196 p
->vm_bos
= radeon_vm_get_bos(p
->rdev
, p
->ib
.vm
,
199 down_read(¤t
->mm
->mmap_sem
);
201 r
= radeon_bo_list_validate(p
->rdev
, &p
->ticket
, &p
->validated
, p
->ring
);
204 up_read(¤t
->mm
->mmap_sem
);
209 static int radeon_cs_get_ring(struct radeon_cs_parser
*p
, u32 ring
, s32 priority
)
211 p
->priority
= priority
;
215 DRM_ERROR("unknown ring id: %d\n", ring
);
217 case RADEON_CS_RING_GFX
:
218 p
->ring
= RADEON_RING_TYPE_GFX_INDEX
;
220 case RADEON_CS_RING_COMPUTE
:
221 if (p
->rdev
->family
>= CHIP_TAHITI
) {
223 p
->ring
= CAYMAN_RING_TYPE_CP1_INDEX
;
225 p
->ring
= CAYMAN_RING_TYPE_CP2_INDEX
;
227 p
->ring
= RADEON_RING_TYPE_GFX_INDEX
;
229 case RADEON_CS_RING_DMA
:
230 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
232 p
->ring
= R600_RING_TYPE_DMA_INDEX
;
234 p
->ring
= CAYMAN_RING_TYPE_DMA1_INDEX
;
235 } else if (p
->rdev
->family
>= CHIP_RV770
) {
236 p
->ring
= R600_RING_TYPE_DMA_INDEX
;
241 case RADEON_CS_RING_UVD
:
242 p
->ring
= R600_RING_TYPE_UVD_INDEX
;
244 case RADEON_CS_RING_VCE
:
245 /* TODO: only use the low priority ring for now */
246 p
->ring
= TN_RING_TYPE_VCE1_INDEX
;
252 static int radeon_cs_sync_rings(struct radeon_cs_parser
*p
)
254 struct radeon_bo_list
*reloc
;
257 list_for_each_entry(reloc
, &p
->validated
, tv
.head
) {
258 struct dma_resv
*resv
;
260 resv
= reloc
->robj
->tbo
.base
.resv
;
261 r
= radeon_sync_resv(p
->rdev
, &p
->ib
.sync
, resv
,
262 reloc
->tv
.num_shared
);
269 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
270 int radeon_cs_parser_init(struct radeon_cs_parser
*p
, void *data
)
272 struct drm_radeon_cs
*cs
= data
;
273 uint64_t *chunk_array_ptr
;
275 u32 ring
= RADEON_CS_RING_GFX
;
278 INIT_LIST_HEAD(&p
->validated
);
280 if (!cs
->num_chunks
) {
287 p
->const_ib
.sa_bo
= NULL
;
289 p
->chunk_relocs
= NULL
;
290 p
->chunk_flags
= NULL
;
291 p
->chunk_const_ib
= NULL
;
292 p
->chunks_array
= kcalloc(cs
->num_chunks
, sizeof(uint64_t), GFP_KERNEL
);
293 if (p
->chunks_array
== NULL
) {
296 chunk_array_ptr
= (uint64_t *)(unsigned long)(cs
->chunks
);
297 if (copy_from_user(p
->chunks_array
, chunk_array_ptr
,
298 sizeof(uint64_t)*cs
->num_chunks
)) {
302 p
->nchunks
= cs
->num_chunks
;
303 p
->chunks
= kcalloc(p
->nchunks
, sizeof(struct radeon_cs_chunk
), GFP_KERNEL
);
304 if (p
->chunks
== NULL
) {
307 for (i
= 0; i
< p
->nchunks
; i
++) {
308 struct drm_radeon_cs_chunk __user
**chunk_ptr
= NULL
;
309 struct drm_radeon_cs_chunk user_chunk
;
310 uint32_t __user
*cdata
;
312 chunk_ptr
= (void __user
*)(unsigned long)p
->chunks_array
[i
];
313 if (copy_from_user(&user_chunk
, chunk_ptr
,
314 sizeof(struct drm_radeon_cs_chunk
))) {
317 p
->chunks
[i
].length_dw
= user_chunk
.length_dw
;
318 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_RELOCS
) {
319 p
->chunk_relocs
= &p
->chunks
[i
];
321 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_IB
) {
322 p
->chunk_ib
= &p
->chunks
[i
];
323 /* zero length IB isn't useful */
324 if (p
->chunks
[i
].length_dw
== 0)
327 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_CONST_IB
) {
328 p
->chunk_const_ib
= &p
->chunks
[i
];
329 /* zero length CONST IB isn't useful */
330 if (p
->chunks
[i
].length_dw
== 0)
333 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_FLAGS
) {
334 p
->chunk_flags
= &p
->chunks
[i
];
335 /* zero length flags aren't useful */
336 if (p
->chunks
[i
].length_dw
== 0)
340 size
= p
->chunks
[i
].length_dw
;
341 cdata
= (void __user
*)(unsigned long)user_chunk
.chunk_data
;
342 p
->chunks
[i
].user_ptr
= cdata
;
343 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_CONST_IB
)
346 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_IB
) {
347 if (!p
->rdev
|| !(p
->rdev
->flags
& RADEON_IS_AGP
))
351 p
->chunks
[i
].kdata
= kvmalloc_array(size
, sizeof(uint32_t), GFP_KERNEL
);
352 size
*= sizeof(uint32_t);
353 if (p
->chunks
[i
].kdata
== NULL
) {
356 if (copy_from_user(p
->chunks
[i
].kdata
, cdata
, size
)) {
359 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_FLAGS
) {
360 p
->cs_flags
= p
->chunks
[i
].kdata
[0];
361 if (p
->chunks
[i
].length_dw
> 1)
362 ring
= p
->chunks
[i
].kdata
[1];
363 if (p
->chunks
[i
].length_dw
> 2)
364 priority
= (s32
)p
->chunks
[i
].kdata
[2];
368 /* these are KMS only */
370 if ((p
->cs_flags
& RADEON_CS_USE_VM
) &&
371 !p
->rdev
->vm_manager
.enabled
) {
372 DRM_ERROR("VM not active on asic!\n");
376 if (radeon_cs_get_ring(p
, ring
, priority
))
379 /* we only support VM on some SI+ rings */
380 if ((p
->cs_flags
& RADEON_CS_USE_VM
) == 0) {
381 if (p
->rdev
->asic
->ring
[p
->ring
]->cs_parse
== NULL
) {
382 DRM_ERROR("Ring %d requires VM!\n", p
->ring
);
386 if (p
->rdev
->asic
->ring
[p
->ring
]->ib_parse
== NULL
) {
387 DRM_ERROR("VM not supported on ring %d!\n",
397 static int cmp_size_smaller_first(void *priv
, struct list_head
*a
,
400 struct radeon_bo_list
*la
= list_entry(a
, struct radeon_bo_list
, tv
.head
);
401 struct radeon_bo_list
*lb
= list_entry(b
, struct radeon_bo_list
, tv
.head
);
403 /* Sort A before B if A is smaller. */
404 return (int)la
->robj
->tbo
.num_pages
- (int)lb
->robj
->tbo
.num_pages
;
408 * cs_parser_fini() - clean parser states
409 * @parser: parser structure holding parsing context.
410 * @error: error number
412 * If error is set than unvalidate buffer, otherwise just free memory
413 * used by parsing context.
415 static void radeon_cs_parser_fini(struct radeon_cs_parser
*parser
, int error
, bool backoff
)
420 /* Sort the buffer list from the smallest to largest buffer,
421 * which affects the order of buffers in the LRU list.
422 * This assures that the smallest buffers are added first
423 * to the LRU list, so they are likely to be later evicted
424 * first, instead of large buffers whose eviction is more
427 * This slightly lowers the number of bytes moved by TTM
428 * per frame under memory pressure.
430 list_sort(NULL
, &parser
->validated
, cmp_size_smaller_first
);
432 ttm_eu_fence_buffer_objects(&parser
->ticket
,
434 &parser
->ib
.fence
->base
);
435 } else if (backoff
) {
436 ttm_eu_backoff_reservation(&parser
->ticket
,
440 if (parser
->relocs
!= NULL
) {
441 for (i
= 0; i
< parser
->nrelocs
; i
++) {
442 struct radeon_bo
*bo
= parser
->relocs
[i
].robj
;
446 drm_gem_object_put_unlocked(&bo
->tbo
.base
);
449 kfree(parser
->track
);
450 kvfree(parser
->relocs
);
451 kvfree(parser
->vm_bos
);
452 for (i
= 0; i
< parser
->nchunks
; i
++)
453 kvfree(parser
->chunks
[i
].kdata
);
454 kfree(parser
->chunks
);
455 kfree(parser
->chunks_array
);
456 radeon_ib_free(parser
->rdev
, &parser
->ib
);
457 radeon_ib_free(parser
->rdev
, &parser
->const_ib
);
460 static int radeon_cs_ib_chunk(struct radeon_device
*rdev
,
461 struct radeon_cs_parser
*parser
)
465 if (parser
->chunk_ib
== NULL
)
468 if (parser
->cs_flags
& RADEON_CS_USE_VM
)
471 r
= radeon_cs_parse(rdev
, parser
->ring
, parser
);
472 if (r
|| parser
->parser_error
) {
473 DRM_ERROR("Invalid command stream !\n");
477 r
= radeon_cs_sync_rings(parser
);
479 if (r
!= -ERESTARTSYS
)
480 DRM_ERROR("Failed to sync rings: %i\n", r
);
484 if (parser
->ring
== R600_RING_TYPE_UVD_INDEX
)
485 radeon_uvd_note_usage(rdev
);
486 else if ((parser
->ring
== TN_RING_TYPE_VCE1_INDEX
) ||
487 (parser
->ring
== TN_RING_TYPE_VCE2_INDEX
))
488 radeon_vce_note_usage(rdev
);
490 r
= radeon_ib_schedule(rdev
, &parser
->ib
, NULL
, true);
492 DRM_ERROR("Failed to schedule IB !\n");
497 static int radeon_bo_vm_update_pte(struct radeon_cs_parser
*p
,
498 struct radeon_vm
*vm
)
500 struct radeon_device
*rdev
= p
->rdev
;
501 struct radeon_bo_va
*bo_va
;
504 r
= radeon_vm_update_page_directory(rdev
, vm
);
508 r
= radeon_vm_clear_freed(rdev
, vm
);
512 if (vm
->ib_bo_va
== NULL
) {
513 DRM_ERROR("Tmp BO not in VM!\n");
517 r
= radeon_vm_bo_update(rdev
, vm
->ib_bo_va
,
518 &rdev
->ring_tmp_bo
.bo
->tbo
.mem
);
522 for (i
= 0; i
< p
->nrelocs
; i
++) {
523 struct radeon_bo
*bo
;
525 bo
= p
->relocs
[i
].robj
;
526 bo_va
= radeon_vm_bo_find(vm
, bo
);
528 dev_err(rdev
->dev
, "bo %p not in vm %p\n", bo
, vm
);
532 r
= radeon_vm_bo_update(rdev
, bo_va
, &bo
->tbo
.mem
);
536 radeon_sync_fence(&p
->ib
.sync
, bo_va
->last_pt_update
);
539 return radeon_vm_clear_invalids(rdev
, vm
);
542 static int radeon_cs_ib_vm_chunk(struct radeon_device
*rdev
,
543 struct radeon_cs_parser
*parser
)
545 struct radeon_fpriv
*fpriv
= parser
->filp
->driver_priv
;
546 struct radeon_vm
*vm
= &fpriv
->vm
;
549 if (parser
->chunk_ib
== NULL
)
551 if ((parser
->cs_flags
& RADEON_CS_USE_VM
) == 0)
554 if (parser
->const_ib
.length_dw
) {
555 r
= radeon_ring_ib_parse(rdev
, parser
->ring
, &parser
->const_ib
);
561 r
= radeon_ring_ib_parse(rdev
, parser
->ring
, &parser
->ib
);
566 if (parser
->ring
== R600_RING_TYPE_UVD_INDEX
)
567 radeon_uvd_note_usage(rdev
);
569 mutex_lock(&vm
->mutex
);
570 r
= radeon_bo_vm_update_pte(parser
, vm
);
575 r
= radeon_cs_sync_rings(parser
);
577 if (r
!= -ERESTARTSYS
)
578 DRM_ERROR("Failed to sync rings: %i\n", r
);
582 if ((rdev
->family
>= CHIP_TAHITI
) &&
583 (parser
->chunk_const_ib
!= NULL
)) {
584 r
= radeon_ib_schedule(rdev
, &parser
->ib
, &parser
->const_ib
, true);
586 r
= radeon_ib_schedule(rdev
, &parser
->ib
, NULL
, true);
590 mutex_unlock(&vm
->mutex
);
594 static int radeon_cs_handle_lockup(struct radeon_device
*rdev
, int r
)
597 r
= radeon_gpu_reset(rdev
);
604 static int radeon_cs_ib_fill(struct radeon_device
*rdev
, struct radeon_cs_parser
*parser
)
606 struct radeon_cs_chunk
*ib_chunk
;
607 struct radeon_vm
*vm
= NULL
;
610 if (parser
->chunk_ib
== NULL
)
613 if (parser
->cs_flags
& RADEON_CS_USE_VM
) {
614 struct radeon_fpriv
*fpriv
= parser
->filp
->driver_priv
;
617 if ((rdev
->family
>= CHIP_TAHITI
) &&
618 (parser
->chunk_const_ib
!= NULL
)) {
619 ib_chunk
= parser
->chunk_const_ib
;
620 if (ib_chunk
->length_dw
> RADEON_IB_VM_MAX_SIZE
) {
621 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk
->length_dw
);
624 r
= radeon_ib_get(rdev
, parser
->ring
, &parser
->const_ib
,
625 vm
, ib_chunk
->length_dw
* 4);
627 DRM_ERROR("Failed to get const ib !\n");
630 parser
->const_ib
.is_const_ib
= true;
631 parser
->const_ib
.length_dw
= ib_chunk
->length_dw
;
632 if (copy_from_user(parser
->const_ib
.ptr
,
634 ib_chunk
->length_dw
* 4))
638 ib_chunk
= parser
->chunk_ib
;
639 if (ib_chunk
->length_dw
> RADEON_IB_VM_MAX_SIZE
) {
640 DRM_ERROR("cs IB too big: %d\n", ib_chunk
->length_dw
);
644 ib_chunk
= parser
->chunk_ib
;
646 r
= radeon_ib_get(rdev
, parser
->ring
, &parser
->ib
,
647 vm
, ib_chunk
->length_dw
* 4);
649 DRM_ERROR("Failed to get ib !\n");
652 parser
->ib
.length_dw
= ib_chunk
->length_dw
;
654 memcpy(parser
->ib
.ptr
, ib_chunk
->kdata
, ib_chunk
->length_dw
* 4);
655 else if (copy_from_user(parser
->ib
.ptr
, ib_chunk
->user_ptr
, ib_chunk
->length_dw
* 4))
660 int radeon_cs_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*filp
)
662 struct radeon_device
*rdev
= dev
->dev_private
;
663 struct radeon_cs_parser parser
;
666 down_read(&rdev
->exclusive_lock
);
667 if (!rdev
->accel_working
) {
668 up_read(&rdev
->exclusive_lock
);
671 if (rdev
->in_reset
) {
672 up_read(&rdev
->exclusive_lock
);
673 r
= radeon_gpu_reset(rdev
);
678 /* initialize parser */
679 memset(&parser
, 0, sizeof(struct radeon_cs_parser
));
682 parser
.dev
= rdev
->dev
;
683 parser
.family
= rdev
->family
;
684 r
= radeon_cs_parser_init(&parser
, data
);
686 DRM_ERROR("Failed to initialize parser !\n");
687 radeon_cs_parser_fini(&parser
, r
, false);
688 up_read(&rdev
->exclusive_lock
);
689 r
= radeon_cs_handle_lockup(rdev
, r
);
693 r
= radeon_cs_ib_fill(rdev
, &parser
);
695 r
= radeon_cs_parser_relocs(&parser
);
696 if (r
&& r
!= -ERESTARTSYS
)
697 DRM_ERROR("Failed to parse relocation %d!\n", r
);
701 radeon_cs_parser_fini(&parser
, r
, false);
702 up_read(&rdev
->exclusive_lock
);
703 r
= radeon_cs_handle_lockup(rdev
, r
);
707 trace_radeon_cs(&parser
);
709 r
= radeon_cs_ib_chunk(rdev
, &parser
);
713 r
= radeon_cs_ib_vm_chunk(rdev
, &parser
);
718 radeon_cs_parser_fini(&parser
, r
, true);
719 up_read(&rdev
->exclusive_lock
);
720 r
= radeon_cs_handle_lockup(rdev
, r
);
725 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
726 * @parser: parser structure holding parsing context.
727 * @pkt: where to store packet information
729 * Assume that chunk_ib_index is properly set. Will return -EINVAL
730 * if packet is bigger than remaining ib size. or if packets is unknown.
732 int radeon_cs_packet_parse(struct radeon_cs_parser
*p
,
733 struct radeon_cs_packet
*pkt
,
736 struct radeon_cs_chunk
*ib_chunk
= p
->chunk_ib
;
737 struct radeon_device
*rdev
= p
->rdev
;
741 if (idx
>= ib_chunk
->length_dw
) {
742 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
743 idx
, ib_chunk
->length_dw
);
746 header
= radeon_get_ib_value(p
, idx
);
748 pkt
->type
= RADEON_CP_PACKET_GET_TYPE(header
);
749 pkt
->count
= RADEON_CP_PACKET_GET_COUNT(header
);
752 case RADEON_PACKET_TYPE0
:
753 if (rdev
->family
< CHIP_R600
) {
754 pkt
->reg
= R100_CP_PACKET0_GET_REG(header
);
756 RADEON_CP_PACKET0_GET_ONE_REG_WR(header
);
758 pkt
->reg
= R600_CP_PACKET0_GET_REG(header
);
760 case RADEON_PACKET_TYPE3
:
761 pkt
->opcode
= RADEON_CP_PACKET3_GET_OPCODE(header
);
763 case RADEON_PACKET_TYPE2
:
767 DRM_ERROR("Unknown packet type %d at %d !\n", pkt
->type
, idx
);
771 if ((pkt
->count
+ 1 + pkt
->idx
) >= ib_chunk
->length_dw
) {
772 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
773 pkt
->idx
, pkt
->type
, pkt
->count
, ib_chunk
->length_dw
);
780 for (i
= 0; i
< ib_chunk
->length_dw
; i
++) {
782 printk("\t0x%08x <---\n", radeon_get_ib_value(p
, i
));
784 printk("\t0x%08x\n", radeon_get_ib_value(p
, i
));
790 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
791 * @p: structure holding the parser context.
793 * Check if the next packet is NOP relocation packet3.
795 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser
*p
)
797 struct radeon_cs_packet p3reloc
;
800 r
= radeon_cs_packet_parse(p
, &p3reloc
, p
->idx
);
803 if (p3reloc
.type
!= RADEON_PACKET_TYPE3
)
805 if (p3reloc
.opcode
!= RADEON_PACKET3_NOP
)
811 * radeon_cs_dump_packet() - dump raw packet context
812 * @p: structure holding the parser context.
813 * @pkt: structure holding the packet.
815 * Used mostly for debugging and error reporting.
817 void radeon_cs_dump_packet(struct radeon_cs_parser
*p
,
818 struct radeon_cs_packet
*pkt
)
820 volatile uint32_t *ib
;
826 for (i
= 0; i
<= (pkt
->count
+ 1); i
++, idx
++)
827 DRM_INFO("ib[%d]=0x%08X\n", idx
, ib
[idx
]);
831 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
832 * @parser: parser structure holding parsing context.
833 * @data: pointer to relocation data
834 * @offset_start: starting offset
835 * @offset_mask: offset mask (to align start offset on)
836 * @reloc: reloc informations
838 * Check if next packet is relocation packet3, do bo validation and compute
839 * GPU offset using the provided start.
841 int radeon_cs_packet_next_reloc(struct radeon_cs_parser
*p
,
842 struct radeon_bo_list
**cs_reloc
,
845 struct radeon_cs_chunk
*relocs_chunk
;
846 struct radeon_cs_packet p3reloc
;
850 if (p
->chunk_relocs
== NULL
) {
851 DRM_ERROR("No relocation chunk !\n");
855 relocs_chunk
= p
->chunk_relocs
;
856 r
= radeon_cs_packet_parse(p
, &p3reloc
, p
->idx
);
859 p
->idx
+= p3reloc
.count
+ 2;
860 if (p3reloc
.type
!= RADEON_PACKET_TYPE3
||
861 p3reloc
.opcode
!= RADEON_PACKET3_NOP
) {
862 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
864 radeon_cs_dump_packet(p
, &p3reloc
);
867 idx
= radeon_get_ib_value(p
, p3reloc
.idx
+ 1);
868 if (idx
>= relocs_chunk
->length_dw
) {
869 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
870 idx
, relocs_chunk
->length_dw
);
871 radeon_cs_dump_packet(p
, &p3reloc
);
874 /* FIXME: we assume reloc size is 4 dwords */
876 *cs_reloc
= p
->relocs
;
877 (*cs_reloc
)->gpu_offset
=
878 (u64
)relocs_chunk
->kdata
[idx
+ 3] << 32;
879 (*cs_reloc
)->gpu_offset
|= relocs_chunk
->kdata
[idx
+ 0];
881 *cs_reloc
= &p
->relocs
[(idx
/ 4)];