2 * Copyright 2008 Jerome Glisse.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Jerome Glisse <glisse@freedesktop.org>
27 #include <linux/list_sort.h>
29 #include <drm/radeon_drm.h>
30 #include "radeon_reg.h"
32 #include "radeon_trace.h"
34 #define RADEON_CS_MAX_PRIORITY 32u
35 #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
37 /* This is based on the bucket sort with O(n) time complexity.
38 * An item with priority "i" is added to bucket[i]. The lists are then
39 * concatenated in descending order.
41 struct radeon_cs_buckets
{
42 struct list_head bucket
[RADEON_CS_NUM_BUCKETS
];
45 static void radeon_cs_buckets_init(struct radeon_cs_buckets
*b
)
49 for (i
= 0; i
< RADEON_CS_NUM_BUCKETS
; i
++)
50 INIT_LIST_HEAD(&b
->bucket
[i
]);
53 static void radeon_cs_buckets_add(struct radeon_cs_buckets
*b
,
54 struct list_head
*item
, unsigned priority
)
56 /* Since buffers which appear sooner in the relocation list are
57 * likely to be used more often than buffers which appear later
58 * in the list, the sort mustn't change the ordering of buffers
59 * with the same priority, i.e. it must be stable.
61 list_add_tail(item
, &b
->bucket
[min(priority
, RADEON_CS_MAX_PRIORITY
)]);
64 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets
*b
,
65 struct list_head
*out_list
)
69 /* Connect the sorted buckets in the output list. */
70 for (i
= 0; i
< RADEON_CS_NUM_BUCKETS
; i
++) {
71 list_splice(&b
->bucket
[i
], out_list
);
75 static int radeon_cs_parser_relocs(struct radeon_cs_parser
*p
)
77 struct radeon_cs_chunk
*chunk
;
78 struct radeon_cs_buckets buckets
;
80 bool need_mmap_lock
= false;
83 if (p
->chunk_relocs
== NULL
) {
86 chunk
= p
->chunk_relocs
;
88 /* FIXME: we assume that each relocs use 4 dwords */
89 p
->nrelocs
= chunk
->length_dw
/ 4;
90 p
->relocs
= drm_calloc_large(p
->nrelocs
, sizeof(struct radeon_bo_list
));
91 if (p
->relocs
== NULL
) {
95 radeon_cs_buckets_init(&buckets
);
97 for (i
= 0; i
< p
->nrelocs
; i
++) {
98 struct drm_radeon_cs_reloc
*r
;
99 struct drm_gem_object
*gobj
;
102 r
= (struct drm_radeon_cs_reloc
*)&chunk
->kdata
[i
*4];
103 gobj
= drm_gem_object_lookup(p
->filp
, r
->handle
);
105 DRM_ERROR("gem object lookup failed 0x%x\n",
109 p
->relocs
[i
].robj
= gem_to_radeon_bo(gobj
);
111 /* The userspace buffer priorities are from 0 to 15. A higher
112 * number means the buffer is more important.
113 * Also, the buffers used for write have a higher priority than
114 * the buffers used for read only, which doubles the range
115 * to 0 to 31. 32 is reserved for the kernel driver.
117 priority
= (r
->flags
& RADEON_RELOC_PRIO_MASK
) * 2
120 /* The first reloc of an UVD job is the msg and that must be in
121 * VRAM, the second reloc is the DPB and for WMV that must be in
122 * VRAM as well. Also put everything into VRAM on AGP cards and older
123 * IGP chips to avoid image corruptions
125 if (p
->ring
== R600_RING_TYPE_UVD_INDEX
&&
126 (i
<= 0 || pci_find_capability(p
->rdev
->ddev
->pdev
,
128 p
->rdev
->family
== CHIP_RS780
||
129 p
->rdev
->family
== CHIP_RS880
)) {
131 /* TODO: is this still needed for NI+ ? */
132 p
->relocs
[i
].prefered_domains
=
133 RADEON_GEM_DOMAIN_VRAM
;
135 p
->relocs
[i
].allowed_domains
=
136 RADEON_GEM_DOMAIN_VRAM
;
138 /* prioritize this over any other relocation */
139 priority
= RADEON_CS_MAX_PRIORITY
;
141 uint32_t domain
= r
->write_domain
?
142 r
->write_domain
: r
->read_domains
;
144 if (domain
& RADEON_GEM_DOMAIN_CPU
) {
145 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
146 "for command submission\n");
150 p
->relocs
[i
].prefered_domains
= domain
;
151 if (domain
== RADEON_GEM_DOMAIN_VRAM
)
152 domain
|= RADEON_GEM_DOMAIN_GTT
;
153 p
->relocs
[i
].allowed_domains
= domain
;
156 if (radeon_ttm_tt_has_userptr(p
->relocs
[i
].robj
->tbo
.ttm
)) {
157 uint32_t domain
= p
->relocs
[i
].prefered_domains
;
158 if (!(domain
& RADEON_GEM_DOMAIN_GTT
)) {
159 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
160 "allowed for userptr BOs\n");
163 need_mmap_lock
= true;
164 domain
= RADEON_GEM_DOMAIN_GTT
;
165 p
->relocs
[i
].prefered_domains
= domain
;
166 p
->relocs
[i
].allowed_domains
= domain
;
169 /* Objects shared as dma-bufs cannot be moved to VRAM */
170 if (p
->relocs
[i
].robj
->prime_shared_count
) {
171 p
->relocs
[i
].allowed_domains
&= ~RADEON_GEM_DOMAIN_VRAM
;
172 if (!p
->relocs
[i
].allowed_domains
) {
173 DRM_ERROR("BO associated with dma-buf cannot "
174 "be moved to VRAM\n");
179 p
->relocs
[i
].tv
.bo
= &p
->relocs
[i
].robj
->tbo
;
180 p
->relocs
[i
].tv
.shared
= !r
->write_domain
;
182 radeon_cs_buckets_add(&buckets
, &p
->relocs
[i
].tv
.head
,
186 radeon_cs_buckets_get_list(&buckets
, &p
->validated
);
188 if (p
->cs_flags
& RADEON_CS_USE_VM
)
189 p
->vm_bos
= radeon_vm_get_bos(p
->rdev
, p
->ib
.vm
,
192 down_read(¤t
->mm
->mmap_sem
);
194 r
= radeon_bo_list_validate(p
->rdev
, &p
->ticket
, &p
->validated
, p
->ring
);
197 up_read(¤t
->mm
->mmap_sem
);
202 static int radeon_cs_get_ring(struct radeon_cs_parser
*p
, u32 ring
, s32 priority
)
204 p
->priority
= priority
;
208 DRM_ERROR("unknown ring id: %d\n", ring
);
210 case RADEON_CS_RING_GFX
:
211 p
->ring
= RADEON_RING_TYPE_GFX_INDEX
;
213 case RADEON_CS_RING_COMPUTE
:
214 if (p
->rdev
->family
>= CHIP_TAHITI
) {
216 p
->ring
= CAYMAN_RING_TYPE_CP1_INDEX
;
218 p
->ring
= CAYMAN_RING_TYPE_CP2_INDEX
;
220 p
->ring
= RADEON_RING_TYPE_GFX_INDEX
;
222 case RADEON_CS_RING_DMA
:
223 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
225 p
->ring
= R600_RING_TYPE_DMA_INDEX
;
227 p
->ring
= CAYMAN_RING_TYPE_DMA1_INDEX
;
228 } else if (p
->rdev
->family
>= CHIP_RV770
) {
229 p
->ring
= R600_RING_TYPE_DMA_INDEX
;
234 case RADEON_CS_RING_UVD
:
235 p
->ring
= R600_RING_TYPE_UVD_INDEX
;
237 case RADEON_CS_RING_VCE
:
238 /* TODO: only use the low priority ring for now */
239 p
->ring
= TN_RING_TYPE_VCE1_INDEX
;
245 static int radeon_cs_sync_rings(struct radeon_cs_parser
*p
)
247 struct radeon_bo_list
*reloc
;
250 list_for_each_entry(reloc
, &p
->validated
, tv
.head
) {
251 struct reservation_object
*resv
;
253 resv
= reloc
->robj
->tbo
.resv
;
254 r
= radeon_sync_resv(p
->rdev
, &p
->ib
.sync
, resv
,
262 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
263 int radeon_cs_parser_init(struct radeon_cs_parser
*p
, void *data
)
265 struct drm_radeon_cs
*cs
= data
;
266 uint64_t *chunk_array_ptr
;
268 u32 ring
= RADEON_CS_RING_GFX
;
271 INIT_LIST_HEAD(&p
->validated
);
273 if (!cs
->num_chunks
) {
280 p
->const_ib
.sa_bo
= NULL
;
282 p
->chunk_relocs
= NULL
;
283 p
->chunk_flags
= NULL
;
284 p
->chunk_const_ib
= NULL
;
285 p
->chunks_array
= kcalloc(cs
->num_chunks
, sizeof(uint64_t), GFP_KERNEL
);
286 if (p
->chunks_array
== NULL
) {
289 chunk_array_ptr
= (uint64_t *)(unsigned long)(cs
->chunks
);
290 if (copy_from_user(p
->chunks_array
, chunk_array_ptr
,
291 sizeof(uint64_t)*cs
->num_chunks
)) {
295 p
->nchunks
= cs
->num_chunks
;
296 p
->chunks
= kcalloc(p
->nchunks
, sizeof(struct radeon_cs_chunk
), GFP_KERNEL
);
297 if (p
->chunks
== NULL
) {
300 for (i
= 0; i
< p
->nchunks
; i
++) {
301 struct drm_radeon_cs_chunk __user
**chunk_ptr
= NULL
;
302 struct drm_radeon_cs_chunk user_chunk
;
303 uint32_t __user
*cdata
;
305 chunk_ptr
= (void __user
*)(unsigned long)p
->chunks_array
[i
];
306 if (copy_from_user(&user_chunk
, chunk_ptr
,
307 sizeof(struct drm_radeon_cs_chunk
))) {
310 p
->chunks
[i
].length_dw
= user_chunk
.length_dw
;
311 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_RELOCS
) {
312 p
->chunk_relocs
= &p
->chunks
[i
];
314 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_IB
) {
315 p
->chunk_ib
= &p
->chunks
[i
];
316 /* zero length IB isn't useful */
317 if (p
->chunks
[i
].length_dw
== 0)
320 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_CONST_IB
) {
321 p
->chunk_const_ib
= &p
->chunks
[i
];
322 /* zero length CONST IB isn't useful */
323 if (p
->chunks
[i
].length_dw
== 0)
326 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_FLAGS
) {
327 p
->chunk_flags
= &p
->chunks
[i
];
328 /* zero length flags aren't useful */
329 if (p
->chunks
[i
].length_dw
== 0)
333 size
= p
->chunks
[i
].length_dw
;
334 cdata
= (void __user
*)(unsigned long)user_chunk
.chunk_data
;
335 p
->chunks
[i
].user_ptr
= cdata
;
336 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_CONST_IB
)
339 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_IB
) {
340 if (!p
->rdev
|| !(p
->rdev
->flags
& RADEON_IS_AGP
))
344 p
->chunks
[i
].kdata
= drm_malloc_ab(size
, sizeof(uint32_t));
345 size
*= sizeof(uint32_t);
346 if (p
->chunks
[i
].kdata
== NULL
) {
349 if (copy_from_user(p
->chunks
[i
].kdata
, cdata
, size
)) {
352 if (user_chunk
.chunk_id
== RADEON_CHUNK_ID_FLAGS
) {
353 p
->cs_flags
= p
->chunks
[i
].kdata
[0];
354 if (p
->chunks
[i
].length_dw
> 1)
355 ring
= p
->chunks
[i
].kdata
[1];
356 if (p
->chunks
[i
].length_dw
> 2)
357 priority
= (s32
)p
->chunks
[i
].kdata
[2];
361 /* these are KMS only */
363 if ((p
->cs_flags
& RADEON_CS_USE_VM
) &&
364 !p
->rdev
->vm_manager
.enabled
) {
365 DRM_ERROR("VM not active on asic!\n");
369 if (radeon_cs_get_ring(p
, ring
, priority
))
372 /* we only support VM on some SI+ rings */
373 if ((p
->cs_flags
& RADEON_CS_USE_VM
) == 0) {
374 if (p
->rdev
->asic
->ring
[p
->ring
]->cs_parse
== NULL
) {
375 DRM_ERROR("Ring %d requires VM!\n", p
->ring
);
379 if (p
->rdev
->asic
->ring
[p
->ring
]->ib_parse
== NULL
) {
380 DRM_ERROR("VM not supported on ring %d!\n",
390 static int cmp_size_smaller_first(void *priv
, struct list_head
*a
,
393 struct radeon_bo_list
*la
= list_entry(a
, struct radeon_bo_list
, tv
.head
);
394 struct radeon_bo_list
*lb
= list_entry(b
, struct radeon_bo_list
, tv
.head
);
396 /* Sort A before B if A is smaller. */
397 return (int)la
->robj
->tbo
.num_pages
- (int)lb
->robj
->tbo
.num_pages
;
401 * cs_parser_fini() - clean parser states
402 * @parser: parser structure holding parsing context.
403 * @error: error number
405 * If error is set than unvalidate buffer, otherwise just free memory
406 * used by parsing context.
408 static void radeon_cs_parser_fini(struct radeon_cs_parser
*parser
, int error
, bool backoff
)
413 /* Sort the buffer list from the smallest to largest buffer,
414 * which affects the order of buffers in the LRU list.
415 * This assures that the smallest buffers are added first
416 * to the LRU list, so they are likely to be later evicted
417 * first, instead of large buffers whose eviction is more
420 * This slightly lowers the number of bytes moved by TTM
421 * per frame under memory pressure.
423 list_sort(NULL
, &parser
->validated
, cmp_size_smaller_first
);
425 ttm_eu_fence_buffer_objects(&parser
->ticket
,
427 &parser
->ib
.fence
->base
);
428 } else if (backoff
) {
429 ttm_eu_backoff_reservation(&parser
->ticket
,
433 if (parser
->relocs
!= NULL
) {
434 for (i
= 0; i
< parser
->nrelocs
; i
++) {
435 struct radeon_bo
*bo
= parser
->relocs
[i
].robj
;
439 drm_gem_object_unreference_unlocked(&bo
->gem_base
);
442 kfree(parser
->track
);
443 drm_free_large(parser
->relocs
);
444 drm_free_large(parser
->vm_bos
);
445 for (i
= 0; i
< parser
->nchunks
; i
++)
446 drm_free_large(parser
->chunks
[i
].kdata
);
447 kfree(parser
->chunks
);
448 kfree(parser
->chunks_array
);
449 radeon_ib_free(parser
->rdev
, &parser
->ib
);
450 radeon_ib_free(parser
->rdev
, &parser
->const_ib
);
453 static int radeon_cs_ib_chunk(struct radeon_device
*rdev
,
454 struct radeon_cs_parser
*parser
)
458 if (parser
->chunk_ib
== NULL
)
461 if (parser
->cs_flags
& RADEON_CS_USE_VM
)
464 r
= radeon_cs_parse(rdev
, parser
->ring
, parser
);
465 if (r
|| parser
->parser_error
) {
466 DRM_ERROR("Invalid command stream !\n");
470 r
= radeon_cs_sync_rings(parser
);
472 if (r
!= -ERESTARTSYS
)
473 DRM_ERROR("Failed to sync rings: %i\n", r
);
477 if (parser
->ring
== R600_RING_TYPE_UVD_INDEX
)
478 radeon_uvd_note_usage(rdev
);
479 else if ((parser
->ring
== TN_RING_TYPE_VCE1_INDEX
) ||
480 (parser
->ring
== TN_RING_TYPE_VCE2_INDEX
))
481 radeon_vce_note_usage(rdev
);
483 r
= radeon_ib_schedule(rdev
, &parser
->ib
, NULL
, true);
485 DRM_ERROR("Failed to schedule IB !\n");
490 static int radeon_bo_vm_update_pte(struct radeon_cs_parser
*p
,
491 struct radeon_vm
*vm
)
493 struct radeon_device
*rdev
= p
->rdev
;
494 struct radeon_bo_va
*bo_va
;
497 r
= radeon_vm_update_page_directory(rdev
, vm
);
501 r
= radeon_vm_clear_freed(rdev
, vm
);
505 if (vm
->ib_bo_va
== NULL
) {
506 DRM_ERROR("Tmp BO not in VM!\n");
510 r
= radeon_vm_bo_update(rdev
, vm
->ib_bo_va
,
511 &rdev
->ring_tmp_bo
.bo
->tbo
.mem
);
515 for (i
= 0; i
< p
->nrelocs
; i
++) {
516 struct radeon_bo
*bo
;
518 bo
= p
->relocs
[i
].robj
;
519 bo_va
= radeon_vm_bo_find(vm
, bo
);
521 dev_err(rdev
->dev
, "bo %p not in vm %p\n", bo
, vm
);
525 r
= radeon_vm_bo_update(rdev
, bo_va
, &bo
->tbo
.mem
);
529 radeon_sync_fence(&p
->ib
.sync
, bo_va
->last_pt_update
);
532 return radeon_vm_clear_invalids(rdev
, vm
);
535 static int radeon_cs_ib_vm_chunk(struct radeon_device
*rdev
,
536 struct radeon_cs_parser
*parser
)
538 struct radeon_fpriv
*fpriv
= parser
->filp
->driver_priv
;
539 struct radeon_vm
*vm
= &fpriv
->vm
;
542 if (parser
->chunk_ib
== NULL
)
544 if ((parser
->cs_flags
& RADEON_CS_USE_VM
) == 0)
547 if (parser
->const_ib
.length_dw
) {
548 r
= radeon_ring_ib_parse(rdev
, parser
->ring
, &parser
->const_ib
);
554 r
= radeon_ring_ib_parse(rdev
, parser
->ring
, &parser
->ib
);
559 if (parser
->ring
== R600_RING_TYPE_UVD_INDEX
)
560 radeon_uvd_note_usage(rdev
);
562 mutex_lock(&vm
->mutex
);
563 r
= radeon_bo_vm_update_pte(parser
, vm
);
568 r
= radeon_cs_sync_rings(parser
);
570 if (r
!= -ERESTARTSYS
)
571 DRM_ERROR("Failed to sync rings: %i\n", r
);
575 if ((rdev
->family
>= CHIP_TAHITI
) &&
576 (parser
->chunk_const_ib
!= NULL
)) {
577 r
= radeon_ib_schedule(rdev
, &parser
->ib
, &parser
->const_ib
, true);
579 r
= radeon_ib_schedule(rdev
, &parser
->ib
, NULL
, true);
583 mutex_unlock(&vm
->mutex
);
587 static int radeon_cs_handle_lockup(struct radeon_device
*rdev
, int r
)
590 r
= radeon_gpu_reset(rdev
);
597 static int radeon_cs_ib_fill(struct radeon_device
*rdev
, struct radeon_cs_parser
*parser
)
599 struct radeon_cs_chunk
*ib_chunk
;
600 struct radeon_vm
*vm
= NULL
;
603 if (parser
->chunk_ib
== NULL
)
606 if (parser
->cs_flags
& RADEON_CS_USE_VM
) {
607 struct radeon_fpriv
*fpriv
= parser
->filp
->driver_priv
;
610 if ((rdev
->family
>= CHIP_TAHITI
) &&
611 (parser
->chunk_const_ib
!= NULL
)) {
612 ib_chunk
= parser
->chunk_const_ib
;
613 if (ib_chunk
->length_dw
> RADEON_IB_VM_MAX_SIZE
) {
614 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk
->length_dw
);
617 r
= radeon_ib_get(rdev
, parser
->ring
, &parser
->const_ib
,
618 vm
, ib_chunk
->length_dw
* 4);
620 DRM_ERROR("Failed to get const ib !\n");
623 parser
->const_ib
.is_const_ib
= true;
624 parser
->const_ib
.length_dw
= ib_chunk
->length_dw
;
625 if (copy_from_user(parser
->const_ib
.ptr
,
627 ib_chunk
->length_dw
* 4))
631 ib_chunk
= parser
->chunk_ib
;
632 if (ib_chunk
->length_dw
> RADEON_IB_VM_MAX_SIZE
) {
633 DRM_ERROR("cs IB too big: %d\n", ib_chunk
->length_dw
);
637 ib_chunk
= parser
->chunk_ib
;
639 r
= radeon_ib_get(rdev
, parser
->ring
, &parser
->ib
,
640 vm
, ib_chunk
->length_dw
* 4);
642 DRM_ERROR("Failed to get ib !\n");
645 parser
->ib
.length_dw
= ib_chunk
->length_dw
;
647 memcpy(parser
->ib
.ptr
, ib_chunk
->kdata
, ib_chunk
->length_dw
* 4);
648 else if (copy_from_user(parser
->ib
.ptr
, ib_chunk
->user_ptr
, ib_chunk
->length_dw
* 4))
653 int radeon_cs_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*filp
)
655 struct radeon_device
*rdev
= dev
->dev_private
;
656 struct radeon_cs_parser parser
;
659 down_read(&rdev
->exclusive_lock
);
660 if (!rdev
->accel_working
) {
661 up_read(&rdev
->exclusive_lock
);
664 if (rdev
->in_reset
) {
665 up_read(&rdev
->exclusive_lock
);
666 r
= radeon_gpu_reset(rdev
);
671 /* initialize parser */
672 memset(&parser
, 0, sizeof(struct radeon_cs_parser
));
675 parser
.dev
= rdev
->dev
;
676 parser
.family
= rdev
->family
;
677 r
= radeon_cs_parser_init(&parser
, data
);
679 DRM_ERROR("Failed to initialize parser !\n");
680 radeon_cs_parser_fini(&parser
, r
, false);
681 up_read(&rdev
->exclusive_lock
);
682 r
= radeon_cs_handle_lockup(rdev
, r
);
686 r
= radeon_cs_ib_fill(rdev
, &parser
);
688 r
= radeon_cs_parser_relocs(&parser
);
689 if (r
&& r
!= -ERESTARTSYS
)
690 DRM_ERROR("Failed to parse relocation %d!\n", r
);
694 radeon_cs_parser_fini(&parser
, r
, false);
695 up_read(&rdev
->exclusive_lock
);
696 r
= radeon_cs_handle_lockup(rdev
, r
);
700 trace_radeon_cs(&parser
);
702 r
= radeon_cs_ib_chunk(rdev
, &parser
);
706 r
= radeon_cs_ib_vm_chunk(rdev
, &parser
);
711 radeon_cs_parser_fini(&parser
, r
, true);
712 up_read(&rdev
->exclusive_lock
);
713 r
= radeon_cs_handle_lockup(rdev
, r
);
718 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
719 * @parser: parser structure holding parsing context.
720 * @pkt: where to store packet information
722 * Assume that chunk_ib_index is properly set. Will return -EINVAL
723 * if packet is bigger than remaining ib size. or if packets is unknown.
725 int radeon_cs_packet_parse(struct radeon_cs_parser
*p
,
726 struct radeon_cs_packet
*pkt
,
729 struct radeon_cs_chunk
*ib_chunk
= p
->chunk_ib
;
730 struct radeon_device
*rdev
= p
->rdev
;
734 if (idx
>= ib_chunk
->length_dw
) {
735 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
736 idx
, ib_chunk
->length_dw
);
739 header
= radeon_get_ib_value(p
, idx
);
741 pkt
->type
= RADEON_CP_PACKET_GET_TYPE(header
);
742 pkt
->count
= RADEON_CP_PACKET_GET_COUNT(header
);
745 case RADEON_PACKET_TYPE0
:
746 if (rdev
->family
< CHIP_R600
) {
747 pkt
->reg
= R100_CP_PACKET0_GET_REG(header
);
749 RADEON_CP_PACKET0_GET_ONE_REG_WR(header
);
751 pkt
->reg
= R600_CP_PACKET0_GET_REG(header
);
753 case RADEON_PACKET_TYPE3
:
754 pkt
->opcode
= RADEON_CP_PACKET3_GET_OPCODE(header
);
756 case RADEON_PACKET_TYPE2
:
760 DRM_ERROR("Unknown packet type %d at %d !\n", pkt
->type
, idx
);
764 if ((pkt
->count
+ 1 + pkt
->idx
) >= ib_chunk
->length_dw
) {
765 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
766 pkt
->idx
, pkt
->type
, pkt
->count
, ib_chunk
->length_dw
);
773 for (i
= 0; i
< ib_chunk
->length_dw
; i
++) {
775 printk("\t0x%08x <---\n", radeon_get_ib_value(p
, i
));
777 printk("\t0x%08x\n", radeon_get_ib_value(p
, i
));
783 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
784 * @p: structure holding the parser context.
786 * Check if the next packet is NOP relocation packet3.
788 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser
*p
)
790 struct radeon_cs_packet p3reloc
;
793 r
= radeon_cs_packet_parse(p
, &p3reloc
, p
->idx
);
796 if (p3reloc
.type
!= RADEON_PACKET_TYPE3
)
798 if (p3reloc
.opcode
!= RADEON_PACKET3_NOP
)
804 * radeon_cs_dump_packet() - dump raw packet context
805 * @p: structure holding the parser context.
806 * @pkt: structure holding the packet.
808 * Used mostly for debugging and error reporting.
810 void radeon_cs_dump_packet(struct radeon_cs_parser
*p
,
811 struct radeon_cs_packet
*pkt
)
813 volatile uint32_t *ib
;
819 for (i
= 0; i
<= (pkt
->count
+ 1); i
++, idx
++)
820 DRM_INFO("ib[%d]=0x%08X\n", idx
, ib
[idx
]);
824 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
825 * @parser: parser structure holding parsing context.
826 * @data: pointer to relocation data
827 * @offset_start: starting offset
828 * @offset_mask: offset mask (to align start offset on)
829 * @reloc: reloc informations
831 * Check if next packet is relocation packet3, do bo validation and compute
832 * GPU offset using the provided start.
834 int radeon_cs_packet_next_reloc(struct radeon_cs_parser
*p
,
835 struct radeon_bo_list
**cs_reloc
,
838 struct radeon_cs_chunk
*relocs_chunk
;
839 struct radeon_cs_packet p3reloc
;
843 if (p
->chunk_relocs
== NULL
) {
844 DRM_ERROR("No relocation chunk !\n");
848 relocs_chunk
= p
->chunk_relocs
;
849 r
= radeon_cs_packet_parse(p
, &p3reloc
, p
->idx
);
852 p
->idx
+= p3reloc
.count
+ 2;
853 if (p3reloc
.type
!= RADEON_PACKET_TYPE3
||
854 p3reloc
.opcode
!= RADEON_PACKET3_NOP
) {
855 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
857 radeon_cs_dump_packet(p
, &p3reloc
);
860 idx
= radeon_get_ib_value(p
, p3reloc
.idx
+ 1);
861 if (idx
>= relocs_chunk
->length_dw
) {
862 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
863 idx
, relocs_chunk
->length_dw
);
864 radeon_cs_dump_packet(p
, &p3reloc
);
867 /* FIXME: we assume reloc size is 4 dwords */
869 *cs_reloc
= p
->relocs
;
870 (*cs_reloc
)->gpu_offset
=
871 (u64
)relocs_chunk
->kdata
[idx
+ 3] << 32;
872 (*cs_reloc
)->gpu_offset
|= relocs_chunk
->kdata
[idx
+ 0];
874 *cs_reloc
= &p
->relocs
[(idx
/ 4)];