2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
24 * Authors: Dave Airlie
29 #include "radeon_drm.h"
31 #include "radeon_reg.h"
34 * Common GART table functions.
36 int radeon_gart_table_ram_alloc(struct radeon_device
*rdev
)
40 ptr
= pci_alloc_consistent(rdev
->pdev
, rdev
->gart
.table_size
,
41 &rdev
->gart
.table_addr
);
46 if (rdev
->family
== CHIP_RS400
|| rdev
->family
== CHIP_RS480
||
47 rdev
->family
== CHIP_RS690
|| rdev
->family
== CHIP_RS740
) {
48 set_memory_uc((unsigned long)ptr
,
49 rdev
->gart
.table_size
>> PAGE_SHIFT
);
53 memset((void *)rdev
->gart
.ptr
, 0, rdev
->gart
.table_size
);
57 void radeon_gart_table_ram_free(struct radeon_device
*rdev
)
59 if (rdev
->gart
.ptr
== NULL
) {
63 if (rdev
->family
== CHIP_RS400
|| rdev
->family
== CHIP_RS480
||
64 rdev
->family
== CHIP_RS690
|| rdev
->family
== CHIP_RS740
) {
65 set_memory_wb((unsigned long)rdev
->gart
.ptr
,
66 rdev
->gart
.table_size
>> PAGE_SHIFT
);
69 pci_free_consistent(rdev
->pdev
, rdev
->gart
.table_size
,
70 (void *)rdev
->gart
.ptr
,
71 rdev
->gart
.table_addr
);
72 rdev
->gart
.ptr
= NULL
;
73 rdev
->gart
.table_addr
= 0;
76 int radeon_gart_table_vram_alloc(struct radeon_device
*rdev
)
80 if (rdev
->gart
.robj
== NULL
) {
81 r
= radeon_bo_create(rdev
, rdev
->gart
.table_size
,
82 PAGE_SIZE
, true, RADEON_GEM_DOMAIN_VRAM
,
91 int radeon_gart_table_vram_pin(struct radeon_device
*rdev
)
96 r
= radeon_bo_reserve(rdev
->gart
.robj
, false);
99 r
= radeon_bo_pin(rdev
->gart
.robj
,
100 RADEON_GEM_DOMAIN_VRAM
, &gpu_addr
);
102 radeon_bo_unreserve(rdev
->gart
.robj
);
105 r
= radeon_bo_kmap(rdev
->gart
.robj
, &rdev
->gart
.ptr
);
107 radeon_bo_unpin(rdev
->gart
.robj
);
108 radeon_bo_unreserve(rdev
->gart
.robj
);
109 rdev
->gart
.table_addr
= gpu_addr
;
113 void radeon_gart_table_vram_unpin(struct radeon_device
*rdev
)
117 if (rdev
->gart
.robj
== NULL
) {
120 r
= radeon_bo_reserve(rdev
->gart
.robj
, false);
121 if (likely(r
== 0)) {
122 radeon_bo_kunmap(rdev
->gart
.robj
);
123 radeon_bo_unpin(rdev
->gart
.robj
);
124 radeon_bo_unreserve(rdev
->gart
.robj
);
125 rdev
->gart
.ptr
= NULL
;
129 void radeon_gart_table_vram_free(struct radeon_device
*rdev
)
131 if (rdev
->gart
.robj
== NULL
) {
134 radeon_gart_table_vram_unpin(rdev
);
135 radeon_bo_unref(&rdev
->gart
.robj
);
142 * Common gart functions.
144 void radeon_gart_unbind(struct radeon_device
*rdev
, unsigned offset
,
152 if (!rdev
->gart
.ready
) {
153 WARN(1, "trying to unbind memory from uninitialized GART !\n");
156 t
= offset
/ RADEON_GPU_PAGE_SIZE
;
157 p
= t
/ (PAGE_SIZE
/ RADEON_GPU_PAGE_SIZE
);
158 for (i
= 0; i
< pages
; i
++, p
++) {
159 if (rdev
->gart
.pages
[p
]) {
160 rdev
->gart
.pages
[p
] = NULL
;
161 rdev
->gart
.pages_addr
[p
] = rdev
->dummy_page
.addr
;
162 page_base
= rdev
->gart
.pages_addr
[p
];
163 for (j
= 0; j
< (PAGE_SIZE
/ RADEON_GPU_PAGE_SIZE
); j
++, t
++) {
164 if (rdev
->gart
.ptr
) {
165 radeon_gart_set_page(rdev
, t
, page_base
);
167 page_base
+= RADEON_GPU_PAGE_SIZE
;
172 radeon_gart_tlb_flush(rdev
);
175 int radeon_gart_bind(struct radeon_device
*rdev
, unsigned offset
,
176 int pages
, struct page
**pagelist
, dma_addr_t
*dma_addr
)
183 if (!rdev
->gart
.ready
) {
184 WARN(1, "trying to bind memory to uninitialized GART !\n");
187 t
= offset
/ RADEON_GPU_PAGE_SIZE
;
188 p
= t
/ (PAGE_SIZE
/ RADEON_GPU_PAGE_SIZE
);
190 for (i
= 0; i
< pages
; i
++, p
++) {
191 rdev
->gart
.pages_addr
[p
] = dma_addr
[i
];
192 rdev
->gart
.pages
[p
] = pagelist
[i
];
193 if (rdev
->gart
.ptr
) {
194 page_base
= rdev
->gart
.pages_addr
[p
];
195 for (j
= 0; j
< (PAGE_SIZE
/ RADEON_GPU_PAGE_SIZE
); j
++, t
++) {
196 radeon_gart_set_page(rdev
, t
, page_base
);
197 page_base
+= RADEON_GPU_PAGE_SIZE
;
202 radeon_gart_tlb_flush(rdev
);
206 void radeon_gart_restore(struct radeon_device
*rdev
)
211 if (!rdev
->gart
.ptr
) {
214 for (i
= 0, t
= 0; i
< rdev
->gart
.num_cpu_pages
; i
++) {
215 page_base
= rdev
->gart
.pages_addr
[i
];
216 for (j
= 0; j
< (PAGE_SIZE
/ RADEON_GPU_PAGE_SIZE
); j
++, t
++) {
217 radeon_gart_set_page(rdev
, t
, page_base
);
218 page_base
+= RADEON_GPU_PAGE_SIZE
;
222 radeon_gart_tlb_flush(rdev
);
225 int radeon_gart_init(struct radeon_device
*rdev
)
229 if (rdev
->gart
.pages
) {
232 /* We need PAGE_SIZE >= RADEON_GPU_PAGE_SIZE */
233 if (PAGE_SIZE
< RADEON_GPU_PAGE_SIZE
) {
234 DRM_ERROR("Page size is smaller than GPU page size!\n");
237 r
= radeon_dummy_page_init(rdev
);
240 /* Compute table size */
241 rdev
->gart
.num_cpu_pages
= rdev
->mc
.gtt_size
/ PAGE_SIZE
;
242 rdev
->gart
.num_gpu_pages
= rdev
->mc
.gtt_size
/ RADEON_GPU_PAGE_SIZE
;
243 DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
244 rdev
->gart
.num_cpu_pages
, rdev
->gart
.num_gpu_pages
);
245 /* Allocate pages table */
246 rdev
->gart
.pages
= kzalloc(sizeof(void *) * rdev
->gart
.num_cpu_pages
,
248 if (rdev
->gart
.pages
== NULL
) {
249 radeon_gart_fini(rdev
);
252 rdev
->gart
.pages_addr
= kzalloc(sizeof(dma_addr_t
) *
253 rdev
->gart
.num_cpu_pages
, GFP_KERNEL
);
254 if (rdev
->gart
.pages_addr
== NULL
) {
255 radeon_gart_fini(rdev
);
258 /* set GART entry to point to the dummy page by default */
259 for (i
= 0; i
< rdev
->gart
.num_cpu_pages
; i
++) {
260 rdev
->gart
.pages_addr
[i
] = rdev
->dummy_page
.addr
;
265 void radeon_gart_fini(struct radeon_device
*rdev
)
267 if (rdev
->gart
.pages
&& rdev
->gart
.pages_addr
&& rdev
->gart
.ready
) {
269 radeon_gart_unbind(rdev
, 0, rdev
->gart
.num_cpu_pages
);
271 rdev
->gart
.ready
= false;
272 kfree(rdev
->gart
.pages
);
273 kfree(rdev
->gart
.pages_addr
);
274 rdev
->gart
.pages
= NULL
;
275 rdev
->gart
.pages_addr
= NULL
;
277 radeon_dummy_page_fini(rdev
);
283 * TODO bind a default page at vm initialization for default address
285 int radeon_vm_manager_init(struct radeon_device
*rdev
)
289 rdev
->vm_manager
.enabled
= false;
291 /* mark first vm as always in use, it's the system one */
292 r
= radeon_sa_bo_manager_init(rdev
, &rdev
->vm_manager
.sa_manager
,
293 rdev
->vm_manager
.max_pfn
* 8,
294 RADEON_GEM_DOMAIN_VRAM
);
296 dev_err(rdev
->dev
, "failed to allocate vm bo (%dKB)\n",
297 (rdev
->vm_manager
.max_pfn
* 8) >> 10);
301 r
= rdev
->vm_manager
.funcs
->init(rdev
);
303 rdev
->vm_manager
.enabled
= true;
308 /* cs mutex must be lock */
309 static void radeon_vm_unbind_locked(struct radeon_device
*rdev
,
310 struct radeon_vm
*vm
)
312 struct radeon_bo_va
*bo_va
;
318 /* wait for vm use to end */
320 radeon_fence_wait(vm
->fence
, false);
321 radeon_fence_unref(&vm
->fence
);
325 rdev
->vm_manager
.funcs
->unbind(rdev
, vm
);
326 rdev
->vm_manager
.use_bitmap
&= ~(1 << vm
->id
);
327 list_del_init(&vm
->list
);
329 radeon_sa_bo_free(rdev
, &vm
->sa_bo
);
332 list_for_each_entry(bo_va
, &vm
->va
, vm_list
) {
333 bo_va
->valid
= false;
337 void radeon_vm_manager_fini(struct radeon_device
*rdev
)
339 if (rdev
->vm_manager
.sa_manager
.bo
== NULL
)
341 radeon_vm_manager_suspend(rdev
);
342 rdev
->vm_manager
.funcs
->fini(rdev
);
343 radeon_sa_bo_manager_fini(rdev
, &rdev
->vm_manager
.sa_manager
);
344 rdev
->vm_manager
.enabled
= false;
347 int radeon_vm_manager_start(struct radeon_device
*rdev
)
349 if (rdev
->vm_manager
.sa_manager
.bo
== NULL
) {
352 return radeon_sa_bo_manager_start(rdev
, &rdev
->vm_manager
.sa_manager
);
355 int radeon_vm_manager_suspend(struct radeon_device
*rdev
)
357 struct radeon_vm
*vm
, *tmp
;
359 radeon_mutex_lock(&rdev
->cs_mutex
);
360 /* unbind all active vm */
361 list_for_each_entry_safe(vm
, tmp
, &rdev
->vm_manager
.lru_vm
, list
) {
362 radeon_vm_unbind_locked(rdev
, vm
);
364 rdev
->vm_manager
.funcs
->fini(rdev
);
365 radeon_mutex_unlock(&rdev
->cs_mutex
);
366 return radeon_sa_bo_manager_suspend(rdev
, &rdev
->vm_manager
.sa_manager
);
369 /* cs mutex must be lock */
370 void radeon_vm_unbind(struct radeon_device
*rdev
, struct radeon_vm
*vm
)
372 mutex_lock(&vm
->mutex
);
373 radeon_vm_unbind_locked(rdev
, vm
);
374 mutex_unlock(&vm
->mutex
);
377 /* cs mutex must be lock & vm mutex must be lock */
378 int radeon_vm_bind(struct radeon_device
*rdev
, struct radeon_vm
*vm
)
380 struct radeon_vm
*vm_evict
;
390 list_del_init(&vm
->list
);
391 list_add_tail(&vm
->list
, &rdev
->vm_manager
.lru_vm
);
396 r
= radeon_sa_bo_new(rdev
, &rdev
->vm_manager
.sa_manager
, &vm
->sa_bo
,
397 RADEON_GPU_PAGE_ALIGN(vm
->last_pfn
* 8),
398 RADEON_GPU_PAGE_SIZE
);
400 if (list_empty(&rdev
->vm_manager
.lru_vm
)) {
403 vm_evict
= list_first_entry(&rdev
->vm_manager
.lru_vm
, struct radeon_vm
, list
);
404 radeon_vm_unbind(rdev
, vm_evict
);
407 vm
->pt
= rdev
->vm_manager
.sa_manager
.cpu_ptr
;
408 vm
->pt
+= (vm
->sa_bo
.offset
>> 3);
409 vm
->pt_gpu_addr
= rdev
->vm_manager
.sa_manager
.gpu_addr
;
410 vm
->pt_gpu_addr
+= vm
->sa_bo
.offset
;
411 memset(vm
->pt
, 0, RADEON_GPU_PAGE_ALIGN(vm
->last_pfn
* 8));
414 /* search for free vm */
415 for (i
= 0; i
< rdev
->vm_manager
.nvm
; i
++) {
416 if (!(rdev
->vm_manager
.use_bitmap
& (1 << i
))) {
421 /* evict vm if necessary */
423 vm_evict
= list_first_entry(&rdev
->vm_manager
.lru_vm
, struct radeon_vm
, list
);
424 radeon_vm_unbind(rdev
, vm_evict
);
429 r
= rdev
->vm_manager
.funcs
->bind(rdev
, vm
, id
);
431 radeon_sa_bo_free(rdev
, &vm
->sa_bo
);
434 rdev
->vm_manager
.use_bitmap
|= 1 << id
;
436 list_add_tail(&vm
->list
, &rdev
->vm_manager
.lru_vm
);
437 return radeon_vm_bo_update_pte(rdev
, vm
, rdev
->ib_pool
.sa_manager
.bo
,
438 &rdev
->ib_pool
.sa_manager
.bo
->tbo
.mem
);
441 /* object have to be reserved */
442 int radeon_vm_bo_add(struct radeon_device
*rdev
,
443 struct radeon_vm
*vm
,
444 struct radeon_bo
*bo
,
448 struct radeon_bo_va
*bo_va
, *tmp
;
449 struct list_head
*head
;
450 uint64_t size
= radeon_bo_size(bo
), last_offset
= 0;
453 bo_va
= kzalloc(sizeof(struct radeon_bo_va
), GFP_KERNEL
);
459 bo_va
->soffset
= offset
;
460 bo_va
->eoffset
= offset
+ size
;
461 bo_va
->flags
= flags
;
462 bo_va
->valid
= false;
463 INIT_LIST_HEAD(&bo_va
->bo_list
);
464 INIT_LIST_HEAD(&bo_va
->vm_list
);
465 /* make sure object fit at this offset */
466 if (bo_va
->soffset
>= bo_va
->eoffset
) {
471 last_pfn
= bo_va
->eoffset
/ RADEON_GPU_PAGE_SIZE
;
472 if (last_pfn
> rdev
->vm_manager
.max_pfn
) {
474 dev_err(rdev
->dev
, "va above limit (0x%08X > 0x%08X)\n",
475 last_pfn
, rdev
->vm_manager
.max_pfn
);
479 mutex_lock(&vm
->mutex
);
480 if (last_pfn
> vm
->last_pfn
) {
481 /* grow va space 32M by 32M */
482 unsigned align
= ((32 << 20) >> 12) - 1;
483 radeon_mutex_lock(&rdev
->cs_mutex
);
484 radeon_vm_unbind_locked(rdev
, vm
);
485 radeon_mutex_unlock(&rdev
->cs_mutex
);
486 vm
->last_pfn
= (last_pfn
+ align
) & ~align
;
490 list_for_each_entry(tmp
, &vm
->va
, vm_list
) {
491 if (bo_va
->soffset
>= last_offset
&& bo_va
->eoffset
< tmp
->soffset
) {
492 /* bo can be added before this one */
495 if (bo_va
->soffset
>= tmp
->soffset
&& bo_va
->soffset
< tmp
->eoffset
) {
496 /* bo and tmp overlap, invalid offset */
497 dev_err(rdev
->dev
, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
498 bo
, (unsigned)bo_va
->soffset
, tmp
->bo
,
499 (unsigned)tmp
->soffset
, (unsigned)tmp
->eoffset
);
501 mutex_unlock(&vm
->mutex
);
504 last_offset
= tmp
->eoffset
;
505 head
= &tmp
->vm_list
;
507 list_add(&bo_va
->vm_list
, head
);
508 list_add_tail(&bo_va
->bo_list
, &bo
->va
);
509 mutex_unlock(&vm
->mutex
);
513 static u64
radeon_vm_get_addr(struct radeon_device
*rdev
,
514 struct ttm_mem_reg
*mem
,
519 switch (mem
->mem_type
) {
521 addr
= (mem
->start
<< PAGE_SHIFT
);
522 addr
+= pfn
* RADEON_GPU_PAGE_SIZE
;
523 addr
+= rdev
->vm_manager
.vram_base_offset
;
526 /* offset inside page table */
527 addr
= mem
->start
<< PAGE_SHIFT
;
528 addr
+= pfn
* RADEON_GPU_PAGE_SIZE
;
529 addr
= addr
>> PAGE_SHIFT
;
530 /* page table offset */
531 addr
= rdev
->gart
.pages_addr
[addr
];
532 /* in case cpu page size != gpu page size*/
533 addr
+= (pfn
* RADEON_GPU_PAGE_SIZE
) & (~PAGE_MASK
);
541 /* object have to be reserved & cs mutex took & vm mutex took */
542 int radeon_vm_bo_update_pte(struct radeon_device
*rdev
,
543 struct radeon_vm
*vm
,
544 struct radeon_bo
*bo
,
545 struct ttm_mem_reg
*mem
)
547 struct radeon_bo_va
*bo_va
;
548 unsigned ngpu_pages
, i
;
549 uint64_t addr
= 0, pfn
;
552 /* nothing to do if vm isn't bound */
556 bo_va
= radeon_bo_va(bo
, vm
);
558 dev_err(rdev
->dev
, "bo %p not in vm %p\n", bo
, vm
);
565 ngpu_pages
= radeon_bo_ngpu_pages(bo
);
566 bo_va
->flags
&= ~RADEON_VM_PAGE_VALID
;
567 bo_va
->flags
&= ~RADEON_VM_PAGE_SYSTEM
;
569 if (mem
->mem_type
!= TTM_PL_SYSTEM
) {
570 bo_va
->flags
|= RADEON_VM_PAGE_VALID
;
573 if (mem
->mem_type
== TTM_PL_TT
) {
574 bo_va
->flags
|= RADEON_VM_PAGE_SYSTEM
;
577 pfn
= bo_va
->soffset
/ RADEON_GPU_PAGE_SIZE
;
578 flags
= rdev
->vm_manager
.funcs
->page_flags(rdev
, bo_va
->vm
, bo_va
->flags
);
579 for (i
= 0, addr
= 0; i
< ngpu_pages
; i
++) {
580 if (mem
&& bo_va
->valid
) {
581 addr
= radeon_vm_get_addr(rdev
, mem
, i
);
583 rdev
->vm_manager
.funcs
->set_page(rdev
, bo_va
->vm
, i
+ pfn
, addr
, flags
);
585 rdev
->vm_manager
.funcs
->tlb_flush(rdev
, bo_va
->vm
);
589 /* object have to be reserved */
590 int radeon_vm_bo_rmv(struct radeon_device
*rdev
,
591 struct radeon_vm
*vm
,
592 struct radeon_bo
*bo
)
594 struct radeon_bo_va
*bo_va
;
596 bo_va
= radeon_bo_va(bo
, vm
);
600 mutex_lock(&vm
->mutex
);
601 radeon_mutex_lock(&rdev
->cs_mutex
);
602 radeon_vm_bo_update_pte(rdev
, vm
, bo
, NULL
);
603 radeon_mutex_unlock(&rdev
->cs_mutex
);
604 list_del(&bo_va
->vm_list
);
605 mutex_unlock(&vm
->mutex
);
606 list_del(&bo_va
->bo_list
);
612 void radeon_vm_bo_invalidate(struct radeon_device
*rdev
,
613 struct radeon_bo
*bo
)
615 struct radeon_bo_va
*bo_va
;
617 BUG_ON(!atomic_read(&bo
->tbo
.reserved
));
618 list_for_each_entry(bo_va
, &bo
->va
, bo_list
) {
619 bo_va
->valid
= false;
623 int radeon_vm_init(struct radeon_device
*rdev
, struct radeon_vm
*vm
)
629 mutex_init(&vm
->mutex
);
630 INIT_LIST_HEAD(&vm
->list
);
631 INIT_LIST_HEAD(&vm
->va
);
633 /* map the ib pool buffer at 0 in virtual address space, set
636 r
= radeon_vm_bo_add(rdev
, vm
, rdev
->ib_pool
.sa_manager
.bo
, 0,
637 RADEON_VM_PAGE_READABLE
| RADEON_VM_PAGE_SNOOPED
);
641 void radeon_vm_fini(struct radeon_device
*rdev
, struct radeon_vm
*vm
)
643 struct radeon_bo_va
*bo_va
, *tmp
;
646 mutex_lock(&vm
->mutex
);
648 radeon_mutex_lock(&rdev
->cs_mutex
);
649 radeon_vm_unbind_locked(rdev
, vm
);
650 radeon_mutex_unlock(&rdev
->cs_mutex
);
653 r
= radeon_bo_reserve(rdev
->ib_pool
.sa_manager
.bo
, false);
655 bo_va
= radeon_bo_va(rdev
->ib_pool
.sa_manager
.bo
, vm
);
656 list_del_init(&bo_va
->bo_list
);
657 list_del_init(&bo_va
->vm_list
);
658 radeon_bo_unreserve(rdev
->ib_pool
.sa_manager
.bo
);
661 if (!list_empty(&vm
->va
)) {
662 dev_err(rdev
->dev
, "still active bo inside vm\n");
664 list_for_each_entry_safe(bo_va
, tmp
, &vm
->va
, vm_list
) {
665 list_del_init(&bo_va
->vm_list
);
666 r
= radeon_bo_reserve(bo_va
->bo
, false);
668 list_del_init(&bo_va
->bo_list
);
669 radeon_bo_unreserve(bo_va
->bo
);
673 mutex_unlock(&vm
->mutex
);