2 * Copyright 2009 Jerome Glisse.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
28 * Jerome Glisse <glisse@freedesktop.org>
29 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
32 #include <drm/ttm/ttm_bo_api.h>
33 #include <drm/ttm/ttm_bo_driver.h>
34 #include <drm/ttm/ttm_placement.h>
35 #include <drm/ttm/ttm_module.h>
36 #include <drm/ttm/ttm_page_alloc.h>
38 #include <drm/amdgpu_drm.h>
39 #include <linux/seq_file.h>
40 #include <linux/slab.h>
41 #include <linux/swiotlb.h>
42 #include <linux/swap.h>
43 #include <linux/pagemap.h>
44 #include <linux/debugfs.h>
45 #include <linux/iommu.h>
47 #include "amdgpu_object.h"
48 #include "amdgpu_trace.h"
49 #include "bif/bif_4_1_d.h"
51 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
53 static int amdgpu_map_buffer(struct ttm_buffer_object
*bo
,
54 struct ttm_mem_reg
*mem
, unsigned num_pages
,
55 uint64_t offset
, unsigned window
,
56 struct amdgpu_ring
*ring
,
59 static int amdgpu_ttm_debugfs_init(struct amdgpu_device
*adev
);
60 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device
*adev
);
65 static int amdgpu_ttm_mem_global_init(struct drm_global_reference
*ref
)
67 return ttm_mem_global_init(ref
->object
);
70 static void amdgpu_ttm_mem_global_release(struct drm_global_reference
*ref
)
72 ttm_mem_global_release(ref
->object
);
75 static int amdgpu_ttm_global_init(struct amdgpu_device
*adev
)
77 struct drm_global_reference
*global_ref
;
78 struct amdgpu_ring
*ring
;
79 struct drm_sched_rq
*rq
;
82 adev
->mman
.mem_global_referenced
= false;
83 global_ref
= &adev
->mman
.mem_global_ref
;
84 global_ref
->global_type
= DRM_GLOBAL_TTM_MEM
;
85 global_ref
->size
= sizeof(struct ttm_mem_global
);
86 global_ref
->init
= &amdgpu_ttm_mem_global_init
;
87 global_ref
->release
= &amdgpu_ttm_mem_global_release
;
88 r
= drm_global_item_ref(global_ref
);
90 DRM_ERROR("Failed setting up TTM memory accounting "
95 adev
->mman
.bo_global_ref
.mem_glob
=
96 adev
->mman
.mem_global_ref
.object
;
97 global_ref
= &adev
->mman
.bo_global_ref
.ref
;
98 global_ref
->global_type
= DRM_GLOBAL_TTM_BO
;
99 global_ref
->size
= sizeof(struct ttm_bo_global
);
100 global_ref
->init
= &ttm_bo_global_init
;
101 global_ref
->release
= &ttm_bo_global_release
;
102 r
= drm_global_item_ref(global_ref
);
104 DRM_ERROR("Failed setting up TTM BO subsystem.\n");
108 mutex_init(&adev
->mman
.gtt_window_lock
);
110 ring
= adev
->mman
.buffer_funcs_ring
;
111 rq
= &ring
->sched
.sched_rq
[DRM_SCHED_PRIORITY_KERNEL
];
112 r
= drm_sched_entity_init(&ring
->sched
, &adev
->mman
.entity
,
113 rq
, amdgpu_sched_jobs
, NULL
);
115 DRM_ERROR("Failed setting up TTM BO move run queue.\n");
119 adev
->mman
.mem_global_referenced
= true;
124 drm_global_item_unref(&adev
->mman
.bo_global_ref
.ref
);
126 drm_global_item_unref(&adev
->mman
.mem_global_ref
);
131 static void amdgpu_ttm_global_fini(struct amdgpu_device
*adev
)
133 if (adev
->mman
.mem_global_referenced
) {
134 drm_sched_entity_fini(adev
->mman
.entity
.sched
,
136 mutex_destroy(&adev
->mman
.gtt_window_lock
);
137 drm_global_item_unref(&adev
->mman
.bo_global_ref
.ref
);
138 drm_global_item_unref(&adev
->mman
.mem_global_ref
);
139 adev
->mman
.mem_global_referenced
= false;
143 static int amdgpu_invalidate_caches(struct ttm_bo_device
*bdev
, uint32_t flags
)
148 static int amdgpu_init_mem_type(struct ttm_bo_device
*bdev
, uint32_t type
,
149 struct ttm_mem_type_manager
*man
)
151 struct amdgpu_device
*adev
;
153 adev
= amdgpu_ttm_adev(bdev
);
158 man
->flags
= TTM_MEMTYPE_FLAG_MAPPABLE
;
159 man
->available_caching
= TTM_PL_MASK_CACHING
;
160 man
->default_caching
= TTM_PL_FLAG_CACHED
;
163 man
->func
= &amdgpu_gtt_mgr_func
;
164 man
->gpu_offset
= adev
->mc
.gart_start
;
165 man
->available_caching
= TTM_PL_MASK_CACHING
;
166 man
->default_caching
= TTM_PL_FLAG_CACHED
;
167 man
->flags
= TTM_MEMTYPE_FLAG_MAPPABLE
| TTM_MEMTYPE_FLAG_CMA
;
170 /* "On-card" video ram */
171 man
->func
= &amdgpu_vram_mgr_func
;
172 man
->gpu_offset
= adev
->mc
.vram_start
;
173 man
->flags
= TTM_MEMTYPE_FLAG_FIXED
|
174 TTM_MEMTYPE_FLAG_MAPPABLE
;
175 man
->available_caching
= TTM_PL_FLAG_UNCACHED
| TTM_PL_FLAG_WC
;
176 man
->default_caching
= TTM_PL_FLAG_WC
;
181 /* On-chip GDS memory*/
182 man
->func
= &ttm_bo_manager_func
;
184 man
->flags
= TTM_MEMTYPE_FLAG_FIXED
| TTM_MEMTYPE_FLAG_CMA
;
185 man
->available_caching
= TTM_PL_FLAG_UNCACHED
;
186 man
->default_caching
= TTM_PL_FLAG_UNCACHED
;
189 DRM_ERROR("Unsupported memory type %u\n", (unsigned)type
);
195 static void amdgpu_evict_flags(struct ttm_buffer_object
*bo
,
196 struct ttm_placement
*placement
)
198 struct amdgpu_device
*adev
= amdgpu_ttm_adev(bo
->bdev
);
199 struct amdgpu_bo
*abo
;
200 static const struct ttm_place placements
= {
203 .flags
= TTM_PL_MASK_CACHING
| TTM_PL_FLAG_SYSTEM
206 if (!amdgpu_ttm_bo_is_amdgpu_bo(bo
)) {
207 placement
->placement
= &placements
;
208 placement
->busy_placement
= &placements
;
209 placement
->num_placement
= 1;
210 placement
->num_busy_placement
= 1;
213 abo
= ttm_to_amdgpu_bo(bo
);
214 switch (bo
->mem
.mem_type
) {
216 if (adev
->mman
.buffer_funcs
&&
217 adev
->mman
.buffer_funcs_ring
&&
218 adev
->mman
.buffer_funcs_ring
->ready
== false) {
219 amdgpu_ttm_placement_from_domain(abo
, AMDGPU_GEM_DOMAIN_CPU
);
220 } else if (adev
->mc
.visible_vram_size
< adev
->mc
.real_vram_size
&&
221 !(abo
->flags
& AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
)) {
222 unsigned fpfn
= adev
->mc
.visible_vram_size
>> PAGE_SHIFT
;
223 struct drm_mm_node
*node
= bo
->mem
.mm_node
;
224 unsigned long pages_left
;
226 for (pages_left
= bo
->mem
.num_pages
;
228 pages_left
-= node
->size
, node
++) {
229 if (node
->start
< fpfn
)
236 /* Try evicting to the CPU inaccessible part of VRAM
237 * first, but only set GTT as busy placement, so this
238 * BO will be evicted to GTT rather than causing other
239 * BOs to be evicted from VRAM
241 amdgpu_ttm_placement_from_domain(abo
, AMDGPU_GEM_DOMAIN_VRAM
|
242 AMDGPU_GEM_DOMAIN_GTT
);
243 abo
->placements
[0].fpfn
= fpfn
;
244 abo
->placements
[0].lpfn
= 0;
245 abo
->placement
.busy_placement
= &abo
->placements
[1];
246 abo
->placement
.num_busy_placement
= 1;
249 amdgpu_ttm_placement_from_domain(abo
, AMDGPU_GEM_DOMAIN_GTT
);
254 amdgpu_ttm_placement_from_domain(abo
, AMDGPU_GEM_DOMAIN_CPU
);
256 *placement
= abo
->placement
;
259 static int amdgpu_verify_access(struct ttm_buffer_object
*bo
, struct file
*filp
)
261 struct amdgpu_bo
*abo
= ttm_to_amdgpu_bo(bo
);
263 if (amdgpu_ttm_tt_get_usermm(bo
->ttm
))
265 return drm_vma_node_verify_access(&abo
->gem_base
.vma_node
,
269 static void amdgpu_move_null(struct ttm_buffer_object
*bo
,
270 struct ttm_mem_reg
*new_mem
)
272 struct ttm_mem_reg
*old_mem
= &bo
->mem
;
274 BUG_ON(old_mem
->mm_node
!= NULL
);
276 new_mem
->mm_node
= NULL
;
279 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object
*bo
,
280 struct drm_mm_node
*mm_node
,
281 struct ttm_mem_reg
*mem
)
285 if (mem
->mem_type
!= TTM_PL_TT
|| amdgpu_gtt_mgr_has_gart_addr(mem
)) {
286 addr
= mm_node
->start
<< PAGE_SHIFT
;
287 addr
+= bo
->bdev
->man
[mem
->mem_type
].gpu_offset
;
293 * amdgpu_find_mm_node - Helper function finds the drm_mm_node
294 * corresponding to @offset. It also modifies the offset to be
295 * within the drm_mm_node returned
297 static struct drm_mm_node
*amdgpu_find_mm_node(struct ttm_mem_reg
*mem
,
298 unsigned long *offset
)
300 struct drm_mm_node
*mm_node
= mem
->mm_node
;
302 while (*offset
>= (mm_node
->size
<< PAGE_SHIFT
)) {
303 *offset
-= (mm_node
->size
<< PAGE_SHIFT
);
310 * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
312 * The function copies @size bytes from {src->mem + src->offset} to
313 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
314 * move and different for a BO to BO copy.
316 * @f: Returns the last fence if multiple jobs are submitted.
318 int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device
*adev
,
319 struct amdgpu_copy_mem
*src
,
320 struct amdgpu_copy_mem
*dst
,
322 struct reservation_object
*resv
,
323 struct dma_fence
**f
)
325 struct amdgpu_ring
*ring
= adev
->mman
.buffer_funcs_ring
;
326 struct drm_mm_node
*src_mm
, *dst_mm
;
327 uint64_t src_node_start
, dst_node_start
, src_node_size
,
328 dst_node_size
, src_page_offset
, dst_page_offset
;
329 struct dma_fence
*fence
= NULL
;
331 const uint64_t GTT_MAX_BYTES
= (AMDGPU_GTT_MAX_TRANSFER_SIZE
*
332 AMDGPU_GPU_PAGE_SIZE
);
335 DRM_ERROR("Trying to move memory with ring turned off.\n");
339 src_mm
= amdgpu_find_mm_node(src
->mem
, &src
->offset
);
340 src_node_start
= amdgpu_mm_node_addr(src
->bo
, src_mm
, src
->mem
) +
342 src_node_size
= (src_mm
->size
<< PAGE_SHIFT
) - src
->offset
;
343 src_page_offset
= src_node_start
& (PAGE_SIZE
- 1);
345 dst_mm
= amdgpu_find_mm_node(dst
->mem
, &dst
->offset
);
346 dst_node_start
= amdgpu_mm_node_addr(dst
->bo
, dst_mm
, dst
->mem
) +
348 dst_node_size
= (dst_mm
->size
<< PAGE_SHIFT
) - dst
->offset
;
349 dst_page_offset
= dst_node_start
& (PAGE_SIZE
- 1);
351 mutex_lock(&adev
->mman
.gtt_window_lock
);
354 unsigned long cur_size
;
355 uint64_t from
= src_node_start
, to
= dst_node_start
;
356 struct dma_fence
*next
;
358 /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
359 * begins at an offset, then adjust the size accordingly
361 cur_size
= min3(min(src_node_size
, dst_node_size
), size
,
363 if (cur_size
+ src_page_offset
> GTT_MAX_BYTES
||
364 cur_size
+ dst_page_offset
> GTT_MAX_BYTES
)
365 cur_size
-= max(src_page_offset
, dst_page_offset
);
367 /* Map only what needs to be accessed. Map src to window 0 and
370 if (src
->mem
->mem_type
== TTM_PL_TT
&&
371 !amdgpu_gtt_mgr_has_gart_addr(src
->mem
)) {
372 r
= amdgpu_map_buffer(src
->bo
, src
->mem
,
373 PFN_UP(cur_size
+ src_page_offset
),
374 src_node_start
, 0, ring
,
378 /* Adjust the offset because amdgpu_map_buffer returns
379 * start of mapped page
381 from
+= src_page_offset
;
384 if (dst
->mem
->mem_type
== TTM_PL_TT
&&
385 !amdgpu_gtt_mgr_has_gart_addr(dst
->mem
)) {
386 r
= amdgpu_map_buffer(dst
->bo
, dst
->mem
,
387 PFN_UP(cur_size
+ dst_page_offset
),
388 dst_node_start
, 1, ring
,
392 to
+= dst_page_offset
;
395 r
= amdgpu_copy_buffer(ring
, from
, to
, cur_size
,
396 resv
, &next
, false, true);
400 dma_fence_put(fence
);
407 src_node_size
-= cur_size
;
408 if (!src_node_size
) {
409 src_node_start
= amdgpu_mm_node_addr(src
->bo
, ++src_mm
,
411 src_node_size
= (src_mm
->size
<< PAGE_SHIFT
);
413 src_node_start
+= cur_size
;
414 src_page_offset
= src_node_start
& (PAGE_SIZE
- 1);
416 dst_node_size
-= cur_size
;
417 if (!dst_node_size
) {
418 dst_node_start
= amdgpu_mm_node_addr(dst
->bo
, ++dst_mm
,
420 dst_node_size
= (dst_mm
->size
<< PAGE_SHIFT
);
422 dst_node_start
+= cur_size
;
423 dst_page_offset
= dst_node_start
& (PAGE_SIZE
- 1);
427 mutex_unlock(&adev
->mman
.gtt_window_lock
);
429 *f
= dma_fence_get(fence
);
430 dma_fence_put(fence
);
435 static int amdgpu_move_blit(struct ttm_buffer_object
*bo
,
436 bool evict
, bool no_wait_gpu
,
437 struct ttm_mem_reg
*new_mem
,
438 struct ttm_mem_reg
*old_mem
)
440 struct amdgpu_device
*adev
= amdgpu_ttm_adev(bo
->bdev
);
441 struct amdgpu_copy_mem src
, dst
;
442 struct dma_fence
*fence
= NULL
;
452 r
= amdgpu_ttm_copy_mem_to_mem(adev
, &src
, &dst
,
453 new_mem
->num_pages
<< PAGE_SHIFT
,
458 r
= ttm_bo_pipeline_move(bo
, fence
, evict
, new_mem
);
459 dma_fence_put(fence
);
464 dma_fence_wait(fence
, false);
465 dma_fence_put(fence
);
469 static int amdgpu_move_vram_ram(struct ttm_buffer_object
*bo
, bool evict
,
470 struct ttm_operation_ctx
*ctx
,
471 struct ttm_mem_reg
*new_mem
)
473 struct amdgpu_device
*adev
;
474 struct ttm_mem_reg
*old_mem
= &bo
->mem
;
475 struct ttm_mem_reg tmp_mem
;
476 struct ttm_place placements
;
477 struct ttm_placement placement
;
480 adev
= amdgpu_ttm_adev(bo
->bdev
);
482 tmp_mem
.mm_node
= NULL
;
483 placement
.num_placement
= 1;
484 placement
.placement
= &placements
;
485 placement
.num_busy_placement
= 1;
486 placement
.busy_placement
= &placements
;
489 placements
.flags
= TTM_PL_MASK_CACHING
| TTM_PL_FLAG_TT
;
490 r
= ttm_bo_mem_space(bo
, &placement
, &tmp_mem
, ctx
);
495 r
= ttm_tt_set_placement_caching(bo
->ttm
, tmp_mem
.placement
);
500 r
= ttm_tt_bind(bo
->ttm
, &tmp_mem
, ctx
);
504 r
= amdgpu_move_blit(bo
, true, ctx
->no_wait_gpu
, &tmp_mem
, old_mem
);
508 r
= ttm_bo_move_ttm(bo
, ctx
, new_mem
);
510 ttm_bo_mem_put(bo
, &tmp_mem
);
514 static int amdgpu_move_ram_vram(struct ttm_buffer_object
*bo
, bool evict
,
515 struct ttm_operation_ctx
*ctx
,
516 struct ttm_mem_reg
*new_mem
)
518 struct amdgpu_device
*adev
;
519 struct ttm_mem_reg
*old_mem
= &bo
->mem
;
520 struct ttm_mem_reg tmp_mem
;
521 struct ttm_placement placement
;
522 struct ttm_place placements
;
525 adev
= amdgpu_ttm_adev(bo
->bdev
);
527 tmp_mem
.mm_node
= NULL
;
528 placement
.num_placement
= 1;
529 placement
.placement
= &placements
;
530 placement
.num_busy_placement
= 1;
531 placement
.busy_placement
= &placements
;
534 placements
.flags
= TTM_PL_MASK_CACHING
| TTM_PL_FLAG_TT
;
535 r
= ttm_bo_mem_space(bo
, &placement
, &tmp_mem
, ctx
);
539 r
= ttm_bo_move_ttm(bo
, ctx
, &tmp_mem
);
543 r
= amdgpu_move_blit(bo
, true, ctx
->no_wait_gpu
, new_mem
, old_mem
);
548 ttm_bo_mem_put(bo
, &tmp_mem
);
552 static int amdgpu_bo_move(struct ttm_buffer_object
*bo
, bool evict
,
553 struct ttm_operation_ctx
*ctx
,
554 struct ttm_mem_reg
*new_mem
)
556 struct amdgpu_device
*adev
;
557 struct amdgpu_bo
*abo
;
558 struct ttm_mem_reg
*old_mem
= &bo
->mem
;
561 /* Can't move a pinned BO */
562 abo
= ttm_to_amdgpu_bo(bo
);
563 if (WARN_ON_ONCE(abo
->pin_count
> 0))
566 adev
= amdgpu_ttm_adev(bo
->bdev
);
568 if (old_mem
->mem_type
== TTM_PL_SYSTEM
&& bo
->ttm
== NULL
) {
569 amdgpu_move_null(bo
, new_mem
);
572 if ((old_mem
->mem_type
== TTM_PL_TT
&&
573 new_mem
->mem_type
== TTM_PL_SYSTEM
) ||
574 (old_mem
->mem_type
== TTM_PL_SYSTEM
&&
575 new_mem
->mem_type
== TTM_PL_TT
)) {
577 amdgpu_move_null(bo
, new_mem
);
580 if (adev
->mman
.buffer_funcs
== NULL
||
581 adev
->mman
.buffer_funcs_ring
== NULL
||
582 !adev
->mman
.buffer_funcs_ring
->ready
) {
587 if (old_mem
->mem_type
== TTM_PL_VRAM
&&
588 new_mem
->mem_type
== TTM_PL_SYSTEM
) {
589 r
= amdgpu_move_vram_ram(bo
, evict
, ctx
, new_mem
);
590 } else if (old_mem
->mem_type
== TTM_PL_SYSTEM
&&
591 new_mem
->mem_type
== TTM_PL_VRAM
) {
592 r
= amdgpu_move_ram_vram(bo
, evict
, ctx
, new_mem
);
594 r
= amdgpu_move_blit(bo
, evict
, ctx
->no_wait_gpu
,
600 r
= ttm_bo_move_memcpy(bo
, ctx
, new_mem
);
606 if (bo
->type
== ttm_bo_type_device
&&
607 new_mem
->mem_type
== TTM_PL_VRAM
&&
608 old_mem
->mem_type
!= TTM_PL_VRAM
) {
609 /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
610 * accesses the BO after it's moved.
612 abo
->flags
&= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
;
615 /* update statistics */
616 atomic64_add((u64
)bo
->num_pages
<< PAGE_SHIFT
, &adev
->num_bytes_moved
);
620 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device
*bdev
, struct ttm_mem_reg
*mem
)
622 struct ttm_mem_type_manager
*man
= &bdev
->man
[mem
->mem_type
];
623 struct amdgpu_device
*adev
= amdgpu_ttm_adev(bdev
);
625 mem
->bus
.addr
= NULL
;
627 mem
->bus
.size
= mem
->num_pages
<< PAGE_SHIFT
;
629 mem
->bus
.is_iomem
= false;
630 if (!(man
->flags
& TTM_MEMTYPE_FLAG_MAPPABLE
))
632 switch (mem
->mem_type
) {
639 mem
->bus
.offset
= mem
->start
<< PAGE_SHIFT
;
640 /* check if it's visible */
641 if ((mem
->bus
.offset
+ mem
->bus
.size
) > adev
->mc
.visible_vram_size
)
643 mem
->bus
.base
= adev
->mc
.aper_base
;
644 mem
->bus
.is_iomem
= true;
652 static void amdgpu_ttm_io_mem_free(struct ttm_bo_device
*bdev
, struct ttm_mem_reg
*mem
)
656 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object
*bo
,
657 unsigned long page_offset
)
659 struct drm_mm_node
*mm
;
660 unsigned long offset
= (page_offset
<< PAGE_SHIFT
);
662 mm
= amdgpu_find_mm_node(&bo
->mem
, &offset
);
663 return (bo
->mem
.bus
.base
>> PAGE_SHIFT
) + mm
->start
+
664 (offset
>> PAGE_SHIFT
);
668 * TTM backend functions.
670 struct amdgpu_ttm_gup_task_list
{
671 struct list_head list
;
672 struct task_struct
*task
;
675 struct amdgpu_ttm_tt
{
676 struct ttm_dma_tt ttm
;
677 struct amdgpu_device
*adev
;
680 struct mm_struct
*usermm
;
682 spinlock_t guptasklock
;
683 struct list_head guptasks
;
684 atomic_t mmu_invalidations
;
685 uint32_t last_set_pages
;
688 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt
*ttm
, struct page
**pages
)
690 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
691 unsigned int flags
= 0;
695 if (!(gtt
->userflags
& AMDGPU_GEM_USERPTR_READONLY
))
698 down_read(¤t
->mm
->mmap_sem
);
700 if (gtt
->userflags
& AMDGPU_GEM_USERPTR_ANONONLY
) {
701 /* check that we only use anonymous memory
702 to prevent problems with writeback */
703 unsigned long end
= gtt
->userptr
+ ttm
->num_pages
* PAGE_SIZE
;
704 struct vm_area_struct
*vma
;
706 vma
= find_vma(gtt
->usermm
, gtt
->userptr
);
707 if (!vma
|| vma
->vm_file
|| vma
->vm_end
< end
) {
708 up_read(¤t
->mm
->mmap_sem
);
714 unsigned num_pages
= ttm
->num_pages
- pinned
;
715 uint64_t userptr
= gtt
->userptr
+ pinned
* PAGE_SIZE
;
716 struct page
**p
= pages
+ pinned
;
717 struct amdgpu_ttm_gup_task_list guptask
;
719 guptask
.task
= current
;
720 spin_lock(>t
->guptasklock
);
721 list_add(&guptask
.list
, >t
->guptasks
);
722 spin_unlock(>t
->guptasklock
);
724 r
= get_user_pages(userptr
, num_pages
, flags
, p
, NULL
);
726 spin_lock(>t
->guptasklock
);
727 list_del(&guptask
.list
);
728 spin_unlock(>t
->guptasklock
);
735 } while (pinned
< ttm
->num_pages
);
737 up_read(¤t
->mm
->mmap_sem
);
741 release_pages(pages
, pinned
);
742 up_read(¤t
->mm
->mmap_sem
);
746 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt
*ttm
, struct page
**pages
)
748 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
751 gtt
->last_set_pages
= atomic_read(>t
->mmu_invalidations
);
752 for (i
= 0; i
< ttm
->num_pages
; ++i
) {
754 put_page(ttm
->pages
[i
]);
756 ttm
->pages
[i
] = pages
? pages
[i
] : NULL
;
760 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt
*ttm
)
762 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
765 for (i
= 0; i
< ttm
->num_pages
; ++i
) {
766 struct page
*page
= ttm
->pages
[i
];
771 if (!(gtt
->userflags
& AMDGPU_GEM_USERPTR_READONLY
))
772 set_page_dirty(page
);
774 mark_page_accessed(page
);
778 /* prepare the sg table with the user pages */
779 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt
*ttm
)
781 struct amdgpu_device
*adev
= amdgpu_ttm_adev(ttm
->bdev
);
782 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
786 int write
= !(gtt
->userflags
& AMDGPU_GEM_USERPTR_READONLY
);
787 enum dma_data_direction direction
= write
?
788 DMA_BIDIRECTIONAL
: DMA_TO_DEVICE
;
790 r
= sg_alloc_table_from_pages(ttm
->sg
, ttm
->pages
, ttm
->num_pages
, 0,
791 ttm
->num_pages
<< PAGE_SHIFT
,
797 nents
= dma_map_sg(adev
->dev
, ttm
->sg
->sgl
, ttm
->sg
->nents
, direction
);
798 if (nents
!= ttm
->sg
->nents
)
801 drm_prime_sg_to_page_addr_arrays(ttm
->sg
, ttm
->pages
,
802 gtt
->ttm
.dma_address
, ttm
->num_pages
);
811 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt
*ttm
)
813 struct amdgpu_device
*adev
= amdgpu_ttm_adev(ttm
->bdev
);
814 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
816 int write
= !(gtt
->userflags
& AMDGPU_GEM_USERPTR_READONLY
);
817 enum dma_data_direction direction
= write
?
818 DMA_BIDIRECTIONAL
: DMA_TO_DEVICE
;
820 /* double check that we don't free the table twice */
824 /* free the sg table and pages again */
825 dma_unmap_sg(adev
->dev
, ttm
->sg
->sgl
, ttm
->sg
->nents
, direction
);
827 amdgpu_ttm_tt_mark_user_pages(ttm
);
829 sg_free_table(ttm
->sg
);
832 static int amdgpu_ttm_backend_bind(struct ttm_tt
*ttm
,
833 struct ttm_mem_reg
*bo_mem
)
835 struct amdgpu_ttm_tt
*gtt
= (void*)ttm
;
840 r
= amdgpu_ttm_tt_pin_userptr(ttm
);
842 DRM_ERROR("failed to pin userptr\n");
846 if (!ttm
->num_pages
) {
847 WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
848 ttm
->num_pages
, bo_mem
, ttm
);
851 if (bo_mem
->mem_type
== AMDGPU_PL_GDS
||
852 bo_mem
->mem_type
== AMDGPU_PL_GWS
||
853 bo_mem
->mem_type
== AMDGPU_PL_OA
)
856 if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem
)) {
857 gtt
->offset
= AMDGPU_BO_INVALID_OFFSET
;
861 flags
= amdgpu_ttm_tt_pte_flags(gtt
->adev
, ttm
, bo_mem
);
862 gtt
->offset
= (u64
)bo_mem
->start
<< PAGE_SHIFT
;
863 r
= amdgpu_gart_bind(gtt
->adev
, gtt
->offset
, ttm
->num_pages
,
864 ttm
->pages
, gtt
->ttm
.dma_address
, flags
);
867 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
868 ttm
->num_pages
, gtt
->offset
);
872 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object
*bo
)
874 struct amdgpu_device
*adev
= amdgpu_ttm_adev(bo
->bdev
);
875 struct ttm_operation_ctx ctx
= { false, false };
876 struct amdgpu_ttm_tt
*gtt
= (void*)bo
->ttm
;
877 struct ttm_mem_reg tmp
;
878 struct ttm_placement placement
;
879 struct ttm_place placements
;
883 if (bo
->mem
.mem_type
!= TTM_PL_TT
||
884 amdgpu_gtt_mgr_has_gart_addr(&bo
->mem
))
889 placement
.num_placement
= 1;
890 placement
.placement
= &placements
;
891 placement
.num_busy_placement
= 1;
892 placement
.busy_placement
= &placements
;
894 placements
.lpfn
= adev
->mc
.gart_size
>> PAGE_SHIFT
;
895 placements
.flags
= (bo
->mem
.placement
& ~TTM_PL_MASK_MEM
) |
898 r
= ttm_bo_mem_space(bo
, &placement
, &tmp
, &ctx
);
902 flags
= amdgpu_ttm_tt_pte_flags(adev
, bo
->ttm
, &tmp
);
903 gtt
->offset
= (u64
)tmp
.start
<< PAGE_SHIFT
;
904 r
= amdgpu_gart_bind(adev
, gtt
->offset
, bo
->ttm
->num_pages
,
905 bo
->ttm
->pages
, gtt
->ttm
.dma_address
, flags
);
907 ttm_bo_mem_put(bo
, &tmp
);
911 ttm_bo_mem_put(bo
, &bo
->mem
);
913 bo
->offset
= (bo
->mem
.start
<< PAGE_SHIFT
) +
914 bo
->bdev
->man
[bo
->mem
.mem_type
].gpu_offset
;
919 int amdgpu_ttm_recover_gart(struct ttm_buffer_object
*tbo
)
921 struct amdgpu_device
*adev
= amdgpu_ttm_adev(tbo
->bdev
);
922 struct amdgpu_ttm_tt
*gtt
= (void *)tbo
->ttm
;
929 flags
= amdgpu_ttm_tt_pte_flags(adev
, >t
->ttm
.ttm
, &tbo
->mem
);
930 r
= amdgpu_gart_bind(adev
, gtt
->offset
, gtt
->ttm
.ttm
.num_pages
,
931 gtt
->ttm
.ttm
.pages
, gtt
->ttm
.dma_address
, flags
);
933 DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
934 gtt
->ttm
.ttm
.num_pages
, gtt
->offset
);
938 static int amdgpu_ttm_backend_unbind(struct ttm_tt
*ttm
)
940 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
944 amdgpu_ttm_tt_unpin_userptr(ttm
);
946 if (gtt
->offset
== AMDGPU_BO_INVALID_OFFSET
)
949 /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
950 r
= amdgpu_gart_unbind(gtt
->adev
, gtt
->offset
, ttm
->num_pages
);
952 DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
953 gtt
->ttm
.ttm
.num_pages
, gtt
->offset
);
957 static void amdgpu_ttm_backend_destroy(struct ttm_tt
*ttm
)
959 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
961 ttm_dma_tt_fini(>t
->ttm
);
965 static struct ttm_backend_func amdgpu_backend_func
= {
966 .bind
= &amdgpu_ttm_backend_bind
,
967 .unbind
= &amdgpu_ttm_backend_unbind
,
968 .destroy
= &amdgpu_ttm_backend_destroy
,
971 static struct ttm_tt
*amdgpu_ttm_tt_create(struct ttm_bo_device
*bdev
,
972 unsigned long size
, uint32_t page_flags
,
973 struct page
*dummy_read_page
)
975 struct amdgpu_device
*adev
;
976 struct amdgpu_ttm_tt
*gtt
;
978 adev
= amdgpu_ttm_adev(bdev
);
980 gtt
= kzalloc(sizeof(struct amdgpu_ttm_tt
), GFP_KERNEL
);
984 gtt
->ttm
.ttm
.func
= &amdgpu_backend_func
;
986 if (ttm_dma_tt_init(>t
->ttm
, bdev
, size
, page_flags
, dummy_read_page
)) {
990 return >t
->ttm
.ttm
;
993 static int amdgpu_ttm_tt_populate(struct ttm_tt
*ttm
,
994 struct ttm_operation_ctx
*ctx
)
996 struct amdgpu_device
*adev
= amdgpu_ttm_adev(ttm
->bdev
);
997 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
998 bool slave
= !!(ttm
->page_flags
& TTM_PAGE_FLAG_SG
);
1000 if (ttm
->state
!= tt_unpopulated
)
1003 if (gtt
&& gtt
->userptr
) {
1004 ttm
->sg
= kzalloc(sizeof(struct sg_table
), GFP_KERNEL
);
1008 ttm
->page_flags
|= TTM_PAGE_FLAG_SG
;
1009 ttm
->state
= tt_unbound
;
1013 if (slave
&& ttm
->sg
) {
1014 drm_prime_sg_to_page_addr_arrays(ttm
->sg
, ttm
->pages
,
1015 gtt
->ttm
.dma_address
, ttm
->num_pages
);
1016 ttm
->state
= tt_unbound
;
1020 #ifdef CONFIG_SWIOTLB
1021 if (swiotlb_nr_tbl()) {
1022 return ttm_dma_populate(>t
->ttm
, adev
->dev
, ctx
);
1026 return ttm_populate_and_map_pages(adev
->dev
, >t
->ttm
, ctx
);
1029 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt
*ttm
)
1031 struct amdgpu_device
*adev
;
1032 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
1033 bool slave
= !!(ttm
->page_flags
& TTM_PAGE_FLAG_SG
);
1035 if (gtt
&& gtt
->userptr
) {
1036 amdgpu_ttm_tt_set_user_pages(ttm
, NULL
);
1038 ttm
->page_flags
&= ~TTM_PAGE_FLAG_SG
;
1045 adev
= amdgpu_ttm_adev(ttm
->bdev
);
1047 #ifdef CONFIG_SWIOTLB
1048 if (swiotlb_nr_tbl()) {
1049 ttm_dma_unpopulate(>t
->ttm
, adev
->dev
);
1054 ttm_unmap_and_unpopulate_pages(adev
->dev
, >t
->ttm
);
1057 int amdgpu_ttm_tt_set_userptr(struct ttm_tt
*ttm
, uint64_t addr
,
1060 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
1065 gtt
->userptr
= addr
;
1066 gtt
->usermm
= current
->mm
;
1067 gtt
->userflags
= flags
;
1068 spin_lock_init(>t
->guptasklock
);
1069 INIT_LIST_HEAD(>t
->guptasks
);
1070 atomic_set(>t
->mmu_invalidations
, 0);
1071 gtt
->last_set_pages
= 0;
1076 struct mm_struct
*amdgpu_ttm_tt_get_usermm(struct ttm_tt
*ttm
)
1078 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
1086 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt
*ttm
, unsigned long start
,
1089 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
1090 struct amdgpu_ttm_gup_task_list
*entry
;
1093 if (gtt
== NULL
|| !gtt
->userptr
)
1096 size
= (unsigned long)gtt
->ttm
.ttm
.num_pages
* PAGE_SIZE
;
1097 if (gtt
->userptr
> end
|| gtt
->userptr
+ size
<= start
)
1100 spin_lock(>t
->guptasklock
);
1101 list_for_each_entry(entry
, >t
->guptasks
, list
) {
1102 if (entry
->task
== current
) {
1103 spin_unlock(>t
->guptasklock
);
1107 spin_unlock(>t
->guptasklock
);
1109 atomic_inc(>t
->mmu_invalidations
);
1114 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt
*ttm
,
1115 int *last_invalidated
)
1117 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
1118 int prev_invalidated
= *last_invalidated
;
1120 *last_invalidated
= atomic_read(>t
->mmu_invalidations
);
1121 return prev_invalidated
!= *last_invalidated
;
1124 bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt
*ttm
)
1126 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
1128 if (gtt
== NULL
|| !gtt
->userptr
)
1131 return atomic_read(>t
->mmu_invalidations
) != gtt
->last_set_pages
;
1134 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt
*ttm
)
1136 struct amdgpu_ttm_tt
*gtt
= (void *)ttm
;
1141 return !!(gtt
->userflags
& AMDGPU_GEM_USERPTR_READONLY
);
1144 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device
*adev
, struct ttm_tt
*ttm
,
1145 struct ttm_mem_reg
*mem
)
1149 if (mem
&& mem
->mem_type
!= TTM_PL_SYSTEM
)
1150 flags
|= AMDGPU_PTE_VALID
;
1152 if (mem
&& mem
->mem_type
== TTM_PL_TT
) {
1153 flags
|= AMDGPU_PTE_SYSTEM
;
1155 if (ttm
->caching_state
== tt_cached
)
1156 flags
|= AMDGPU_PTE_SNOOPED
;
1159 flags
|= adev
->gart
.gart_pte_flags
;
1160 flags
|= AMDGPU_PTE_READABLE
;
1162 if (!amdgpu_ttm_tt_is_readonly(ttm
))
1163 flags
|= AMDGPU_PTE_WRITEABLE
;
1168 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object
*bo
,
1169 const struct ttm_place
*place
)
1171 unsigned long num_pages
= bo
->mem
.num_pages
;
1172 struct drm_mm_node
*node
= bo
->mem
.mm_node
;
1174 switch (bo
->mem
.mem_type
) {
1179 /* Check each drm MM node individually */
1181 if (place
->fpfn
< (node
->start
+ node
->size
) &&
1182 !(place
->lpfn
&& place
->lpfn
<= node
->start
))
1185 num_pages
-= node
->size
;
1194 return ttm_bo_eviction_valuable(bo
, place
);
1197 static int amdgpu_ttm_access_memory(struct ttm_buffer_object
*bo
,
1198 unsigned long offset
,
1199 void *buf
, int len
, int write
)
1201 struct amdgpu_bo
*abo
= ttm_to_amdgpu_bo(bo
);
1202 struct amdgpu_device
*adev
= amdgpu_ttm_adev(abo
->tbo
.bdev
);
1203 struct drm_mm_node
*nodes
;
1207 unsigned long flags
;
1209 if (bo
->mem
.mem_type
!= TTM_PL_VRAM
)
1212 nodes
= amdgpu_find_mm_node(&abo
->tbo
.mem
, &offset
);
1213 pos
= (nodes
->start
<< PAGE_SHIFT
) + offset
;
1215 while (len
&& pos
< adev
->mc
.mc_vram_size
) {
1216 uint64_t aligned_pos
= pos
& ~(uint64_t)3;
1217 uint32_t bytes
= 4 - (pos
& 3);
1218 uint32_t shift
= (pos
& 3) * 8;
1219 uint32_t mask
= 0xffffffff << shift
;
1222 mask
&= 0xffffffff >> (bytes
- len
) * 8;
1226 spin_lock_irqsave(&adev
->mmio_idx_lock
, flags
);
1227 WREG32_NO_KIQ(mmMM_INDEX
, ((uint32_t)aligned_pos
) | 0x80000000);
1228 WREG32_NO_KIQ(mmMM_INDEX_HI
, aligned_pos
>> 31);
1229 if (!write
|| mask
!= 0xffffffff)
1230 value
= RREG32_NO_KIQ(mmMM_DATA
);
1233 value
|= (*(uint32_t *)buf
<< shift
) & mask
;
1234 WREG32_NO_KIQ(mmMM_DATA
, value
);
1236 spin_unlock_irqrestore(&adev
->mmio_idx_lock
, flags
);
1238 value
= (value
& mask
) >> shift
;
1239 memcpy(buf
, &value
, bytes
);
1243 buf
= (uint8_t *)buf
+ bytes
;
1246 if (pos
>= (nodes
->start
+ nodes
->size
) << PAGE_SHIFT
) {
1248 pos
= (nodes
->start
<< PAGE_SHIFT
);
1255 static struct ttm_bo_driver amdgpu_bo_driver
= {
1256 .ttm_tt_create
= &amdgpu_ttm_tt_create
,
1257 .ttm_tt_populate
= &amdgpu_ttm_tt_populate
,
1258 .ttm_tt_unpopulate
= &amdgpu_ttm_tt_unpopulate
,
1259 .invalidate_caches
= &amdgpu_invalidate_caches
,
1260 .init_mem_type
= &amdgpu_init_mem_type
,
1261 .eviction_valuable
= amdgpu_ttm_bo_eviction_valuable
,
1262 .evict_flags
= &amdgpu_evict_flags
,
1263 .move
= &amdgpu_bo_move
,
1264 .verify_access
= &amdgpu_verify_access
,
1265 .move_notify
= &amdgpu_bo_move_notify
,
1266 .fault_reserve_notify
= &amdgpu_bo_fault_reserve_notify
,
1267 .io_mem_reserve
= &amdgpu_ttm_io_mem_reserve
,
1268 .io_mem_free
= &amdgpu_ttm_io_mem_free
,
1269 .io_mem_pfn
= amdgpu_ttm_io_mem_pfn
,
1270 .access_memory
= &amdgpu_ttm_access_memory
1274 * Firmware Reservation functions
1277 * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
1279 * @adev: amdgpu_device pointer
1281 * free fw reserved vram if it has been reserved.
1283 static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device
*adev
)
1285 amdgpu_bo_free_kernel(&adev
->fw_vram_usage
.reserved_bo
,
1286 NULL
, &adev
->fw_vram_usage
.va
);
1290 * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
1292 * @adev: amdgpu_device pointer
1294 * create bo vram reservation from fw.
1296 static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device
*adev
)
1298 struct ttm_operation_ctx ctx
= { false, false };
1301 u64 vram_size
= adev
->mc
.visible_vram_size
;
1302 u64 offset
= adev
->fw_vram_usage
.start_offset
;
1303 u64 size
= adev
->fw_vram_usage
.size
;
1304 struct amdgpu_bo
*bo
;
1306 adev
->fw_vram_usage
.va
= NULL
;
1307 adev
->fw_vram_usage
.reserved_bo
= NULL
;
1309 if (adev
->fw_vram_usage
.size
> 0 &&
1310 adev
->fw_vram_usage
.size
<= vram_size
) {
1312 r
= amdgpu_bo_create(adev
, adev
->fw_vram_usage
.size
,
1313 PAGE_SIZE
, true, AMDGPU_GEM_DOMAIN_VRAM
,
1314 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
|
1315 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
, NULL
, NULL
, 0,
1316 &adev
->fw_vram_usage
.reserved_bo
);
1320 r
= amdgpu_bo_reserve(adev
->fw_vram_usage
.reserved_bo
, false);
1324 /* remove the original mem node and create a new one at the
1327 bo
= adev
->fw_vram_usage
.reserved_bo
;
1328 offset
= ALIGN(offset
, PAGE_SIZE
);
1329 for (i
= 0; i
< bo
->placement
.num_placement
; ++i
) {
1330 bo
->placements
[i
].fpfn
= offset
>> PAGE_SHIFT
;
1331 bo
->placements
[i
].lpfn
= (offset
+ size
) >> PAGE_SHIFT
;
1334 ttm_bo_mem_put(&bo
->tbo
, &bo
->tbo
.mem
);
1335 r
= ttm_bo_mem_space(&bo
->tbo
, &bo
->placement
,
1336 &bo
->tbo
.mem
, &ctx
);
1340 r
= amdgpu_bo_pin_restricted(adev
->fw_vram_usage
.reserved_bo
,
1341 AMDGPU_GEM_DOMAIN_VRAM
,
1342 adev
->fw_vram_usage
.start_offset
,
1343 (adev
->fw_vram_usage
.start_offset
+
1344 adev
->fw_vram_usage
.size
), NULL
);
1347 r
= amdgpu_bo_kmap(adev
->fw_vram_usage
.reserved_bo
,
1348 &adev
->fw_vram_usage
.va
);
1352 amdgpu_bo_unreserve(adev
->fw_vram_usage
.reserved_bo
);
1357 amdgpu_bo_unpin(adev
->fw_vram_usage
.reserved_bo
);
1359 amdgpu_bo_unreserve(adev
->fw_vram_usage
.reserved_bo
);
1361 amdgpu_bo_unref(&adev
->fw_vram_usage
.reserved_bo
);
1363 adev
->fw_vram_usage
.va
= NULL
;
1364 adev
->fw_vram_usage
.reserved_bo
= NULL
;
1368 int amdgpu_ttm_init(struct amdgpu_device
*adev
)
1374 r
= amdgpu_ttm_global_init(adev
);
1378 /* No others user of address space so set it to 0 */
1379 r
= ttm_bo_device_init(&adev
->mman
.bdev
,
1380 adev
->mman
.bo_global_ref
.ref
.object
,
1382 adev
->ddev
->anon_inode
->i_mapping
,
1383 DRM_FILE_PAGE_OFFSET
,
1386 DRM_ERROR("failed initializing buffer object driver(%d).\n", r
);
1389 adev
->mman
.initialized
= true;
1390 r
= ttm_bo_init_mm(&adev
->mman
.bdev
, TTM_PL_VRAM
,
1391 adev
->mc
.real_vram_size
>> PAGE_SHIFT
);
1393 DRM_ERROR("Failed initializing VRAM heap.\n");
1397 /* Reduce size of CPU-visible VRAM if requested */
1398 vis_vram_limit
= (u64
)amdgpu_vis_vram_limit
* 1024 * 1024;
1399 if (amdgpu_vis_vram_limit
> 0 &&
1400 vis_vram_limit
<= adev
->mc
.visible_vram_size
)
1401 adev
->mc
.visible_vram_size
= vis_vram_limit
;
1403 /* Change the size here instead of the init above so only lpfn is affected */
1404 amdgpu_ttm_set_active_vram_size(adev
, adev
->mc
.visible_vram_size
);
1407 *The reserved vram for firmware must be pinned to the specified
1408 *place on the VRAM, so reserve it early.
1410 r
= amdgpu_ttm_fw_reserve_vram_init(adev
);
1415 r
= amdgpu_bo_create_kernel(adev
, adev
->mc
.stolen_size
, PAGE_SIZE
,
1416 AMDGPU_GEM_DOMAIN_VRAM
,
1417 &adev
->stolen_vga_memory
,
1421 DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1422 (unsigned) (adev
->mc
.real_vram_size
/ (1024 * 1024)));
1424 if (amdgpu_gtt_size
== -1) {
1428 gtt_size
= min(max((AMDGPU_DEFAULT_GTT_SIZE_MB
<< 20),
1429 adev
->mc
.mc_vram_size
),
1430 ((uint64_t)si
.totalram
* si
.mem_unit
* 3/4));
1433 gtt_size
= (uint64_t)amdgpu_gtt_size
<< 20;
1434 r
= ttm_bo_init_mm(&adev
->mman
.bdev
, TTM_PL_TT
, gtt_size
>> PAGE_SHIFT
);
1436 DRM_ERROR("Failed initializing GTT heap.\n");
1439 DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1440 (unsigned)(gtt_size
/ (1024 * 1024)));
1442 adev
->gds
.mem
.total_size
= adev
->gds
.mem
.total_size
<< AMDGPU_GDS_SHIFT
;
1443 adev
->gds
.mem
.gfx_partition_size
= adev
->gds
.mem
.gfx_partition_size
<< AMDGPU_GDS_SHIFT
;
1444 adev
->gds
.mem
.cs_partition_size
= adev
->gds
.mem
.cs_partition_size
<< AMDGPU_GDS_SHIFT
;
1445 adev
->gds
.gws
.total_size
= adev
->gds
.gws
.total_size
<< AMDGPU_GWS_SHIFT
;
1446 adev
->gds
.gws
.gfx_partition_size
= adev
->gds
.gws
.gfx_partition_size
<< AMDGPU_GWS_SHIFT
;
1447 adev
->gds
.gws
.cs_partition_size
= adev
->gds
.gws
.cs_partition_size
<< AMDGPU_GWS_SHIFT
;
1448 adev
->gds
.oa
.total_size
= adev
->gds
.oa
.total_size
<< AMDGPU_OA_SHIFT
;
1449 adev
->gds
.oa
.gfx_partition_size
= adev
->gds
.oa
.gfx_partition_size
<< AMDGPU_OA_SHIFT
;
1450 adev
->gds
.oa
.cs_partition_size
= adev
->gds
.oa
.cs_partition_size
<< AMDGPU_OA_SHIFT
;
1452 if (adev
->gds
.mem
.total_size
) {
1453 r
= ttm_bo_init_mm(&adev
->mman
.bdev
, AMDGPU_PL_GDS
,
1454 adev
->gds
.mem
.total_size
>> PAGE_SHIFT
);
1456 DRM_ERROR("Failed initializing GDS heap.\n");
1462 if (adev
->gds
.gws
.total_size
) {
1463 r
= ttm_bo_init_mm(&adev
->mman
.bdev
, AMDGPU_PL_GWS
,
1464 adev
->gds
.gws
.total_size
>> PAGE_SHIFT
);
1466 DRM_ERROR("Failed initializing gws heap.\n");
1472 if (adev
->gds
.oa
.total_size
) {
1473 r
= ttm_bo_init_mm(&adev
->mman
.bdev
, AMDGPU_PL_OA
,
1474 adev
->gds
.oa
.total_size
>> PAGE_SHIFT
);
1476 DRM_ERROR("Failed initializing oa heap.\n");
1481 r
= amdgpu_ttm_debugfs_init(adev
);
1483 DRM_ERROR("Failed to init debugfs\n");
1489 void amdgpu_ttm_fini(struct amdgpu_device
*adev
)
1491 if (!adev
->mman
.initialized
)
1494 amdgpu_ttm_debugfs_fini(adev
);
1495 amdgpu_bo_free_kernel(&adev
->stolen_vga_memory
, NULL
, NULL
);
1496 amdgpu_ttm_fw_reserve_vram_fini(adev
);
1498 ttm_bo_clean_mm(&adev
->mman
.bdev
, TTM_PL_VRAM
);
1499 ttm_bo_clean_mm(&adev
->mman
.bdev
, TTM_PL_TT
);
1500 if (adev
->gds
.mem
.total_size
)
1501 ttm_bo_clean_mm(&adev
->mman
.bdev
, AMDGPU_PL_GDS
);
1502 if (adev
->gds
.gws
.total_size
)
1503 ttm_bo_clean_mm(&adev
->mman
.bdev
, AMDGPU_PL_GWS
);
1504 if (adev
->gds
.oa
.total_size
)
1505 ttm_bo_clean_mm(&adev
->mman
.bdev
, AMDGPU_PL_OA
);
1506 ttm_bo_device_release(&adev
->mman
.bdev
);
1507 amdgpu_ttm_global_fini(adev
);
1508 adev
->mman
.initialized
= false;
1509 DRM_INFO("amdgpu: ttm finalized\n");
1512 /* this should only be called at bootup or when userspace
1514 void amdgpu_ttm_set_active_vram_size(struct amdgpu_device
*adev
, u64 size
)
1516 struct ttm_mem_type_manager
*man
;
1518 if (!adev
->mman
.initialized
)
1521 man
= &adev
->mman
.bdev
.man
[TTM_PL_VRAM
];
1522 /* this just adjusts TTM size idea, which sets lpfn to the correct value */
1523 man
->size
= size
>> PAGE_SHIFT
;
1526 int amdgpu_mmap(struct file
*filp
, struct vm_area_struct
*vma
)
1528 struct drm_file
*file_priv
;
1529 struct amdgpu_device
*adev
;
1531 if (unlikely(vma
->vm_pgoff
< DRM_FILE_PAGE_OFFSET
))
1534 file_priv
= filp
->private_data
;
1535 adev
= file_priv
->minor
->dev
->dev_private
;
1539 return ttm_bo_mmap(filp
, vma
, &adev
->mman
.bdev
);
1542 static int amdgpu_map_buffer(struct ttm_buffer_object
*bo
,
1543 struct ttm_mem_reg
*mem
, unsigned num_pages
,
1544 uint64_t offset
, unsigned window
,
1545 struct amdgpu_ring
*ring
,
1548 struct amdgpu_ttm_tt
*gtt
= (void *)bo
->ttm
;
1549 struct amdgpu_device
*adev
= ring
->adev
;
1550 struct ttm_tt
*ttm
= bo
->ttm
;
1551 struct amdgpu_job
*job
;
1552 unsigned num_dw
, num_bytes
;
1553 dma_addr_t
*dma_address
;
1554 struct dma_fence
*fence
;
1555 uint64_t src_addr
, dst_addr
;
1559 BUG_ON(adev
->mman
.buffer_funcs
->copy_max_bytes
<
1560 AMDGPU_GTT_MAX_TRANSFER_SIZE
* 8);
1562 *addr
= adev
->mc
.gart_start
;
1563 *addr
+= (u64
)window
* AMDGPU_GTT_MAX_TRANSFER_SIZE
*
1564 AMDGPU_GPU_PAGE_SIZE
;
1566 num_dw
= adev
->mman
.buffer_funcs
->copy_num_dw
;
1567 while (num_dw
& 0x7)
1570 num_bytes
= num_pages
* 8;
1572 r
= amdgpu_job_alloc_with_ib(adev
, num_dw
* 4 + num_bytes
, &job
);
1576 src_addr
= num_dw
* 4;
1577 src_addr
+= job
->ibs
[0].gpu_addr
;
1579 dst_addr
= adev
->gart
.table_addr
;
1580 dst_addr
+= window
* AMDGPU_GTT_MAX_TRANSFER_SIZE
* 8;
1581 amdgpu_emit_copy_buffer(adev
, &job
->ibs
[0], src_addr
,
1582 dst_addr
, num_bytes
);
1584 amdgpu_ring_pad_ib(ring
, &job
->ibs
[0]);
1585 WARN_ON(job
->ibs
[0].length_dw
> num_dw
);
1587 dma_address
= >t
->ttm
.dma_address
[offset
>> PAGE_SHIFT
];
1588 flags
= amdgpu_ttm_tt_pte_flags(adev
, ttm
, mem
);
1589 r
= amdgpu_gart_map(adev
, 0, num_pages
, dma_address
, flags
,
1590 &job
->ibs
[0].ptr
[num_dw
]);
1594 r
= amdgpu_job_submit(job
, ring
, &adev
->mman
.entity
,
1595 AMDGPU_FENCE_OWNER_UNDEFINED
, &fence
);
1599 dma_fence_put(fence
);
1604 amdgpu_job_free(job
);
1608 int amdgpu_copy_buffer(struct amdgpu_ring
*ring
, uint64_t src_offset
,
1609 uint64_t dst_offset
, uint32_t byte_count
,
1610 struct reservation_object
*resv
,
1611 struct dma_fence
**fence
, bool direct_submit
,
1612 bool vm_needs_flush
)
1614 struct amdgpu_device
*adev
= ring
->adev
;
1615 struct amdgpu_job
*job
;
1618 unsigned num_loops
, num_dw
;
1622 max_bytes
= adev
->mman
.buffer_funcs
->copy_max_bytes
;
1623 num_loops
= DIV_ROUND_UP(byte_count
, max_bytes
);
1624 num_dw
= num_loops
* adev
->mman
.buffer_funcs
->copy_num_dw
;
1626 /* for IB padding */
1627 while (num_dw
& 0x7)
1630 r
= amdgpu_job_alloc_with_ib(adev
, num_dw
* 4, &job
);
1634 job
->vm_needs_flush
= vm_needs_flush
;
1636 r
= amdgpu_sync_resv(adev
, &job
->sync
, resv
,
1637 AMDGPU_FENCE_OWNER_UNDEFINED
,
1640 DRM_ERROR("sync failed (%d).\n", r
);
1645 for (i
= 0; i
< num_loops
; i
++) {
1646 uint32_t cur_size_in_bytes
= min(byte_count
, max_bytes
);
1648 amdgpu_emit_copy_buffer(adev
, &job
->ibs
[0], src_offset
,
1649 dst_offset
, cur_size_in_bytes
);
1651 src_offset
+= cur_size_in_bytes
;
1652 dst_offset
+= cur_size_in_bytes
;
1653 byte_count
-= cur_size_in_bytes
;
1656 amdgpu_ring_pad_ib(ring
, &job
->ibs
[0]);
1657 WARN_ON(job
->ibs
[0].length_dw
> num_dw
);
1658 if (direct_submit
) {
1659 r
= amdgpu_ib_schedule(ring
, job
->num_ibs
, job
->ibs
,
1661 job
->fence
= dma_fence_get(*fence
);
1663 DRM_ERROR("Error scheduling IBs (%d)\n", r
);
1664 amdgpu_job_free(job
);
1666 r
= amdgpu_job_submit(job
, ring
, &adev
->mman
.entity
,
1667 AMDGPU_FENCE_OWNER_UNDEFINED
, fence
);
1675 amdgpu_job_free(job
);
1679 int amdgpu_fill_buffer(struct amdgpu_bo
*bo
,
1681 struct reservation_object
*resv
,
1682 struct dma_fence
**fence
)
1684 struct amdgpu_device
*adev
= amdgpu_ttm_adev(bo
->tbo
.bdev
);
1685 uint32_t max_bytes
= 8 *
1686 adev
->vm_manager
.vm_pte_funcs
->set_max_nums_pte_pde
;
1687 struct amdgpu_ring
*ring
= adev
->mman
.buffer_funcs_ring
;
1689 struct drm_mm_node
*mm_node
;
1690 unsigned long num_pages
;
1691 unsigned int num_loops
, num_dw
;
1693 struct amdgpu_job
*job
;
1697 DRM_ERROR("Trying to clear memory with ring turned off.\n");
1701 if (bo
->tbo
.mem
.mem_type
== TTM_PL_TT
) {
1702 r
= amdgpu_ttm_alloc_gart(&bo
->tbo
);
1707 num_pages
= bo
->tbo
.num_pages
;
1708 mm_node
= bo
->tbo
.mem
.mm_node
;
1711 uint32_t byte_count
= mm_node
->size
<< PAGE_SHIFT
;
1713 num_loops
+= DIV_ROUND_UP(byte_count
, max_bytes
);
1714 num_pages
-= mm_node
->size
;
1718 /* num of dwords for each SDMA_OP_PTEPDE cmd */
1719 num_dw
= num_loops
* adev
->vm_manager
.vm_pte_funcs
->set_pte_pde_num_dw
;
1721 /* for IB padding */
1724 r
= amdgpu_job_alloc_with_ib(adev
, num_dw
* 4, &job
);
1729 r
= amdgpu_sync_resv(adev
, &job
->sync
, resv
,
1730 AMDGPU_FENCE_OWNER_UNDEFINED
, false);
1732 DRM_ERROR("sync failed (%d).\n", r
);
1737 num_pages
= bo
->tbo
.num_pages
;
1738 mm_node
= bo
->tbo
.mem
.mm_node
;
1741 uint32_t byte_count
= mm_node
->size
<< PAGE_SHIFT
;
1744 WARN_ONCE(byte_count
& 0x7, "size should be a multiple of 8");
1746 dst_addr
= amdgpu_mm_node_addr(&bo
->tbo
, mm_node
, &bo
->tbo
.mem
);
1747 while (byte_count
) {
1748 uint32_t cur_size_in_bytes
= min(byte_count
, max_bytes
);
1750 amdgpu_vm_set_pte_pde(adev
, &job
->ibs
[0],
1752 cur_size_in_bytes
>> 3, 0,
1755 dst_addr
+= cur_size_in_bytes
;
1756 byte_count
-= cur_size_in_bytes
;
1759 num_pages
-= mm_node
->size
;
1763 amdgpu_ring_pad_ib(ring
, &job
->ibs
[0]);
1764 WARN_ON(job
->ibs
[0].length_dw
> num_dw
);
1765 r
= amdgpu_job_submit(job
, ring
, &adev
->mman
.entity
,
1766 AMDGPU_FENCE_OWNER_UNDEFINED
, fence
);
1773 amdgpu_job_free(job
);
1777 #if defined(CONFIG_DEBUG_FS)
1779 static int amdgpu_mm_dump_table(struct seq_file
*m
, void *data
)
1781 struct drm_info_node
*node
= (struct drm_info_node
*)m
->private;
1782 unsigned ttm_pl
= *(int *)node
->info_ent
->data
;
1783 struct drm_device
*dev
= node
->minor
->dev
;
1784 struct amdgpu_device
*adev
= dev
->dev_private
;
1785 struct ttm_mem_type_manager
*man
= &adev
->mman
.bdev
.man
[ttm_pl
];
1786 struct drm_printer p
= drm_seq_file_printer(m
);
1788 man
->func
->debug(man
, &p
);
1792 static int ttm_pl_vram
= TTM_PL_VRAM
;
1793 static int ttm_pl_tt
= TTM_PL_TT
;
1795 static const struct drm_info_list amdgpu_ttm_debugfs_list
[] = {
1796 {"amdgpu_vram_mm", amdgpu_mm_dump_table
, 0, &ttm_pl_vram
},
1797 {"amdgpu_gtt_mm", amdgpu_mm_dump_table
, 0, &ttm_pl_tt
},
1798 {"ttm_page_pool", ttm_page_alloc_debugfs
, 0, NULL
},
1799 #ifdef CONFIG_SWIOTLB
1800 {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs
, 0, NULL
}
1804 static ssize_t
amdgpu_ttm_vram_read(struct file
*f
, char __user
*buf
,
1805 size_t size
, loff_t
*pos
)
1807 struct amdgpu_device
*adev
= file_inode(f
)->i_private
;
1811 if (size
& 0x3 || *pos
& 0x3)
1814 if (*pos
>= adev
->mc
.mc_vram_size
)
1818 unsigned long flags
;
1821 if (*pos
>= adev
->mc
.mc_vram_size
)
1824 spin_lock_irqsave(&adev
->mmio_idx_lock
, flags
);
1825 WREG32_NO_KIQ(mmMM_INDEX
, ((uint32_t)*pos
) | 0x80000000);
1826 WREG32_NO_KIQ(mmMM_INDEX_HI
, *pos
>> 31);
1827 value
= RREG32_NO_KIQ(mmMM_DATA
);
1828 spin_unlock_irqrestore(&adev
->mmio_idx_lock
, flags
);
1830 r
= put_user(value
, (uint32_t *)buf
);
1843 static ssize_t
amdgpu_ttm_vram_write(struct file
*f
, const char __user
*buf
,
1844 size_t size
, loff_t
*pos
)
1846 struct amdgpu_device
*adev
= file_inode(f
)->i_private
;
1850 if (size
& 0x3 || *pos
& 0x3)
1853 if (*pos
>= adev
->mc
.mc_vram_size
)
1857 unsigned long flags
;
1860 if (*pos
>= adev
->mc
.mc_vram_size
)
1863 r
= get_user(value
, (uint32_t *)buf
);
1867 spin_lock_irqsave(&adev
->mmio_idx_lock
, flags
);
1868 WREG32_NO_KIQ(mmMM_INDEX
, ((uint32_t)*pos
) | 0x80000000);
1869 WREG32_NO_KIQ(mmMM_INDEX_HI
, *pos
>> 31);
1870 WREG32_NO_KIQ(mmMM_DATA
, value
);
1871 spin_unlock_irqrestore(&adev
->mmio_idx_lock
, flags
);
1882 static const struct file_operations amdgpu_ttm_vram_fops
= {
1883 .owner
= THIS_MODULE
,
1884 .read
= amdgpu_ttm_vram_read
,
1885 .write
= amdgpu_ttm_vram_write
,
1886 .llseek
= default_llseek
,
1889 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1891 static ssize_t
amdgpu_ttm_gtt_read(struct file
*f
, char __user
*buf
,
1892 size_t size
, loff_t
*pos
)
1894 struct amdgpu_device
*adev
= file_inode(f
)->i_private
;
1899 loff_t p
= *pos
/ PAGE_SIZE
;
1900 unsigned off
= *pos
& ~PAGE_MASK
;
1901 size_t cur_size
= min_t(size_t, size
, PAGE_SIZE
- off
);
1905 if (p
>= adev
->gart
.num_cpu_pages
)
1908 page
= adev
->gart
.pages
[p
];
1913 r
= copy_to_user(buf
, ptr
, cur_size
);
1914 kunmap(adev
->gart
.pages
[p
]);
1916 r
= clear_user(buf
, cur_size
);
1930 static const struct file_operations amdgpu_ttm_gtt_fops
= {
1931 .owner
= THIS_MODULE
,
1932 .read
= amdgpu_ttm_gtt_read
,
1933 .llseek
= default_llseek
1938 static ssize_t
amdgpu_iova_to_phys_read(struct file
*f
, char __user
*buf
,
1939 size_t size
, loff_t
*pos
)
1941 struct amdgpu_device
*adev
= file_inode(f
)->i_private
;
1944 struct iommu_domain
*dom
;
1946 // always return 8 bytes
1950 // only accept page addresses
1954 dom
= iommu_get_domain_for_dev(adev
->dev
);
1956 phys
= iommu_iova_to_phys(dom
, *pos
);
1960 r
= copy_to_user(buf
, &phys
, 8);
1967 static const struct file_operations amdgpu_ttm_iova_fops
= {
1968 .owner
= THIS_MODULE
,
1969 .read
= amdgpu_iova_to_phys_read
,
1970 .llseek
= default_llseek
1973 static const struct {
1975 const struct file_operations
*fops
;
1977 } ttm_debugfs_entries
[] = {
1978 { "amdgpu_vram", &amdgpu_ttm_vram_fops
, TTM_PL_VRAM
},
1979 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
1980 { "amdgpu_gtt", &amdgpu_ttm_gtt_fops
, TTM_PL_TT
},
1982 { "amdgpu_iova", &amdgpu_ttm_iova_fops
, TTM_PL_SYSTEM
},
1987 static int amdgpu_ttm_debugfs_init(struct amdgpu_device
*adev
)
1989 #if defined(CONFIG_DEBUG_FS)
1992 struct drm_minor
*minor
= adev
->ddev
->primary
;
1993 struct dentry
*ent
, *root
= minor
->debugfs_root
;
1995 for (count
= 0; count
< ARRAY_SIZE(ttm_debugfs_entries
); count
++) {
1996 ent
= debugfs_create_file(
1997 ttm_debugfs_entries
[count
].name
,
1998 S_IFREG
| S_IRUGO
, root
,
2000 ttm_debugfs_entries
[count
].fops
);
2002 return PTR_ERR(ent
);
2003 if (ttm_debugfs_entries
[count
].domain
== TTM_PL_VRAM
)
2004 i_size_write(ent
->d_inode
, adev
->mc
.mc_vram_size
);
2005 else if (ttm_debugfs_entries
[count
].domain
== TTM_PL_TT
)
2006 i_size_write(ent
->d_inode
, adev
->mc
.gart_size
);
2007 adev
->mman
.debugfs_entries
[count
] = ent
;
2010 count
= ARRAY_SIZE(amdgpu_ttm_debugfs_list
);
2012 #ifdef CONFIG_SWIOTLB
2013 if (!swiotlb_nr_tbl())
2017 return amdgpu_debugfs_add_files(adev
, amdgpu_ttm_debugfs_list
, count
);
2023 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device
*adev
)
2025 #if defined(CONFIG_DEBUG_FS)
2028 for (i
= 0; i
< ARRAY_SIZE(ttm_debugfs_entries
); i
++)
2029 debugfs_remove(adev
->mman
.debugfs_entries
[i
]);