2 * Copyright 2018 Red Hat Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 #include "nouveau_dmem.h"
23 #include "nouveau_drv.h"
24 #include "nouveau_chan.h"
25 #include "nouveau_dma.h"
26 #include "nouveau_mem.h"
27 #include "nouveau_bo.h"
29 #include <nvif/class.h>
30 #include <nvif/object.h>
31 #include <nvif/if500b.h>
32 #include <nvif/if900b.h>
34 #include <linux/sched/mm.h>
35 #include <linux/hmm.h>
38 * FIXME: this is ugly right now we are using TTM to allocate vram and we pin
39 * it in vram while in use. We likely want to overhaul memory management for
40 * nouveau to be more page like (not necessarily with system page size but a
41 * bigger page size) at lowest level and have some shim layer on top that would
42 * provide the same functionality as TTM.
44 #define DMEM_CHUNK_SIZE (2UL << 20)
45 #define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT)
53 typedef int (*nouveau_migrate_copy_t
)(struct nouveau_drm
*drm
, u64 npages
,
54 enum nouveau_aper
, u64 dst_addr
,
55 enum nouveau_aper
, u64 src_addr
);
57 struct nouveau_dmem_chunk
{
58 struct list_head list
;
59 struct nouveau_bo
*bo
;
60 struct nouveau_drm
*drm
;
61 unsigned long pfn_first
;
62 unsigned long callocated
;
63 unsigned long bitmap
[BITS_TO_LONGS(DMEM_CHUNK_NPAGES
)];
67 struct nouveau_dmem_migrate
{
68 nouveau_migrate_copy_t copy_func
;
69 struct nouveau_channel
*chan
;
73 struct nouveau_drm
*drm
;
74 struct dev_pagemap pagemap
;
75 struct nouveau_dmem_migrate migrate
;
76 struct list_head chunk_free
;
77 struct list_head chunk_full
;
78 struct list_head chunk_empty
;
82 static inline struct nouveau_dmem
*page_to_dmem(struct page
*page
)
84 return container_of(page
->pgmap
, struct nouveau_dmem
, pagemap
);
87 static unsigned long nouveau_dmem_page_addr(struct page
*page
)
89 struct nouveau_dmem_chunk
*chunk
= page
->zone_device_data
;
90 unsigned long idx
= page_to_pfn(page
) - chunk
->pfn_first
;
92 return (idx
<< PAGE_SHIFT
) + chunk
->bo
->bo
.offset
;
95 static void nouveau_dmem_page_free(struct page
*page
)
97 struct nouveau_dmem_chunk
*chunk
= page
->zone_device_data
;
98 unsigned long idx
= page_to_pfn(page
) - chunk
->pfn_first
;
103 * This is really a bad example, we need to overhaul nouveau memory
104 * management to be more page focus and allow lighter locking scheme
105 * to be use in the process.
107 spin_lock(&chunk
->lock
);
108 clear_bit(idx
, chunk
->bitmap
);
109 WARN_ON(!chunk
->callocated
);
112 * FIXME when chunk->callocated reach 0 we should add the chunk to
113 * a reclaim list so that it can be freed in case of memory pressure.
115 spin_unlock(&chunk
->lock
);
118 static void nouveau_dmem_fence_done(struct nouveau_fence
**fence
)
121 nouveau_fence_wait(*fence
, true, false);
122 nouveau_fence_unref(fence
);
125 * FIXME wait for channel to be IDLE before calling finalizing
131 static vm_fault_t
nouveau_dmem_fault_copy_one(struct nouveau_drm
*drm
,
132 struct vm_fault
*vmf
, struct migrate_vma
*args
,
133 dma_addr_t
*dma_addr
)
135 struct device
*dev
= drm
->dev
->dev
;
136 struct page
*dpage
, *spage
;
138 spage
= migrate_pfn_to_page(args
->src
[0]);
139 if (!spage
|| !(args
->src
[0] & MIGRATE_PFN_MIGRATE
))
142 dpage
= alloc_page_vma(GFP_HIGHUSER
, vmf
->vma
, vmf
->address
);
144 return VM_FAULT_SIGBUS
;
147 *dma_addr
= dma_map_page(dev
, dpage
, 0, PAGE_SIZE
, DMA_BIDIRECTIONAL
);
148 if (dma_mapping_error(dev
, *dma_addr
))
149 goto error_free_page
;
151 if (drm
->dmem
->migrate
.copy_func(drm
, 1, NOUVEAU_APER_HOST
, *dma_addr
,
152 NOUVEAU_APER_VRAM
, nouveau_dmem_page_addr(spage
)))
153 goto error_dma_unmap
;
155 args
->dst
[0] = migrate_pfn(page_to_pfn(dpage
)) | MIGRATE_PFN_LOCKED
;
159 dma_unmap_page(dev
, *dma_addr
, PAGE_SIZE
, DMA_BIDIRECTIONAL
);
162 return VM_FAULT_SIGBUS
;
165 static vm_fault_t
nouveau_dmem_migrate_to_ram(struct vm_fault
*vmf
)
167 struct nouveau_dmem
*dmem
= page_to_dmem(vmf
->page
);
168 struct nouveau_drm
*drm
= dmem
->drm
;
169 struct nouveau_fence
*fence
;
170 unsigned long src
= 0, dst
= 0;
171 dma_addr_t dma_addr
= 0;
173 struct migrate_vma args
= {
175 .start
= vmf
->address
,
176 .end
= vmf
->address
+ PAGE_SIZE
,
182 * FIXME what we really want is to find some heuristic to migrate more
183 * than just one page on CPU fault. When such fault happens it is very
184 * likely that more surrounding page will CPU fault too.
186 if (migrate_vma_setup(&args
) < 0)
187 return VM_FAULT_SIGBUS
;
191 ret
= nouveau_dmem_fault_copy_one(drm
, vmf
, &args
, &dma_addr
);
195 nouveau_fence_new(dmem
->migrate
.chan
, false, &fence
);
196 migrate_vma_pages(&args
);
197 nouveau_dmem_fence_done(&fence
);
198 dma_unmap_page(drm
->dev
->dev
, dma_addr
, PAGE_SIZE
, DMA_BIDIRECTIONAL
);
200 migrate_vma_finalize(&args
);
204 static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops
= {
205 .page_free
= nouveau_dmem_page_free
,
206 .migrate_to_ram
= nouveau_dmem_migrate_to_ram
,
210 nouveau_dmem_chunk_alloc(struct nouveau_drm
*drm
)
212 struct nouveau_dmem_chunk
*chunk
;
215 if (drm
->dmem
== NULL
)
218 mutex_lock(&drm
->dmem
->mutex
);
219 chunk
= list_first_entry_or_null(&drm
->dmem
->chunk_empty
,
220 struct nouveau_dmem_chunk
,
223 mutex_unlock(&drm
->dmem
->mutex
);
227 list_del(&chunk
->list
);
228 mutex_unlock(&drm
->dmem
->mutex
);
230 ret
= nouveau_bo_new(&drm
->client
, DMEM_CHUNK_SIZE
, 0,
231 TTM_PL_FLAG_VRAM
, 0, 0, NULL
, NULL
,
236 ret
= nouveau_bo_pin(chunk
->bo
, TTM_PL_FLAG_VRAM
, false);
238 nouveau_bo_ref(NULL
, &chunk
->bo
);
242 bitmap_zero(chunk
->bitmap
, DMEM_CHUNK_NPAGES
);
243 spin_lock_init(&chunk
->lock
);
246 mutex_lock(&drm
->dmem
->mutex
);
248 list_add(&chunk
->list
, &drm
->dmem
->chunk_empty
);
250 list_add_tail(&chunk
->list
, &drm
->dmem
->chunk_empty
);
251 mutex_unlock(&drm
->dmem
->mutex
);
256 static struct nouveau_dmem_chunk
*
257 nouveau_dmem_chunk_first_free_locked(struct nouveau_drm
*drm
)
259 struct nouveau_dmem_chunk
*chunk
;
261 chunk
= list_first_entry_or_null(&drm
->dmem
->chunk_free
,
262 struct nouveau_dmem_chunk
,
267 chunk
= list_first_entry_or_null(&drm
->dmem
->chunk_empty
,
268 struct nouveau_dmem_chunk
,
277 nouveau_dmem_pages_alloc(struct nouveau_drm
*drm
,
278 unsigned long npages
,
279 unsigned long *pages
)
281 struct nouveau_dmem_chunk
*chunk
;
285 memset(pages
, 0xff, npages
* sizeof(*pages
));
287 mutex_lock(&drm
->dmem
->mutex
);
288 for (c
= 0; c
< npages
;) {
291 chunk
= nouveau_dmem_chunk_first_free_locked(drm
);
293 mutex_unlock(&drm
->dmem
->mutex
);
294 ret
= nouveau_dmem_chunk_alloc(drm
);
300 mutex_lock(&drm
->dmem
->mutex
);
304 spin_lock(&chunk
->lock
);
305 i
= find_first_zero_bit(chunk
->bitmap
, DMEM_CHUNK_NPAGES
);
306 while (i
< DMEM_CHUNK_NPAGES
&& c
< npages
) {
307 pages
[c
] = chunk
->pfn_first
+ i
;
308 set_bit(i
, chunk
->bitmap
);
312 i
= find_next_zero_bit(chunk
->bitmap
,
313 DMEM_CHUNK_NPAGES
, i
);
315 spin_unlock(&chunk
->lock
);
317 mutex_unlock(&drm
->dmem
->mutex
);
323 nouveau_dmem_page_alloc_locked(struct nouveau_drm
*drm
)
325 unsigned long pfns
[1];
329 /* FIXME stop all the miss-match API ... */
330 ret
= nouveau_dmem_pages_alloc(drm
, 1, pfns
);
334 page
= pfn_to_page(pfns
[0]);
341 nouveau_dmem_page_free_locked(struct nouveau_drm
*drm
, struct page
*page
)
348 nouveau_dmem_resume(struct nouveau_drm
*drm
)
350 struct nouveau_dmem_chunk
*chunk
;
353 if (drm
->dmem
== NULL
)
356 mutex_lock(&drm
->dmem
->mutex
);
357 list_for_each_entry (chunk
, &drm
->dmem
->chunk_free
, list
) {
358 ret
= nouveau_bo_pin(chunk
->bo
, TTM_PL_FLAG_VRAM
, false);
359 /* FIXME handle pin failure */
362 list_for_each_entry (chunk
, &drm
->dmem
->chunk_full
, list
) {
363 ret
= nouveau_bo_pin(chunk
->bo
, TTM_PL_FLAG_VRAM
, false);
364 /* FIXME handle pin failure */
367 mutex_unlock(&drm
->dmem
->mutex
);
371 nouveau_dmem_suspend(struct nouveau_drm
*drm
)
373 struct nouveau_dmem_chunk
*chunk
;
375 if (drm
->dmem
== NULL
)
378 mutex_lock(&drm
->dmem
->mutex
);
379 list_for_each_entry (chunk
, &drm
->dmem
->chunk_free
, list
) {
380 nouveau_bo_unpin(chunk
->bo
);
382 list_for_each_entry (chunk
, &drm
->dmem
->chunk_full
, list
) {
383 nouveau_bo_unpin(chunk
->bo
);
385 mutex_unlock(&drm
->dmem
->mutex
);
389 nouveau_dmem_fini(struct nouveau_drm
*drm
)
391 struct nouveau_dmem_chunk
*chunk
, *tmp
;
393 if (drm
->dmem
== NULL
)
396 mutex_lock(&drm
->dmem
->mutex
);
398 WARN_ON(!list_empty(&drm
->dmem
->chunk_free
));
399 WARN_ON(!list_empty(&drm
->dmem
->chunk_full
));
401 list_for_each_entry_safe (chunk
, tmp
, &drm
->dmem
->chunk_empty
, list
) {
403 nouveau_bo_unpin(chunk
->bo
);
404 nouveau_bo_ref(NULL
, &chunk
->bo
);
406 list_del(&chunk
->list
);
410 mutex_unlock(&drm
->dmem
->mutex
);
414 nvc0b5_migrate_copy(struct nouveau_drm
*drm
, u64 npages
,
415 enum nouveau_aper dst_aper
, u64 dst_addr
,
416 enum nouveau_aper src_aper
, u64 src_addr
)
418 struct nouveau_channel
*chan
= drm
->dmem
->migrate
.chan
;
419 u32 launch_dma
= (1 << 9) /* MULTI_LINE_ENABLE. */ |
420 (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ |
421 (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ |
422 (1 << 2) /* FLUSH_ENABLE_TRUE. */ |
423 (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */;
426 ret
= RING_SPACE(chan
, 13);
430 if (src_aper
!= NOUVEAU_APER_VIRT
) {
432 case NOUVEAU_APER_VRAM
:
433 BEGIN_IMC0(chan
, NvSubCopy
, 0x0260, 0);
435 case NOUVEAU_APER_HOST
:
436 BEGIN_IMC0(chan
, NvSubCopy
, 0x0260, 1);
441 launch_dma
|= 0x00001000; /* SRC_TYPE_PHYSICAL. */
444 if (dst_aper
!= NOUVEAU_APER_VIRT
) {
446 case NOUVEAU_APER_VRAM
:
447 BEGIN_IMC0(chan
, NvSubCopy
, 0x0264, 0);
449 case NOUVEAU_APER_HOST
:
450 BEGIN_IMC0(chan
, NvSubCopy
, 0x0264, 1);
455 launch_dma
|= 0x00002000; /* DST_TYPE_PHYSICAL. */
458 BEGIN_NVC0(chan
, NvSubCopy
, 0x0400, 8);
459 OUT_RING (chan
, upper_32_bits(src_addr
));
460 OUT_RING (chan
, lower_32_bits(src_addr
));
461 OUT_RING (chan
, upper_32_bits(dst_addr
));
462 OUT_RING (chan
, lower_32_bits(dst_addr
));
463 OUT_RING (chan
, PAGE_SIZE
);
464 OUT_RING (chan
, PAGE_SIZE
);
465 OUT_RING (chan
, PAGE_SIZE
);
466 OUT_RING (chan
, npages
);
467 BEGIN_NVC0(chan
, NvSubCopy
, 0x0300, 1);
468 OUT_RING (chan
, launch_dma
);
473 nouveau_dmem_migrate_init(struct nouveau_drm
*drm
)
475 switch (drm
->ttm
.copy
.oclass
) {
476 case PASCAL_DMA_COPY_A
:
477 case PASCAL_DMA_COPY_B
:
478 case VOLTA_DMA_COPY_A
:
479 case TURING_DMA_COPY_A
:
480 drm
->dmem
->migrate
.copy_func
= nvc0b5_migrate_copy
;
481 drm
->dmem
->migrate
.chan
= drm
->ttm
.chan
;
490 nouveau_dmem_init(struct nouveau_drm
*drm
)
492 struct device
*device
= drm
->dev
->dev
;
493 struct resource
*res
;
494 unsigned long i
, size
, pfn_first
;
497 /* This only make sense on PASCAL or newer */
498 if (drm
->client
.device
.info
.family
< NV_DEVICE_INFO_V0_PASCAL
)
501 if (!(drm
->dmem
= kzalloc(sizeof(*drm
->dmem
), GFP_KERNEL
)))
504 drm
->dmem
->drm
= drm
;
505 mutex_init(&drm
->dmem
->mutex
);
506 INIT_LIST_HEAD(&drm
->dmem
->chunk_free
);
507 INIT_LIST_HEAD(&drm
->dmem
->chunk_full
);
508 INIT_LIST_HEAD(&drm
->dmem
->chunk_empty
);
510 size
= ALIGN(drm
->client
.device
.info
.ram_user
, DMEM_CHUNK_SIZE
);
512 /* Initialize migration dma helpers before registering memory */
513 ret
= nouveau_dmem_migrate_init(drm
);
518 * FIXME we need some kind of policy to decide how much VRAM we
519 * want to register with HMM. For now just register everything
520 * and latter if we want to do thing like over commit then we
521 * could revisit this.
523 res
= devm_request_free_mem_region(device
, &iomem_resource
, size
);
526 drm
->dmem
->pagemap
.type
= MEMORY_DEVICE_PRIVATE
;
527 drm
->dmem
->pagemap
.res
= *res
;
528 drm
->dmem
->pagemap
.ops
= &nouveau_dmem_pagemap_ops
;
529 if (IS_ERR(devm_memremap_pages(device
, &drm
->dmem
->pagemap
)))
532 pfn_first
= res
->start
>> PAGE_SHIFT
;
533 for (i
= 0; i
< (size
/ DMEM_CHUNK_SIZE
); ++i
) {
534 struct nouveau_dmem_chunk
*chunk
;
538 chunk
= kzalloc(sizeof(*chunk
), GFP_KERNEL
);
540 nouveau_dmem_fini(drm
);
545 chunk
->pfn_first
= pfn_first
+ (i
* DMEM_CHUNK_NPAGES
);
546 list_add_tail(&chunk
->list
, &drm
->dmem
->chunk_empty
);
548 page
= pfn_to_page(chunk
->pfn_first
);
549 for (j
= 0; j
< DMEM_CHUNK_NPAGES
; ++j
, ++page
)
550 page
->zone_device_data
= chunk
;
553 NV_INFO(drm
, "DMEM: registered %ldMB of device memory\n", size
>> 20);
560 static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm
*drm
,
561 unsigned long src
, dma_addr_t
*dma_addr
)
563 struct device
*dev
= drm
->dev
->dev
;
564 struct page
*dpage
, *spage
;
566 spage
= migrate_pfn_to_page(src
);
567 if (!spage
|| !(src
& MIGRATE_PFN_MIGRATE
))
570 dpage
= nouveau_dmem_page_alloc_locked(drm
);
574 *dma_addr
= dma_map_page(dev
, spage
, 0, PAGE_SIZE
, DMA_BIDIRECTIONAL
);
575 if (dma_mapping_error(dev
, *dma_addr
))
578 if (drm
->dmem
->migrate
.copy_func(drm
, 1, NOUVEAU_APER_VRAM
,
579 nouveau_dmem_page_addr(dpage
), NOUVEAU_APER_HOST
,
583 return migrate_pfn(page_to_pfn(dpage
)) | MIGRATE_PFN_LOCKED
;
586 dma_unmap_page(dev
, *dma_addr
, PAGE_SIZE
, DMA_BIDIRECTIONAL
);
588 nouveau_dmem_page_free_locked(drm
, dpage
);
593 static void nouveau_dmem_migrate_chunk(struct nouveau_drm
*drm
,
594 struct migrate_vma
*args
, dma_addr_t
*dma_addrs
)
596 struct nouveau_fence
*fence
;
597 unsigned long addr
= args
->start
, nr_dma
= 0, i
;
599 for (i
= 0; addr
< args
->end
; i
++) {
600 args
->dst
[i
] = nouveau_dmem_migrate_copy_one(drm
, args
->src
[i
],
607 nouveau_fence_new(drm
->dmem
->migrate
.chan
, false, &fence
);
608 migrate_vma_pages(args
);
609 nouveau_dmem_fence_done(&fence
);
612 dma_unmap_page(drm
->dev
->dev
, dma_addrs
[nr_dma
], PAGE_SIZE
,
616 * FIXME optimization: update GPU page table to point to newly migrated
619 migrate_vma_finalize(args
);
623 nouveau_dmem_migrate_vma(struct nouveau_drm
*drm
,
624 struct vm_area_struct
*vma
,
628 unsigned long npages
= (end
- start
) >> PAGE_SHIFT
;
629 unsigned long max
= min(SG_MAX_SINGLE_ALLOC
, npages
);
630 dma_addr_t
*dma_addrs
;
631 struct migrate_vma args
= {
638 args
.src
= kcalloc(max
, sizeof(*args
.src
), GFP_KERNEL
);
641 args
.dst
= kcalloc(max
, sizeof(*args
.dst
), GFP_KERNEL
);
645 dma_addrs
= kmalloc_array(max
, sizeof(*dma_addrs
), GFP_KERNEL
);
649 for (i
= 0; i
< npages
; i
+= c
) {
650 c
= min(SG_MAX_SINGLE_ALLOC
, npages
);
651 args
.end
= start
+ (c
<< PAGE_SHIFT
);
652 ret
= migrate_vma_setup(&args
);
657 nouveau_dmem_migrate_chunk(drm
, &args
, dma_addrs
);
658 args
.start
= args
.end
;
673 nouveau_dmem_page(struct nouveau_drm
*drm
, struct page
*page
)
675 return is_device_private_page(page
) && drm
->dmem
== page_to_dmem(page
);
679 nouveau_dmem_convert_pfn(struct nouveau_drm
*drm
,
680 struct hmm_range
*range
)
682 unsigned long i
, npages
;
684 npages
= (range
->end
- range
->start
) >> PAGE_SHIFT
;
685 for (i
= 0; i
< npages
; ++i
) {
689 page
= hmm_device_entry_to_page(range
, range
->pfns
[i
]);
693 if (!(range
->pfns
[i
] & range
->flags
[HMM_PFN_DEVICE_PRIVATE
])) {
697 if (!nouveau_dmem_page(drm
, page
)) {
698 WARN(1, "Some unknown device memory !\n");
703 addr
= nouveau_dmem_page_addr(page
);
704 range
->pfns
[i
] &= ((1UL << range
->pfn_shift
) - 1);
705 range
->pfns
[i
] |= (addr
>> PAGE_SHIFT
) << range
->pfn_shift
;