1 // SPDX-License-Identifier: GPL-2.0
3 * This is a module to test the HMM (Heterogeneous Memory Management)
4 * mirror and zone device private memory migration APIs of the kernel.
5 * Userspace programs can register with the driver to mirror their own address
6 * space and can use the device to read/write any valid virtual address.
8 #include <linux/init.h>
11 #include <linux/module.h>
12 #include <linux/kernel.h>
13 #include <linux/cdev.h>
14 #include <linux/device.h>
15 #include <linux/mutex.h>
16 #include <linux/rwsem.h>
17 #include <linux/sched.h>
18 #include <linux/slab.h>
19 #include <linux/highmem.h>
20 #include <linux/delay.h>
21 #include <linux/pagemap.h>
22 #include <linux/hmm.h>
23 #include <linux/vmalloc.h>
24 #include <linux/swap.h>
25 #include <linux/swapops.h>
26 #include <linux/sched/mm.h>
27 #include <linux/platform_device.h>
29 #include "test_hmm_uapi.h"
31 #define DMIRROR_NDEVICES 2
32 #define DMIRROR_RANGE_FAULT_TIMEOUT 1000
33 #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U)
34 #define DEVMEM_CHUNKS_RESERVE 16
36 static const struct dev_pagemap_ops dmirror_devmem_ops
;
37 static const struct mmu_interval_notifier_ops dmirror_min_ops
;
38 static dev_t dmirror_dev
;
40 struct dmirror_device
;
42 struct dmirror_bounce
{
49 #define DPT_XA_TAG_WRITE 3UL
52 * Data structure to track address ranges and register for mmu interval
55 struct dmirror_interval
{
56 struct mmu_interval_notifier notifier
;
57 struct dmirror
*dmirror
;
61 * Data attached to the open device file.
62 * Note that it might be shared after a fork().
65 struct dmirror_device
*mdevice
;
67 struct mmu_interval_notifier notifier
;
72 * ZONE_DEVICE pages for migration and simulating device memory.
74 struct dmirror_chunk
{
75 struct dev_pagemap pagemap
;
76 struct dmirror_device
*mdevice
;
82 struct dmirror_device
{
84 struct hmm_devmem
*devmem
;
86 unsigned int devmem_capacity
;
87 unsigned int devmem_count
;
88 struct dmirror_chunk
**devmem_chunks
;
89 struct mutex devmem_lock
; /* protects the above */
93 struct page
*free_pages
;
94 spinlock_t lock
; /* protects the above */
97 static struct dmirror_device dmirror_devices
[DMIRROR_NDEVICES
];
99 static int dmirror_bounce_init(struct dmirror_bounce
*bounce
,
106 bounce
->ptr
= vmalloc(size
);
112 static void dmirror_bounce_fini(struct dmirror_bounce
*bounce
)
117 static int dmirror_fops_open(struct inode
*inode
, struct file
*filp
)
119 struct cdev
*cdev
= inode
->i_cdev
;
120 struct dmirror
*dmirror
;
123 /* Mirror this process address space */
124 dmirror
= kzalloc(sizeof(*dmirror
), GFP_KERNEL
);
128 dmirror
->mdevice
= container_of(cdev
, struct dmirror_device
, cdevice
);
129 mutex_init(&dmirror
->mutex
);
130 xa_init(&dmirror
->pt
);
132 ret
= mmu_interval_notifier_insert(&dmirror
->notifier
, current
->mm
,
133 0, ULONG_MAX
& PAGE_MASK
, &dmirror_min_ops
);
139 filp
->private_data
= dmirror
;
143 static int dmirror_fops_release(struct inode
*inode
, struct file
*filp
)
145 struct dmirror
*dmirror
= filp
->private_data
;
147 mmu_interval_notifier_remove(&dmirror
->notifier
);
148 xa_destroy(&dmirror
->pt
);
153 static struct dmirror_device
*dmirror_page_to_device(struct page
*page
)
156 return container_of(page
->pgmap
, struct dmirror_chunk
,
160 static int dmirror_do_fault(struct dmirror
*dmirror
, struct hmm_range
*range
)
162 unsigned long *pfns
= range
->hmm_pfns
;
165 for (pfn
= (range
->start
>> PAGE_SHIFT
);
166 pfn
< (range
->end
>> PAGE_SHIFT
);
172 * Since we asked for hmm_range_fault() to populate pages,
173 * it shouldn't return an error entry on success.
175 WARN_ON(*pfns
& HMM_PFN_ERROR
);
176 WARN_ON(!(*pfns
& HMM_PFN_VALID
));
178 page
= hmm_pfn_to_page(*pfns
);
182 if (*pfns
& HMM_PFN_WRITE
)
183 entry
= xa_tag_pointer(entry
, DPT_XA_TAG_WRITE
);
184 else if (WARN_ON(range
->default_flags
& HMM_PFN_WRITE
))
186 entry
= xa_store(&dmirror
->pt
, pfn
, entry
, GFP_ATOMIC
);
187 if (xa_is_err(entry
))
188 return xa_err(entry
);
194 static void dmirror_do_update(struct dmirror
*dmirror
, unsigned long start
,
201 * The XArray doesn't hold references to pages since it relies on
202 * the mmu notifier to clear page pointers when they become stale.
203 * Therefore, it is OK to just clear the entry.
205 xa_for_each_range(&dmirror
->pt
, pfn
, entry
, start
>> PAGE_SHIFT
,
207 xa_erase(&dmirror
->pt
, pfn
);
210 static bool dmirror_interval_invalidate(struct mmu_interval_notifier
*mni
,
211 const struct mmu_notifier_range
*range
,
212 unsigned long cur_seq
)
214 struct dmirror
*dmirror
= container_of(mni
, struct dmirror
, notifier
);
217 * Ignore invalidation callbacks for device private pages since
218 * the invalidation is handled as part of the migration process.
220 if (range
->event
== MMU_NOTIFY_MIGRATE
&&
221 range
->migrate_pgmap_owner
== dmirror
->mdevice
)
224 if (mmu_notifier_range_blockable(range
))
225 mutex_lock(&dmirror
->mutex
);
226 else if (!mutex_trylock(&dmirror
->mutex
))
229 mmu_interval_set_seq(mni
, cur_seq
);
230 dmirror_do_update(dmirror
, range
->start
, range
->end
);
232 mutex_unlock(&dmirror
->mutex
);
236 static const struct mmu_interval_notifier_ops dmirror_min_ops
= {
237 .invalidate
= dmirror_interval_invalidate
,
240 static int dmirror_range_fault(struct dmirror
*dmirror
,
241 struct hmm_range
*range
)
243 struct mm_struct
*mm
= dmirror
->notifier
.mm
;
244 unsigned long timeout
=
245 jiffies
+ msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT
);
249 if (time_after(jiffies
, timeout
)) {
254 range
->notifier_seq
= mmu_interval_read_begin(range
->notifier
);
256 ret
= hmm_range_fault(range
);
257 mmap_read_unlock(mm
);
264 mutex_lock(&dmirror
->mutex
);
265 if (mmu_interval_read_retry(range
->notifier
,
266 range
->notifier_seq
)) {
267 mutex_unlock(&dmirror
->mutex
);
273 ret
= dmirror_do_fault(dmirror
, range
);
275 mutex_unlock(&dmirror
->mutex
);
280 static int dmirror_fault(struct dmirror
*dmirror
, unsigned long start
,
281 unsigned long end
, bool write
)
283 struct mm_struct
*mm
= dmirror
->notifier
.mm
;
285 unsigned long pfns
[64];
286 struct hmm_range range
= {
287 .notifier
= &dmirror
->notifier
,
291 HMM_PFN_REQ_FAULT
| (write
? HMM_PFN_REQ_WRITE
: 0),
292 .dev_private_owner
= dmirror
->mdevice
,
296 /* Since the mm is for the mirrored process, get a reference first. */
297 if (!mmget_not_zero(mm
))
300 for (addr
= start
; addr
< end
; addr
= range
.end
) {
302 range
.end
= min(addr
+ (ARRAY_SIZE(pfns
) << PAGE_SHIFT
), end
);
304 ret
= dmirror_range_fault(dmirror
, &range
);
313 static int dmirror_do_read(struct dmirror
*dmirror
, unsigned long start
,
314 unsigned long end
, struct dmirror_bounce
*bounce
)
319 ptr
= bounce
->ptr
+ ((start
- bounce
->addr
) & PAGE_MASK
);
321 for (pfn
= start
>> PAGE_SHIFT
; pfn
< (end
>> PAGE_SHIFT
); pfn
++) {
326 entry
= xa_load(&dmirror
->pt
, pfn
);
327 page
= xa_untag_pointer(entry
);
332 memcpy(ptr
, tmp
, PAGE_SIZE
);
342 static int dmirror_read(struct dmirror
*dmirror
, struct hmm_dmirror_cmd
*cmd
)
344 struct dmirror_bounce bounce
;
345 unsigned long start
, end
;
346 unsigned long size
= cmd
->npages
<< PAGE_SHIFT
;
354 ret
= dmirror_bounce_init(&bounce
, start
, size
);
359 mutex_lock(&dmirror
->mutex
);
360 ret
= dmirror_do_read(dmirror
, start
, end
, &bounce
);
361 mutex_unlock(&dmirror
->mutex
);
365 start
= cmd
->addr
+ (bounce
.cpages
<< PAGE_SHIFT
);
366 ret
= dmirror_fault(dmirror
, start
, end
, false);
373 if (copy_to_user(u64_to_user_ptr(cmd
->ptr
), bounce
.ptr
,
377 cmd
->cpages
= bounce
.cpages
;
378 dmirror_bounce_fini(&bounce
);
382 static int dmirror_do_write(struct dmirror
*dmirror
, unsigned long start
,
383 unsigned long end
, struct dmirror_bounce
*bounce
)
388 ptr
= bounce
->ptr
+ ((start
- bounce
->addr
) & PAGE_MASK
);
390 for (pfn
= start
>> PAGE_SHIFT
; pfn
< (end
>> PAGE_SHIFT
); pfn
++) {
395 entry
= xa_load(&dmirror
->pt
, pfn
);
396 page
= xa_untag_pointer(entry
);
397 if (!page
|| xa_pointer_tag(entry
) != DPT_XA_TAG_WRITE
)
401 memcpy(tmp
, ptr
, PAGE_SIZE
);
411 static int dmirror_write(struct dmirror
*dmirror
, struct hmm_dmirror_cmd
*cmd
)
413 struct dmirror_bounce bounce
;
414 unsigned long start
, end
;
415 unsigned long size
= cmd
->npages
<< PAGE_SHIFT
;
423 ret
= dmirror_bounce_init(&bounce
, start
, size
);
426 if (copy_from_user(bounce
.ptr
, u64_to_user_ptr(cmd
->ptr
),
433 mutex_lock(&dmirror
->mutex
);
434 ret
= dmirror_do_write(dmirror
, start
, end
, &bounce
);
435 mutex_unlock(&dmirror
->mutex
);
439 start
= cmd
->addr
+ (bounce
.cpages
<< PAGE_SHIFT
);
440 ret
= dmirror_fault(dmirror
, start
, end
, true);
447 cmd
->cpages
= bounce
.cpages
;
448 dmirror_bounce_fini(&bounce
);
452 static bool dmirror_allocate_chunk(struct dmirror_device
*mdevice
,
455 struct dmirror_chunk
*devmem
;
456 struct resource
*res
;
458 unsigned long pfn_first
;
459 unsigned long pfn_last
;
462 devmem
= kzalloc(sizeof(*devmem
), GFP_KERNEL
);
466 res
= request_free_mem_region(&iomem_resource
, DEVMEM_CHUNK_SIZE
,
471 devmem
->pagemap
.type
= MEMORY_DEVICE_PRIVATE
;
472 devmem
->pagemap
.range
.start
= res
->start
;
473 devmem
->pagemap
.range
.end
= res
->end
;
474 devmem
->pagemap
.nr_range
= 1;
475 devmem
->pagemap
.ops
= &dmirror_devmem_ops
;
476 devmem
->pagemap
.owner
= mdevice
;
478 mutex_lock(&mdevice
->devmem_lock
);
480 if (mdevice
->devmem_count
== mdevice
->devmem_capacity
) {
481 struct dmirror_chunk
**new_chunks
;
482 unsigned int new_capacity
;
484 new_capacity
= mdevice
->devmem_capacity
+
485 DEVMEM_CHUNKS_RESERVE
;
486 new_chunks
= krealloc(mdevice
->devmem_chunks
,
487 sizeof(new_chunks
[0]) * new_capacity
,
491 mdevice
->devmem_capacity
= new_capacity
;
492 mdevice
->devmem_chunks
= new_chunks
;
495 ptr
= memremap_pages(&devmem
->pagemap
, numa_node_id());
499 devmem
->mdevice
= mdevice
;
500 pfn_first
= devmem
->pagemap
.range
.start
>> PAGE_SHIFT
;
501 pfn_last
= pfn_first
+ (range_len(&devmem
->pagemap
.range
) >> PAGE_SHIFT
);
502 mdevice
->devmem_chunks
[mdevice
->devmem_count
++] = devmem
;
504 mutex_unlock(&mdevice
->devmem_lock
);
506 pr_info("added new %u MB chunk (total %u chunks, %u MB) PFNs [0x%lx 0x%lx)\n",
507 DEVMEM_CHUNK_SIZE
/ (1024 * 1024),
508 mdevice
->devmem_count
,
509 mdevice
->devmem_count
* (DEVMEM_CHUNK_SIZE
/ (1024 * 1024)),
510 pfn_first
, pfn_last
);
512 spin_lock(&mdevice
->lock
);
513 for (pfn
= pfn_first
; pfn
< pfn_last
; pfn
++) {
514 struct page
*page
= pfn_to_page(pfn
);
516 page
->zone_device_data
= mdevice
->free_pages
;
517 mdevice
->free_pages
= page
;
520 *ppage
= mdevice
->free_pages
;
521 mdevice
->free_pages
= (*ppage
)->zone_device_data
;
524 spin_unlock(&mdevice
->lock
);
529 mutex_unlock(&mdevice
->devmem_lock
);
530 release_mem_region(devmem
->pagemap
.range
.start
, range_len(&devmem
->pagemap
.range
));
537 static struct page
*dmirror_devmem_alloc_page(struct dmirror_device
*mdevice
)
539 struct page
*dpage
= NULL
;
543 * This is a fake device so we alloc real system memory to store
546 rpage
= alloc_page(GFP_HIGHUSER
);
550 spin_lock(&mdevice
->lock
);
552 if (mdevice
->free_pages
) {
553 dpage
= mdevice
->free_pages
;
554 mdevice
->free_pages
= dpage
->zone_device_data
;
556 spin_unlock(&mdevice
->lock
);
558 spin_unlock(&mdevice
->lock
);
559 if (!dmirror_allocate_chunk(mdevice
, &dpage
))
563 dpage
->zone_device_data
= rpage
;
573 static void dmirror_migrate_alloc_and_copy(struct migrate_vma
*args
,
574 struct dmirror
*dmirror
)
576 struct dmirror_device
*mdevice
= dmirror
->mdevice
;
577 const unsigned long *src
= args
->src
;
578 unsigned long *dst
= args
->dst
;
581 for (addr
= args
->start
; addr
< args
->end
; addr
+= PAGE_SIZE
,
587 if (!(*src
& MIGRATE_PFN_MIGRATE
))
591 * Note that spage might be NULL which is OK since it is an
592 * unallocated pte_none() or read-only zero page.
594 spage
= migrate_pfn_to_page(*src
);
596 dpage
= dmirror_devmem_alloc_page(mdevice
);
600 rpage
= dpage
->zone_device_data
;
602 copy_highpage(rpage
, spage
);
604 clear_highpage(rpage
);
607 * Normally, a device would use the page->zone_device_data to
608 * point to the mirror but here we use it to hold the page for
609 * the simulated device memory and that page holds the pointer
612 rpage
->zone_device_data
= dmirror
;
614 *dst
= migrate_pfn(page_to_pfn(dpage
)) |
616 if ((*src
& MIGRATE_PFN_WRITE
) ||
617 (!spage
&& args
->vma
->vm_flags
& VM_WRITE
))
618 *dst
|= MIGRATE_PFN_WRITE
;
622 static int dmirror_migrate_finalize_and_map(struct migrate_vma
*args
,
623 struct dmirror
*dmirror
)
625 unsigned long start
= args
->start
;
626 unsigned long end
= args
->end
;
627 const unsigned long *src
= args
->src
;
628 const unsigned long *dst
= args
->dst
;
631 /* Map the migrated pages into the device's page tables. */
632 mutex_lock(&dmirror
->mutex
);
634 for (pfn
= start
>> PAGE_SHIFT
; pfn
< (end
>> PAGE_SHIFT
); pfn
++,
639 if (!(*src
& MIGRATE_PFN_MIGRATE
))
642 dpage
= migrate_pfn_to_page(*dst
);
647 * Store the page that holds the data so the page table
648 * doesn't have to deal with ZONE_DEVICE private pages.
650 entry
= dpage
->zone_device_data
;
651 if (*dst
& MIGRATE_PFN_WRITE
)
652 entry
= xa_tag_pointer(entry
, DPT_XA_TAG_WRITE
);
653 entry
= xa_store(&dmirror
->pt
, pfn
, entry
, GFP_ATOMIC
);
654 if (xa_is_err(entry
)) {
655 mutex_unlock(&dmirror
->mutex
);
656 return xa_err(entry
);
660 mutex_unlock(&dmirror
->mutex
);
664 static int dmirror_migrate(struct dmirror
*dmirror
,
665 struct hmm_dmirror_cmd
*cmd
)
667 unsigned long start
, end
, addr
;
668 unsigned long size
= cmd
->npages
<< PAGE_SHIFT
;
669 struct mm_struct
*mm
= dmirror
->notifier
.mm
;
670 struct vm_area_struct
*vma
;
671 unsigned long src_pfns
[64];
672 unsigned long dst_pfns
[64];
673 struct dmirror_bounce bounce
;
674 struct migrate_vma args
;
683 /* Since the mm is for the mirrored process, get a reference first. */
684 if (!mmget_not_zero(mm
))
688 for (addr
= start
; addr
< end
; addr
= next
) {
689 vma
= find_vma(mm
, addr
);
690 if (!vma
|| addr
< vma
->vm_start
||
691 !(vma
->vm_flags
& VM_READ
)) {
695 next
= min(end
, addr
+ (ARRAY_SIZE(src_pfns
) << PAGE_SHIFT
));
696 if (next
> vma
->vm_end
)
704 args
.pgmap_owner
= dmirror
->mdevice
;
705 args
.flags
= MIGRATE_VMA_SELECT_SYSTEM
;
706 ret
= migrate_vma_setup(&args
);
710 dmirror_migrate_alloc_and_copy(&args
, dmirror
);
711 migrate_vma_pages(&args
);
712 dmirror_migrate_finalize_and_map(&args
, dmirror
);
713 migrate_vma_finalize(&args
);
715 mmap_read_unlock(mm
);
718 /* Return the migrated data for verification. */
719 ret
= dmirror_bounce_init(&bounce
, start
, size
);
722 mutex_lock(&dmirror
->mutex
);
723 ret
= dmirror_do_read(dmirror
, start
, end
, &bounce
);
724 mutex_unlock(&dmirror
->mutex
);
726 if (copy_to_user(u64_to_user_ptr(cmd
->ptr
), bounce
.ptr
,
730 cmd
->cpages
= bounce
.cpages
;
731 dmirror_bounce_fini(&bounce
);
735 mmap_read_unlock(mm
);
740 static void dmirror_mkentry(struct dmirror
*dmirror
, struct hmm_range
*range
,
741 unsigned char *perm
, unsigned long entry
)
745 if (entry
& HMM_PFN_ERROR
) {
746 *perm
= HMM_DMIRROR_PROT_ERROR
;
749 if (!(entry
& HMM_PFN_VALID
)) {
750 *perm
= HMM_DMIRROR_PROT_NONE
;
754 page
= hmm_pfn_to_page(entry
);
755 if (is_device_private_page(page
)) {
756 /* Is the page migrated to this device or some other? */
757 if (dmirror
->mdevice
== dmirror_page_to_device(page
))
758 *perm
= HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL
;
760 *perm
= HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE
;
761 } else if (is_zero_pfn(page_to_pfn(page
)))
762 *perm
= HMM_DMIRROR_PROT_ZERO
;
764 *perm
= HMM_DMIRROR_PROT_NONE
;
765 if (entry
& HMM_PFN_WRITE
)
766 *perm
|= HMM_DMIRROR_PROT_WRITE
;
768 *perm
|= HMM_DMIRROR_PROT_READ
;
769 if (hmm_pfn_to_map_order(entry
) + PAGE_SHIFT
== PMD_SHIFT
)
770 *perm
|= HMM_DMIRROR_PROT_PMD
;
771 else if (hmm_pfn_to_map_order(entry
) + PAGE_SHIFT
== PUD_SHIFT
)
772 *perm
|= HMM_DMIRROR_PROT_PUD
;
775 static bool dmirror_snapshot_invalidate(struct mmu_interval_notifier
*mni
,
776 const struct mmu_notifier_range
*range
,
777 unsigned long cur_seq
)
779 struct dmirror_interval
*dmi
=
780 container_of(mni
, struct dmirror_interval
, notifier
);
781 struct dmirror
*dmirror
= dmi
->dmirror
;
783 if (mmu_notifier_range_blockable(range
))
784 mutex_lock(&dmirror
->mutex
);
785 else if (!mutex_trylock(&dmirror
->mutex
))
789 * Snapshots only need to set the sequence number since any
790 * invalidation in the interval invalidates the whole snapshot.
792 mmu_interval_set_seq(mni
, cur_seq
);
794 mutex_unlock(&dmirror
->mutex
);
798 static const struct mmu_interval_notifier_ops dmirror_mrn_ops
= {
799 .invalidate
= dmirror_snapshot_invalidate
,
802 static int dmirror_range_snapshot(struct dmirror
*dmirror
,
803 struct hmm_range
*range
,
806 struct mm_struct
*mm
= dmirror
->notifier
.mm
;
807 struct dmirror_interval notifier
;
808 unsigned long timeout
=
809 jiffies
+ msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT
);
814 notifier
.dmirror
= dmirror
;
815 range
->notifier
= ¬ifier
.notifier
;
817 ret
= mmu_interval_notifier_insert(range
->notifier
, mm
,
818 range
->start
, range
->end
- range
->start
,
824 if (time_after(jiffies
, timeout
)) {
829 range
->notifier_seq
= mmu_interval_read_begin(range
->notifier
);
832 ret
= hmm_range_fault(range
);
833 mmap_read_unlock(mm
);
840 mutex_lock(&dmirror
->mutex
);
841 if (mmu_interval_read_retry(range
->notifier
,
842 range
->notifier_seq
)) {
843 mutex_unlock(&dmirror
->mutex
);
849 n
= (range
->end
- range
->start
) >> PAGE_SHIFT
;
850 for (i
= 0; i
< n
; i
++)
851 dmirror_mkentry(dmirror
, range
, perm
+ i
, range
->hmm_pfns
[i
]);
853 mutex_unlock(&dmirror
->mutex
);
855 mmu_interval_notifier_remove(range
->notifier
);
859 static int dmirror_snapshot(struct dmirror
*dmirror
,
860 struct hmm_dmirror_cmd
*cmd
)
862 struct mm_struct
*mm
= dmirror
->notifier
.mm
;
863 unsigned long start
, end
;
864 unsigned long size
= cmd
->npages
<< PAGE_SHIFT
;
867 unsigned long pfns
[64];
868 unsigned char perm
[64];
870 struct hmm_range range
= {
872 .dev_private_owner
= dmirror
->mdevice
,
881 /* Since the mm is for the mirrored process, get a reference first. */
882 if (!mmget_not_zero(mm
))
886 * Register a temporary notifier to detect invalidations even if it
887 * overlaps with other mmu_interval_notifiers.
889 uptr
= u64_to_user_ptr(cmd
->ptr
);
890 for (addr
= start
; addr
< end
; addr
= next
) {
893 next
= min(addr
+ (ARRAY_SIZE(pfns
) << PAGE_SHIFT
), end
);
897 ret
= dmirror_range_snapshot(dmirror
, &range
, perm
);
901 n
= (range
.end
- range
.start
) >> PAGE_SHIFT
;
902 if (copy_to_user(uptr
, perm
, n
)) {
915 static long dmirror_fops_unlocked_ioctl(struct file
*filp
,
916 unsigned int command
,
919 void __user
*uarg
= (void __user
*)arg
;
920 struct hmm_dmirror_cmd cmd
;
921 struct dmirror
*dmirror
;
924 dmirror
= filp
->private_data
;
928 if (copy_from_user(&cmd
, uarg
, sizeof(cmd
)))
931 if (cmd
.addr
& ~PAGE_MASK
)
933 if (cmd
.addr
>= (cmd
.addr
+ (cmd
.npages
<< PAGE_SHIFT
)))
940 case HMM_DMIRROR_READ
:
941 ret
= dmirror_read(dmirror
, &cmd
);
944 case HMM_DMIRROR_WRITE
:
945 ret
= dmirror_write(dmirror
, &cmd
);
948 case HMM_DMIRROR_MIGRATE
:
949 ret
= dmirror_migrate(dmirror
, &cmd
);
952 case HMM_DMIRROR_SNAPSHOT
:
953 ret
= dmirror_snapshot(dmirror
, &cmd
);
962 if (copy_to_user(uarg
, &cmd
, sizeof(cmd
)))
968 static const struct file_operations dmirror_fops
= {
969 .open
= dmirror_fops_open
,
970 .release
= dmirror_fops_release
,
971 .unlocked_ioctl
= dmirror_fops_unlocked_ioctl
,
972 .llseek
= default_llseek
,
973 .owner
= THIS_MODULE
,
976 static void dmirror_devmem_free(struct page
*page
)
978 struct page
*rpage
= page
->zone_device_data
;
979 struct dmirror_device
*mdevice
;
984 mdevice
= dmirror_page_to_device(page
);
986 spin_lock(&mdevice
->lock
);
988 page
->zone_device_data
= mdevice
->free_pages
;
989 mdevice
->free_pages
= page
;
990 spin_unlock(&mdevice
->lock
);
993 static vm_fault_t
dmirror_devmem_fault_alloc_and_copy(struct migrate_vma
*args
,
994 struct dmirror
*dmirror
)
996 const unsigned long *src
= args
->src
;
997 unsigned long *dst
= args
->dst
;
998 unsigned long start
= args
->start
;
999 unsigned long end
= args
->end
;
1002 for (addr
= start
; addr
< end
; addr
+= PAGE_SIZE
,
1004 struct page
*dpage
, *spage
;
1006 spage
= migrate_pfn_to_page(*src
);
1007 if (!spage
|| !(*src
& MIGRATE_PFN_MIGRATE
))
1009 spage
= spage
->zone_device_data
;
1011 dpage
= alloc_page_vma(GFP_HIGHUSER_MOVABLE
, args
->vma
, addr
);
1016 xa_erase(&dmirror
->pt
, addr
>> PAGE_SHIFT
);
1017 copy_highpage(dpage
, spage
);
1018 *dst
= migrate_pfn(page_to_pfn(dpage
)) | MIGRATE_PFN_LOCKED
;
1019 if (*src
& MIGRATE_PFN_WRITE
)
1020 *dst
|= MIGRATE_PFN_WRITE
;
1025 static vm_fault_t
dmirror_devmem_fault(struct vm_fault
*vmf
)
1027 struct migrate_vma args
;
1028 unsigned long src_pfns
;
1029 unsigned long dst_pfns
;
1031 struct dmirror
*dmirror
;
1035 * Normally, a device would use the page->zone_device_data to point to
1036 * the mirror but here we use it to hold the page for the simulated
1037 * device memory and that page holds the pointer to the mirror.
1039 rpage
= vmf
->page
->zone_device_data
;
1040 dmirror
= rpage
->zone_device_data
;
1042 /* FIXME demonstrate how we can adjust migrate range */
1043 args
.vma
= vmf
->vma
;
1044 args
.start
= vmf
->address
;
1045 args
.end
= args
.start
+ PAGE_SIZE
;
1046 args
.src
= &src_pfns
;
1047 args
.dst
= &dst_pfns
;
1048 args
.pgmap_owner
= dmirror
->mdevice
;
1049 args
.flags
= MIGRATE_VMA_SELECT_DEVICE_PRIVATE
;
1051 if (migrate_vma_setup(&args
))
1052 return VM_FAULT_SIGBUS
;
1054 ret
= dmirror_devmem_fault_alloc_and_copy(&args
, dmirror
);
1057 migrate_vma_pages(&args
);
1059 * No device finalize step is needed since
1060 * dmirror_devmem_fault_alloc_and_copy() will have already
1061 * invalidated the device page table.
1063 migrate_vma_finalize(&args
);
1067 static const struct dev_pagemap_ops dmirror_devmem_ops
= {
1068 .page_free
= dmirror_devmem_free
,
1069 .migrate_to_ram
= dmirror_devmem_fault
,
1072 static int dmirror_device_init(struct dmirror_device
*mdevice
, int id
)
1077 dev
= MKDEV(MAJOR(dmirror_dev
), id
);
1078 mutex_init(&mdevice
->devmem_lock
);
1079 spin_lock_init(&mdevice
->lock
);
1081 cdev_init(&mdevice
->cdevice
, &dmirror_fops
);
1082 mdevice
->cdevice
.owner
= THIS_MODULE
;
1083 ret
= cdev_add(&mdevice
->cdevice
, dev
, 1);
1087 /* Build a list of free ZONE_DEVICE private struct pages */
1088 dmirror_allocate_chunk(mdevice
, NULL
);
1093 static void dmirror_device_remove(struct dmirror_device
*mdevice
)
1097 if (mdevice
->devmem_chunks
) {
1098 for (i
= 0; i
< mdevice
->devmem_count
; i
++) {
1099 struct dmirror_chunk
*devmem
=
1100 mdevice
->devmem_chunks
[i
];
1102 memunmap_pages(&devmem
->pagemap
);
1103 release_mem_region(devmem
->pagemap
.range
.start
,
1104 range_len(&devmem
->pagemap
.range
));
1107 kfree(mdevice
->devmem_chunks
);
1110 cdev_del(&mdevice
->cdevice
);
1113 static int __init
hmm_dmirror_init(void)
1118 ret
= alloc_chrdev_region(&dmirror_dev
, 0, DMIRROR_NDEVICES
,
1123 for (id
= 0; id
< DMIRROR_NDEVICES
; id
++) {
1124 ret
= dmirror_device_init(dmirror_devices
+ id
, id
);
1129 pr_info("HMM test module loaded. This is only for testing HMM.\n");
1134 dmirror_device_remove(dmirror_devices
+ id
);
1135 unregister_chrdev_region(dmirror_dev
, DMIRROR_NDEVICES
);
1140 static void __exit
hmm_dmirror_exit(void)
1144 for (id
= 0; id
< DMIRROR_NDEVICES
; id
++)
1145 dmirror_device_remove(dmirror_devices
+ id
);
1146 unregister_chrdev_region(dmirror_dev
, DMIRROR_NDEVICES
);
1149 module_init(hmm_dmirror_init
);
1150 module_exit(hmm_dmirror_exit
);
1151 MODULE_LICENSE("GPL");