1 // SPDX-License-Identifier: GPL-2.0
3 * KVM guest address space mapping code
5 * Copyright IBM Corp. 2007, 2016
6 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
9 #include <linux/kernel.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/spinlock.h>
14 #include <linux/slab.h>
15 #include <linux/swapops.h>
16 #include <linux/ksm.h>
17 #include <linux/mman.h>
19 #include <asm/pgtable.h>
20 #include <asm/pgalloc.h>
24 #define GMAP_SHADOW_FAKE_TABLE 1ULL
27 * gmap_alloc - allocate and initialize a guest address space
28 * @mm: pointer to the parent mm_struct
29 * @limit: maximum address of the gmap address space
31 * Returns a guest address space structure.
33 static struct gmap
*gmap_alloc(unsigned long limit
)
38 unsigned long etype
, atype
;
40 if (limit
< _REGION3_SIZE
) {
41 limit
= _REGION3_SIZE
- 1;
42 atype
= _ASCE_TYPE_SEGMENT
;
43 etype
= _SEGMENT_ENTRY_EMPTY
;
44 } else if (limit
< _REGION2_SIZE
) {
45 limit
= _REGION2_SIZE
- 1;
46 atype
= _ASCE_TYPE_REGION3
;
47 etype
= _REGION3_ENTRY_EMPTY
;
48 } else if (limit
< _REGION1_SIZE
) {
49 limit
= _REGION1_SIZE
- 1;
50 atype
= _ASCE_TYPE_REGION2
;
51 etype
= _REGION2_ENTRY_EMPTY
;
54 atype
= _ASCE_TYPE_REGION1
;
55 etype
= _REGION1_ENTRY_EMPTY
;
57 gmap
= kzalloc(sizeof(struct gmap
), GFP_KERNEL
);
60 INIT_LIST_HEAD(&gmap
->crst_list
);
61 INIT_LIST_HEAD(&gmap
->children
);
62 INIT_LIST_HEAD(&gmap
->pt_list
);
63 INIT_RADIX_TREE(&gmap
->guest_to_host
, GFP_KERNEL
);
64 INIT_RADIX_TREE(&gmap
->host_to_guest
, GFP_ATOMIC
);
65 INIT_RADIX_TREE(&gmap
->host_to_rmap
, GFP_ATOMIC
);
66 spin_lock_init(&gmap
->guest_table_lock
);
67 spin_lock_init(&gmap
->shadow_lock
);
68 atomic_set(&gmap
->ref_count
, 1);
69 page
= alloc_pages(GFP_KERNEL
, CRST_ALLOC_ORDER
);
73 list_add(&page
->lru
, &gmap
->crst_list
);
74 table
= (unsigned long *) page_to_phys(page
);
75 crst_table_init(table
, etype
);
77 gmap
->asce
= atype
| _ASCE_TABLE_LENGTH
|
78 _ASCE_USER_BITS
| __pa(table
);
79 gmap
->asce_end
= limit
;
89 * gmap_create - create a guest address space
90 * @mm: pointer to the parent mm_struct
91 * @limit: maximum size of the gmap address space
93 * Returns a guest address space structure.
95 struct gmap
*gmap_create(struct mm_struct
*mm
, unsigned long limit
)
98 unsigned long gmap_asce
;
100 gmap
= gmap_alloc(limit
);
104 spin_lock(&mm
->context
.lock
);
105 list_add_rcu(&gmap
->list
, &mm
->context
.gmap_list
);
106 if (list_is_singular(&mm
->context
.gmap_list
))
107 gmap_asce
= gmap
->asce
;
110 WRITE_ONCE(mm
->context
.gmap_asce
, gmap_asce
);
111 spin_unlock(&mm
->context
.lock
);
114 EXPORT_SYMBOL_GPL(gmap_create
);
116 static void gmap_flush_tlb(struct gmap
*gmap
)
118 if (MACHINE_HAS_IDTE
)
119 __tlb_flush_idte(gmap
->asce
);
121 __tlb_flush_global();
124 static void gmap_radix_tree_free(struct radix_tree_root
*root
)
126 struct radix_tree_iter iter
;
127 unsigned long indices
[16];
132 /* A radix tree is freed by deleting all of its entries */
136 radix_tree_for_each_slot(slot
, root
, &iter
, index
) {
137 indices
[nr
] = iter
.index
;
141 for (i
= 0; i
< nr
; i
++) {
143 radix_tree_delete(root
, index
);
148 static void gmap_rmap_radix_tree_free(struct radix_tree_root
*root
)
150 struct gmap_rmap
*rmap
, *rnext
, *head
;
151 struct radix_tree_iter iter
;
152 unsigned long indices
[16];
157 /* A radix tree is freed by deleting all of its entries */
161 radix_tree_for_each_slot(slot
, root
, &iter
, index
) {
162 indices
[nr
] = iter
.index
;
166 for (i
= 0; i
< nr
; i
++) {
168 head
= radix_tree_delete(root
, index
);
169 gmap_for_each_rmap_safe(rmap
, rnext
, head
)
176 * gmap_free - free a guest address space
177 * @gmap: pointer to the guest address space structure
179 * No locks required. There are no references to this gmap anymore.
181 static void gmap_free(struct gmap
*gmap
)
183 struct page
*page
, *next
;
185 /* Flush tlb of all gmaps (if not already done for shadows) */
186 if (!(gmap_is_shadow(gmap
) && gmap
->removed
))
187 gmap_flush_tlb(gmap
);
188 /* Free all segment & region tables. */
189 list_for_each_entry_safe(page
, next
, &gmap
->crst_list
, lru
)
190 __free_pages(page
, CRST_ALLOC_ORDER
);
191 gmap_radix_tree_free(&gmap
->guest_to_host
);
192 gmap_radix_tree_free(&gmap
->host_to_guest
);
194 /* Free additional data for a shadow gmap */
195 if (gmap_is_shadow(gmap
)) {
196 /* Free all page tables. */
197 list_for_each_entry_safe(page
, next
, &gmap
->pt_list
, lru
)
198 page_table_free_pgste(page
);
199 gmap_rmap_radix_tree_free(&gmap
->host_to_rmap
);
200 /* Release reference to the parent */
201 gmap_put(gmap
->parent
);
208 * gmap_get - increase reference counter for guest address space
209 * @gmap: pointer to the guest address space structure
211 * Returns the gmap pointer
213 struct gmap
*gmap_get(struct gmap
*gmap
)
215 atomic_inc(&gmap
->ref_count
);
218 EXPORT_SYMBOL_GPL(gmap_get
);
221 * gmap_put - decrease reference counter for guest address space
222 * @gmap: pointer to the guest address space structure
224 * If the reference counter reaches zero the guest address space is freed.
226 void gmap_put(struct gmap
*gmap
)
228 if (atomic_dec_return(&gmap
->ref_count
) == 0)
231 EXPORT_SYMBOL_GPL(gmap_put
);
234 * gmap_remove - remove a guest address space but do not free it yet
235 * @gmap: pointer to the guest address space structure
237 void gmap_remove(struct gmap
*gmap
)
239 struct gmap
*sg
, *next
;
240 unsigned long gmap_asce
;
242 /* Remove all shadow gmaps linked to this gmap */
243 if (!list_empty(&gmap
->children
)) {
244 spin_lock(&gmap
->shadow_lock
);
245 list_for_each_entry_safe(sg
, next
, &gmap
->children
, list
) {
249 spin_unlock(&gmap
->shadow_lock
);
251 /* Remove gmap from the pre-mm list */
252 spin_lock(&gmap
->mm
->context
.lock
);
253 list_del_rcu(&gmap
->list
);
254 if (list_empty(&gmap
->mm
->context
.gmap_list
))
256 else if (list_is_singular(&gmap
->mm
->context
.gmap_list
))
257 gmap_asce
= list_first_entry(&gmap
->mm
->context
.gmap_list
,
258 struct gmap
, list
)->asce
;
261 WRITE_ONCE(gmap
->mm
->context
.gmap_asce
, gmap_asce
);
262 spin_unlock(&gmap
->mm
->context
.lock
);
267 EXPORT_SYMBOL_GPL(gmap_remove
);
270 * gmap_enable - switch primary space to the guest address space
271 * @gmap: pointer to the guest address space structure
273 void gmap_enable(struct gmap
*gmap
)
275 S390_lowcore
.gmap
= (unsigned long) gmap
;
277 EXPORT_SYMBOL_GPL(gmap_enable
);
280 * gmap_disable - switch back to the standard primary address space
281 * @gmap: pointer to the guest address space structure
283 void gmap_disable(struct gmap
*gmap
)
285 S390_lowcore
.gmap
= 0UL;
287 EXPORT_SYMBOL_GPL(gmap_disable
);
290 * gmap_get_enabled - get a pointer to the currently enabled gmap
292 * Returns a pointer to the currently enabled gmap. 0 if none is enabled.
294 struct gmap
*gmap_get_enabled(void)
296 return (struct gmap
*) S390_lowcore
.gmap
;
298 EXPORT_SYMBOL_GPL(gmap_get_enabled
);
301 * gmap_alloc_table is assumed to be called with mmap_sem held
303 static int gmap_alloc_table(struct gmap
*gmap
, unsigned long *table
,
304 unsigned long init
, unsigned long gaddr
)
309 /* since we dont free the gmap table until gmap_free we can unlock */
310 page
= alloc_pages(GFP_KERNEL
, CRST_ALLOC_ORDER
);
313 new = (unsigned long *) page_to_phys(page
);
314 crst_table_init(new, init
);
315 spin_lock(&gmap
->guest_table_lock
);
316 if (*table
& _REGION_ENTRY_INVALID
) {
317 list_add(&page
->lru
, &gmap
->crst_list
);
318 *table
= (unsigned long) new | _REGION_ENTRY_LENGTH
|
319 (*table
& _REGION_ENTRY_TYPE_MASK
);
323 spin_unlock(&gmap
->guest_table_lock
);
325 __free_pages(page
, CRST_ALLOC_ORDER
);
330 * __gmap_segment_gaddr - find virtual address from segment pointer
331 * @entry: pointer to a segment table entry in the guest address space
333 * Returns the virtual address in the guest address space for the segment
335 static unsigned long __gmap_segment_gaddr(unsigned long *entry
)
338 unsigned long offset
, mask
;
340 offset
= (unsigned long) entry
/ sizeof(unsigned long);
341 offset
= (offset
& (PTRS_PER_PMD
- 1)) * PMD_SIZE
;
342 mask
= ~(PTRS_PER_PMD
* sizeof(pmd_t
) - 1);
343 page
= virt_to_page((void *)((unsigned long) entry
& mask
));
344 return page
->index
+ offset
;
348 * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
349 * @gmap: pointer to the guest address space structure
350 * @vmaddr: address in the host process address space
352 * Returns 1 if a TLB flush is required
354 static int __gmap_unlink_by_vmaddr(struct gmap
*gmap
, unsigned long vmaddr
)
356 unsigned long *entry
;
359 BUG_ON(gmap_is_shadow(gmap
));
360 spin_lock(&gmap
->guest_table_lock
);
361 entry
= radix_tree_delete(&gmap
->host_to_guest
, vmaddr
>> PMD_SHIFT
);
363 flush
= (*entry
!= _SEGMENT_ENTRY_EMPTY
);
364 *entry
= _SEGMENT_ENTRY_EMPTY
;
366 spin_unlock(&gmap
->guest_table_lock
);
371 * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
372 * @gmap: pointer to the guest address space structure
373 * @gaddr: address in the guest address space
375 * Returns 1 if a TLB flush is required
377 static int __gmap_unmap_by_gaddr(struct gmap
*gmap
, unsigned long gaddr
)
379 unsigned long vmaddr
;
381 vmaddr
= (unsigned long) radix_tree_delete(&gmap
->guest_to_host
,
383 return vmaddr
? __gmap_unlink_by_vmaddr(gmap
, vmaddr
) : 0;
387 * gmap_unmap_segment - unmap segment from the guest address space
388 * @gmap: pointer to the guest address space structure
389 * @to: address in the guest address space
390 * @len: length of the memory area to unmap
392 * Returns 0 if the unmap succeeded, -EINVAL if not.
394 int gmap_unmap_segment(struct gmap
*gmap
, unsigned long to
, unsigned long len
)
399 BUG_ON(gmap_is_shadow(gmap
));
400 if ((to
| len
) & (PMD_SIZE
- 1))
402 if (len
== 0 || to
+ len
< to
)
406 down_write(&gmap
->mm
->mmap_sem
);
407 for (off
= 0; off
< len
; off
+= PMD_SIZE
)
408 flush
|= __gmap_unmap_by_gaddr(gmap
, to
+ off
);
409 up_write(&gmap
->mm
->mmap_sem
);
411 gmap_flush_tlb(gmap
);
414 EXPORT_SYMBOL_GPL(gmap_unmap_segment
);
417 * gmap_map_segment - map a segment to the guest address space
418 * @gmap: pointer to the guest address space structure
419 * @from: source address in the parent address space
420 * @to: target address in the guest address space
421 * @len: length of the memory area to map
423 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
425 int gmap_map_segment(struct gmap
*gmap
, unsigned long from
,
426 unsigned long to
, unsigned long len
)
431 BUG_ON(gmap_is_shadow(gmap
));
432 if ((from
| to
| len
) & (PMD_SIZE
- 1))
434 if (len
== 0 || from
+ len
< from
|| to
+ len
< to
||
435 from
+ len
- 1 > TASK_SIZE_MAX
|| to
+ len
- 1 > gmap
->asce_end
)
439 down_write(&gmap
->mm
->mmap_sem
);
440 for (off
= 0; off
< len
; off
+= PMD_SIZE
) {
441 /* Remove old translation */
442 flush
|= __gmap_unmap_by_gaddr(gmap
, to
+ off
);
443 /* Store new translation */
444 if (radix_tree_insert(&gmap
->guest_to_host
,
445 (to
+ off
) >> PMD_SHIFT
,
446 (void *) from
+ off
))
449 up_write(&gmap
->mm
->mmap_sem
);
451 gmap_flush_tlb(gmap
);
454 gmap_unmap_segment(gmap
, to
, len
);
457 EXPORT_SYMBOL_GPL(gmap_map_segment
);
460 * __gmap_translate - translate a guest address to a user space address
461 * @gmap: pointer to guest mapping meta data structure
462 * @gaddr: guest address
464 * Returns user space address which corresponds to the guest address or
465 * -EFAULT if no such mapping exists.
466 * This function does not establish potentially missing page table entries.
467 * The mmap_sem of the mm that belongs to the address space must be held
468 * when this function gets called.
470 * Note: Can also be called for shadow gmaps.
472 unsigned long __gmap_translate(struct gmap
*gmap
, unsigned long gaddr
)
474 unsigned long vmaddr
;
476 vmaddr
= (unsigned long)
477 radix_tree_lookup(&gmap
->guest_to_host
, gaddr
>> PMD_SHIFT
);
478 /* Note: guest_to_host is empty for a shadow gmap */
479 return vmaddr
? (vmaddr
| (gaddr
& ~PMD_MASK
)) : -EFAULT
;
481 EXPORT_SYMBOL_GPL(__gmap_translate
);
484 * gmap_translate - translate a guest address to a user space address
485 * @gmap: pointer to guest mapping meta data structure
486 * @gaddr: guest address
488 * Returns user space address which corresponds to the guest address or
489 * -EFAULT if no such mapping exists.
490 * This function does not establish potentially missing page table entries.
492 unsigned long gmap_translate(struct gmap
*gmap
, unsigned long gaddr
)
496 down_read(&gmap
->mm
->mmap_sem
);
497 rc
= __gmap_translate(gmap
, gaddr
);
498 up_read(&gmap
->mm
->mmap_sem
);
501 EXPORT_SYMBOL_GPL(gmap_translate
);
504 * gmap_unlink - disconnect a page table from the gmap shadow tables
505 * @gmap: pointer to guest mapping meta data structure
506 * @table: pointer to the host page table
507 * @vmaddr: vm address associated with the host page table
509 void gmap_unlink(struct mm_struct
*mm
, unsigned long *table
,
510 unsigned long vmaddr
)
516 list_for_each_entry_rcu(gmap
, &mm
->context
.gmap_list
, list
) {
517 flush
= __gmap_unlink_by_vmaddr(gmap
, vmaddr
);
519 gmap_flush_tlb(gmap
);
524 static void gmap_pmdp_xchg(struct gmap
*gmap
, pmd_t
*old
, pmd_t
new,
525 unsigned long gaddr
);
528 * gmap_link - set up shadow page tables to connect a host to a guest address
529 * @gmap: pointer to guest mapping meta data structure
530 * @gaddr: guest address
531 * @vmaddr: vm address
533 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
534 * if the vm address is already mapped to a different guest segment.
535 * The mmap_sem of the mm that belongs to the address space must be held
536 * when this function gets called.
538 int __gmap_link(struct gmap
*gmap
, unsigned long gaddr
, unsigned long vmaddr
)
540 struct mm_struct
*mm
;
541 unsigned long *table
;
550 BUG_ON(gmap_is_shadow(gmap
));
551 /* Create higher level tables in the gmap page table */
553 if ((gmap
->asce
& _ASCE_TYPE_MASK
) >= _ASCE_TYPE_REGION1
) {
554 table
+= (gaddr
& _REGION1_INDEX
) >> _REGION1_SHIFT
;
555 if ((*table
& _REGION_ENTRY_INVALID
) &&
556 gmap_alloc_table(gmap
, table
, _REGION2_ENTRY_EMPTY
,
557 gaddr
& _REGION1_MASK
))
559 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
561 if ((gmap
->asce
& _ASCE_TYPE_MASK
) >= _ASCE_TYPE_REGION2
) {
562 table
+= (gaddr
& _REGION2_INDEX
) >> _REGION2_SHIFT
;
563 if ((*table
& _REGION_ENTRY_INVALID
) &&
564 gmap_alloc_table(gmap
, table
, _REGION3_ENTRY_EMPTY
,
565 gaddr
& _REGION2_MASK
))
567 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
569 if ((gmap
->asce
& _ASCE_TYPE_MASK
) >= _ASCE_TYPE_REGION3
) {
570 table
+= (gaddr
& _REGION3_INDEX
) >> _REGION3_SHIFT
;
571 if ((*table
& _REGION_ENTRY_INVALID
) &&
572 gmap_alloc_table(gmap
, table
, _SEGMENT_ENTRY_EMPTY
,
573 gaddr
& _REGION3_MASK
))
575 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
577 table
+= (gaddr
& _SEGMENT_INDEX
) >> _SEGMENT_SHIFT
;
578 /* Walk the parent mm page table */
580 pgd
= pgd_offset(mm
, vmaddr
);
581 VM_BUG_ON(pgd_none(*pgd
));
582 p4d
= p4d_offset(pgd
, vmaddr
);
583 VM_BUG_ON(p4d_none(*p4d
));
584 pud
= pud_offset(p4d
, vmaddr
);
585 VM_BUG_ON(pud_none(*pud
));
586 /* large puds cannot yet be handled */
589 pmd
= pmd_offset(pud
, vmaddr
);
590 VM_BUG_ON(pmd_none(*pmd
));
591 /* large pmds cannot yet be handled */
594 /* Link gmap segment table entry location to page table. */
595 rc
= radix_tree_preload(GFP_KERNEL
);
598 ptl
= pmd_lock(mm
, pmd
);
599 spin_lock(&gmap
->guest_table_lock
);
600 if (*table
== _SEGMENT_ENTRY_EMPTY
) {
601 rc
= radix_tree_insert(&gmap
->host_to_guest
,
602 vmaddr
>> PMD_SHIFT
, table
);
604 if (pmd_large(*pmd
)) {
605 *table
= (pmd_val(*pmd
) &
606 _SEGMENT_ENTRY_HARDWARE_BITS_LARGE
)
607 | _SEGMENT_ENTRY_GMAP_UC
;
609 *table
= pmd_val(*pmd
) &
610 _SEGMENT_ENTRY_HARDWARE_BITS
;
612 } else if (*table
& _SEGMENT_ENTRY_PROTECT
&&
613 !(pmd_val(*pmd
) & _SEGMENT_ENTRY_PROTECT
)) {
614 unprot
= (u64
)*table
;
615 unprot
&= ~_SEGMENT_ENTRY_PROTECT
;
616 unprot
|= _SEGMENT_ENTRY_GMAP_UC
;
617 gmap_pmdp_xchg(gmap
, (pmd_t
*)table
, __pmd(unprot
), gaddr
);
619 spin_unlock(&gmap
->guest_table_lock
);
621 radix_tree_preload_end();
626 * gmap_fault - resolve a fault on a guest address
627 * @gmap: pointer to guest mapping meta data structure
628 * @gaddr: guest address
629 * @fault_flags: flags to pass down to handle_mm_fault()
631 * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
632 * if the vm address is already mapped to a different guest segment.
634 int gmap_fault(struct gmap
*gmap
, unsigned long gaddr
,
635 unsigned int fault_flags
)
637 unsigned long vmaddr
;
641 down_read(&gmap
->mm
->mmap_sem
);
645 vmaddr
= __gmap_translate(gmap
, gaddr
);
646 if (IS_ERR_VALUE(vmaddr
)) {
650 if (fixup_user_fault(current
, gmap
->mm
, vmaddr
, fault_flags
,
656 * In the case that fixup_user_fault unlocked the mmap_sem during
657 * faultin redo __gmap_translate to not race with a map/unmap_segment.
662 rc
= __gmap_link(gmap
, gaddr
, vmaddr
);
664 up_read(&gmap
->mm
->mmap_sem
);
667 EXPORT_SYMBOL_GPL(gmap_fault
);
670 * this function is assumed to be called with mmap_sem held
672 void __gmap_zap(struct gmap
*gmap
, unsigned long gaddr
)
674 unsigned long vmaddr
;
678 /* Find the vm address for the guest address */
679 vmaddr
= (unsigned long) radix_tree_lookup(&gmap
->guest_to_host
,
682 vmaddr
|= gaddr
& ~PMD_MASK
;
683 /* Get pointer to the page table entry */
684 ptep
= get_locked_pte(gmap
->mm
, vmaddr
, &ptl
);
686 ptep_zap_unused(gmap
->mm
, vmaddr
, ptep
, 0);
687 pte_unmap_unlock(ptep
, ptl
);
690 EXPORT_SYMBOL_GPL(__gmap_zap
);
692 void gmap_discard(struct gmap
*gmap
, unsigned long from
, unsigned long to
)
694 unsigned long gaddr
, vmaddr
, size
;
695 struct vm_area_struct
*vma
;
697 down_read(&gmap
->mm
->mmap_sem
);
698 for (gaddr
= from
; gaddr
< to
;
699 gaddr
= (gaddr
+ PMD_SIZE
) & PMD_MASK
) {
700 /* Find the vm address for the guest address */
701 vmaddr
= (unsigned long)
702 radix_tree_lookup(&gmap
->guest_to_host
,
706 vmaddr
|= gaddr
& ~PMD_MASK
;
707 /* Find vma in the parent mm */
708 vma
= find_vma(gmap
->mm
, vmaddr
);
709 size
= min(to
- gaddr
, PMD_SIZE
- (gaddr
& ~PMD_MASK
));
710 zap_page_range(vma
, vmaddr
, size
);
712 up_read(&gmap
->mm
->mmap_sem
);
714 EXPORT_SYMBOL_GPL(gmap_discard
);
716 static LIST_HEAD(gmap_notifier_list
);
717 static DEFINE_SPINLOCK(gmap_notifier_lock
);
720 * gmap_register_pte_notifier - register a pte invalidation callback
721 * @nb: pointer to the gmap notifier block
723 void gmap_register_pte_notifier(struct gmap_notifier
*nb
)
725 spin_lock(&gmap_notifier_lock
);
726 list_add_rcu(&nb
->list
, &gmap_notifier_list
);
727 spin_unlock(&gmap_notifier_lock
);
729 EXPORT_SYMBOL_GPL(gmap_register_pte_notifier
);
732 * gmap_unregister_pte_notifier - remove a pte invalidation callback
733 * @nb: pointer to the gmap notifier block
735 void gmap_unregister_pte_notifier(struct gmap_notifier
*nb
)
737 spin_lock(&gmap_notifier_lock
);
738 list_del_rcu(&nb
->list
);
739 spin_unlock(&gmap_notifier_lock
);
742 EXPORT_SYMBOL_GPL(gmap_unregister_pte_notifier
);
745 * gmap_call_notifier - call all registered invalidation callbacks
746 * @gmap: pointer to guest mapping meta data structure
747 * @start: start virtual address in the guest address space
748 * @end: end virtual address in the guest address space
750 static void gmap_call_notifier(struct gmap
*gmap
, unsigned long start
,
753 struct gmap_notifier
*nb
;
755 list_for_each_entry(nb
, &gmap_notifier_list
, list
)
756 nb
->notifier_call(gmap
, start
, end
);
760 * gmap_table_walk - walk the gmap page tables
761 * @gmap: pointer to guest mapping meta data structure
762 * @gaddr: virtual address in the guest address space
763 * @level: page table level to stop at
765 * Returns a table entry pointer for the given guest address and @level
766 * @level=0 : returns a pointer to a page table table entry (or NULL)
767 * @level=1 : returns a pointer to a segment table entry (or NULL)
768 * @level=2 : returns a pointer to a region-3 table entry (or NULL)
769 * @level=3 : returns a pointer to a region-2 table entry (or NULL)
770 * @level=4 : returns a pointer to a region-1 table entry (or NULL)
772 * Returns NULL if the gmap page tables could not be walked to the
775 * Note: Can also be called for shadow gmaps.
777 static inline unsigned long *gmap_table_walk(struct gmap
*gmap
,
778 unsigned long gaddr
, int level
)
780 unsigned long *table
;
782 if ((gmap
->asce
& _ASCE_TYPE_MASK
) + 4 < (level
* 4))
784 if (gmap_is_shadow(gmap
) && gmap
->removed
)
786 if (gaddr
& (-1UL << (31 + ((gmap
->asce
& _ASCE_TYPE_MASK
) >> 2)*11)))
789 switch (gmap
->asce
& _ASCE_TYPE_MASK
) {
790 case _ASCE_TYPE_REGION1
:
791 table
+= (gaddr
& _REGION1_INDEX
) >> _REGION1_SHIFT
;
794 if (*table
& _REGION_ENTRY_INVALID
)
796 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
798 case _ASCE_TYPE_REGION2
:
799 table
+= (gaddr
& _REGION2_INDEX
) >> _REGION2_SHIFT
;
802 if (*table
& _REGION_ENTRY_INVALID
)
804 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
806 case _ASCE_TYPE_REGION3
:
807 table
+= (gaddr
& _REGION3_INDEX
) >> _REGION3_SHIFT
;
810 if (*table
& _REGION_ENTRY_INVALID
)
812 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
814 case _ASCE_TYPE_SEGMENT
:
815 table
+= (gaddr
& _SEGMENT_INDEX
) >> _SEGMENT_SHIFT
;
818 if (*table
& _REGION_ENTRY_INVALID
)
820 table
= (unsigned long *)(*table
& _SEGMENT_ENTRY_ORIGIN
);
821 table
+= (gaddr
& _PAGE_INDEX
) >> _PAGE_SHIFT
;
827 * gmap_pte_op_walk - walk the gmap page table, get the page table lock
828 * and return the pte pointer
829 * @gmap: pointer to guest mapping meta data structure
830 * @gaddr: virtual address in the guest address space
831 * @ptl: pointer to the spinlock pointer
833 * Returns a pointer to the locked pte for a guest address, or NULL
835 static pte_t
*gmap_pte_op_walk(struct gmap
*gmap
, unsigned long gaddr
,
838 unsigned long *table
;
840 BUG_ON(gmap_is_shadow(gmap
));
841 /* Walk the gmap page table, lock and get pte pointer */
842 table
= gmap_table_walk(gmap
, gaddr
, 1); /* get segment pointer */
843 if (!table
|| *table
& _SEGMENT_ENTRY_INVALID
)
845 return pte_alloc_map_lock(gmap
->mm
, (pmd_t
*) table
, gaddr
, ptl
);
849 * gmap_pte_op_fixup - force a page in and connect the gmap page table
850 * @gmap: pointer to guest mapping meta data structure
851 * @gaddr: virtual address in the guest address space
852 * @vmaddr: address in the host process address space
853 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
855 * Returns 0 if the caller can retry __gmap_translate (might fail again),
856 * -ENOMEM if out of memory and -EFAULT if anything goes wrong while fixing
857 * up or connecting the gmap page table.
859 static int gmap_pte_op_fixup(struct gmap
*gmap
, unsigned long gaddr
,
860 unsigned long vmaddr
, int prot
)
862 struct mm_struct
*mm
= gmap
->mm
;
863 unsigned int fault_flags
;
864 bool unlocked
= false;
866 BUG_ON(gmap_is_shadow(gmap
));
867 fault_flags
= (prot
== PROT_WRITE
) ? FAULT_FLAG_WRITE
: 0;
868 if (fixup_user_fault(current
, mm
, vmaddr
, fault_flags
, &unlocked
))
871 /* lost mmap_sem, caller has to retry __gmap_translate */
873 /* Connect the page tables */
874 return __gmap_link(gmap
, gaddr
, vmaddr
);
878 * gmap_pte_op_end - release the page table lock
879 * @ptl: pointer to the spinlock pointer
881 static void gmap_pte_op_end(spinlock_t
*ptl
)
888 * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
889 * and return the pmd pointer
890 * @gmap: pointer to guest mapping meta data structure
891 * @gaddr: virtual address in the guest address space
893 * Returns a pointer to the pmd for a guest address, or NULL
895 static inline pmd_t
*gmap_pmd_op_walk(struct gmap
*gmap
, unsigned long gaddr
)
899 BUG_ON(gmap_is_shadow(gmap
));
900 spin_lock(&gmap
->guest_table_lock
);
901 pmdp
= (pmd_t
*) gmap_table_walk(gmap
, gaddr
, 1);
903 if (!pmdp
|| pmd_none(*pmdp
)) {
904 spin_unlock(&gmap
->guest_table_lock
);
908 /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
909 if (!pmd_large(*pmdp
))
910 spin_unlock(&gmap
->guest_table_lock
);
915 * gmap_pmd_op_end - release the guest_table_lock if needed
916 * @gmap: pointer to the guest mapping meta data structure
917 * @pmdp: pointer to the pmd
919 static inline void gmap_pmd_op_end(struct gmap
*gmap
, pmd_t
*pmdp
)
921 if (pmd_large(*pmdp
))
922 spin_unlock(&gmap
->guest_table_lock
);
926 * gmap_protect_pmd - remove access rights to memory and set pmd notification bits
927 * @pmdp: pointer to the pmd to be protected
928 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
929 * @bits: notification bits to set
932 * 0 if successfully protected
933 * -EAGAIN if a fixup is needed
934 * -EINVAL if unsupported notifier bits have been specified
936 * Expected to be called with sg->mm->mmap_sem in read and
937 * guest_table_lock held.
939 static int gmap_protect_pmd(struct gmap
*gmap
, unsigned long gaddr
,
940 pmd_t
*pmdp
, int prot
, unsigned long bits
)
942 int pmd_i
= pmd_val(*pmdp
) & _SEGMENT_ENTRY_INVALID
;
943 int pmd_p
= pmd_val(*pmdp
) & _SEGMENT_ENTRY_PROTECT
;
947 if ((pmd_i
&& (prot
!= PROT_NONE
)) || (pmd_p
&& (prot
== PROT_WRITE
)))
950 if (prot
== PROT_NONE
&& !pmd_i
) {
951 pmd_val(new) |= _SEGMENT_ENTRY_INVALID
;
952 gmap_pmdp_xchg(gmap
, pmdp
, new, gaddr
);
955 if (prot
== PROT_READ
&& !pmd_p
) {
956 pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID
;
957 pmd_val(new) |= _SEGMENT_ENTRY_PROTECT
;
958 gmap_pmdp_xchg(gmap
, pmdp
, new, gaddr
);
961 if (bits
& GMAP_NOTIFY_MPROT
)
962 pmd_val(*pmdp
) |= _SEGMENT_ENTRY_GMAP_IN
;
964 /* Shadow GMAP protection needs split PMDs */
965 if (bits
& GMAP_NOTIFY_SHADOW
)
972 * gmap_protect_pte - remove access rights to memory and set pgste bits
973 * @gmap: pointer to guest mapping meta data structure
974 * @gaddr: virtual address in the guest address space
975 * @pmdp: pointer to the pmd associated with the pte
976 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
977 * @bits: notification bits to set
979 * Returns 0 if successfully protected, -ENOMEM if out of memory and
980 * -EAGAIN if a fixup is needed.
982 * Expected to be called with sg->mm->mmap_sem in read
984 static int gmap_protect_pte(struct gmap
*gmap
, unsigned long gaddr
,
985 pmd_t
*pmdp
, int prot
, unsigned long bits
)
989 spinlock_t
*ptl
= NULL
;
990 unsigned long pbits
= 0;
992 if (pmd_val(*pmdp
) & _SEGMENT_ENTRY_INVALID
)
995 ptep
= pte_alloc_map_lock(gmap
->mm
, pmdp
, gaddr
, &ptl
);
999 pbits
|= (bits
& GMAP_NOTIFY_MPROT
) ? PGSTE_IN_BIT
: 0;
1000 pbits
|= (bits
& GMAP_NOTIFY_SHADOW
) ? PGSTE_VSIE_BIT
: 0;
1001 /* Protect and unlock. */
1002 rc
= ptep_force_prot(gmap
->mm
, gaddr
, ptep
, prot
, pbits
);
1003 gmap_pte_op_end(ptl
);
1008 * gmap_protect_range - remove access rights to memory and set pgste bits
1009 * @gmap: pointer to guest mapping meta data structure
1010 * @gaddr: virtual address in the guest address space
1011 * @len: size of area
1012 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
1013 * @bits: pgste notification bits to set
1015 * Returns 0 if successfully protected, -ENOMEM if out of memory and
1016 * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
1018 * Called with sg->mm->mmap_sem in read.
1020 static int gmap_protect_range(struct gmap
*gmap
, unsigned long gaddr
,
1021 unsigned long len
, int prot
, unsigned long bits
)
1023 unsigned long vmaddr
, dist
;
1027 BUG_ON(gmap_is_shadow(gmap
));
1030 pmdp
= gmap_pmd_op_walk(gmap
, gaddr
);
1032 if (!pmd_large(*pmdp
)) {
1033 rc
= gmap_protect_pte(gmap
, gaddr
, pmdp
, prot
,
1040 rc
= gmap_protect_pmd(gmap
, gaddr
, pmdp
, prot
,
1043 dist
= HPAGE_SIZE
- (gaddr
& ~HPAGE_MASK
);
1044 len
= len
< dist
? 0 : len
- dist
;
1045 gaddr
= (gaddr
& HPAGE_MASK
) + HPAGE_SIZE
;
1048 gmap_pmd_op_end(gmap
, pmdp
);
1054 /* -EAGAIN, fixup of userspace mm and gmap */
1055 vmaddr
= __gmap_translate(gmap
, gaddr
);
1056 if (IS_ERR_VALUE(vmaddr
))
1058 rc
= gmap_pte_op_fixup(gmap
, gaddr
, vmaddr
, prot
);
1067 * gmap_mprotect_notify - change access rights for a range of ptes and
1068 * call the notifier if any pte changes again
1069 * @gmap: pointer to guest mapping meta data structure
1070 * @gaddr: virtual address in the guest address space
1071 * @len: size of area
1072 * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
1074 * Returns 0 if for each page in the given range a gmap mapping exists,
1075 * the new access rights could be set and the notifier could be armed.
1076 * If the gmap mapping is missing for one or more pages -EFAULT is
1077 * returned. If no memory could be allocated -ENOMEM is returned.
1078 * This function establishes missing page table entries.
1080 int gmap_mprotect_notify(struct gmap
*gmap
, unsigned long gaddr
,
1081 unsigned long len
, int prot
)
1085 if ((gaddr
& ~PAGE_MASK
) || (len
& ~PAGE_MASK
) || gmap_is_shadow(gmap
))
1087 if (!MACHINE_HAS_ESOP
&& prot
== PROT_READ
)
1089 down_read(&gmap
->mm
->mmap_sem
);
1090 rc
= gmap_protect_range(gmap
, gaddr
, len
, prot
, GMAP_NOTIFY_MPROT
);
1091 up_read(&gmap
->mm
->mmap_sem
);
1094 EXPORT_SYMBOL_GPL(gmap_mprotect_notify
);
1097 * gmap_read_table - get an unsigned long value from a guest page table using
1098 * absolute addressing, without marking the page referenced.
1099 * @gmap: pointer to guest mapping meta data structure
1100 * @gaddr: virtual address in the guest address space
1101 * @val: pointer to the unsigned long value to return
1103 * Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
1104 * if reading using the virtual address failed. -EINVAL if called on a gmap
1107 * Called with gmap->mm->mmap_sem in read.
1109 int gmap_read_table(struct gmap
*gmap
, unsigned long gaddr
, unsigned long *val
)
1111 unsigned long address
, vmaddr
;
1116 if (gmap_is_shadow(gmap
))
1121 ptep
= gmap_pte_op_walk(gmap
, gaddr
, &ptl
);
1124 if (pte_present(pte
) && (pte_val(pte
) & _PAGE_READ
)) {
1125 address
= pte_val(pte
) & PAGE_MASK
;
1126 address
+= gaddr
& ~PAGE_MASK
;
1127 *val
= *(unsigned long *) address
;
1128 pte_val(*ptep
) |= _PAGE_YOUNG
;
1129 /* Do *NOT* clear the _PAGE_INVALID bit! */
1132 gmap_pte_op_end(ptl
);
1136 vmaddr
= __gmap_translate(gmap
, gaddr
);
1137 if (IS_ERR_VALUE(vmaddr
)) {
1141 rc
= gmap_pte_op_fixup(gmap
, gaddr
, vmaddr
, PROT_READ
);
1147 EXPORT_SYMBOL_GPL(gmap_read_table
);
1150 * gmap_insert_rmap - add a rmap to the host_to_rmap radix tree
1151 * @sg: pointer to the shadow guest address space structure
1152 * @vmaddr: vm address associated with the rmap
1153 * @rmap: pointer to the rmap structure
1155 * Called with the sg->guest_table_lock
1157 static inline void gmap_insert_rmap(struct gmap
*sg
, unsigned long vmaddr
,
1158 struct gmap_rmap
*rmap
)
1162 BUG_ON(!gmap_is_shadow(sg
));
1163 slot
= radix_tree_lookup_slot(&sg
->host_to_rmap
, vmaddr
>> PAGE_SHIFT
);
1165 rmap
->next
= radix_tree_deref_slot_protected(slot
,
1166 &sg
->guest_table_lock
);
1167 radix_tree_replace_slot(&sg
->host_to_rmap
, slot
, rmap
);
1170 radix_tree_insert(&sg
->host_to_rmap
, vmaddr
>> PAGE_SHIFT
,
1176 * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
1177 * @sg: pointer to the shadow guest address space structure
1178 * @raddr: rmap address in the shadow gmap
1179 * @paddr: address in the parent guest address space
1180 * @len: length of the memory area to protect
1182 * Returns 0 if successfully protected and the rmap was created, -ENOMEM
1183 * if out of memory and -EFAULT if paddr is invalid.
1185 static int gmap_protect_rmap(struct gmap
*sg
, unsigned long raddr
,
1186 unsigned long paddr
, unsigned long len
)
1188 struct gmap
*parent
;
1189 struct gmap_rmap
*rmap
;
1190 unsigned long vmaddr
;
1195 BUG_ON(!gmap_is_shadow(sg
));
1196 parent
= sg
->parent
;
1198 vmaddr
= __gmap_translate(parent
, paddr
);
1199 if (IS_ERR_VALUE(vmaddr
))
1201 rmap
= kzalloc(sizeof(*rmap
), GFP_KERNEL
);
1204 rmap
->raddr
= raddr
;
1205 rc
= radix_tree_preload(GFP_KERNEL
);
1211 ptep
= gmap_pte_op_walk(parent
, paddr
, &ptl
);
1213 spin_lock(&sg
->guest_table_lock
);
1214 rc
= ptep_force_prot(parent
->mm
, paddr
, ptep
, PROT_READ
,
1217 gmap_insert_rmap(sg
, vmaddr
, rmap
);
1218 spin_unlock(&sg
->guest_table_lock
);
1219 gmap_pte_op_end(ptl
);
1221 radix_tree_preload_end();
1224 rc
= gmap_pte_op_fixup(parent
, paddr
, vmaddr
, PROT_READ
);
1235 #define _SHADOW_RMAP_MASK 0x7
1236 #define _SHADOW_RMAP_REGION1 0x5
1237 #define _SHADOW_RMAP_REGION2 0x4
1238 #define _SHADOW_RMAP_REGION3 0x3
1239 #define _SHADOW_RMAP_SEGMENT 0x2
1240 #define _SHADOW_RMAP_PGTABLE 0x1
1243 * gmap_idte_one - invalidate a single region or segment table entry
1244 * @asce: region or segment table *origin* + table-type bits
1245 * @vaddr: virtual address to identify the table entry to flush
1247 * The invalid bit of a single region or segment table entry is set
1248 * and the associated TLB entries depending on the entry are flushed.
1249 * The table-type of the @asce identifies the portion of the @vaddr
1250 * that is used as the invalidation index.
1252 static inline void gmap_idte_one(unsigned long asce
, unsigned long vaddr
)
1255 " .insn rrf,0xb98e0000,%0,%1,0,0"
1256 : : "a" (asce
), "a" (vaddr
) : "cc", "memory");
1260 * gmap_unshadow_page - remove a page from a shadow page table
1261 * @sg: pointer to the shadow guest address space structure
1262 * @raddr: rmap address in the shadow guest address space
1264 * Called with the sg->guest_table_lock
1266 static void gmap_unshadow_page(struct gmap
*sg
, unsigned long raddr
)
1268 unsigned long *table
;
1270 BUG_ON(!gmap_is_shadow(sg
));
1271 table
= gmap_table_walk(sg
, raddr
, 0); /* get page table pointer */
1272 if (!table
|| *table
& _PAGE_INVALID
)
1274 gmap_call_notifier(sg
, raddr
, raddr
+ _PAGE_SIZE
- 1);
1275 ptep_unshadow_pte(sg
->mm
, raddr
, (pte_t
*) table
);
1279 * __gmap_unshadow_pgt - remove all entries from a shadow page table
1280 * @sg: pointer to the shadow guest address space structure
1281 * @raddr: rmap address in the shadow guest address space
1282 * @pgt: pointer to the start of a shadow page table
1284 * Called with the sg->guest_table_lock
1286 static void __gmap_unshadow_pgt(struct gmap
*sg
, unsigned long raddr
,
1291 BUG_ON(!gmap_is_shadow(sg
));
1292 for (i
= 0; i
< _PAGE_ENTRIES
; i
++, raddr
+= _PAGE_SIZE
)
1293 pgt
[i
] = _PAGE_INVALID
;
1297 * gmap_unshadow_pgt - remove a shadow page table from a segment entry
1298 * @sg: pointer to the shadow guest address space structure
1299 * @raddr: address in the shadow guest address space
1301 * Called with the sg->guest_table_lock
1303 static void gmap_unshadow_pgt(struct gmap
*sg
, unsigned long raddr
)
1305 unsigned long sto
, *ste
, *pgt
;
1308 BUG_ON(!gmap_is_shadow(sg
));
1309 ste
= gmap_table_walk(sg
, raddr
, 1); /* get segment pointer */
1310 if (!ste
|| !(*ste
& _SEGMENT_ENTRY_ORIGIN
))
1312 gmap_call_notifier(sg
, raddr
, raddr
+ _SEGMENT_SIZE
- 1);
1313 sto
= (unsigned long) (ste
- ((raddr
& _SEGMENT_INDEX
) >> _SEGMENT_SHIFT
));
1314 gmap_idte_one(sto
| _ASCE_TYPE_SEGMENT
, raddr
);
1315 pgt
= (unsigned long *)(*ste
& _SEGMENT_ENTRY_ORIGIN
);
1316 *ste
= _SEGMENT_ENTRY_EMPTY
;
1317 __gmap_unshadow_pgt(sg
, raddr
, pgt
);
1318 /* Free page table */
1319 page
= pfn_to_page(__pa(pgt
) >> PAGE_SHIFT
);
1320 list_del(&page
->lru
);
1321 page_table_free_pgste(page
);
1325 * __gmap_unshadow_sgt - remove all entries from a shadow segment table
1326 * @sg: pointer to the shadow guest address space structure
1327 * @raddr: rmap address in the shadow guest address space
1328 * @sgt: pointer to the start of a shadow segment table
1330 * Called with the sg->guest_table_lock
1332 static void __gmap_unshadow_sgt(struct gmap
*sg
, unsigned long raddr
,
1339 BUG_ON(!gmap_is_shadow(sg
));
1340 for (i
= 0; i
< _CRST_ENTRIES
; i
++, raddr
+= _SEGMENT_SIZE
) {
1341 if (!(sgt
[i
] & _SEGMENT_ENTRY_ORIGIN
))
1343 pgt
= (unsigned long *)(sgt
[i
] & _REGION_ENTRY_ORIGIN
);
1344 sgt
[i
] = _SEGMENT_ENTRY_EMPTY
;
1345 __gmap_unshadow_pgt(sg
, raddr
, pgt
);
1346 /* Free page table */
1347 page
= pfn_to_page(__pa(pgt
) >> PAGE_SHIFT
);
1348 list_del(&page
->lru
);
1349 page_table_free_pgste(page
);
1354 * gmap_unshadow_sgt - remove a shadow segment table from a region-3 entry
1355 * @sg: pointer to the shadow guest address space structure
1356 * @raddr: rmap address in the shadow guest address space
1358 * Called with the shadow->guest_table_lock
1360 static void gmap_unshadow_sgt(struct gmap
*sg
, unsigned long raddr
)
1362 unsigned long r3o
, *r3e
, *sgt
;
1365 BUG_ON(!gmap_is_shadow(sg
));
1366 r3e
= gmap_table_walk(sg
, raddr
, 2); /* get region-3 pointer */
1367 if (!r3e
|| !(*r3e
& _REGION_ENTRY_ORIGIN
))
1369 gmap_call_notifier(sg
, raddr
, raddr
+ _REGION3_SIZE
- 1);
1370 r3o
= (unsigned long) (r3e
- ((raddr
& _REGION3_INDEX
) >> _REGION3_SHIFT
));
1371 gmap_idte_one(r3o
| _ASCE_TYPE_REGION3
, raddr
);
1372 sgt
= (unsigned long *)(*r3e
& _REGION_ENTRY_ORIGIN
);
1373 *r3e
= _REGION3_ENTRY_EMPTY
;
1374 __gmap_unshadow_sgt(sg
, raddr
, sgt
);
1375 /* Free segment table */
1376 page
= pfn_to_page(__pa(sgt
) >> PAGE_SHIFT
);
1377 list_del(&page
->lru
);
1378 __free_pages(page
, CRST_ALLOC_ORDER
);
1382 * __gmap_unshadow_r3t - remove all entries from a shadow region-3 table
1383 * @sg: pointer to the shadow guest address space structure
1384 * @raddr: address in the shadow guest address space
1385 * @r3t: pointer to the start of a shadow region-3 table
1387 * Called with the sg->guest_table_lock
1389 static void __gmap_unshadow_r3t(struct gmap
*sg
, unsigned long raddr
,
1396 BUG_ON(!gmap_is_shadow(sg
));
1397 for (i
= 0; i
< _CRST_ENTRIES
; i
++, raddr
+= _REGION3_SIZE
) {
1398 if (!(r3t
[i
] & _REGION_ENTRY_ORIGIN
))
1400 sgt
= (unsigned long *)(r3t
[i
] & _REGION_ENTRY_ORIGIN
);
1401 r3t
[i
] = _REGION3_ENTRY_EMPTY
;
1402 __gmap_unshadow_sgt(sg
, raddr
, sgt
);
1403 /* Free segment table */
1404 page
= pfn_to_page(__pa(sgt
) >> PAGE_SHIFT
);
1405 list_del(&page
->lru
);
1406 __free_pages(page
, CRST_ALLOC_ORDER
);
1411 * gmap_unshadow_r3t - remove a shadow region-3 table from a region-2 entry
1412 * @sg: pointer to the shadow guest address space structure
1413 * @raddr: rmap address in the shadow guest address space
1415 * Called with the sg->guest_table_lock
1417 static void gmap_unshadow_r3t(struct gmap
*sg
, unsigned long raddr
)
1419 unsigned long r2o
, *r2e
, *r3t
;
1422 BUG_ON(!gmap_is_shadow(sg
));
1423 r2e
= gmap_table_walk(sg
, raddr
, 3); /* get region-2 pointer */
1424 if (!r2e
|| !(*r2e
& _REGION_ENTRY_ORIGIN
))
1426 gmap_call_notifier(sg
, raddr
, raddr
+ _REGION2_SIZE
- 1);
1427 r2o
= (unsigned long) (r2e
- ((raddr
& _REGION2_INDEX
) >> _REGION2_SHIFT
));
1428 gmap_idte_one(r2o
| _ASCE_TYPE_REGION2
, raddr
);
1429 r3t
= (unsigned long *)(*r2e
& _REGION_ENTRY_ORIGIN
);
1430 *r2e
= _REGION2_ENTRY_EMPTY
;
1431 __gmap_unshadow_r3t(sg
, raddr
, r3t
);
1432 /* Free region 3 table */
1433 page
= pfn_to_page(__pa(r3t
) >> PAGE_SHIFT
);
1434 list_del(&page
->lru
);
1435 __free_pages(page
, CRST_ALLOC_ORDER
);
1439 * __gmap_unshadow_r2t - remove all entries from a shadow region-2 table
1440 * @sg: pointer to the shadow guest address space structure
1441 * @raddr: rmap address in the shadow guest address space
1442 * @r2t: pointer to the start of a shadow region-2 table
1444 * Called with the sg->guest_table_lock
1446 static void __gmap_unshadow_r2t(struct gmap
*sg
, unsigned long raddr
,
1453 BUG_ON(!gmap_is_shadow(sg
));
1454 for (i
= 0; i
< _CRST_ENTRIES
; i
++, raddr
+= _REGION2_SIZE
) {
1455 if (!(r2t
[i
] & _REGION_ENTRY_ORIGIN
))
1457 r3t
= (unsigned long *)(r2t
[i
] & _REGION_ENTRY_ORIGIN
);
1458 r2t
[i
] = _REGION2_ENTRY_EMPTY
;
1459 __gmap_unshadow_r3t(sg
, raddr
, r3t
);
1460 /* Free region 3 table */
1461 page
= pfn_to_page(__pa(r3t
) >> PAGE_SHIFT
);
1462 list_del(&page
->lru
);
1463 __free_pages(page
, CRST_ALLOC_ORDER
);
1468 * gmap_unshadow_r2t - remove a shadow region-2 table from a region-1 entry
1469 * @sg: pointer to the shadow guest address space structure
1470 * @raddr: rmap address in the shadow guest address space
1472 * Called with the sg->guest_table_lock
1474 static void gmap_unshadow_r2t(struct gmap
*sg
, unsigned long raddr
)
1476 unsigned long r1o
, *r1e
, *r2t
;
1479 BUG_ON(!gmap_is_shadow(sg
));
1480 r1e
= gmap_table_walk(sg
, raddr
, 4); /* get region-1 pointer */
1481 if (!r1e
|| !(*r1e
& _REGION_ENTRY_ORIGIN
))
1483 gmap_call_notifier(sg
, raddr
, raddr
+ _REGION1_SIZE
- 1);
1484 r1o
= (unsigned long) (r1e
- ((raddr
& _REGION1_INDEX
) >> _REGION1_SHIFT
));
1485 gmap_idte_one(r1o
| _ASCE_TYPE_REGION1
, raddr
);
1486 r2t
= (unsigned long *)(*r1e
& _REGION_ENTRY_ORIGIN
);
1487 *r1e
= _REGION1_ENTRY_EMPTY
;
1488 __gmap_unshadow_r2t(sg
, raddr
, r2t
);
1489 /* Free region 2 table */
1490 page
= pfn_to_page(__pa(r2t
) >> PAGE_SHIFT
);
1491 list_del(&page
->lru
);
1492 __free_pages(page
, CRST_ALLOC_ORDER
);
1496 * __gmap_unshadow_r1t - remove all entries from a shadow region-1 table
1497 * @sg: pointer to the shadow guest address space structure
1498 * @raddr: rmap address in the shadow guest address space
1499 * @r1t: pointer to the start of a shadow region-1 table
1501 * Called with the shadow->guest_table_lock
1503 static void __gmap_unshadow_r1t(struct gmap
*sg
, unsigned long raddr
,
1506 unsigned long asce
, *r2t
;
1510 BUG_ON(!gmap_is_shadow(sg
));
1511 asce
= (unsigned long) r1t
| _ASCE_TYPE_REGION1
;
1512 for (i
= 0; i
< _CRST_ENTRIES
; i
++, raddr
+= _REGION1_SIZE
) {
1513 if (!(r1t
[i
] & _REGION_ENTRY_ORIGIN
))
1515 r2t
= (unsigned long *)(r1t
[i
] & _REGION_ENTRY_ORIGIN
);
1516 __gmap_unshadow_r2t(sg
, raddr
, r2t
);
1517 /* Clear entry and flush translation r1t -> r2t */
1518 gmap_idte_one(asce
, raddr
);
1519 r1t
[i
] = _REGION1_ENTRY_EMPTY
;
1520 /* Free region 2 table */
1521 page
= pfn_to_page(__pa(r2t
) >> PAGE_SHIFT
);
1522 list_del(&page
->lru
);
1523 __free_pages(page
, CRST_ALLOC_ORDER
);
1528 * gmap_unshadow - remove a shadow page table completely
1529 * @sg: pointer to the shadow guest address space structure
1531 * Called with sg->guest_table_lock
1533 static void gmap_unshadow(struct gmap
*sg
)
1535 unsigned long *table
;
1537 BUG_ON(!gmap_is_shadow(sg
));
1541 gmap_call_notifier(sg
, 0, -1UL);
1543 table
= (unsigned long *)(sg
->asce
& _ASCE_ORIGIN
);
1544 switch (sg
->asce
& _ASCE_TYPE_MASK
) {
1545 case _ASCE_TYPE_REGION1
:
1546 __gmap_unshadow_r1t(sg
, 0, table
);
1548 case _ASCE_TYPE_REGION2
:
1549 __gmap_unshadow_r2t(sg
, 0, table
);
1551 case _ASCE_TYPE_REGION3
:
1552 __gmap_unshadow_r3t(sg
, 0, table
);
1554 case _ASCE_TYPE_SEGMENT
:
1555 __gmap_unshadow_sgt(sg
, 0, table
);
1561 * gmap_find_shadow - find a specific asce in the list of shadow tables
1562 * @parent: pointer to the parent gmap
1563 * @asce: ASCE for which the shadow table is created
1564 * @edat_level: edat level to be used for the shadow translation
1566 * Returns the pointer to a gmap if a shadow table with the given asce is
1567 * already available, ERR_PTR(-EAGAIN) if another one is just being created,
1570 static struct gmap
*gmap_find_shadow(struct gmap
*parent
, unsigned long asce
,
1575 list_for_each_entry(sg
, &parent
->children
, list
) {
1576 if (sg
->orig_asce
!= asce
|| sg
->edat_level
!= edat_level
||
1579 if (!sg
->initialized
)
1580 return ERR_PTR(-EAGAIN
);
1581 atomic_inc(&sg
->ref_count
);
1588 * gmap_shadow_valid - check if a shadow guest address space matches the
1589 * given properties and is still valid
1590 * @sg: pointer to the shadow guest address space structure
1591 * @asce: ASCE for which the shadow table is requested
1592 * @edat_level: edat level to be used for the shadow translation
1594 * Returns 1 if the gmap shadow is still valid and matches the given
1595 * properties, the caller can continue using it. Returns 0 otherwise, the
1596 * caller has to request a new shadow gmap in this case.
1599 int gmap_shadow_valid(struct gmap
*sg
, unsigned long asce
, int edat_level
)
1603 return sg
->orig_asce
== asce
&& sg
->edat_level
== edat_level
;
1605 EXPORT_SYMBOL_GPL(gmap_shadow_valid
);
1608 * gmap_shadow - create/find a shadow guest address space
1609 * @parent: pointer to the parent gmap
1610 * @asce: ASCE for which the shadow table is created
1611 * @edat_level: edat level to be used for the shadow translation
1613 * The pages of the top level page table referred by the asce parameter
1614 * will be set to read-only and marked in the PGSTEs of the kvm process.
1615 * The shadow table will be removed automatically on any change to the
1616 * PTE mapping for the source table.
1618 * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory,
1619 * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the
1620 * parent gmap table could not be protected.
1622 struct gmap
*gmap_shadow(struct gmap
*parent
, unsigned long asce
,
1625 struct gmap
*sg
, *new;
1626 unsigned long limit
;
1629 BUG_ON(gmap_is_shadow(parent
));
1630 spin_lock(&parent
->shadow_lock
);
1631 sg
= gmap_find_shadow(parent
, asce
, edat_level
);
1632 spin_unlock(&parent
->shadow_lock
);
1635 /* Create a new shadow gmap */
1636 limit
= -1UL >> (33 - (((asce
& _ASCE_TYPE_MASK
) >> 2) * 11));
1637 if (asce
& _ASCE_REAL_SPACE
)
1639 new = gmap_alloc(limit
);
1641 return ERR_PTR(-ENOMEM
);
1642 new->mm
= parent
->mm
;
1643 new->parent
= gmap_get(parent
);
1644 new->orig_asce
= asce
;
1645 new->edat_level
= edat_level
;
1646 new->initialized
= false;
1647 spin_lock(&parent
->shadow_lock
);
1648 /* Recheck if another CPU created the same shadow */
1649 sg
= gmap_find_shadow(parent
, asce
, edat_level
);
1651 spin_unlock(&parent
->shadow_lock
);
1655 if (asce
& _ASCE_REAL_SPACE
) {
1656 /* only allow one real-space gmap shadow */
1657 list_for_each_entry(sg
, &parent
->children
, list
) {
1658 if (sg
->orig_asce
& _ASCE_REAL_SPACE
) {
1659 spin_lock(&sg
->guest_table_lock
);
1661 spin_unlock(&sg
->guest_table_lock
);
1662 list_del(&sg
->list
);
1668 atomic_set(&new->ref_count
, 2);
1669 list_add(&new->list
, &parent
->children
);
1670 if (asce
& _ASCE_REAL_SPACE
) {
1671 /* nothing to protect, return right away */
1672 new->initialized
= true;
1673 spin_unlock(&parent
->shadow_lock
);
1676 spin_unlock(&parent
->shadow_lock
);
1677 /* protect after insertion, so it will get properly invalidated */
1678 down_read(&parent
->mm
->mmap_sem
);
1679 rc
= gmap_protect_range(parent
, asce
& _ASCE_ORIGIN
,
1680 ((asce
& _ASCE_TABLE_LENGTH
) + 1) * PAGE_SIZE
,
1681 PROT_READ
, GMAP_NOTIFY_SHADOW
);
1682 up_read(&parent
->mm
->mmap_sem
);
1683 spin_lock(&parent
->shadow_lock
);
1684 new->initialized
= true;
1686 list_del(&new->list
);
1690 spin_unlock(&parent
->shadow_lock
);
1693 EXPORT_SYMBOL_GPL(gmap_shadow
);
1696 * gmap_shadow_r2t - create an empty shadow region 2 table
1697 * @sg: pointer to the shadow guest address space structure
1698 * @saddr: faulting address in the shadow gmap
1699 * @r2t: parent gmap address of the region 2 table to get shadowed
1700 * @fake: r2t references contiguous guest memory block, not a r2t
1702 * The r2t parameter specifies the address of the source table. The
1703 * four pages of the source table are made read-only in the parent gmap
1704 * address space. A write to the source table area @r2t will automatically
1705 * remove the shadow r2 table and all of its decendents.
1707 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1708 * shadow table structure is incomplete, -ENOMEM if out of memory and
1709 * -EFAULT if an address in the parent gmap could not be resolved.
1711 * Called with sg->mm->mmap_sem in read.
1713 int gmap_shadow_r2t(struct gmap
*sg
, unsigned long saddr
, unsigned long r2t
,
1716 unsigned long raddr
, origin
, offset
, len
;
1717 unsigned long *s_r2t
, *table
;
1721 BUG_ON(!gmap_is_shadow(sg
));
1722 /* Allocate a shadow region second table */
1723 page
= alloc_pages(GFP_KERNEL
, CRST_ALLOC_ORDER
);
1726 page
->index
= r2t
& _REGION_ENTRY_ORIGIN
;
1728 page
->index
|= GMAP_SHADOW_FAKE_TABLE
;
1729 s_r2t
= (unsigned long *) page_to_phys(page
);
1730 /* Install shadow region second table */
1731 spin_lock(&sg
->guest_table_lock
);
1732 table
= gmap_table_walk(sg
, saddr
, 4); /* get region-1 pointer */
1734 rc
= -EAGAIN
; /* Race with unshadow */
1737 if (!(*table
& _REGION_ENTRY_INVALID
)) {
1738 rc
= 0; /* Already established */
1740 } else if (*table
& _REGION_ENTRY_ORIGIN
) {
1741 rc
= -EAGAIN
; /* Race with shadow */
1744 crst_table_init(s_r2t
, _REGION2_ENTRY_EMPTY
);
1745 /* mark as invalid as long as the parent table is not protected */
1746 *table
= (unsigned long) s_r2t
| _REGION_ENTRY_LENGTH
|
1747 _REGION_ENTRY_TYPE_R1
| _REGION_ENTRY_INVALID
;
1748 if (sg
->edat_level
>= 1)
1749 *table
|= (r2t
& _REGION_ENTRY_PROTECT
);
1750 list_add(&page
->lru
, &sg
->crst_list
);
1752 /* nothing to protect for fake tables */
1753 *table
&= ~_REGION_ENTRY_INVALID
;
1754 spin_unlock(&sg
->guest_table_lock
);
1757 spin_unlock(&sg
->guest_table_lock
);
1758 /* Make r2t read-only in parent gmap page table */
1759 raddr
= (saddr
& _REGION1_MASK
) | _SHADOW_RMAP_REGION1
;
1760 origin
= r2t
& _REGION_ENTRY_ORIGIN
;
1761 offset
= ((r2t
& _REGION_ENTRY_OFFSET
) >> 6) * PAGE_SIZE
;
1762 len
= ((r2t
& _REGION_ENTRY_LENGTH
) + 1) * PAGE_SIZE
- offset
;
1763 rc
= gmap_protect_rmap(sg
, raddr
, origin
+ offset
, len
);
1764 spin_lock(&sg
->guest_table_lock
);
1766 table
= gmap_table_walk(sg
, saddr
, 4);
1767 if (!table
|| (*table
& _REGION_ENTRY_ORIGIN
) !=
1768 (unsigned long) s_r2t
)
1769 rc
= -EAGAIN
; /* Race with unshadow */
1771 *table
&= ~_REGION_ENTRY_INVALID
;
1773 gmap_unshadow_r2t(sg
, raddr
);
1775 spin_unlock(&sg
->guest_table_lock
);
1778 spin_unlock(&sg
->guest_table_lock
);
1779 __free_pages(page
, CRST_ALLOC_ORDER
);
1782 EXPORT_SYMBOL_GPL(gmap_shadow_r2t
);
1785 * gmap_shadow_r3t - create a shadow region 3 table
1786 * @sg: pointer to the shadow guest address space structure
1787 * @saddr: faulting address in the shadow gmap
1788 * @r3t: parent gmap address of the region 3 table to get shadowed
1789 * @fake: r3t references contiguous guest memory block, not a r3t
1791 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1792 * shadow table structure is incomplete, -ENOMEM if out of memory and
1793 * -EFAULT if an address in the parent gmap could not be resolved.
1795 * Called with sg->mm->mmap_sem in read.
1797 int gmap_shadow_r3t(struct gmap
*sg
, unsigned long saddr
, unsigned long r3t
,
1800 unsigned long raddr
, origin
, offset
, len
;
1801 unsigned long *s_r3t
, *table
;
1805 BUG_ON(!gmap_is_shadow(sg
));
1806 /* Allocate a shadow region second table */
1807 page
= alloc_pages(GFP_KERNEL
, CRST_ALLOC_ORDER
);
1810 page
->index
= r3t
& _REGION_ENTRY_ORIGIN
;
1812 page
->index
|= GMAP_SHADOW_FAKE_TABLE
;
1813 s_r3t
= (unsigned long *) page_to_phys(page
);
1814 /* Install shadow region second table */
1815 spin_lock(&sg
->guest_table_lock
);
1816 table
= gmap_table_walk(sg
, saddr
, 3); /* get region-2 pointer */
1818 rc
= -EAGAIN
; /* Race with unshadow */
1821 if (!(*table
& _REGION_ENTRY_INVALID
)) {
1822 rc
= 0; /* Already established */
1824 } else if (*table
& _REGION_ENTRY_ORIGIN
) {
1825 rc
= -EAGAIN
; /* Race with shadow */
1827 crst_table_init(s_r3t
, _REGION3_ENTRY_EMPTY
);
1828 /* mark as invalid as long as the parent table is not protected */
1829 *table
= (unsigned long) s_r3t
| _REGION_ENTRY_LENGTH
|
1830 _REGION_ENTRY_TYPE_R2
| _REGION_ENTRY_INVALID
;
1831 if (sg
->edat_level
>= 1)
1832 *table
|= (r3t
& _REGION_ENTRY_PROTECT
);
1833 list_add(&page
->lru
, &sg
->crst_list
);
1835 /* nothing to protect for fake tables */
1836 *table
&= ~_REGION_ENTRY_INVALID
;
1837 spin_unlock(&sg
->guest_table_lock
);
1840 spin_unlock(&sg
->guest_table_lock
);
1841 /* Make r3t read-only in parent gmap page table */
1842 raddr
= (saddr
& _REGION2_MASK
) | _SHADOW_RMAP_REGION2
;
1843 origin
= r3t
& _REGION_ENTRY_ORIGIN
;
1844 offset
= ((r3t
& _REGION_ENTRY_OFFSET
) >> 6) * PAGE_SIZE
;
1845 len
= ((r3t
& _REGION_ENTRY_LENGTH
) + 1) * PAGE_SIZE
- offset
;
1846 rc
= gmap_protect_rmap(sg
, raddr
, origin
+ offset
, len
);
1847 spin_lock(&sg
->guest_table_lock
);
1849 table
= gmap_table_walk(sg
, saddr
, 3);
1850 if (!table
|| (*table
& _REGION_ENTRY_ORIGIN
) !=
1851 (unsigned long) s_r3t
)
1852 rc
= -EAGAIN
; /* Race with unshadow */
1854 *table
&= ~_REGION_ENTRY_INVALID
;
1856 gmap_unshadow_r3t(sg
, raddr
);
1858 spin_unlock(&sg
->guest_table_lock
);
1861 spin_unlock(&sg
->guest_table_lock
);
1862 __free_pages(page
, CRST_ALLOC_ORDER
);
1865 EXPORT_SYMBOL_GPL(gmap_shadow_r3t
);
1868 * gmap_shadow_sgt - create a shadow segment table
1869 * @sg: pointer to the shadow guest address space structure
1870 * @saddr: faulting address in the shadow gmap
1871 * @sgt: parent gmap address of the segment table to get shadowed
1872 * @fake: sgt references contiguous guest memory block, not a sgt
1874 * Returns: 0 if successfully shadowed or already shadowed, -EAGAIN if the
1875 * shadow table structure is incomplete, -ENOMEM if out of memory and
1876 * -EFAULT if an address in the parent gmap could not be resolved.
1878 * Called with sg->mm->mmap_sem in read.
1880 int gmap_shadow_sgt(struct gmap
*sg
, unsigned long saddr
, unsigned long sgt
,
1883 unsigned long raddr
, origin
, offset
, len
;
1884 unsigned long *s_sgt
, *table
;
1888 BUG_ON(!gmap_is_shadow(sg
) || (sgt
& _REGION3_ENTRY_LARGE
));
1889 /* Allocate a shadow segment table */
1890 page
= alloc_pages(GFP_KERNEL
, CRST_ALLOC_ORDER
);
1893 page
->index
= sgt
& _REGION_ENTRY_ORIGIN
;
1895 page
->index
|= GMAP_SHADOW_FAKE_TABLE
;
1896 s_sgt
= (unsigned long *) page_to_phys(page
);
1897 /* Install shadow region second table */
1898 spin_lock(&sg
->guest_table_lock
);
1899 table
= gmap_table_walk(sg
, saddr
, 2); /* get region-3 pointer */
1901 rc
= -EAGAIN
; /* Race with unshadow */
1904 if (!(*table
& _REGION_ENTRY_INVALID
)) {
1905 rc
= 0; /* Already established */
1907 } else if (*table
& _REGION_ENTRY_ORIGIN
) {
1908 rc
= -EAGAIN
; /* Race with shadow */
1911 crst_table_init(s_sgt
, _SEGMENT_ENTRY_EMPTY
);
1912 /* mark as invalid as long as the parent table is not protected */
1913 *table
= (unsigned long) s_sgt
| _REGION_ENTRY_LENGTH
|
1914 _REGION_ENTRY_TYPE_R3
| _REGION_ENTRY_INVALID
;
1915 if (sg
->edat_level
>= 1)
1916 *table
|= sgt
& _REGION_ENTRY_PROTECT
;
1917 list_add(&page
->lru
, &sg
->crst_list
);
1919 /* nothing to protect for fake tables */
1920 *table
&= ~_REGION_ENTRY_INVALID
;
1921 spin_unlock(&sg
->guest_table_lock
);
1924 spin_unlock(&sg
->guest_table_lock
);
1925 /* Make sgt read-only in parent gmap page table */
1926 raddr
= (saddr
& _REGION3_MASK
) | _SHADOW_RMAP_REGION3
;
1927 origin
= sgt
& _REGION_ENTRY_ORIGIN
;
1928 offset
= ((sgt
& _REGION_ENTRY_OFFSET
) >> 6) * PAGE_SIZE
;
1929 len
= ((sgt
& _REGION_ENTRY_LENGTH
) + 1) * PAGE_SIZE
- offset
;
1930 rc
= gmap_protect_rmap(sg
, raddr
, origin
+ offset
, len
);
1931 spin_lock(&sg
->guest_table_lock
);
1933 table
= gmap_table_walk(sg
, saddr
, 2);
1934 if (!table
|| (*table
& _REGION_ENTRY_ORIGIN
) !=
1935 (unsigned long) s_sgt
)
1936 rc
= -EAGAIN
; /* Race with unshadow */
1938 *table
&= ~_REGION_ENTRY_INVALID
;
1940 gmap_unshadow_sgt(sg
, raddr
);
1942 spin_unlock(&sg
->guest_table_lock
);
1945 spin_unlock(&sg
->guest_table_lock
);
1946 __free_pages(page
, CRST_ALLOC_ORDER
);
1949 EXPORT_SYMBOL_GPL(gmap_shadow_sgt
);
1952 * gmap_shadow_lookup_pgtable - find a shadow page table
1953 * @sg: pointer to the shadow guest address space structure
1954 * @saddr: the address in the shadow aguest address space
1955 * @pgt: parent gmap address of the page table to get shadowed
1956 * @dat_protection: if the pgtable is marked as protected by dat
1957 * @fake: pgt references contiguous guest memory block, not a pgtable
1959 * Returns 0 if the shadow page table was found and -EAGAIN if the page
1960 * table was not found.
1962 * Called with sg->mm->mmap_sem in read.
1964 int gmap_shadow_pgt_lookup(struct gmap
*sg
, unsigned long saddr
,
1965 unsigned long *pgt
, int *dat_protection
,
1968 unsigned long *table
;
1972 BUG_ON(!gmap_is_shadow(sg
));
1973 spin_lock(&sg
->guest_table_lock
);
1974 table
= gmap_table_walk(sg
, saddr
, 1); /* get segment pointer */
1975 if (table
&& !(*table
& _SEGMENT_ENTRY_INVALID
)) {
1976 /* Shadow page tables are full pages (pte+pgste) */
1977 page
= pfn_to_page(*table
>> PAGE_SHIFT
);
1978 *pgt
= page
->index
& ~GMAP_SHADOW_FAKE_TABLE
;
1979 *dat_protection
= !!(*table
& _SEGMENT_ENTRY_PROTECT
);
1980 *fake
= !!(page
->index
& GMAP_SHADOW_FAKE_TABLE
);
1985 spin_unlock(&sg
->guest_table_lock
);
1989 EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup
);
1992 * gmap_shadow_pgt - instantiate a shadow page table
1993 * @sg: pointer to the shadow guest address space structure
1994 * @saddr: faulting address in the shadow gmap
1995 * @pgt: parent gmap address of the page table to get shadowed
1996 * @fake: pgt references contiguous guest memory block, not a pgtable
1998 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
1999 * shadow table structure is incomplete, -ENOMEM if out of memory,
2000 * -EFAULT if an address in the parent gmap could not be resolved and
2002 * Called with gmap->mm->mmap_sem in read
2004 int gmap_shadow_pgt(struct gmap
*sg
, unsigned long saddr
, unsigned long pgt
,
2007 unsigned long raddr
, origin
;
2008 unsigned long *s_pgt
, *table
;
2012 BUG_ON(!gmap_is_shadow(sg
) || (pgt
& _SEGMENT_ENTRY_LARGE
));
2013 /* Allocate a shadow page table */
2014 page
= page_table_alloc_pgste(sg
->mm
);
2017 page
->index
= pgt
& _SEGMENT_ENTRY_ORIGIN
;
2019 page
->index
|= GMAP_SHADOW_FAKE_TABLE
;
2020 s_pgt
= (unsigned long *) page_to_phys(page
);
2021 /* Install shadow page table */
2022 spin_lock(&sg
->guest_table_lock
);
2023 table
= gmap_table_walk(sg
, saddr
, 1); /* get segment pointer */
2025 rc
= -EAGAIN
; /* Race with unshadow */
2028 if (!(*table
& _SEGMENT_ENTRY_INVALID
)) {
2029 rc
= 0; /* Already established */
2031 } else if (*table
& _SEGMENT_ENTRY_ORIGIN
) {
2032 rc
= -EAGAIN
; /* Race with shadow */
2035 /* mark as invalid as long as the parent table is not protected */
2036 *table
= (unsigned long) s_pgt
| _SEGMENT_ENTRY
|
2037 (pgt
& _SEGMENT_ENTRY_PROTECT
) | _SEGMENT_ENTRY_INVALID
;
2038 list_add(&page
->lru
, &sg
->pt_list
);
2040 /* nothing to protect for fake tables */
2041 *table
&= ~_SEGMENT_ENTRY_INVALID
;
2042 spin_unlock(&sg
->guest_table_lock
);
2045 spin_unlock(&sg
->guest_table_lock
);
2046 /* Make pgt read-only in parent gmap page table (not the pgste) */
2047 raddr
= (saddr
& _SEGMENT_MASK
) | _SHADOW_RMAP_SEGMENT
;
2048 origin
= pgt
& _SEGMENT_ENTRY_ORIGIN
& PAGE_MASK
;
2049 rc
= gmap_protect_rmap(sg
, raddr
, origin
, PAGE_SIZE
);
2050 spin_lock(&sg
->guest_table_lock
);
2052 table
= gmap_table_walk(sg
, saddr
, 1);
2053 if (!table
|| (*table
& _SEGMENT_ENTRY_ORIGIN
) !=
2054 (unsigned long) s_pgt
)
2055 rc
= -EAGAIN
; /* Race with unshadow */
2057 *table
&= ~_SEGMENT_ENTRY_INVALID
;
2059 gmap_unshadow_pgt(sg
, raddr
);
2061 spin_unlock(&sg
->guest_table_lock
);
2064 spin_unlock(&sg
->guest_table_lock
);
2065 page_table_free_pgste(page
);
2069 EXPORT_SYMBOL_GPL(gmap_shadow_pgt
);
2072 * gmap_shadow_page - create a shadow page mapping
2073 * @sg: pointer to the shadow guest address space structure
2074 * @saddr: faulting address in the shadow gmap
2075 * @pte: pte in parent gmap address space to get shadowed
2077 * Returns 0 if successfully shadowed or already shadowed, -EAGAIN if the
2078 * shadow table structure is incomplete, -ENOMEM if out of memory and
2079 * -EFAULT if an address in the parent gmap could not be resolved.
2081 * Called with sg->mm->mmap_sem in read.
2083 int gmap_shadow_page(struct gmap
*sg
, unsigned long saddr
, pte_t pte
)
2085 struct gmap
*parent
;
2086 struct gmap_rmap
*rmap
;
2087 unsigned long vmaddr
, paddr
;
2089 pte_t
*sptep
, *tptep
;
2093 BUG_ON(!gmap_is_shadow(sg
));
2094 parent
= sg
->parent
;
2095 prot
= (pte_val(pte
) & _PAGE_PROTECT
) ? PROT_READ
: PROT_WRITE
;
2097 rmap
= kzalloc(sizeof(*rmap
), GFP_KERNEL
);
2100 rmap
->raddr
= (saddr
& PAGE_MASK
) | _SHADOW_RMAP_PGTABLE
;
2103 paddr
= pte_val(pte
) & PAGE_MASK
;
2104 vmaddr
= __gmap_translate(parent
, paddr
);
2105 if (IS_ERR_VALUE(vmaddr
)) {
2109 rc
= radix_tree_preload(GFP_KERNEL
);
2113 sptep
= gmap_pte_op_walk(parent
, paddr
, &ptl
);
2115 spin_lock(&sg
->guest_table_lock
);
2116 /* Get page table pointer */
2117 tptep
= (pte_t
*) gmap_table_walk(sg
, saddr
, 0);
2119 spin_unlock(&sg
->guest_table_lock
);
2120 gmap_pte_op_end(ptl
);
2121 radix_tree_preload_end();
2124 rc
= ptep_shadow_pte(sg
->mm
, saddr
, sptep
, tptep
, pte
);
2126 /* Success and a new mapping */
2127 gmap_insert_rmap(sg
, vmaddr
, rmap
);
2131 gmap_pte_op_end(ptl
);
2132 spin_unlock(&sg
->guest_table_lock
);
2134 radix_tree_preload_end();
2137 rc
= gmap_pte_op_fixup(parent
, paddr
, vmaddr
, prot
);
2144 EXPORT_SYMBOL_GPL(gmap_shadow_page
);
2147 * gmap_shadow_notify - handle notifications for shadow gmap
2149 * Called with sg->parent->shadow_lock.
2151 static void gmap_shadow_notify(struct gmap
*sg
, unsigned long vmaddr
,
2152 unsigned long gaddr
)
2154 struct gmap_rmap
*rmap
, *rnext
, *head
;
2155 unsigned long start
, end
, bits
, raddr
;
2157 BUG_ON(!gmap_is_shadow(sg
));
2159 spin_lock(&sg
->guest_table_lock
);
2161 spin_unlock(&sg
->guest_table_lock
);
2164 /* Check for top level table */
2165 start
= sg
->orig_asce
& _ASCE_ORIGIN
;
2166 end
= start
+ ((sg
->orig_asce
& _ASCE_TABLE_LENGTH
) + 1) * PAGE_SIZE
;
2167 if (!(sg
->orig_asce
& _ASCE_REAL_SPACE
) && gaddr
>= start
&&
2169 /* The complete shadow table has to go */
2171 spin_unlock(&sg
->guest_table_lock
);
2172 list_del(&sg
->list
);
2176 /* Remove the page table tree from on specific entry */
2177 head
= radix_tree_delete(&sg
->host_to_rmap
, vmaddr
>> PAGE_SHIFT
);
2178 gmap_for_each_rmap_safe(rmap
, rnext
, head
) {
2179 bits
= rmap
->raddr
& _SHADOW_RMAP_MASK
;
2180 raddr
= rmap
->raddr
^ bits
;
2182 case _SHADOW_RMAP_REGION1
:
2183 gmap_unshadow_r2t(sg
, raddr
);
2185 case _SHADOW_RMAP_REGION2
:
2186 gmap_unshadow_r3t(sg
, raddr
);
2188 case _SHADOW_RMAP_REGION3
:
2189 gmap_unshadow_sgt(sg
, raddr
);
2191 case _SHADOW_RMAP_SEGMENT
:
2192 gmap_unshadow_pgt(sg
, raddr
);
2194 case _SHADOW_RMAP_PGTABLE
:
2195 gmap_unshadow_page(sg
, raddr
);
2200 spin_unlock(&sg
->guest_table_lock
);
2204 * ptep_notify - call all invalidation callbacks for a specific pte.
2205 * @mm: pointer to the process mm_struct
2206 * @addr: virtual address in the process address space
2207 * @pte: pointer to the page table entry
2208 * @bits: bits from the pgste that caused the notify call
2210 * This function is assumed to be called with the page table lock held
2211 * for the pte to notify.
2213 void ptep_notify(struct mm_struct
*mm
, unsigned long vmaddr
,
2214 pte_t
*pte
, unsigned long bits
)
2216 unsigned long offset
, gaddr
= 0;
2217 unsigned long *table
;
2218 struct gmap
*gmap
, *sg
, *next
;
2220 offset
= ((unsigned long) pte
) & (255 * sizeof(pte_t
));
2221 offset
= offset
* (PAGE_SIZE
/ sizeof(pte_t
));
2223 list_for_each_entry_rcu(gmap
, &mm
->context
.gmap_list
, list
) {
2224 spin_lock(&gmap
->guest_table_lock
);
2225 table
= radix_tree_lookup(&gmap
->host_to_guest
,
2226 vmaddr
>> PMD_SHIFT
);
2228 gaddr
= __gmap_segment_gaddr(table
) + offset
;
2229 spin_unlock(&gmap
->guest_table_lock
);
2233 if (!list_empty(&gmap
->children
) && (bits
& PGSTE_VSIE_BIT
)) {
2234 spin_lock(&gmap
->shadow_lock
);
2235 list_for_each_entry_safe(sg
, next
,
2236 &gmap
->children
, list
)
2237 gmap_shadow_notify(sg
, vmaddr
, gaddr
);
2238 spin_unlock(&gmap
->shadow_lock
);
2240 if (bits
& PGSTE_IN_BIT
)
2241 gmap_call_notifier(gmap
, gaddr
, gaddr
+ PAGE_SIZE
- 1);
2245 EXPORT_SYMBOL_GPL(ptep_notify
);
2247 static void pmdp_notify_gmap(struct gmap
*gmap
, pmd_t
*pmdp
,
2248 unsigned long gaddr
)
2250 pmd_val(*pmdp
) &= ~_SEGMENT_ENTRY_GMAP_IN
;
2251 gmap_call_notifier(gmap
, gaddr
, gaddr
+ HPAGE_SIZE
- 1);
2255 * gmap_pmdp_xchg - exchange a gmap pmd with another
2256 * @gmap: pointer to the guest address space structure
2257 * @pmdp: pointer to the pmd entry
2258 * @new: replacement entry
2259 * @gaddr: the affected guest address
2261 * This function is assumed to be called with the guest_table_lock
2264 static void gmap_pmdp_xchg(struct gmap
*gmap
, pmd_t
*pmdp
, pmd_t
new,
2265 unsigned long gaddr
)
2267 gaddr
&= HPAGE_MASK
;
2268 pmdp_notify_gmap(gmap
, pmdp
, gaddr
);
2269 pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN
;
2270 if (MACHINE_HAS_TLB_GUEST
)
2271 __pmdp_idte(gaddr
, (pmd_t
*)pmdp
, IDTE_GUEST_ASCE
, gmap
->asce
,
2273 else if (MACHINE_HAS_IDTE
)
2274 __pmdp_idte(gaddr
, (pmd_t
*)pmdp
, 0, 0, IDTE_GLOBAL
);
2280 static void gmap_pmdp_clear(struct mm_struct
*mm
, unsigned long vmaddr
,
2285 unsigned long gaddr
;
2288 list_for_each_entry_rcu(gmap
, &mm
->context
.gmap_list
, list
) {
2289 spin_lock(&gmap
->guest_table_lock
);
2290 pmdp
= (pmd_t
*)radix_tree_delete(&gmap
->host_to_guest
,
2291 vmaddr
>> PMD_SHIFT
);
2293 gaddr
= __gmap_segment_gaddr((unsigned long *)pmdp
);
2294 pmdp_notify_gmap(gmap
, pmdp
, gaddr
);
2295 WARN_ON(pmd_val(*pmdp
) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE
|
2296 _SEGMENT_ENTRY_GMAP_UC
));
2299 pmd_val(*pmdp
) = _SEGMENT_ENTRY_EMPTY
;
2301 spin_unlock(&gmap
->guest_table_lock
);
2307 * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
2309 * @mm: pointer to the process mm_struct
2310 * @vmaddr: virtual address in the process address space
2312 void gmap_pmdp_invalidate(struct mm_struct
*mm
, unsigned long vmaddr
)
2314 gmap_pmdp_clear(mm
, vmaddr
, 0);
2316 EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate
);
2319 * gmap_pmdp_csp - csp all affected guest pmd entries
2320 * @mm: pointer to the process mm_struct
2321 * @vmaddr: virtual address in the process address space
2323 void gmap_pmdp_csp(struct mm_struct
*mm
, unsigned long vmaddr
)
2325 gmap_pmdp_clear(mm
, vmaddr
, 1);
2327 EXPORT_SYMBOL_GPL(gmap_pmdp_csp
);
2330 * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
2331 * @mm: pointer to the process mm_struct
2332 * @vmaddr: virtual address in the process address space
2334 void gmap_pmdp_idte_local(struct mm_struct
*mm
, unsigned long vmaddr
)
2336 unsigned long *entry
, gaddr
;
2341 list_for_each_entry_rcu(gmap
, &mm
->context
.gmap_list
, list
) {
2342 spin_lock(&gmap
->guest_table_lock
);
2343 entry
= radix_tree_delete(&gmap
->host_to_guest
,
2344 vmaddr
>> PMD_SHIFT
);
2346 pmdp
= (pmd_t
*)entry
;
2347 gaddr
= __gmap_segment_gaddr(entry
);
2348 pmdp_notify_gmap(gmap
, pmdp
, gaddr
);
2349 WARN_ON(*entry
& ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE
|
2350 _SEGMENT_ENTRY_GMAP_UC
));
2351 if (MACHINE_HAS_TLB_GUEST
)
2352 __pmdp_idte(gaddr
, pmdp
, IDTE_GUEST_ASCE
,
2353 gmap
->asce
, IDTE_LOCAL
);
2354 else if (MACHINE_HAS_IDTE
)
2355 __pmdp_idte(gaddr
, pmdp
, 0, 0, IDTE_LOCAL
);
2356 *entry
= _SEGMENT_ENTRY_EMPTY
;
2358 spin_unlock(&gmap
->guest_table_lock
);
2362 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local
);
2365 * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
2366 * @mm: pointer to the process mm_struct
2367 * @vmaddr: virtual address in the process address space
2369 void gmap_pmdp_idte_global(struct mm_struct
*mm
, unsigned long vmaddr
)
2371 unsigned long *entry
, gaddr
;
2376 list_for_each_entry_rcu(gmap
, &mm
->context
.gmap_list
, list
) {
2377 spin_lock(&gmap
->guest_table_lock
);
2378 entry
= radix_tree_delete(&gmap
->host_to_guest
,
2379 vmaddr
>> PMD_SHIFT
);
2381 pmdp
= (pmd_t
*)entry
;
2382 gaddr
= __gmap_segment_gaddr(entry
);
2383 pmdp_notify_gmap(gmap
, pmdp
, gaddr
);
2384 WARN_ON(*entry
& ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE
|
2385 _SEGMENT_ENTRY_GMAP_UC
));
2386 if (MACHINE_HAS_TLB_GUEST
)
2387 __pmdp_idte(gaddr
, pmdp
, IDTE_GUEST_ASCE
,
2388 gmap
->asce
, IDTE_GLOBAL
);
2389 else if (MACHINE_HAS_IDTE
)
2390 __pmdp_idte(gaddr
, pmdp
, 0, 0, IDTE_GLOBAL
);
2393 *entry
= _SEGMENT_ENTRY_EMPTY
;
2395 spin_unlock(&gmap
->guest_table_lock
);
2399 EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global
);
2402 * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
2403 * @gmap: pointer to guest address space
2404 * @pmdp: pointer to the pmd to be tested
2405 * @gaddr: virtual address in the guest address space
2407 * This function is assumed to be called with the guest_table_lock
2410 bool gmap_test_and_clear_dirty_pmd(struct gmap
*gmap
, pmd_t
*pmdp
,
2411 unsigned long gaddr
)
2413 if (pmd_val(*pmdp
) & _SEGMENT_ENTRY_INVALID
)
2416 /* Already protected memory, which did not change is clean */
2417 if (pmd_val(*pmdp
) & _SEGMENT_ENTRY_PROTECT
&&
2418 !(pmd_val(*pmdp
) & _SEGMENT_ENTRY_GMAP_UC
))
2421 /* Clear UC indication and reset protection */
2422 pmd_val(*pmdp
) &= ~_SEGMENT_ENTRY_GMAP_UC
;
2423 gmap_protect_pmd(gmap
, gaddr
, pmdp
, PROT_READ
, 0);
2428 * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
2429 * @gmap: pointer to guest address space
2430 * @bitmap: dirty bitmap for this pmd
2431 * @gaddr: virtual address in the guest address space
2432 * @vmaddr: virtual address in the host address space
2434 * This function is assumed to be called with the guest_table_lock
2437 void gmap_sync_dirty_log_pmd(struct gmap
*gmap
, unsigned long bitmap
[4],
2438 unsigned long gaddr
, unsigned long vmaddr
)
2445 pmdp
= gmap_pmd_op_walk(gmap
, gaddr
);
2449 if (pmd_large(*pmdp
)) {
2450 if (gmap_test_and_clear_dirty_pmd(gmap
, pmdp
, gaddr
))
2451 bitmap_fill(bitmap
, _PAGE_ENTRIES
);
2453 for (i
= 0; i
< _PAGE_ENTRIES
; i
++, vmaddr
+= PAGE_SIZE
) {
2454 ptep
= pte_alloc_map_lock(gmap
->mm
, pmdp
, vmaddr
, &ptl
);
2457 if (ptep_test_and_clear_uc(gmap
->mm
, vmaddr
, ptep
))
2462 gmap_pmd_op_end(gmap
, pmdp
);
2464 EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd
);
2466 static inline void thp_split_mm(struct mm_struct
*mm
)
2468 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
2469 struct vm_area_struct
*vma
;
2472 for (vma
= mm
->mmap
; vma
!= NULL
; vma
= vma
->vm_next
) {
2473 for (addr
= vma
->vm_start
;
2476 follow_page(vma
, addr
, FOLL_SPLIT
);
2477 vma
->vm_flags
&= ~VM_HUGEPAGE
;
2478 vma
->vm_flags
|= VM_NOHUGEPAGE
;
2480 mm
->def_flags
|= VM_NOHUGEPAGE
;
2485 * Remove all empty zero pages from the mapping for lazy refaulting
2486 * - This must be called after mm->context.has_pgste is set, to avoid
2487 * future creation of zero pages
2488 * - This must be called after THP was enabled
2490 static int __zap_zero_pages(pmd_t
*pmd
, unsigned long start
,
2491 unsigned long end
, struct mm_walk
*walk
)
2495 for (addr
= start
; addr
!= end
; addr
+= PAGE_SIZE
) {
2499 ptep
= pte_offset_map_lock(walk
->mm
, pmd
, addr
, &ptl
);
2500 if (is_zero_pfn(pte_pfn(*ptep
)))
2501 ptep_xchg_direct(walk
->mm
, addr
, ptep
, __pte(_PAGE_INVALID
));
2502 pte_unmap_unlock(ptep
, ptl
);
2507 static inline void zap_zero_pages(struct mm_struct
*mm
)
2509 struct mm_walk walk
= { .pmd_entry
= __zap_zero_pages
};
2512 walk_page_range(0, TASK_SIZE
, &walk
);
2516 * switch on pgstes for its userspace process (for kvm)
2518 int s390_enable_sie(void)
2520 struct mm_struct
*mm
= current
->mm
;
2522 /* Do we have pgstes? if yes, we are done */
2523 if (mm_has_pgste(mm
))
2525 /* Fail if the page tables are 2K */
2526 if (!mm_alloc_pgste(mm
))
2528 down_write(&mm
->mmap_sem
);
2529 mm
->context
.has_pgste
= 1;
2530 /* split thp mappings and disable thp for future mappings */
2533 up_write(&mm
->mmap_sem
);
2536 EXPORT_SYMBOL_GPL(s390_enable_sie
);
2539 * Enable storage key handling from now on and initialize the storage
2540 * keys with the default key.
2542 static int __s390_enable_skey_pte(pte_t
*pte
, unsigned long addr
,
2543 unsigned long next
, struct mm_walk
*walk
)
2545 /* Clear storage key */
2546 ptep_zap_key(walk
->mm
, addr
, pte
);
2550 static int __s390_enable_skey_hugetlb(pte_t
*pte
, unsigned long addr
,
2551 unsigned long hmask
, unsigned long next
,
2552 struct mm_walk
*walk
)
2554 pmd_t
*pmd
= (pmd_t
*)pte
;
2555 unsigned long start
, end
;
2556 struct page
*page
= pmd_page(*pmd
);
2559 * The write check makes sure we do not set a key on shared
2560 * memory. This is needed as the walker does not differentiate
2561 * between actual guest memory and the process executable or
2564 if (pmd_val(*pmd
) & _SEGMENT_ENTRY_INVALID
||
2565 !(pmd_val(*pmd
) & _SEGMENT_ENTRY_WRITE
))
2568 start
= pmd_val(*pmd
) & HPAGE_MASK
;
2569 end
= start
+ HPAGE_SIZE
- 1;
2570 __storage_key_init_range(start
, end
);
2571 set_bit(PG_arch_1
, &page
->flags
);
2575 int s390_enable_skey(void)
2577 struct mm_walk walk
= {
2578 .hugetlb_entry
= __s390_enable_skey_hugetlb
,
2579 .pte_entry
= __s390_enable_skey_pte
,
2581 struct mm_struct
*mm
= current
->mm
;
2582 struct vm_area_struct
*vma
;
2585 down_write(&mm
->mmap_sem
);
2586 if (mm_uses_skeys(mm
))
2589 mm
->context
.uses_skeys
= 1;
2590 for (vma
= mm
->mmap
; vma
; vma
= vma
->vm_next
) {
2591 if (ksm_madvise(vma
, vma
->vm_start
, vma
->vm_end
,
2592 MADV_UNMERGEABLE
, &vma
->vm_flags
)) {
2593 mm
->context
.uses_skeys
= 0;
2598 mm
->def_flags
&= ~VM_MERGEABLE
;
2601 walk_page_range(0, TASK_SIZE
, &walk
);
2604 up_write(&mm
->mmap_sem
);
2607 EXPORT_SYMBOL_GPL(s390_enable_skey
);
2610 * Reset CMMA state, make all pages stable again.
2612 static int __s390_reset_cmma(pte_t
*pte
, unsigned long addr
,
2613 unsigned long next
, struct mm_walk
*walk
)
2615 ptep_zap_unused(walk
->mm
, addr
, pte
, 1);
2619 void s390_reset_cmma(struct mm_struct
*mm
)
2621 struct mm_walk walk
= { .pte_entry
= __s390_reset_cmma
};
2623 down_write(&mm
->mmap_sem
);
2625 walk_page_range(0, TASK_SIZE
, &walk
);
2626 up_write(&mm
->mmap_sem
);
2628 EXPORT_SYMBOL_GPL(s390_reset_cmma
);