2 * Copyright IBM Corp. 2007,2011
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/highmem.h>
14 #include <linux/pagemap.h>
15 #include <linux/spinlock.h>
16 #include <linux/module.h>
17 #include <linux/quicklist.h>
18 #include <linux/rcupdate.h>
19 #include <linux/slab.h>
21 #include <asm/system.h>
22 #include <asm/pgtable.h>
23 #include <asm/pgalloc.h>
25 #include <asm/tlbflush.h>
26 #include <asm/mmu_context.h>
30 #define FRAG_MASK 0x0f
33 #define FRAG_MASK 0x03
37 unsigned long *crst_table_alloc(struct mm_struct
*mm
)
39 struct page
*page
= alloc_pages(GFP_KERNEL
, ALLOC_ORDER
);
43 return (unsigned long *) page_to_phys(page
);
46 void crst_table_free(struct mm_struct
*mm
, unsigned long *table
)
48 free_pages((unsigned long) table
, ALLOC_ORDER
);
52 int crst_table_upgrade(struct mm_struct
*mm
, unsigned long limit
)
54 unsigned long *table
, *pgd
;
57 BUG_ON(limit
> (1UL << 53));
59 table
= crst_table_alloc(mm
);
62 spin_lock_bh(&mm
->page_table_lock
);
63 if (mm
->context
.asce_limit
< limit
) {
64 pgd
= (unsigned long *) mm
->pgd
;
65 if (mm
->context
.asce_limit
<= (1UL << 31)) {
66 entry
= _REGION3_ENTRY_EMPTY
;
67 mm
->context
.asce_limit
= 1UL << 42;
68 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
72 entry
= _REGION2_ENTRY_EMPTY
;
73 mm
->context
.asce_limit
= 1UL << 53;
74 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
78 crst_table_init(table
, entry
);
79 pgd_populate(mm
, (pgd_t
*) table
, (pud_t
*) pgd
);
80 mm
->pgd
= (pgd_t
*) table
;
81 mm
->task_size
= mm
->context
.asce_limit
;
84 spin_unlock_bh(&mm
->page_table_lock
);
86 crst_table_free(mm
, table
);
87 if (mm
->context
.asce_limit
< limit
)
89 update_mm(mm
, current
);
93 void crst_table_downgrade(struct mm_struct
*mm
, unsigned long limit
)
97 if (mm
->context
.asce_limit
<= limit
)
100 while (mm
->context
.asce_limit
> limit
) {
102 switch (pgd_val(*pgd
) & _REGION_ENTRY_TYPE_MASK
) {
103 case _REGION_ENTRY_TYPE_R2
:
104 mm
->context
.asce_limit
= 1UL << 42;
105 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
109 case _REGION_ENTRY_TYPE_R3
:
110 mm
->context
.asce_limit
= 1UL << 31;
111 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
118 mm
->pgd
= (pgd_t
*) (pgd_val(*pgd
) & _REGION_ENTRY_ORIGIN
);
119 mm
->task_size
= mm
->context
.asce_limit
;
120 crst_table_free(mm
, (unsigned long *) pgd
);
122 update_mm(mm
, current
);
129 * gmap_alloc - allocate a guest address space
130 * @mm: pointer to the parent mm_struct
132 * Returns a guest address space structure.
134 struct gmap
*gmap_alloc(struct mm_struct
*mm
)
138 unsigned long *table
;
140 gmap
= kzalloc(sizeof(struct gmap
), GFP_KERNEL
);
143 INIT_LIST_HEAD(&gmap
->crst_list
);
145 page
= alloc_pages(GFP_KERNEL
, ALLOC_ORDER
);
148 list_add(&page
->lru
, &gmap
->crst_list
);
149 table
= (unsigned long *) page_to_phys(page
);
150 crst_table_init(table
, _REGION1_ENTRY_EMPTY
);
152 gmap
->asce
= _ASCE_TYPE_REGION1
| _ASCE_TABLE_LENGTH
|
153 _ASCE_USER_BITS
| __pa(table
);
154 list_add(&gmap
->list
, &mm
->context
.gmap_list
);
162 EXPORT_SYMBOL_GPL(gmap_alloc
);
164 static int gmap_unlink_segment(struct gmap
*gmap
, unsigned long *table
)
166 struct gmap_pgtable
*mp
;
167 struct gmap_rmap
*rmap
;
170 if (*table
& _SEGMENT_ENTRY_INV
)
172 page
= pfn_to_page(*table
>> PAGE_SHIFT
);
173 mp
= (struct gmap_pgtable
*) page
->index
;
174 list_for_each_entry(rmap
, &mp
->mapper
, list
) {
175 if (rmap
->entry
!= table
)
177 list_del(&rmap
->list
);
181 *table
= _SEGMENT_ENTRY_INV
| _SEGMENT_ENTRY_RO
| mp
->vmaddr
;
185 static void gmap_flush_tlb(struct gmap
*gmap
)
187 if (MACHINE_HAS_IDTE
)
188 __tlb_flush_idte((unsigned long) gmap
->table
|
191 __tlb_flush_global();
195 * gmap_free - free a guest address space
196 * @gmap: pointer to the guest address space structure
198 void gmap_free(struct gmap
*gmap
)
200 struct page
*page
, *next
;
201 unsigned long *table
;
206 if (MACHINE_HAS_IDTE
)
207 __tlb_flush_idte((unsigned long) gmap
->table
|
210 __tlb_flush_global();
212 /* Free all segment & region tables. */
213 down_read(&gmap
->mm
->mmap_sem
);
214 spin_lock(&gmap
->mm
->page_table_lock
);
215 list_for_each_entry_safe(page
, next
, &gmap
->crst_list
, lru
) {
216 table
= (unsigned long *) page_to_phys(page
);
217 if ((*table
& _REGION_ENTRY_TYPE_MASK
) == 0)
218 /* Remove gmap rmap structures for segment table. */
219 for (i
= 0; i
< PTRS_PER_PMD
; i
++, table
++)
220 gmap_unlink_segment(gmap
, table
);
221 __free_pages(page
, ALLOC_ORDER
);
223 spin_unlock(&gmap
->mm
->page_table_lock
);
224 up_read(&gmap
->mm
->mmap_sem
);
225 list_del(&gmap
->list
);
228 EXPORT_SYMBOL_GPL(gmap_free
);
231 * gmap_enable - switch primary space to the guest address space
232 * @gmap: pointer to the guest address space structure
234 void gmap_enable(struct gmap
*gmap
)
236 S390_lowcore
.gmap
= (unsigned long) gmap
;
238 EXPORT_SYMBOL_GPL(gmap_enable
);
241 * gmap_disable - switch back to the standard primary address space
242 * @gmap: pointer to the guest address space structure
244 void gmap_disable(struct gmap
*gmap
)
246 S390_lowcore
.gmap
= 0UL;
248 EXPORT_SYMBOL_GPL(gmap_disable
);
251 * gmap_alloc_table is assumed to be called with mmap_sem held
253 static int gmap_alloc_table(struct gmap
*gmap
,
254 unsigned long *table
, unsigned long init
)
259 /* since we dont free the gmap table until gmap_free we can unlock */
260 spin_unlock(&gmap
->mm
->page_table_lock
);
261 page
= alloc_pages(GFP_KERNEL
, ALLOC_ORDER
);
262 spin_lock(&gmap
->mm
->page_table_lock
);
265 new = (unsigned long *) page_to_phys(page
);
266 crst_table_init(new, init
);
267 if (*table
& _REGION_ENTRY_INV
) {
268 list_add(&page
->lru
, &gmap
->crst_list
);
269 *table
= (unsigned long) new | _REGION_ENTRY_LENGTH
|
270 (*table
& _REGION_ENTRY_TYPE_MASK
);
272 __free_pages(page
, ALLOC_ORDER
);
277 * gmap_unmap_segment - unmap segment from the guest address space
278 * @gmap: pointer to the guest address space structure
279 * @addr: address in the guest address space
280 * @len: length of the memory area to unmap
282 * Returns 0 if the unmap succeded, -EINVAL if not.
284 int gmap_unmap_segment(struct gmap
*gmap
, unsigned long to
, unsigned long len
)
286 unsigned long *table
;
290 if ((to
| len
) & (PMD_SIZE
- 1))
292 if (len
== 0 || to
+ len
< to
)
296 down_read(&gmap
->mm
->mmap_sem
);
297 spin_lock(&gmap
->mm
->page_table_lock
);
298 for (off
= 0; off
< len
; off
+= PMD_SIZE
) {
299 /* Walk the guest addr space page table */
300 table
= gmap
->table
+ (((to
+ off
) >> 53) & 0x7ff);
301 if (*table
& _REGION_ENTRY_INV
)
303 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
304 table
= table
+ (((to
+ off
) >> 42) & 0x7ff);
305 if (*table
& _REGION_ENTRY_INV
)
307 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
308 table
= table
+ (((to
+ off
) >> 31) & 0x7ff);
309 if (*table
& _REGION_ENTRY_INV
)
311 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
312 table
= table
+ (((to
+ off
) >> 20) & 0x7ff);
314 /* Clear segment table entry in guest address space. */
315 flush
|= gmap_unlink_segment(gmap
, table
);
316 *table
= _SEGMENT_ENTRY_INV
;
319 spin_unlock(&gmap
->mm
->page_table_lock
);
320 up_read(&gmap
->mm
->mmap_sem
);
322 gmap_flush_tlb(gmap
);
325 EXPORT_SYMBOL_GPL(gmap_unmap_segment
);
328 * gmap_mmap_segment - map a segment to the guest address space
329 * @gmap: pointer to the guest address space structure
330 * @from: source address in the parent address space
331 * @to: target address in the guest address space
333 * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not.
335 int gmap_map_segment(struct gmap
*gmap
, unsigned long from
,
336 unsigned long to
, unsigned long len
)
338 unsigned long *table
;
342 if ((from
| to
| len
) & (PMD_SIZE
- 1))
344 if (len
== 0 || from
+ len
> PGDIR_SIZE
||
345 from
+ len
< from
|| to
+ len
< to
)
349 down_read(&gmap
->mm
->mmap_sem
);
350 spin_lock(&gmap
->mm
->page_table_lock
);
351 for (off
= 0; off
< len
; off
+= PMD_SIZE
) {
352 /* Walk the gmap address space page table */
353 table
= gmap
->table
+ (((to
+ off
) >> 53) & 0x7ff);
354 if ((*table
& _REGION_ENTRY_INV
) &&
355 gmap_alloc_table(gmap
, table
, _REGION2_ENTRY_EMPTY
))
357 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
358 table
= table
+ (((to
+ off
) >> 42) & 0x7ff);
359 if ((*table
& _REGION_ENTRY_INV
) &&
360 gmap_alloc_table(gmap
, table
, _REGION3_ENTRY_EMPTY
))
362 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
363 table
= table
+ (((to
+ off
) >> 31) & 0x7ff);
364 if ((*table
& _REGION_ENTRY_INV
) &&
365 gmap_alloc_table(gmap
, table
, _SEGMENT_ENTRY_EMPTY
))
367 table
= (unsigned long *) (*table
& _REGION_ENTRY_ORIGIN
);
368 table
= table
+ (((to
+ off
) >> 20) & 0x7ff);
370 /* Store 'from' address in an invalid segment table entry. */
371 flush
|= gmap_unlink_segment(gmap
, table
);
372 *table
= _SEGMENT_ENTRY_INV
| _SEGMENT_ENTRY_RO
| (from
+ off
);
374 spin_unlock(&gmap
->mm
->page_table_lock
);
375 up_read(&gmap
->mm
->mmap_sem
);
377 gmap_flush_tlb(gmap
);
381 spin_unlock(&gmap
->mm
->page_table_lock
);
382 up_read(&gmap
->mm
->mmap_sem
);
383 gmap_unmap_segment(gmap
, to
, len
);
386 EXPORT_SYMBOL_GPL(gmap_map_segment
);
389 * this function is assumed to be called with mmap_sem held
391 unsigned long __gmap_fault(unsigned long address
, struct gmap
*gmap
)
393 unsigned long *table
, vmaddr
, segment
;
394 struct mm_struct
*mm
;
395 struct gmap_pgtable
*mp
;
396 struct gmap_rmap
*rmap
;
397 struct vm_area_struct
*vma
;
403 current
->thread
.gmap_addr
= address
;
405 /* Walk the gmap address space page table */
406 table
= gmap
->table
+ ((address
>> 53) & 0x7ff);
407 if (unlikely(*table
& _REGION_ENTRY_INV
))
409 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
410 table
= table
+ ((address
>> 42) & 0x7ff);
411 if (unlikely(*table
& _REGION_ENTRY_INV
))
413 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
414 table
= table
+ ((address
>> 31) & 0x7ff);
415 if (unlikely(*table
& _REGION_ENTRY_INV
))
417 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
418 table
= table
+ ((address
>> 20) & 0x7ff);
420 /* Convert the gmap address to an mm address. */
422 if (likely(!(segment
& _SEGMENT_ENTRY_INV
))) {
423 page
= pfn_to_page(segment
>> PAGE_SHIFT
);
424 mp
= (struct gmap_pgtable
*) page
->index
;
425 return mp
->vmaddr
| (address
& ~PMD_MASK
);
426 } else if (segment
& _SEGMENT_ENTRY_RO
) {
427 vmaddr
= segment
& _SEGMENT_ENTRY_ORIGIN
;
428 vma
= find_vma(mm
, vmaddr
);
429 if (!vma
|| vma
->vm_start
> vmaddr
)
432 /* Walk the parent mm page table */
433 pgd
= pgd_offset(mm
, vmaddr
);
434 pud
= pud_alloc(mm
, pgd
, vmaddr
);
437 pmd
= pmd_alloc(mm
, pud
, vmaddr
);
440 if (!pmd_present(*pmd
) &&
441 __pte_alloc(mm
, vma
, pmd
, vmaddr
))
443 /* pmd now points to a valid segment table entry. */
444 rmap
= kmalloc(sizeof(*rmap
), GFP_KERNEL
|__GFP_REPEAT
);
447 /* Link gmap segment table entry location to page table. */
448 page
= pmd_page(*pmd
);
449 mp
= (struct gmap_pgtable
*) page
->index
;
451 spin_lock(&mm
->page_table_lock
);
452 list_add(&rmap
->list
, &mp
->mapper
);
453 spin_unlock(&mm
->page_table_lock
);
454 /* Set gmap segment table entry to page table. */
455 *table
= pmd_val(*pmd
) & PAGE_MASK
;
456 return vmaddr
| (address
& ~PMD_MASK
);
461 unsigned long gmap_fault(unsigned long address
, struct gmap
*gmap
)
465 down_read(&gmap
->mm
->mmap_sem
);
466 rc
= __gmap_fault(address
, gmap
);
467 up_read(&gmap
->mm
->mmap_sem
);
471 EXPORT_SYMBOL_GPL(gmap_fault
);
473 void gmap_discard(unsigned long from
, unsigned long to
, struct gmap
*gmap
)
476 unsigned long *table
, address
, size
;
477 struct vm_area_struct
*vma
;
478 struct gmap_pgtable
*mp
;
481 down_read(&gmap
->mm
->mmap_sem
);
483 while (address
< to
) {
484 /* Walk the gmap address space page table */
485 table
= gmap
->table
+ ((address
>> 53) & 0x7ff);
486 if (unlikely(*table
& _REGION_ENTRY_INV
)) {
487 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
490 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
491 table
= table
+ ((address
>> 42) & 0x7ff);
492 if (unlikely(*table
& _REGION_ENTRY_INV
)) {
493 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
496 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
497 table
= table
+ ((address
>> 31) & 0x7ff);
498 if (unlikely(*table
& _REGION_ENTRY_INV
)) {
499 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
502 table
= (unsigned long *)(*table
& _REGION_ENTRY_ORIGIN
);
503 table
= table
+ ((address
>> 20) & 0x7ff);
504 if (unlikely(*table
& _SEGMENT_ENTRY_INV
)) {
505 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
508 page
= pfn_to_page(*table
>> PAGE_SHIFT
);
509 mp
= (struct gmap_pgtable
*) page
->index
;
510 vma
= find_vma(gmap
->mm
, mp
->vmaddr
);
511 size
= min(to
- address
, PMD_SIZE
- (address
& ~PMD_MASK
));
512 zap_page_range(vma
, mp
->vmaddr
| (address
& ~PMD_MASK
),
514 address
= (address
+ PMD_SIZE
) & PMD_MASK
;
516 up_read(&gmap
->mm
->mmap_sem
);
518 EXPORT_SYMBOL_GPL(gmap_discard
);
520 void gmap_unmap_notifier(struct mm_struct
*mm
, unsigned long *table
)
522 struct gmap_rmap
*rmap
, *next
;
523 struct gmap_pgtable
*mp
;
528 spin_lock(&mm
->page_table_lock
);
529 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
530 mp
= (struct gmap_pgtable
*) page
->index
;
531 list_for_each_entry_safe(rmap
, next
, &mp
->mapper
, list
) {
533 _SEGMENT_ENTRY_INV
| _SEGMENT_ENTRY_RO
| mp
->vmaddr
;
534 list_del(&rmap
->list
);
538 spin_unlock(&mm
->page_table_lock
);
540 __tlb_flush_global();
543 static inline unsigned long *page_table_alloc_pgste(struct mm_struct
*mm
,
544 unsigned long vmaddr
)
547 unsigned long *table
;
548 struct gmap_pgtable
*mp
;
550 page
= alloc_page(GFP_KERNEL
|__GFP_REPEAT
);
553 mp
= kmalloc(sizeof(*mp
), GFP_KERNEL
|__GFP_REPEAT
);
558 pgtable_page_ctor(page
);
559 mp
->vmaddr
= vmaddr
& PMD_MASK
;
560 INIT_LIST_HEAD(&mp
->mapper
);
561 page
->index
= (unsigned long) mp
;
562 atomic_set(&page
->_mapcount
, 3);
563 table
= (unsigned long *) page_to_phys(page
);
564 clear_table(table
, _PAGE_TYPE_EMPTY
, PAGE_SIZE
/2);
565 clear_table(table
+ PTRS_PER_PTE
, 0, PAGE_SIZE
/2);
569 static inline void page_table_free_pgste(unsigned long *table
)
572 struct gmap_pgtable
*mp
;
574 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
575 mp
= (struct gmap_pgtable
*) page
->index
;
576 BUG_ON(!list_empty(&mp
->mapper
));
577 pgtable_page_dtor(page
);
578 atomic_set(&page
->_mapcount
, -1);
583 #else /* CONFIG_PGSTE */
585 static inline unsigned long *page_table_alloc_pgste(struct mm_struct
*mm
,
586 unsigned long vmaddr
)
591 static inline void page_table_free_pgste(unsigned long *table
)
595 static inline void gmap_unmap_notifier(struct mm_struct
*mm
,
596 unsigned long *table
)
600 #endif /* CONFIG_PGSTE */
602 static inline unsigned int atomic_xor_bits(atomic_t
*v
, unsigned int bits
)
604 unsigned int old
, new;
607 old
= atomic_read(v
);
609 } while (atomic_cmpxchg(v
, old
, new) != old
);
614 * page table entry allocation/free routines.
616 unsigned long *page_table_alloc(struct mm_struct
*mm
, unsigned long vmaddr
)
619 unsigned long *table
;
620 unsigned int mask
, bit
;
622 if (mm_has_pgste(mm
))
623 return page_table_alloc_pgste(mm
, vmaddr
);
624 /* Allocate fragments of a 4K page as 1K/2K page table */
625 spin_lock_bh(&mm
->context
.list_lock
);
627 if (!list_empty(&mm
->context
.pgtable_list
)) {
628 page
= list_first_entry(&mm
->context
.pgtable_list
,
630 table
= (unsigned long *) page_to_phys(page
);
631 mask
= atomic_read(&page
->_mapcount
);
632 mask
= mask
| (mask
>> 4);
634 if ((mask
& FRAG_MASK
) == FRAG_MASK
) {
635 spin_unlock_bh(&mm
->context
.list_lock
);
636 page
= alloc_page(GFP_KERNEL
|__GFP_REPEAT
);
639 pgtable_page_ctor(page
);
640 atomic_set(&page
->_mapcount
, 1);
641 table
= (unsigned long *) page_to_phys(page
);
642 clear_table(table
, _PAGE_TYPE_EMPTY
, PAGE_SIZE
);
643 spin_lock_bh(&mm
->context
.list_lock
);
644 list_add(&page
->lru
, &mm
->context
.pgtable_list
);
646 for (bit
= 1; mask
& bit
; bit
<<= 1)
647 table
+= PTRS_PER_PTE
;
648 mask
= atomic_xor_bits(&page
->_mapcount
, bit
);
649 if ((mask
& FRAG_MASK
) == FRAG_MASK
)
650 list_del(&page
->lru
);
652 spin_unlock_bh(&mm
->context
.list_lock
);
656 void page_table_free(struct mm_struct
*mm
, unsigned long *table
)
659 unsigned int bit
, mask
;
661 if (mm_has_pgste(mm
)) {
662 gmap_unmap_notifier(mm
, table
);
663 return page_table_free_pgste(table
);
665 /* Free 1K/2K page table fragment of a 4K page */
666 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
667 bit
= 1 << ((__pa(table
) & ~PAGE_MASK
)/(PTRS_PER_PTE
*sizeof(pte_t
)));
668 spin_lock_bh(&mm
->context
.list_lock
);
669 if ((atomic_read(&page
->_mapcount
) & FRAG_MASK
) != FRAG_MASK
)
670 list_del(&page
->lru
);
671 mask
= atomic_xor_bits(&page
->_mapcount
, bit
);
672 if (mask
& FRAG_MASK
)
673 list_add(&page
->lru
, &mm
->context
.pgtable_list
);
674 spin_unlock_bh(&mm
->context
.list_lock
);
676 pgtable_page_dtor(page
);
677 atomic_set(&page
->_mapcount
, -1);
682 static void __page_table_free_rcu(void *table
, unsigned bit
)
686 if (bit
== FRAG_MASK
)
687 return page_table_free_pgste(table
);
688 /* Free 1K/2K page table fragment of a 4K page */
689 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
690 if (atomic_xor_bits(&page
->_mapcount
, bit
) == 0) {
691 pgtable_page_dtor(page
);
692 atomic_set(&page
->_mapcount
, -1);
697 void page_table_free_rcu(struct mmu_gather
*tlb
, unsigned long *table
)
699 struct mm_struct
*mm
;
701 unsigned int bit
, mask
;
704 if (mm_has_pgste(mm
)) {
705 gmap_unmap_notifier(mm
, table
);
706 table
= (unsigned long *) (__pa(table
) | FRAG_MASK
);
707 tlb_remove_table(tlb
, table
);
710 bit
= 1 << ((__pa(table
) & ~PAGE_MASK
) / (PTRS_PER_PTE
*sizeof(pte_t
)));
711 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
712 spin_lock_bh(&mm
->context
.list_lock
);
713 if ((atomic_read(&page
->_mapcount
) & FRAG_MASK
) != FRAG_MASK
)
714 list_del(&page
->lru
);
715 mask
= atomic_xor_bits(&page
->_mapcount
, bit
| (bit
<< 4));
716 if (mask
& FRAG_MASK
)
717 list_add_tail(&page
->lru
, &mm
->context
.pgtable_list
);
718 spin_unlock_bh(&mm
->context
.list_lock
);
719 table
= (unsigned long *) (__pa(table
) | (bit
<< 4));
720 tlb_remove_table(tlb
, table
);
723 void __tlb_remove_table(void *_table
)
725 const unsigned long mask
= (FRAG_MASK
<< 4) | FRAG_MASK
;
726 void *table
= (void *)((unsigned long) _table
& ~mask
);
727 unsigned type
= (unsigned long) _table
& mask
;
730 __page_table_free_rcu(table
, type
);
732 free_pages((unsigned long) table
, ALLOC_ORDER
);
735 static void tlb_remove_table_smp_sync(void *arg
)
737 /* Simply deliver the interrupt */
740 static void tlb_remove_table_one(void *table
)
743 * This isn't an RCU grace period and hence the page-tables cannot be
744 * assumed to be actually RCU-freed.
746 * It is however sufficient for software page-table walkers that rely
747 * on IRQ disabling. See the comment near struct mmu_table_batch.
749 smp_call_function(tlb_remove_table_smp_sync
, NULL
, 1);
750 __tlb_remove_table(table
);
753 static void tlb_remove_table_rcu(struct rcu_head
*head
)
755 struct mmu_table_batch
*batch
;
758 batch
= container_of(head
, struct mmu_table_batch
, rcu
);
760 for (i
= 0; i
< batch
->nr
; i
++)
761 __tlb_remove_table(batch
->tables
[i
]);
763 free_page((unsigned long)batch
);
766 void tlb_table_flush(struct mmu_gather
*tlb
)
768 struct mmu_table_batch
**batch
= &tlb
->batch
;
771 __tlb_flush_mm(tlb
->mm
);
772 call_rcu_sched(&(*batch
)->rcu
, tlb_remove_table_rcu
);
777 void tlb_remove_table(struct mmu_gather
*tlb
, void *table
)
779 struct mmu_table_batch
**batch
= &tlb
->batch
;
781 if (*batch
== NULL
) {
782 *batch
= (struct mmu_table_batch
*)
783 __get_free_page(GFP_NOWAIT
| __GFP_NOWARN
);
784 if (*batch
== NULL
) {
785 __tlb_flush_mm(tlb
->mm
);
786 tlb_remove_table_one(table
);
791 (*batch
)->tables
[(*batch
)->nr
++] = table
;
792 if ((*batch
)->nr
== MAX_TABLE_BATCH
)
793 tlb_table_flush(tlb
);
797 * switch on pgstes for its userspace process (for kvm)
799 int s390_enable_sie(void)
801 struct task_struct
*tsk
= current
;
802 struct mm_struct
*mm
, *old_mm
;
804 /* Do we have switched amode? If no, we cannot do sie */
805 if (user_mode
== HOME_SPACE_MODE
)
808 /* Do we have pgstes? if yes, we are done */
809 if (mm_has_pgste(tsk
->mm
))
812 /* lets check if we are allowed to replace the mm */
814 if (!tsk
->mm
|| atomic_read(&tsk
->mm
->mm_users
) > 1 ||
816 !hlist_empty(&tsk
->mm
->ioctx_list
) ||
818 tsk
->mm
!= tsk
->active_mm
) {
824 /* we copy the mm and let dup_mm create the page tables with_pgstes */
825 tsk
->mm
->context
.alloc_pgste
= 1;
827 tsk
->mm
->context
.alloc_pgste
= 0;
831 /* Now lets check again if something happened */
833 if (!tsk
->mm
|| atomic_read(&tsk
->mm
->mm_users
) > 1 ||
835 !hlist_empty(&tsk
->mm
->ioctx_list
) ||
837 tsk
->mm
!= tsk
->active_mm
) {
843 /* ok, we are alone. No ptrace, no threads, etc. */
845 tsk
->mm
= tsk
->active_mm
= mm
;
848 atomic_inc(&mm
->context
.attach_count
);
849 atomic_dec(&old_mm
->context
.attach_count
);
850 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm
));
856 EXPORT_SYMBOL_GPL(s390_enable_sie
);
858 #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
859 bool kernel_page_present(struct page
*page
)
864 addr
= page_to_phys(page
);
869 : "=d" (cc
), "+a" (addr
) : : "cc");
872 #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */