2 * arch/s390/mm/pgtable.c
4 * Copyright IBM Corp. 2007
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/errno.h>
12 #include <linux/swap.h>
13 #include <linux/smp.h>
14 #include <linux/highmem.h>
15 #include <linux/slab.h>
16 #include <linux/pagemap.h>
17 #include <linux/spinlock.h>
18 #include <linux/module.h>
19 #include <linux/quicklist.h>
21 #include <asm/system.h>
22 #include <asm/pgtable.h>
23 #include <asm/pgalloc.h>
25 #include <asm/tlbflush.h>
26 #include <asm/mmu_context.h>
30 #define TABLES_PER_PAGE 4
31 #define FRAG_MASK 15UL
32 #define SECOND_HALVES 10UL
34 void clear_table_pgstes(unsigned long *table
)
36 clear_table(table
, _PAGE_TYPE_EMPTY
, PAGE_SIZE
/4);
37 memset(table
+ 256, 0, PAGE_SIZE
/4);
38 clear_table(table
+ 512, _PAGE_TYPE_EMPTY
, PAGE_SIZE
/4);
39 memset(table
+ 768, 0, PAGE_SIZE
/4);
44 #define TABLES_PER_PAGE 2
46 #define SECOND_HALVES 2UL
48 void clear_table_pgstes(unsigned long *table
)
50 clear_table(table
, _PAGE_TYPE_EMPTY
, PAGE_SIZE
/2);
51 memset(table
+ 256, 0, PAGE_SIZE
/2);
56 unsigned long *crst_table_alloc(struct mm_struct
*mm
, int noexec
)
58 struct page
*page
= alloc_pages(GFP_KERNEL
, ALLOC_ORDER
);
64 struct page
*shadow
= alloc_pages(GFP_KERNEL
, ALLOC_ORDER
);
66 __free_pages(page
, ALLOC_ORDER
);
69 page
->index
= page_to_phys(shadow
);
71 spin_lock(&mm
->page_table_lock
);
72 list_add(&page
->lru
, &mm
->context
.crst_list
);
73 spin_unlock(&mm
->page_table_lock
);
74 return (unsigned long *) page_to_phys(page
);
77 void crst_table_free(struct mm_struct
*mm
, unsigned long *table
)
79 unsigned long *shadow
= get_shadow_table(table
);
80 struct page
*page
= virt_to_page(table
);
82 spin_lock(&mm
->page_table_lock
);
84 spin_unlock(&mm
->page_table_lock
);
86 free_pages((unsigned long) shadow
, ALLOC_ORDER
);
87 free_pages((unsigned long) table
, ALLOC_ORDER
);
91 int crst_table_upgrade(struct mm_struct
*mm
, unsigned long limit
)
93 unsigned long *table
, *pgd
;
96 BUG_ON(limit
> (1UL << 53));
98 table
= crst_table_alloc(mm
, mm
->context
.noexec
);
101 spin_lock(&mm
->page_table_lock
);
102 if (mm
->context
.asce_limit
< limit
) {
103 pgd
= (unsigned long *) mm
->pgd
;
104 if (mm
->context
.asce_limit
<= (1UL << 31)) {
105 entry
= _REGION3_ENTRY_EMPTY
;
106 mm
->context
.asce_limit
= 1UL << 42;
107 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
111 entry
= _REGION2_ENTRY_EMPTY
;
112 mm
->context
.asce_limit
= 1UL << 53;
113 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
117 crst_table_init(table
, entry
);
118 pgd_populate(mm
, (pgd_t
*) table
, (pud_t
*) pgd
);
119 mm
->pgd
= (pgd_t
*) table
;
122 spin_unlock(&mm
->page_table_lock
);
124 crst_table_free(mm
, table
);
125 if (mm
->context
.asce_limit
< limit
)
127 update_mm(mm
, current
);
131 void crst_table_downgrade(struct mm_struct
*mm
, unsigned long limit
)
135 if (mm
->context
.asce_limit
<= limit
)
138 while (mm
->context
.asce_limit
> limit
) {
140 switch (pgd_val(*pgd
) & _REGION_ENTRY_TYPE_MASK
) {
141 case _REGION_ENTRY_TYPE_R2
:
142 mm
->context
.asce_limit
= 1UL << 42;
143 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
147 case _REGION_ENTRY_TYPE_R3
:
148 mm
->context
.asce_limit
= 1UL << 31;
149 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
156 mm
->pgd
= (pgd_t
*) (pgd_val(*pgd
) & _REGION_ENTRY_ORIGIN
);
157 crst_table_free(mm
, (unsigned long *) pgd
);
159 update_mm(mm
, current
);
164 * page table entry allocation/free routines.
166 unsigned long *page_table_alloc(struct mm_struct
*mm
)
169 unsigned long *table
;
172 bits
= (mm
->context
.noexec
|| mm
->context
.pgstes
) ? 3UL : 1UL;
173 spin_lock(&mm
->page_table_lock
);
175 if (!list_empty(&mm
->context
.pgtable_list
)) {
176 page
= list_first_entry(&mm
->context
.pgtable_list
,
178 if ((page
->flags
& FRAG_MASK
) == ((1UL << TABLES_PER_PAGE
) - 1))
182 spin_unlock(&mm
->page_table_lock
);
183 page
= alloc_page(GFP_KERNEL
|__GFP_REPEAT
);
186 pgtable_page_ctor(page
);
187 page
->flags
&= ~FRAG_MASK
;
188 table
= (unsigned long *) page_to_phys(page
);
189 if (mm
->context
.pgstes
)
190 clear_table_pgstes(table
);
192 clear_table(table
, _PAGE_TYPE_EMPTY
, PAGE_SIZE
);
193 spin_lock(&mm
->page_table_lock
);
194 list_add(&page
->lru
, &mm
->context
.pgtable_list
);
196 table
= (unsigned long *) page_to_phys(page
);
197 while (page
->flags
& bits
) {
202 if ((page
->flags
& FRAG_MASK
) == ((1UL << TABLES_PER_PAGE
) - 1))
203 list_move_tail(&page
->lru
, &mm
->context
.pgtable_list
);
204 spin_unlock(&mm
->page_table_lock
);
208 void page_table_free(struct mm_struct
*mm
, unsigned long *table
)
213 bits
= (mm
->context
.noexec
|| mm
->context
.pgstes
) ? 3UL : 1UL;
214 bits
<<= (__pa(table
) & (PAGE_SIZE
- 1)) / 256 / sizeof(unsigned long);
215 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
216 spin_lock(&mm
->page_table_lock
);
218 if (page
->flags
& FRAG_MASK
) {
219 /* Page now has some free pgtable fragments. */
220 list_move(&page
->lru
, &mm
->context
.pgtable_list
);
223 /* All fragments of the 4K page have been freed. */
224 list_del(&page
->lru
);
225 spin_unlock(&mm
->page_table_lock
);
227 pgtable_page_dtor(page
);
232 void disable_noexec(struct mm_struct
*mm
, struct task_struct
*tsk
)
236 spin_lock(&mm
->page_table_lock
);
237 /* Free shadow region and segment tables. */
238 list_for_each_entry(page
, &mm
->context
.crst_list
, lru
)
240 free_pages((unsigned long) page
->index
, ALLOC_ORDER
);
243 /* "Free" second halves of page tables. */
244 list_for_each_entry(page
, &mm
->context
.pgtable_list
, lru
)
245 page
->flags
&= ~SECOND_HALVES
;
246 spin_unlock(&mm
->page_table_lock
);
247 mm
->context
.noexec
= 0;
252 * switch on pgstes for its userspace process (for kvm)
254 int s390_enable_sie(void)
256 struct task_struct
*tsk
= current
;
257 struct mm_struct
*mm
, *old_mm
;
259 /* Do we have pgstes? if yes, we are done */
260 if (tsk
->mm
->context
.pgstes
)
263 /* lets check if we are allowed to replace the mm */
265 if (!tsk
->mm
|| atomic_read(&tsk
->mm
->mm_users
) > 1 ||
266 tsk
->mm
!= tsk
->active_mm
|| tsk
->mm
->ioctx_list
) {
272 /* we copy the mm with pgstes enabled */
273 tsk
->mm
->context
.pgstes
= 1;
275 tsk
->mm
->context
.pgstes
= 0;
279 /* Now lets check again if somebody attached ptrace etc */
281 if (!tsk
->mm
|| atomic_read(&tsk
->mm
->mm_users
) > 1 ||
282 tsk
->mm
!= tsk
->active_mm
|| tsk
->mm
->ioctx_list
) {
288 /* ok, we are alone. No ptrace, no threads, etc. */
290 tsk
->mm
= tsk
->active_mm
= mm
;
293 cpu_set(smp_processor_id(), mm
->cpu_vm_mask
);
299 EXPORT_SYMBOL_GPL(s390_enable_sie
);