2 * Copyright IBM Corp. 2007,2009
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/highmem.h>
14 #include <linux/pagemap.h>
15 #include <linux/spinlock.h>
16 #include <linux/module.h>
17 #include <linux/quicklist.h>
18 #include <linux/rcupdate.h>
20 #include <asm/system.h>
21 #include <asm/pgtable.h>
22 #include <asm/pgalloc.h>
24 #include <asm/tlbflush.h>
25 #include <asm/mmu_context.h>
29 #define FRAG_MASK 0x0f
32 #define FRAG_MASK 0x03
35 unsigned long VMALLOC_START
= VMALLOC_END
- VMALLOC_SIZE
;
36 EXPORT_SYMBOL(VMALLOC_START
);
38 static int __init
parse_vmalloc(char *arg
)
42 VMALLOC_START
= (VMALLOC_END
- memparse(arg
, &arg
)) & PAGE_MASK
;
45 early_param("vmalloc", parse_vmalloc
);
47 unsigned long *crst_table_alloc(struct mm_struct
*mm
)
49 struct page
*page
= alloc_pages(GFP_KERNEL
, ALLOC_ORDER
);
53 return (unsigned long *) page_to_phys(page
);
56 void crst_table_free(struct mm_struct
*mm
, unsigned long *table
)
58 free_pages((unsigned long) table
, ALLOC_ORDER
);
62 int crst_table_upgrade(struct mm_struct
*mm
, unsigned long limit
)
64 unsigned long *table
, *pgd
;
67 BUG_ON(limit
> (1UL << 53));
69 table
= crst_table_alloc(mm
);
72 spin_lock_bh(&mm
->page_table_lock
);
73 if (mm
->context
.asce_limit
< limit
) {
74 pgd
= (unsigned long *) mm
->pgd
;
75 if (mm
->context
.asce_limit
<= (1UL << 31)) {
76 entry
= _REGION3_ENTRY_EMPTY
;
77 mm
->context
.asce_limit
= 1UL << 42;
78 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
82 entry
= _REGION2_ENTRY_EMPTY
;
83 mm
->context
.asce_limit
= 1UL << 53;
84 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
88 crst_table_init(table
, entry
);
89 pgd_populate(mm
, (pgd_t
*) table
, (pud_t
*) pgd
);
90 mm
->pgd
= (pgd_t
*) table
;
91 mm
->task_size
= mm
->context
.asce_limit
;
94 spin_unlock_bh(&mm
->page_table_lock
);
96 crst_table_free(mm
, table
);
97 if (mm
->context
.asce_limit
< limit
)
99 update_mm(mm
, current
);
103 void crst_table_downgrade(struct mm_struct
*mm
, unsigned long limit
)
107 if (mm
->context
.asce_limit
<= limit
)
110 while (mm
->context
.asce_limit
> limit
) {
112 switch (pgd_val(*pgd
) & _REGION_ENTRY_TYPE_MASK
) {
113 case _REGION_ENTRY_TYPE_R2
:
114 mm
->context
.asce_limit
= 1UL << 42;
115 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
119 case _REGION_ENTRY_TYPE_R3
:
120 mm
->context
.asce_limit
= 1UL << 31;
121 mm
->context
.asce_bits
= _ASCE_TABLE_LENGTH
|
128 mm
->pgd
= (pgd_t
*) (pgd_val(*pgd
) & _REGION_ENTRY_ORIGIN
);
129 mm
->task_size
= mm
->context
.asce_limit
;
130 crst_table_free(mm
, (unsigned long *) pgd
);
132 update_mm(mm
, current
);
136 static inline unsigned int atomic_xor_bits(atomic_t
*v
, unsigned int bits
)
138 unsigned int old
, new;
141 old
= atomic_read(v
);
143 } while (atomic_cmpxchg(v
, old
, new) != old
);
148 * page table entry allocation/free routines.
151 static inline unsigned long *page_table_alloc_pgste(struct mm_struct
*mm
)
154 unsigned long *table
;
156 page
= alloc_page(GFP_KERNEL
|__GFP_REPEAT
);
159 pgtable_page_ctor(page
);
160 atomic_set(&page
->_mapcount
, 3);
161 table
= (unsigned long *) page_to_phys(page
);
162 clear_table(table
, _PAGE_TYPE_EMPTY
, PAGE_SIZE
/2);
163 clear_table(table
+ PTRS_PER_PTE
, 0, PAGE_SIZE
/2);
167 static inline void page_table_free_pgste(unsigned long *table
)
171 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
172 pgtable_page_ctor(page
);
173 atomic_set(&page
->_mapcount
, -1);
178 unsigned long *page_table_alloc(struct mm_struct
*mm
)
181 unsigned long *table
;
182 unsigned int mask
, bit
;
185 if (mm_has_pgste(mm
))
186 return page_table_alloc_pgste(mm
);
188 /* Allocate fragments of a 4K page as 1K/2K page table */
189 spin_lock_bh(&mm
->context
.list_lock
);
191 if (!list_empty(&mm
->context
.pgtable_list
)) {
192 page
= list_first_entry(&mm
->context
.pgtable_list
,
194 table
= (unsigned long *) page_to_phys(page
);
195 mask
= atomic_read(&page
->_mapcount
);
196 mask
= mask
| (mask
>> 4);
198 if ((mask
& FRAG_MASK
) == FRAG_MASK
) {
199 spin_unlock_bh(&mm
->context
.list_lock
);
200 page
= alloc_page(GFP_KERNEL
|__GFP_REPEAT
);
203 pgtable_page_ctor(page
);
204 atomic_set(&page
->_mapcount
, 1);
205 table
= (unsigned long *) page_to_phys(page
);
206 clear_table(table
, _PAGE_TYPE_EMPTY
, PAGE_SIZE
);
207 spin_lock_bh(&mm
->context
.list_lock
);
208 list_add(&page
->lru
, &mm
->context
.pgtable_list
);
210 for (bit
= 1; mask
& bit
; bit
<<= 1)
211 table
+= PTRS_PER_PTE
;
212 mask
= atomic_xor_bits(&page
->_mapcount
, bit
);
213 if ((mask
& FRAG_MASK
) == FRAG_MASK
)
214 list_del(&page
->lru
);
216 spin_unlock_bh(&mm
->context
.list_lock
);
220 void page_table_free(struct mm_struct
*mm
, unsigned long *table
)
223 unsigned int bit
, mask
;
226 if (mm_has_pgste(mm
))
227 return page_table_free_pgste(table
);
229 /* Free 1K/2K page table fragment of a 4K page */
230 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
231 bit
= 1 << ((__pa(table
) & ~PAGE_MASK
)/(PTRS_PER_PTE
*sizeof(pte_t
)));
232 spin_lock_bh(&mm
->context
.list_lock
);
233 if ((atomic_read(&page
->_mapcount
) & FRAG_MASK
) != FRAG_MASK
)
234 list_del(&page
->lru
);
235 mask
= atomic_xor_bits(&page
->_mapcount
, bit
);
236 if (mask
& FRAG_MASK
)
237 list_add(&page
->lru
, &mm
->context
.pgtable_list
);
238 spin_unlock_bh(&mm
->context
.list_lock
);
240 pgtable_page_dtor(page
);
241 atomic_set(&page
->_mapcount
, -1);
246 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
248 static void __page_table_free_rcu(void *table
, unsigned bit
)
253 if (bit
== FRAG_MASK
)
254 return page_table_free_pgste(table
);
256 /* Free 1K/2K page table fragment of a 4K page */
257 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
258 if (atomic_xor_bits(&page
->_mapcount
, bit
) == 0) {
259 pgtable_page_dtor(page
);
260 atomic_set(&page
->_mapcount
, -1);
265 void page_table_free_rcu(struct mmu_gather
*tlb
, unsigned long *table
)
267 struct mm_struct
*mm
;
269 unsigned int bit
, mask
;
273 if (mm_has_pgste(mm
)) {
274 table
= (unsigned long *) (__pa(table
) | FRAG_MASK
);
275 tlb_remove_table(tlb
, table
);
279 bit
= 1 << ((__pa(table
) & ~PAGE_MASK
) / (PTRS_PER_PTE
*sizeof(pte_t
)));
280 page
= pfn_to_page(__pa(table
) >> PAGE_SHIFT
);
281 spin_lock_bh(&mm
->context
.list_lock
);
282 if ((atomic_read(&page
->_mapcount
) & FRAG_MASK
) != FRAG_MASK
)
283 list_del(&page
->lru
);
284 mask
= atomic_xor_bits(&page
->_mapcount
, bit
| (bit
<< 4));
285 if (mask
& FRAG_MASK
)
286 list_add_tail(&page
->lru
, &mm
->context
.pgtable_list
);
287 spin_unlock_bh(&mm
->context
.list_lock
);
288 table
= (unsigned long *) (__pa(table
) | (bit
<< 4));
289 tlb_remove_table(tlb
, table
);
292 void __tlb_remove_table(void *_table
)
294 void *table
= (void *)((unsigned long) _table
& PAGE_MASK
);
295 unsigned type
= (unsigned long) _table
& ~PAGE_MASK
;
298 __page_table_free_rcu(table
, type
);
300 free_pages((unsigned long) table
, ALLOC_ORDER
);
306 * switch on pgstes for its userspace process (for kvm)
308 int s390_enable_sie(void)
310 struct task_struct
*tsk
= current
;
311 struct mm_struct
*mm
, *old_mm
;
313 /* Do we have switched amode? If no, we cannot do sie */
314 if (user_mode
== HOME_SPACE_MODE
)
317 /* Do we have pgstes? if yes, we are done */
318 if (mm_has_pgste(tsk
->mm
))
321 /* lets check if we are allowed to replace the mm */
323 if (!tsk
->mm
|| atomic_read(&tsk
->mm
->mm_users
) > 1 ||
325 !hlist_empty(&tsk
->mm
->ioctx_list
) ||
327 tsk
->mm
!= tsk
->active_mm
) {
333 /* we copy the mm and let dup_mm create the page tables with_pgstes */
334 tsk
->mm
->context
.alloc_pgste
= 1;
336 tsk
->mm
->context
.alloc_pgste
= 0;
340 /* Now lets check again if something happened */
342 if (!tsk
->mm
|| atomic_read(&tsk
->mm
->mm_users
) > 1 ||
344 !hlist_empty(&tsk
->mm
->ioctx_list
) ||
346 tsk
->mm
!= tsk
->active_mm
) {
352 /* ok, we are alone. No ptrace, no threads, etc. */
354 tsk
->mm
= tsk
->active_mm
= mm
;
357 atomic_inc(&mm
->context
.attach_count
);
358 atomic_dec(&old_mm
->context
.attach_count
);
359 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm
));
365 EXPORT_SYMBOL_GPL(s390_enable_sie
);
367 #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
368 bool kernel_page_present(struct page
*page
)
373 addr
= page_to_phys(page
);
378 : "=d" (cc
), "+a" (addr
) : : "cc");
381 #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */