kvm tools, setup: Create private directory
[linux-2.6/next.git] / arch / s390 / mm / pgtable.c
blob37a23c22370576415441e7542728b79a233f2f58
1 /*
2 * Copyright IBM Corp. 2007,2009
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
4 */
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
9 #include <linux/gfp.h>
10 #include <linux/mm.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/highmem.h>
14 #include <linux/pagemap.h>
15 #include <linux/spinlock.h>
16 #include <linux/module.h>
17 #include <linux/quicklist.h>
18 #include <linux/rcupdate.h>
20 #include <asm/system.h>
21 #include <asm/pgtable.h>
22 #include <asm/pgalloc.h>
23 #include <asm/tlb.h>
24 #include <asm/tlbflush.h>
25 #include <asm/mmu_context.h>
27 #ifndef CONFIG_64BIT
28 #define ALLOC_ORDER 1
29 #define FRAG_MASK 0x0f
30 #else
31 #define ALLOC_ORDER 2
32 #define FRAG_MASK 0x03
33 #endif
35 unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
36 EXPORT_SYMBOL(VMALLOC_START);
38 static int __init parse_vmalloc(char *arg)
40 if (!arg)
41 return -EINVAL;
42 VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK;
43 return 0;
45 early_param("vmalloc", parse_vmalloc);
47 unsigned long *crst_table_alloc(struct mm_struct *mm)
49 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
51 if (!page)
52 return NULL;
53 return (unsigned long *) page_to_phys(page);
56 void crst_table_free(struct mm_struct *mm, unsigned long *table)
58 free_pages((unsigned long) table, ALLOC_ORDER);
61 #ifdef CONFIG_64BIT
62 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
64 unsigned long *table, *pgd;
65 unsigned long entry;
67 BUG_ON(limit > (1UL << 53));
68 repeat:
69 table = crst_table_alloc(mm);
70 if (!table)
71 return -ENOMEM;
72 spin_lock_bh(&mm->page_table_lock);
73 if (mm->context.asce_limit < limit) {
74 pgd = (unsigned long *) mm->pgd;
75 if (mm->context.asce_limit <= (1UL << 31)) {
76 entry = _REGION3_ENTRY_EMPTY;
77 mm->context.asce_limit = 1UL << 42;
78 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
79 _ASCE_USER_BITS |
80 _ASCE_TYPE_REGION3;
81 } else {
82 entry = _REGION2_ENTRY_EMPTY;
83 mm->context.asce_limit = 1UL << 53;
84 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
85 _ASCE_USER_BITS |
86 _ASCE_TYPE_REGION2;
88 crst_table_init(table, entry);
89 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
90 mm->pgd = (pgd_t *) table;
91 mm->task_size = mm->context.asce_limit;
92 table = NULL;
94 spin_unlock_bh(&mm->page_table_lock);
95 if (table)
96 crst_table_free(mm, table);
97 if (mm->context.asce_limit < limit)
98 goto repeat;
99 update_mm(mm, current);
100 return 0;
103 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
105 pgd_t *pgd;
107 if (mm->context.asce_limit <= limit)
108 return;
109 __tlb_flush_mm(mm);
110 while (mm->context.asce_limit > limit) {
111 pgd = mm->pgd;
112 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
113 case _REGION_ENTRY_TYPE_R2:
114 mm->context.asce_limit = 1UL << 42;
115 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
116 _ASCE_USER_BITS |
117 _ASCE_TYPE_REGION3;
118 break;
119 case _REGION_ENTRY_TYPE_R3:
120 mm->context.asce_limit = 1UL << 31;
121 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
122 _ASCE_USER_BITS |
123 _ASCE_TYPE_SEGMENT;
124 break;
125 default:
126 BUG();
128 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
129 mm->task_size = mm->context.asce_limit;
130 crst_table_free(mm, (unsigned long *) pgd);
132 update_mm(mm, current);
134 #endif
136 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
138 unsigned int old, new;
140 do {
141 old = atomic_read(v);
142 new = old ^ bits;
143 } while (atomic_cmpxchg(v, old, new) != old);
144 return new;
148 * page table entry allocation/free routines.
150 #ifdef CONFIG_PGSTE
151 static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
153 struct page *page;
154 unsigned long *table;
156 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
157 if (!page)
158 return NULL;
159 pgtable_page_ctor(page);
160 atomic_set(&page->_mapcount, 3);
161 table = (unsigned long *) page_to_phys(page);
162 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
163 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
164 return table;
167 static inline void page_table_free_pgste(unsigned long *table)
169 struct page *page;
171 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
172 pgtable_page_ctor(page);
173 atomic_set(&page->_mapcount, -1);
174 __free_page(page);
176 #endif
178 unsigned long *page_table_alloc(struct mm_struct *mm)
180 struct page *page;
181 unsigned long *table;
182 unsigned int mask, bit;
184 #ifdef CONFIG_PGSTE
185 if (mm_has_pgste(mm))
186 return page_table_alloc_pgste(mm);
187 #endif
188 /* Allocate fragments of a 4K page as 1K/2K page table */
189 spin_lock_bh(&mm->context.list_lock);
190 mask = FRAG_MASK;
191 if (!list_empty(&mm->context.pgtable_list)) {
192 page = list_first_entry(&mm->context.pgtable_list,
193 struct page, lru);
194 table = (unsigned long *) page_to_phys(page);
195 mask = atomic_read(&page->_mapcount);
196 mask = mask | (mask >> 4);
198 if ((mask & FRAG_MASK) == FRAG_MASK) {
199 spin_unlock_bh(&mm->context.list_lock);
200 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
201 if (!page)
202 return NULL;
203 pgtable_page_ctor(page);
204 atomic_set(&page->_mapcount, 1);
205 table = (unsigned long *) page_to_phys(page);
206 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
207 spin_lock_bh(&mm->context.list_lock);
208 list_add(&page->lru, &mm->context.pgtable_list);
209 } else {
210 for (bit = 1; mask & bit; bit <<= 1)
211 table += PTRS_PER_PTE;
212 mask = atomic_xor_bits(&page->_mapcount, bit);
213 if ((mask & FRAG_MASK) == FRAG_MASK)
214 list_del(&page->lru);
216 spin_unlock_bh(&mm->context.list_lock);
217 return table;
220 void page_table_free(struct mm_struct *mm, unsigned long *table)
222 struct page *page;
223 unsigned int bit, mask;
225 #ifdef CONFIG_PGSTE
226 if (mm_has_pgste(mm))
227 return page_table_free_pgste(table);
228 #endif
229 /* Free 1K/2K page table fragment of a 4K page */
230 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
231 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
232 spin_lock_bh(&mm->context.list_lock);
233 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
234 list_del(&page->lru);
235 mask = atomic_xor_bits(&page->_mapcount, bit);
236 if (mask & FRAG_MASK)
237 list_add(&page->lru, &mm->context.pgtable_list);
238 spin_unlock_bh(&mm->context.list_lock);
239 if (mask == 0) {
240 pgtable_page_dtor(page);
241 atomic_set(&page->_mapcount, -1);
242 __free_page(page);
246 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
248 static void __page_table_free_rcu(void *table, unsigned bit)
250 struct page *page;
252 #ifdef CONFIG_PGSTE
253 if (bit == FRAG_MASK)
254 return page_table_free_pgste(table);
255 #endif
256 /* Free 1K/2K page table fragment of a 4K page */
257 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
258 if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
259 pgtable_page_dtor(page);
260 atomic_set(&page->_mapcount, -1);
261 __free_page(page);
265 void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
267 struct mm_struct *mm;
268 struct page *page;
269 unsigned int bit, mask;
271 mm = tlb->mm;
272 #ifdef CONFIG_PGSTE
273 if (mm_has_pgste(mm)) {
274 table = (unsigned long *) (__pa(table) | FRAG_MASK);
275 tlb_remove_table(tlb, table);
276 return;
278 #endif
279 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
280 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
281 spin_lock_bh(&mm->context.list_lock);
282 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
283 list_del(&page->lru);
284 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
285 if (mask & FRAG_MASK)
286 list_add_tail(&page->lru, &mm->context.pgtable_list);
287 spin_unlock_bh(&mm->context.list_lock);
288 table = (unsigned long *) (__pa(table) | (bit << 4));
289 tlb_remove_table(tlb, table);
292 void __tlb_remove_table(void *_table)
294 void *table = (void *)((unsigned long) _table & PAGE_MASK);
295 unsigned type = (unsigned long) _table & ~PAGE_MASK;
297 if (type)
298 __page_table_free_rcu(table, type);
299 else
300 free_pages((unsigned long) table, ALLOC_ORDER);
303 #endif
306 * switch on pgstes for its userspace process (for kvm)
308 int s390_enable_sie(void)
310 struct task_struct *tsk = current;
311 struct mm_struct *mm, *old_mm;
313 /* Do we have switched amode? If no, we cannot do sie */
314 if (user_mode == HOME_SPACE_MODE)
315 return -EINVAL;
317 /* Do we have pgstes? if yes, we are done */
318 if (mm_has_pgste(tsk->mm))
319 return 0;
321 /* lets check if we are allowed to replace the mm */
322 task_lock(tsk);
323 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
324 #ifdef CONFIG_AIO
325 !hlist_empty(&tsk->mm->ioctx_list) ||
326 #endif
327 tsk->mm != tsk->active_mm) {
328 task_unlock(tsk);
329 return -EINVAL;
331 task_unlock(tsk);
333 /* we copy the mm and let dup_mm create the page tables with_pgstes */
334 tsk->mm->context.alloc_pgste = 1;
335 mm = dup_mm(tsk);
336 tsk->mm->context.alloc_pgste = 0;
337 if (!mm)
338 return -ENOMEM;
340 /* Now lets check again if something happened */
341 task_lock(tsk);
342 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
343 #ifdef CONFIG_AIO
344 !hlist_empty(&tsk->mm->ioctx_list) ||
345 #endif
346 tsk->mm != tsk->active_mm) {
347 mmput(mm);
348 task_unlock(tsk);
349 return -EINVAL;
352 /* ok, we are alone. No ptrace, no threads, etc. */
353 old_mm = tsk->mm;
354 tsk->mm = tsk->active_mm = mm;
355 preempt_disable();
356 update_mm(mm, tsk);
357 atomic_inc(&mm->context.attach_count);
358 atomic_dec(&old_mm->context.attach_count);
359 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
360 preempt_enable();
361 task_unlock(tsk);
362 mmput(old_mm);
363 return 0;
365 EXPORT_SYMBOL_GPL(s390_enable_sie);
367 #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
368 bool kernel_page_present(struct page *page)
370 unsigned long addr;
371 int cc;
373 addr = page_to_phys(page);
374 asm volatile(
375 " lra %1,0(%1)\n"
376 " ipm %0\n"
377 " srl %0,28"
378 : "=d" (cc), "+a" (addr) : : "cc");
379 return cc == 0;
381 #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */