x86: unify PAE/non-PAE pgd_ctor
[wrt350n-kernel.git] / arch / x86 / mm / pgtable_32.c
blobf34e33d184438ba82aa7dc6e7591338bf0f6d976
1 /*
2 * linux/arch/i386/mm/pgtable.c
3 */
5 #include <linux/sched.h>
6 #include <linux/kernel.h>
7 #include <linux/errno.h>
8 #include <linux/mm.h>
9 #include <linux/nmi.h>
10 #include <linux/swap.h>
11 #include <linux/smp.h>
12 #include <linux/highmem.h>
13 #include <linux/slab.h>
14 #include <linux/pagemap.h>
15 #include <linux/spinlock.h>
16 #include <linux/module.h>
17 #include <linux/quicklist.h>
19 #include <asm/system.h>
20 #include <asm/pgtable.h>
21 #include <asm/pgalloc.h>
22 #include <asm/fixmap.h>
23 #include <asm/e820.h>
24 #include <asm/tlb.h>
25 #include <asm/tlbflush.h>
27 void show_mem(void)
29 int total = 0, reserved = 0;
30 int shared = 0, cached = 0;
31 int highmem = 0;
32 struct page *page;
33 pg_data_t *pgdat;
34 unsigned long i;
35 unsigned long flags;
37 printk(KERN_INFO "Mem-info:\n");
38 show_free_areas();
39 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
40 for_each_online_pgdat(pgdat) {
41 pgdat_resize_lock(pgdat, &flags);
42 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
43 if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
44 touch_nmi_watchdog();
45 page = pgdat_page_nr(pgdat, i);
46 total++;
47 if (PageHighMem(page))
48 highmem++;
49 if (PageReserved(page))
50 reserved++;
51 else if (PageSwapCache(page))
52 cached++;
53 else if (page_count(page))
54 shared += page_count(page) - 1;
56 pgdat_resize_unlock(pgdat, &flags);
58 printk(KERN_INFO "%d pages of RAM\n", total);
59 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
60 printk(KERN_INFO "%d reserved pages\n", reserved);
61 printk(KERN_INFO "%d pages shared\n", shared);
62 printk(KERN_INFO "%d pages swap cached\n", cached);
64 printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
65 printk(KERN_INFO "%lu pages writeback\n",
66 global_page_state(NR_WRITEBACK));
67 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
68 printk(KERN_INFO "%lu pages slab\n",
69 global_page_state(NR_SLAB_RECLAIMABLE) +
70 global_page_state(NR_SLAB_UNRECLAIMABLE));
71 printk(KERN_INFO "%lu pages pagetables\n",
72 global_page_state(NR_PAGETABLE));
76 * Associate a virtual page frame with a given physical page frame
77 * and protection flags for that frame.
78 */
79 static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
81 pgd_t *pgd;
82 pud_t *pud;
83 pmd_t *pmd;
84 pte_t *pte;
86 pgd = swapper_pg_dir + pgd_index(vaddr);
87 if (pgd_none(*pgd)) {
88 BUG();
89 return;
91 pud = pud_offset(pgd, vaddr);
92 if (pud_none(*pud)) {
93 BUG();
94 return;
96 pmd = pmd_offset(pud, vaddr);
97 if (pmd_none(*pmd)) {
98 BUG();
99 return;
101 pte = pte_offset_kernel(pmd, vaddr);
102 if (pgprot_val(flags))
103 set_pte_present(&init_mm, vaddr, pte, pfn_pte(pfn, flags));
104 else
105 pte_clear(&init_mm, vaddr, pte);
108 * It's enough to flush this one mapping.
109 * (PGE mappings get flushed as well)
111 __flush_tlb_one(vaddr);
115 * Associate a large virtual page frame with a given physical page frame
116 * and protection flags for that frame. pfn is for the base of the page,
117 * vaddr is what the page gets mapped to - both must be properly aligned.
118 * The pmd must already be instantiated. Assumes PAE mode.
120 void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
122 pgd_t *pgd;
123 pud_t *pud;
124 pmd_t *pmd;
126 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
127 printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n");
128 return; /* BUG(); */
130 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
131 printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n");
132 return; /* BUG(); */
134 pgd = swapper_pg_dir + pgd_index(vaddr);
135 if (pgd_none(*pgd)) {
136 printk(KERN_WARNING "set_pmd_pfn: pgd_none\n");
137 return; /* BUG(); */
139 pud = pud_offset(pgd, vaddr);
140 pmd = pmd_offset(pud, vaddr);
141 set_pmd(pmd, pfn_pmd(pfn, flags));
143 * It's enough to flush this one mapping.
144 * (PGE mappings get flushed as well)
146 __flush_tlb_one(vaddr);
149 static int fixmaps;
150 unsigned long __FIXADDR_TOP = 0xfffff000;
151 EXPORT_SYMBOL(__FIXADDR_TOP);
153 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
155 unsigned long address = __fix_to_virt(idx);
157 if (idx >= __end_of_fixed_addresses) {
158 BUG();
159 return;
161 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
162 fixmaps++;
166 * reserve_top_address - reserves a hole in the top of kernel address space
167 * @reserve - size of hole to reserve
169 * Can be used to relocate the fixmap area and poke a hole in the top
170 * of kernel address space to make room for a hypervisor.
172 void reserve_top_address(unsigned long reserve)
174 BUG_ON(fixmaps > 0);
175 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
176 (int)-reserve);
177 __FIXADDR_TOP = -reserve - PAGE_SIZE;
178 __VMALLOC_RESERVE += reserve;
181 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
183 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
186 struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
188 struct page *pte;
190 #ifdef CONFIG_HIGHPTE
191 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
192 #else
193 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
194 #endif
195 return pte;
199 * List of all pgd's needed for non-PAE so it can invalidate entries
200 * in both cached and uncached pgd's; not needed for PAE since the
201 * kernel pmd is shared. If PAE were not to share the pmd a similar
202 * tactic would be needed. This is essentially codepath-based locking
203 * against pageattr.c; it is the unique case in which a valid change
204 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
205 * vmalloc faults work because attached pagetables are never freed.
206 * -- wli
208 static inline void pgd_list_add(pgd_t *pgd)
210 struct page *page = virt_to_page(pgd);
212 list_add(&page->lru, &pgd_list);
215 static inline void pgd_list_del(pgd_t *pgd)
217 struct page *page = virt_to_page(pgd);
219 list_del(&page->lru);
222 #define UNSHARED_PTRS_PER_PGD \
223 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
225 static void pgd_ctor(void *p)
227 pgd_t *pgd = p;
228 unsigned long flags;
230 /* Clear usermode parts of PGD */
231 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
233 spin_lock_irqsave(&pgd_lock, flags);
235 /* If the pgd points to a shared pagetable level (either the
236 ptes in non-PAE, or shared PMD in PAE), then just copy the
237 references from swapper_pg_dir. */
238 if (PAGETABLE_LEVELS == 2 ||
239 (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
240 clone_pgd_range(pgd + USER_PTRS_PER_PGD,
241 swapper_pg_dir + USER_PTRS_PER_PGD,
242 KERNEL_PGD_PTRS);
243 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
244 __pa(swapper_pg_dir) >> PAGE_SHIFT,
245 USER_PTRS_PER_PGD,
246 KERNEL_PGD_PTRS);
249 /* list required to sync kernel mapping updates */
250 if (!SHARED_KERNEL_PMD)
251 pgd_list_add(pgd);
253 spin_unlock_irqrestore(&pgd_lock, flags);
256 static void pgd_dtor(void *pgd)
258 unsigned long flags; /* can be called from interrupt context */
260 if (SHARED_KERNEL_PMD)
261 return;
263 spin_lock_irqsave(&pgd_lock, flags);
264 pgd_list_del(pgd);
265 spin_unlock_irqrestore(&pgd_lock, flags);
268 #ifdef CONFIG_X86_PAE
270 * Mop up any pmd pages which may still be attached to the pgd.
271 * Normally they will be freed by munmap/exit_mmap, but any pmd we
272 * preallocate which never got a corresponding vma will need to be
273 * freed manually.
275 static void pgd_mop_up_pmds(pgd_t *pgdp)
277 int i;
279 for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
280 pgd_t pgd = pgdp[i];
282 if (pgd_val(pgd) != 0) {
283 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
285 pgdp[i] = native_make_pgd(0);
287 paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
288 pmd_free(pmd);
294 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
295 * updating the top-level pagetable entries to guarantee the
296 * processor notices the update. Since this is expensive, and
297 * all 4 top-level entries are used almost immediately in a
298 * new process's life, we just pre-populate them here.
300 * Also, if we're in a paravirt environment where the kernel pmd is
301 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
302 * and initialize the kernel pmds here.
304 static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
306 pud_t *pud;
307 unsigned long addr;
308 int i;
310 pud = pud_offset(pgd, 0);
311 for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
312 i++, pud++, addr += PUD_SIZE) {
313 pmd_t *pmd = pmd_alloc_one(mm, addr);
315 if (!pmd) {
316 pgd_mop_up_pmds(pgd);
317 return 0;
320 if (i >= USER_PTRS_PER_PGD)
321 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
322 sizeof(pmd_t) * PTRS_PER_PMD);
324 pud_populate(mm, pud, pmd);
327 return 1;
329 #else /* !CONFIG_X86_PAE */
330 /* No need to prepopulate any pagetable entries in non-PAE modes. */
331 static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
333 return 1;
336 static void pgd_mop_up_pmds(pgd_t *pgd)
339 #endif /* CONFIG_X86_PAE */
341 pgd_t *pgd_alloc(struct mm_struct *mm)
343 pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
345 mm->pgd = pgd; /* so that alloc_pd can use it */
347 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
348 quicklist_free(0, pgd_dtor, pgd);
349 pgd = NULL;
352 return pgd;
355 void pgd_free(pgd_t *pgd)
357 pgd_mop_up_pmds(pgd);
358 quicklist_free(0, pgd_dtor, pgd);
361 void check_pgt_cache(void)
363 quicklist_trim(0, pgd_dtor, 25, 16);
366 void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
368 paravirt_release_pt(page_to_pfn(pte));
369 tlb_remove_page(tlb, pte);
372 #ifdef CONFIG_X86_PAE
374 void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
376 /* This is called just after the pmd has been detached from
377 the pgd, which requires a full tlb flush to be recognized
378 by the CPU. Rather than incurring multiple tlb flushes
379 while the address space is being pulled down, make the tlb
380 gathering machinery do a full flush when we're done. */
381 tlb->fullmm = 1;
383 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
384 tlb_remove_page(tlb, virt_to_page(pmd));
387 #endif