2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
14 * TILE Huge TLB Page Support for Kernel.
15 * Taken from i386 hugetlb implementation:
16 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
19 #include <linux/init.h>
22 #include <linux/hugetlb.h>
23 #include <linux/pagemap.h>
24 #include <linux/slab.h>
25 #include <linux/err.h>
26 #include <linux/sysctl.h>
27 #include <linux/mman.h>
29 #include <asm/tlbflush.h>
30 #include <asm/setup.h>
32 #ifdef CONFIG_HUGETLB_SUPER_PAGES
35 * Provide an additional huge page size (in addition to the regular default
36 * huge page size) if no "hugepagesz" arguments are specified.
37 * Note that it must be smaller than the default huge page size so
38 * that it's possible to allocate them on demand from the buddy allocator.
39 * You can change this to 64K (on a 16K build), 256K, 1M, or 4M,
40 * or not define it at all.
42 #define ADDITIONAL_HUGE_SIZE (1024 * 1024UL)
44 /* "Extra" page-size multipliers, one per level of the page table. */
45 int huge_shift
[HUGE_SHIFT_ENTRIES
] = {
46 #ifdef ADDITIONAL_HUGE_SIZE
47 #define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE)
48 [HUGE_SHIFT_PAGE
] = ADDITIONAL_HUGE_SHIFT
53 * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel().
54 * It assumes that L2 PTEs are never in HIGHMEM (we don't support that).
55 * It locks the user pagetable, and bumps up the mm->nr_ptes field,
56 * but otherwise allocate the page table using the kernel versions.
58 static pte_t
*pte_alloc_hugetlb(struct mm_struct
*mm
, pmd_t
*pmd
,
59 unsigned long address
)
64 new = pte_alloc_one_kernel(mm
, address
);
68 smp_wmb(); /* See comment in __pte_alloc */
70 spin_lock(&mm
->page_table_lock
);
71 if (likely(pmd_none(*pmd
))) { /* Has another populated it ? */
73 pmd_populate_kernel(mm
, pmd
, new);
76 VM_BUG_ON(pmd_trans_splitting(*pmd
));
77 spin_unlock(&mm
->page_table_lock
);
79 pte_free_kernel(mm
, new);
82 return pte_offset_kernel(pmd
, address
);
86 pte_t
*huge_pte_alloc(struct mm_struct
*mm
,
87 unsigned long addr
, unsigned long sz
)
92 addr
&= -sz
; /* Mask off any low bits in the address. */
94 pgd
= pgd_offset(mm
, addr
);
95 pud
= pud_alloc(mm
, pgd
, addr
);
97 #ifdef CONFIG_HUGETLB_SUPER_PAGES
98 if (sz
>= PGDIR_SIZE
) {
99 BUG_ON(sz
!= PGDIR_SIZE
&&
100 sz
!= PGDIR_SIZE
<< huge_shift
[HUGE_SHIFT_PGDIR
]);
103 pmd_t
*pmd
= pmd_alloc(mm
, pud
, addr
);
104 if (sz
>= PMD_SIZE
) {
105 BUG_ON(sz
!= PMD_SIZE
&&
106 sz
!= (PMD_SIZE
<< huge_shift
[HUGE_SHIFT_PMD
]));
110 if (sz
!= PAGE_SIZE
<< huge_shift
[HUGE_SHIFT_PAGE
])
111 panic("Unexpected page size %#lx\n", sz
);
112 return pte_alloc_hugetlb(mm
, pmd
, addr
);
116 BUG_ON(sz
!= PMD_SIZE
);
117 return (pte_t
*) pmd_alloc(mm
, pud
, addr
);
121 static pte_t
*get_pte(pte_t
*base
, int index
, int level
)
123 pte_t
*ptep
= base
+ index
;
124 #ifdef CONFIG_HUGETLB_SUPER_PAGES
125 if (!pte_present(*ptep
) && huge_shift
[level
] != 0) {
126 unsigned long mask
= -1UL << huge_shift
[level
];
127 pte_t
*super_ptep
= base
+ (index
& mask
);
128 pte_t pte
= *super_ptep
;
129 if (pte_present(pte
) && pte_super(pte
))
136 pte_t
*huge_pte_offset(struct mm_struct
*mm
, unsigned long addr
)
141 #ifdef CONFIG_HUGETLB_SUPER_PAGES
145 /* Get the top-level page table entry. */
146 pgd
= (pgd_t
*)get_pte((pte_t
*)mm
->pgd
, pgd_index(addr
), 0);
147 if (!pgd_present(*pgd
))
150 /* We don't have four levels. */
151 pud
= pud_offset(pgd
, addr
);
152 #ifndef __PAGETABLE_PUD_FOLDED
153 # error support fourth page table level
156 /* Check for an L0 huge PTE, if we have three levels. */
157 #ifndef __PAGETABLE_PMD_FOLDED
161 pmd
= (pmd_t
*)get_pte((pte_t
*)pud_page_vaddr(*pud
),
163 if (!pmd_present(*pmd
))
166 pmd
= pmd_offset(pud
, addr
);
169 /* Check for an L1 huge PTE. */
173 #ifdef CONFIG_HUGETLB_SUPER_PAGES
174 /* Check for an L2 huge PTE. */
175 pte
= get_pte((pte_t
*)pmd_page_vaddr(*pmd
), pte_index(addr
), 2);
176 if (!pte_present(*pte
))
185 struct page
*follow_huge_addr(struct mm_struct
*mm
, unsigned long address
,
188 return ERR_PTR(-EINVAL
);
191 int pmd_huge(pmd_t pmd
)
193 return !!(pmd_val(pmd
) & _PAGE_HUGE_PAGE
);
196 int pud_huge(pud_t pud
)
198 return !!(pud_val(pud
) & _PAGE_HUGE_PAGE
);
201 struct page
*follow_huge_pmd(struct mm_struct
*mm
, unsigned long address
,
202 pmd_t
*pmd
, int write
)
206 page
= pte_page(*(pte_t
*)pmd
);
208 page
+= ((address
& ~PMD_MASK
) >> PAGE_SHIFT
);
212 struct page
*follow_huge_pud(struct mm_struct
*mm
, unsigned long address
,
213 pud_t
*pud
, int write
)
217 page
= pte_page(*(pte_t
*)pud
);
219 page
+= ((address
& ~PUD_MASK
) >> PAGE_SHIFT
);
223 int huge_pmd_unshare(struct mm_struct
*mm
, unsigned long *addr
, pte_t
*ptep
)
228 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
229 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file
*file
,
230 unsigned long addr
, unsigned long len
,
231 unsigned long pgoff
, unsigned long flags
)
233 struct hstate
*h
= hstate_file(file
);
234 struct vm_unmapped_area_info info
;
238 info
.low_limit
= TASK_UNMAPPED_BASE
;
239 info
.high_limit
= TASK_SIZE
;
240 info
.align_mask
= PAGE_MASK
& ~huge_page_mask(h
);
241 info
.align_offset
= 0;
242 return vm_unmapped_area(&info
);
245 static unsigned long hugetlb_get_unmapped_area_topdown(struct file
*file
,
246 unsigned long addr0
, unsigned long len
,
247 unsigned long pgoff
, unsigned long flags
)
249 struct hstate
*h
= hstate_file(file
);
250 struct vm_unmapped_area_info info
;
253 info
.flags
= VM_UNMAPPED_AREA_TOPDOWN
;
255 info
.low_limit
= PAGE_SIZE
;
256 info
.high_limit
= current
->mm
->mmap_base
;
257 info
.align_mask
= PAGE_MASK
& ~huge_page_mask(h
);
258 info
.align_offset
= 0;
259 addr
= vm_unmapped_area(&info
);
262 * A failed mmap() very likely causes application failure,
263 * so fall back to the bottom-up function here. This scenario
264 * can happen with large stack limits and large mmap()
267 if (addr
& ~PAGE_MASK
) {
268 VM_BUG_ON(addr
!= -ENOMEM
);
270 info
.low_limit
= TASK_UNMAPPED_BASE
;
271 info
.high_limit
= TASK_SIZE
;
272 addr
= vm_unmapped_area(&info
);
278 unsigned long hugetlb_get_unmapped_area(struct file
*file
, unsigned long addr
,
279 unsigned long len
, unsigned long pgoff
, unsigned long flags
)
281 struct hstate
*h
= hstate_file(file
);
282 struct mm_struct
*mm
= current
->mm
;
283 struct vm_area_struct
*vma
;
285 if (len
& ~huge_page_mask(h
))
290 if (flags
& MAP_FIXED
) {
291 if (prepare_hugepage_range(file
, addr
, len
))
297 addr
= ALIGN(addr
, huge_page_size(h
));
298 vma
= find_vma(mm
, addr
);
299 if (TASK_SIZE
- len
>= addr
&&
300 (!vma
|| addr
+ len
<= vma
->vm_start
))
303 if (current
->mm
->get_unmapped_area
== arch_get_unmapped_area
)
304 return hugetlb_get_unmapped_area_bottomup(file
, addr
, len
,
307 return hugetlb_get_unmapped_area_topdown(file
, addr
, len
,
310 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
312 #ifdef CONFIG_HUGETLB_SUPER_PAGES
313 static __init
int __setup_hugepagesz(unsigned long ps
)
315 int log_ps
= __builtin_ctzl(ps
);
316 int level
, base_shift
;
318 if ((1UL << log_ps
) != ps
|| (log_ps
& 1) != 0) {
319 pr_warn("Not enabling %ld byte huge pages;"
320 " must be a power of four.\n", ps
);
324 if (ps
> 64*1024*1024*1024UL) {
325 pr_warn("Not enabling %ld MB huge pages;"
326 " largest legal value is 64 GB .\n", ps
>> 20);
328 } else if (ps
>= PUD_SIZE
) {
329 static long hv_jpage_size
;
330 if (hv_jpage_size
== 0)
331 hv_jpage_size
= hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO
);
332 if (hv_jpage_size
!= PUD_SIZE
) {
333 pr_warn("Not enabling >= %ld MB huge pages:"
334 " hypervisor reports size %ld\n",
335 PUD_SIZE
>> 20, hv_jpage_size
);
339 base_shift
= PUD_SHIFT
;
340 } else if (ps
>= PMD_SIZE
) {
342 base_shift
= PMD_SHIFT
;
343 } else if (ps
> PAGE_SIZE
) {
345 base_shift
= PAGE_SHIFT
;
347 pr_err("hugepagesz: huge page size %ld too small\n", ps
);
351 if (log_ps
!= base_shift
) {
352 int shift_val
= log_ps
- base_shift
;
353 if (huge_shift
[level
] != 0) {
354 int old_shift
= base_shift
+ huge_shift
[level
];
355 pr_warn("Not enabling %ld MB huge pages;"
356 " already have size %ld MB.\n",
357 ps
>> 20, (1UL << old_shift
) >> 20);
360 if (hv_set_pte_super_shift(level
, shift_val
) != 0) {
361 pr_warn("Not enabling %ld MB huge pages;"
362 " no hypervisor support.\n", ps
>> 20);
365 printk(KERN_DEBUG
"Enabled %ld MB huge pages\n", ps
>> 20);
366 huge_shift
[level
] = shift_val
;
369 hugetlb_add_hstate(log_ps
- PAGE_SHIFT
);
374 static bool saw_hugepagesz
;
376 static __init
int setup_hugepagesz(char *opt
)
378 if (!saw_hugepagesz
) {
379 saw_hugepagesz
= true;
380 memset(huge_shift
, 0, sizeof(huge_shift
));
382 return __setup_hugepagesz(memparse(opt
, NULL
));
384 __setup("hugepagesz=", setup_hugepagesz
);
386 #ifdef ADDITIONAL_HUGE_SIZE
388 * Provide an additional huge page size if no "hugepagesz" args are given.
389 * In that case, all the cores have properly set up their hv super_shift
390 * already, but we need to notify the hugetlb code to enable the
391 * new huge page size from the Linux point of view.
393 static __init
int add_default_hugepagesz(void)
395 if (!saw_hugepagesz
) {
396 BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE
>= PMD_SIZE
||
397 ADDITIONAL_HUGE_SIZE
<= PAGE_SIZE
);
398 BUILD_BUG_ON((PAGE_SIZE
<< ADDITIONAL_HUGE_SHIFT
) !=
399 ADDITIONAL_HUGE_SIZE
);
400 BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT
& 1);
401 hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT
);
405 arch_initcall(add_default_hugepagesz
);
408 #endif /* CONFIG_HUGETLB_SUPER_PAGES */