2 * IA-64 Huge TLB Page Support for Kernel.
4 * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com>
5 * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com>
7 * Sep, 2003: add numa support
8 * Feb, 2004: dynamic hugetlb page size via boot parameter
11 #include <linux/config.h>
12 #include <linux/init.h>
15 #include <linux/hugetlb.h>
16 #include <linux/pagemap.h>
17 #include <linux/smp_lock.h>
18 #include <linux/slab.h>
19 #include <linux/sysctl.h>
21 #include <asm/pgalloc.h>
23 #include <asm/tlbflush.h>
25 unsigned int hpage_shift
=HPAGE_SHIFT_DEFAULT
;
28 huge_pte_alloc (struct mm_struct
*mm
, unsigned long addr
)
30 unsigned long taddr
= htlbpage_to_page(addr
);
36 pgd
= pgd_offset(mm
, taddr
);
37 pud
= pud_alloc(mm
, pgd
, taddr
);
39 pmd
= pmd_alloc(mm
, pud
, taddr
);
41 pte
= pte_alloc_map(mm
, pmd
, taddr
);
47 huge_pte_offset (struct mm_struct
*mm
, unsigned long addr
)
49 unsigned long taddr
= htlbpage_to_page(addr
);
55 pgd
= pgd_offset(mm
, taddr
);
56 if (pgd_present(*pgd
)) {
57 pud
= pud_offset(pgd
, taddr
);
58 if (pud_present(*pud
)) {
59 pmd
= pmd_offset(pud
, taddr
);
60 if (pmd_present(*pmd
))
61 pte
= pte_offset_map(pmd
, taddr
);
68 #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
71 set_huge_pte (struct mm_struct
*mm
, struct vm_area_struct
*vma
,
72 struct page
*page
, pte_t
* page_table
, int write_access
)
76 add_mm_counter(mm
, rss
, HPAGE_SIZE
/ PAGE_SIZE
);
79 pte_mkwrite(pte_mkdirty(mk_pte(page
, vma
->vm_page_prot
)));
81 entry
= pte_wrprotect(mk_pte(page
, vma
->vm_page_prot
));
82 entry
= pte_mkyoung(entry
);
84 set_pte(page_table
, entry
);
88 * This function checks for proper alignment of input addr and len parameters.
90 int is_aligned_hugepage_range(unsigned long addr
, unsigned long len
)
92 if (len
& ~HPAGE_MASK
)
94 if (addr
& ~HPAGE_MASK
)
96 if (REGION_NUMBER(addr
) != REGION_HPAGE
)
102 int copy_hugetlb_page_range(struct mm_struct
*dst
, struct mm_struct
*src
,
103 struct vm_area_struct
*vma
)
105 pte_t
*src_pte
, *dst_pte
, entry
;
106 struct page
*ptepage
;
107 unsigned long addr
= vma
->vm_start
;
108 unsigned long end
= vma
->vm_end
;
111 dst_pte
= huge_pte_alloc(dst
, addr
);
114 src_pte
= huge_pte_offset(src
, addr
);
116 ptepage
= pte_page(entry
);
118 set_pte(dst_pte
, entry
);
119 add_mm_counter(dst
, rss
, HPAGE_SIZE
/ PAGE_SIZE
);
128 follow_hugetlb_page(struct mm_struct
*mm
, struct vm_area_struct
*vma
,
129 struct page
**pages
, struct vm_area_struct
**vmas
,
130 unsigned long *st
, int *length
, int i
)
133 unsigned long start
= *st
;
134 unsigned long pstart
;
139 pstart
= start
& HPAGE_MASK
;
140 ptep
= huge_pte_offset(mm
, start
);
144 page
= pte_page(pte
);
146 page
+= ((start
& ~HPAGE_MASK
) >> PAGE_SHIFT
);
155 if (((start
& HPAGE_MASK
) == pstart
) && len
&&
156 (start
< vma
->vm_end
))
158 } while (len
&& start
< vma
->vm_end
);
164 struct page
*follow_huge_addr(struct mm_struct
*mm
, unsigned long addr
, int write
)
169 if (REGION_NUMBER(addr
) != REGION_HPAGE
)
170 return ERR_PTR(-EINVAL
);
172 ptep
= huge_pte_offset(mm
, addr
);
173 if (!ptep
|| pte_none(*ptep
))
175 page
= pte_page(*ptep
);
176 page
+= ((addr
& ~HPAGE_MASK
) >> PAGE_SHIFT
);
179 int pmd_huge(pmd_t pmd
)
184 follow_huge_pmd(struct mm_struct
*mm
, unsigned long address
, pmd_t
*pmd
, int write
)
189 void hugetlb_free_pgd_range(struct mmu_gather
**tlb
,
190 unsigned long addr
, unsigned long end
,
191 unsigned long floor
, unsigned long ceiling
)
194 * This is called only when is_hugepage_only_range(addr,),
195 * and it follows that is_hugepage_only_range(end,) also.
197 * The offset of these addresses from the base of the hugetlb
198 * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that
199 * the standard free_pgd_range will free the right page tables.
201 * If floor and ceiling are also in the hugetlb region, they
202 * must likewise be scaled down; but if outside, left unchanged.
205 addr
= htlbpage_to_page(addr
);
206 end
= htlbpage_to_page(end
);
207 if (is_hugepage_only_range(tlb
->mm
, floor
, HPAGE_SIZE
))
208 floor
= htlbpage_to_page(floor
);
209 if (is_hugepage_only_range(tlb
->mm
, ceiling
, HPAGE_SIZE
))
210 ceiling
= htlbpage_to_page(ceiling
);
212 free_pgd_range(tlb
, addr
, end
, floor
, ceiling
);
215 void unmap_hugepage_range(struct vm_area_struct
*vma
, unsigned long start
, unsigned long end
)
217 struct mm_struct
*mm
= vma
->vm_mm
;
218 unsigned long address
;
222 BUG_ON(start
& (HPAGE_SIZE
- 1));
223 BUG_ON(end
& (HPAGE_SIZE
- 1));
225 for (address
= start
; address
< end
; address
+= HPAGE_SIZE
) {
226 pte
= huge_pte_offset(mm
, address
);
229 page
= pte_page(*pte
);
231 pte_clear(mm
, address
, pte
);
233 add_mm_counter(mm
, rss
, - ((end
- start
) >> PAGE_SHIFT
));
234 flush_tlb_range(vma
, start
, end
);
237 int hugetlb_prefault(struct address_space
*mapping
, struct vm_area_struct
*vma
)
239 struct mm_struct
*mm
= current
->mm
;
243 BUG_ON(vma
->vm_start
& ~HPAGE_MASK
);
244 BUG_ON(vma
->vm_end
& ~HPAGE_MASK
);
246 spin_lock(&mm
->page_table_lock
);
247 for (addr
= vma
->vm_start
; addr
< vma
->vm_end
; addr
+= HPAGE_SIZE
) {
249 pte_t
*pte
= huge_pte_alloc(mm
, addr
);
259 idx
= ((addr
- vma
->vm_start
) >> HPAGE_SHIFT
)
260 + (vma
->vm_pgoff
>> (HPAGE_SHIFT
- PAGE_SHIFT
));
261 page
= find_get_page(mapping
, idx
);
263 /* charge the fs quota first */
264 if (hugetlb_get_quota(mapping
)) {
268 page
= alloc_huge_page();
270 hugetlb_put_quota(mapping
);
274 ret
= add_to_page_cache(page
, mapping
, idx
, GFP_ATOMIC
);
278 hugetlb_put_quota(mapping
);
279 page_cache_release(page
);
283 set_huge_pte(mm
, vma
, page
, pte
, vma
->vm_flags
& VM_WRITE
);
286 spin_unlock(&mm
->page_table_lock
);
290 unsigned long hugetlb_get_unmapped_area(struct file
*file
, unsigned long addr
, unsigned long len
,
291 unsigned long pgoff
, unsigned long flags
)
293 struct vm_area_struct
*vmm
;
295 if (len
> RGN_MAP_LIMIT
)
297 if (len
& ~HPAGE_MASK
)
299 /* This code assumes that REGION_HPAGE != 0. */
300 if ((REGION_NUMBER(addr
) != REGION_HPAGE
) || (addr
& (HPAGE_SIZE
- 1)))
301 addr
= HPAGE_REGION_BASE
;
303 addr
= ALIGN(addr
, HPAGE_SIZE
);
304 for (vmm
= find_vma(current
->mm
, addr
); ; vmm
= vmm
->vm_next
) {
305 /* At this point: (!vmm || addr < vmm->vm_end). */
306 if (REGION_OFFSET(addr
) + len
> RGN_MAP_LIMIT
)
308 if (!vmm
|| (addr
+ len
) <= vmm
->vm_start
)
310 addr
= ALIGN(vmm
->vm_end
, HPAGE_SIZE
);
314 static int __init
hugetlb_setup_sz(char *str
)
317 unsigned long long size
;
319 if (ia64_pal_vm_page_size(&tr_pages
, NULL
) != 0)
321 * shouldn't happen, but just in case.
323 tr_pages
= 0x15557000UL
;
325 size
= memparse(str
, &str
);
326 if (*str
|| (size
& (size
-1)) || !(tr_pages
& size
) ||
328 size
>= (1UL << PAGE_SHIFT
<< MAX_ORDER
)) {
329 printk(KERN_WARNING
"Invalid huge page size specified\n");
333 hpage_shift
= __ffs(size
);
335 * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT
336 * override here with new page shift.
338 ia64_set_rr(HPAGE_REGION_BASE
, hpage_shift
<< 2);
341 __setup("hugepagesz=", hugetlb_setup_sz
);