2 * IA-32 Huge TLB Page Support for Kernel.
4 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
7 #include <linux/config.h>
8 #include <linux/init.h>
11 #include <linux/hugetlb.h>
12 #include <linux/pagemap.h>
13 #include <linux/smp_lock.h>
14 #include <linux/slab.h>
15 #include <linux/err.h>
16 #include <linux/sysctl.h>
19 #include <asm/tlbflush.h>
21 static pte_t
*huge_pte_alloc(struct mm_struct
*mm
, unsigned long addr
)
27 pgd
= pgd_offset(mm
, addr
);
28 pud
= pud_alloc(mm
, pgd
, addr
);
29 pmd
= pmd_alloc(mm
, pud
, addr
);
33 static pte_t
*huge_pte_offset(struct mm_struct
*mm
, unsigned long addr
)
39 pgd
= pgd_offset(mm
, addr
);
40 pud
= pud_offset(pgd
, addr
);
41 pmd
= pmd_offset(pud
, addr
);
45 static void set_huge_pte(struct mm_struct
*mm
, struct vm_area_struct
*vma
, struct page
*page
, pte_t
* page_table
, int write_access
)
49 add_mm_counter(mm
, rss
, HPAGE_SIZE
/ PAGE_SIZE
);
52 pte_mkwrite(pte_mkdirty(mk_pte(page
, vma
->vm_page_prot
)));
54 entry
= pte_wrprotect(mk_pte(page
, vma
->vm_page_prot
));
55 entry
= pte_mkyoung(entry
);
57 set_pte(page_table
, entry
);
61 * This function checks for proper alignment of input addr and len parameters.
63 int is_aligned_hugepage_range(unsigned long addr
, unsigned long len
)
65 if (len
& ~HPAGE_MASK
)
67 if (addr
& ~HPAGE_MASK
)
72 int copy_hugetlb_page_range(struct mm_struct
*dst
, struct mm_struct
*src
,
73 struct vm_area_struct
*vma
)
75 pte_t
*src_pte
, *dst_pte
, entry
;
77 unsigned long addr
= vma
->vm_start
;
78 unsigned long end
= vma
->vm_end
;
81 dst_pte
= huge_pte_alloc(dst
, addr
);
84 src_pte
= huge_pte_offset(src
, addr
);
86 ptepage
= pte_page(entry
);
88 set_pte(dst_pte
, entry
);
89 add_mm_counter(dst
, rss
, HPAGE_SIZE
/ PAGE_SIZE
);
99 follow_hugetlb_page(struct mm_struct
*mm
, struct vm_area_struct
*vma
,
100 struct page
**pages
, struct vm_area_struct
**vmas
,
101 unsigned long *position
, int *length
, int i
)
103 unsigned long vpfn
, vaddr
= *position
;
104 int remainder
= *length
;
106 WARN_ON(!is_vm_hugetlb_page(vma
));
108 vpfn
= vaddr
/PAGE_SIZE
;
109 while (vaddr
< vma
->vm_end
&& remainder
) {
115 pte
= huge_pte_offset(mm
, vaddr
);
117 /* hugetlb should be locked, and hence, prefaulted */
118 WARN_ON(!pte
|| pte_none(*pte
));
120 page
= &pte_page(*pte
)[vpfn
% (HPAGE_SIZE
/PAGE_SIZE
)];
122 WARN_ON(!PageCompound(page
));
143 #if 0 /* This is just for testing */
145 follow_huge_addr(struct mm_struct
*mm
, unsigned long address
, int write
)
147 unsigned long start
= address
;
151 struct vm_area_struct
*vma
;
153 vma
= find_vma(mm
, addr
);
154 if (!vma
|| !is_vm_hugetlb_page(vma
))
155 return ERR_PTR(-EINVAL
);
157 pte
= huge_pte_offset(mm
, address
);
159 /* hugetlb should be locked, and hence, prefaulted */
160 WARN_ON(!pte
|| pte_none(*pte
));
162 page
= &pte_page(*pte
)[vpfn
% (HPAGE_SIZE
/PAGE_SIZE
)];
164 WARN_ON(!PageCompound(page
));
169 int pmd_huge(pmd_t pmd
)
175 follow_huge_pmd(struct mm_struct
*mm
, unsigned long address
,
176 pmd_t
*pmd
, int write
)
184 follow_huge_addr(struct mm_struct
*mm
, unsigned long address
, int write
)
186 return ERR_PTR(-EINVAL
);
189 int pmd_huge(pmd_t pmd
)
191 return !!(pmd_val(pmd
) & _PAGE_PSE
);
195 follow_huge_pmd(struct mm_struct
*mm
, unsigned long address
,
196 pmd_t
*pmd
, int write
)
200 page
= pte_page(*(pte_t
*)pmd
);
202 page
+= ((address
& ~HPAGE_MASK
) >> PAGE_SHIFT
);
207 void unmap_hugepage_range(struct vm_area_struct
*vma
,
208 unsigned long start
, unsigned long end
)
210 struct mm_struct
*mm
= vma
->vm_mm
;
211 unsigned long address
;
215 BUG_ON(start
& (HPAGE_SIZE
- 1));
216 BUG_ON(end
& (HPAGE_SIZE
- 1));
218 for (address
= start
; address
< end
; address
+= HPAGE_SIZE
) {
219 ptep
= huge_pte_offset(mm
, address
);
222 pte
= ptep_get_and_clear(mm
, address
, ptep
);
225 page
= pte_page(pte
);
228 add_mm_counter(mm
,rss
, -((end
- start
) >> PAGE_SHIFT
));
229 flush_tlb_range(vma
, start
, end
);
232 int hugetlb_prefault(struct address_space
*mapping
, struct vm_area_struct
*vma
)
234 struct mm_struct
*mm
= current
->mm
;
238 BUG_ON(vma
->vm_start
& ~HPAGE_MASK
);
239 BUG_ON(vma
->vm_end
& ~HPAGE_MASK
);
241 spin_lock(&mm
->page_table_lock
);
242 for (addr
= vma
->vm_start
; addr
< vma
->vm_end
; addr
+= HPAGE_SIZE
) {
244 pte_t
*pte
= huge_pte_alloc(mm
, addr
);
255 idx
= ((addr
- vma
->vm_start
) >> HPAGE_SHIFT
)
256 + (vma
->vm_pgoff
>> (HPAGE_SHIFT
- PAGE_SHIFT
));
257 page
= find_get_page(mapping
, idx
);
259 /* charge the fs quota first */
260 if (hugetlb_get_quota(mapping
)) {
264 page
= alloc_huge_page();
266 hugetlb_put_quota(mapping
);
270 ret
= add_to_page_cache(page
, mapping
, idx
, GFP_ATOMIC
);
274 hugetlb_put_quota(mapping
);
275 free_huge_page(page
);
279 set_huge_pte(mm
, vma
, page
, pte
, vma
->vm_flags
& VM_WRITE
);
282 spin_unlock(&mm
->page_table_lock
);
286 /* x86_64 also uses this file */
288 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
289 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file
*file
,
290 unsigned long addr
, unsigned long len
,
291 unsigned long pgoff
, unsigned long flags
)
293 struct mm_struct
*mm
= current
->mm
;
294 struct vm_area_struct
*vma
;
295 unsigned long start_addr
;
297 start_addr
= mm
->free_area_cache
;
300 addr
= ALIGN(start_addr
, HPAGE_SIZE
);
302 for (vma
= find_vma(mm
, addr
); ; vma
= vma
->vm_next
) {
303 /* At this point: (!vma || addr < vma->vm_end). */
304 if (TASK_SIZE
- len
< addr
) {
306 * Start a new search - just in case we missed
309 if (start_addr
!= TASK_UNMAPPED_BASE
) {
310 start_addr
= TASK_UNMAPPED_BASE
;
315 if (!vma
|| addr
+ len
<= vma
->vm_start
) {
316 mm
->free_area_cache
= addr
+ len
;
319 addr
= ALIGN(vma
->vm_end
, HPAGE_SIZE
);
323 static unsigned long hugetlb_get_unmapped_area_topdown(struct file
*file
,
324 unsigned long addr0
, unsigned long len
,
325 unsigned long pgoff
, unsigned long flags
)
327 struct mm_struct
*mm
= current
->mm
;
328 struct vm_area_struct
*vma
, *prev_vma
;
329 unsigned long base
= mm
->mmap_base
, addr
= addr0
;
332 /* don't allow allocations above current base */
333 if (mm
->free_area_cache
> base
)
334 mm
->free_area_cache
= base
;
337 /* make sure it can fit in the remaining address space */
338 if (mm
->free_area_cache
< len
)
341 /* either no address requested or cant fit in requested address hole */
342 addr
= (mm
->free_area_cache
- len
) & HPAGE_MASK
;
345 * Lookup failure means no vma is above this address,
346 * i.e. return with success:
348 if (!(vma
= find_vma_prev(mm
, addr
, &prev_vma
)))
352 * new region fits between prev_vma->vm_end and
353 * vma->vm_start, use it:
355 if (addr
+ len
<= vma
->vm_start
&&
356 (!prev_vma
|| (addr
>= prev_vma
->vm_end
)))
357 /* remember the address as a hint for next time */
358 return (mm
->free_area_cache
= addr
);
360 /* pull free_area_cache down to the first hole */
361 if (mm
->free_area_cache
== vma
->vm_end
)
362 mm
->free_area_cache
= vma
->vm_start
;
364 /* try just below the current vma->vm_start */
365 addr
= (vma
->vm_start
- len
) & HPAGE_MASK
;
366 } while (len
<= vma
->vm_start
);
370 * if hint left us with no space for the requested
371 * mapping then try again:
374 mm
->free_area_cache
= base
;
379 * A failed mmap() very likely causes application failure,
380 * so fall back to the bottom-up function here. This scenario
381 * can happen with large stack limits and large mmap()
384 mm
->free_area_cache
= TASK_UNMAPPED_BASE
;
385 addr
= hugetlb_get_unmapped_area_bottomup(file
, addr0
,
389 * Restore the topdown base:
391 mm
->free_area_cache
= base
;
397 hugetlb_get_unmapped_area(struct file
*file
, unsigned long addr
,
398 unsigned long len
, unsigned long pgoff
, unsigned long flags
)
400 struct mm_struct
*mm
= current
->mm
;
401 struct vm_area_struct
*vma
;
403 if (len
& ~HPAGE_MASK
)
409 addr
= ALIGN(addr
, HPAGE_SIZE
);
410 vma
= find_vma(mm
, addr
);
411 if (TASK_SIZE
- len
>= addr
&&
412 (!vma
|| addr
+ len
<= vma
->vm_start
))
415 if (mm
->get_unmapped_area
== arch_get_unmapped_area
)
416 return hugetlb_get_unmapped_area_bottomup(file
, addr
, len
,
419 return hugetlb_get_unmapped_area_topdown(file
, addr
, len
,
423 #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/