2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <asm/pgtable.h>
10 #include <asm/tlbflush.h>
11 #include <as-layout.h>
15 #include <kern_util.h>
17 struct host_vm_change
{
19 enum { NONE
, MMAP
, MUNMAP
, MPROTECT
} type
;
45 #define INIT_HVC(mm, force) \
46 ((struct host_vm_change) \
47 { .ops = { { .type = NONE } }, \
48 .id = &mm->context.id, \
53 static void report_enomem(void)
55 printk(KERN_ERR
"UML ran out of memory on the host side! "
56 "This can happen due to a memory limitation or "
57 "vm.max_map_count has been reached.\n");
60 static int do_ops(struct host_vm_change
*hvc
, int end
,
63 struct host_vm_op
*op
;
66 for (i
= 0; i
< end
&& !ret
; i
++) {
70 ret
= map(hvc
->id
, op
->u
.mmap
.addr
, op
->u
.mmap
.len
,
71 op
->u
.mmap
.prot
, op
->u
.mmap
.fd
,
72 op
->u
.mmap
.offset
, finished
, &hvc
->data
);
75 ret
= unmap(hvc
->id
, op
->u
.munmap
.addr
,
76 op
->u
.munmap
.len
, finished
, &hvc
->data
);
79 ret
= protect(hvc
->id
, op
->u
.mprotect
.addr
,
80 op
->u
.mprotect
.len
, op
->u
.mprotect
.prot
,
81 finished
, &hvc
->data
);
84 printk(KERN_ERR
"Unknown op type %d in do_ops\n",
97 static int add_mmap(unsigned long virt
, unsigned long phys
, unsigned long len
,
98 unsigned int prot
, struct host_vm_change
*hvc
)
101 struct host_vm_op
*last
;
104 fd
= phys_mapping(phys
, &offset
);
105 if (hvc
->index
!= 0) {
106 last
= &hvc
->ops
[hvc
->index
- 1];
107 if ((last
->type
== MMAP
) &&
108 (last
->u
.mmap
.addr
+ last
->u
.mmap
.len
== virt
) &&
109 (last
->u
.mmap
.prot
== prot
) && (last
->u
.mmap
.fd
== fd
) &&
110 (last
->u
.mmap
.offset
+ last
->u
.mmap
.len
== offset
)) {
111 last
->u
.mmap
.len
+= len
;
116 if (hvc
->index
== ARRAY_SIZE(hvc
->ops
)) {
117 ret
= do_ops(hvc
, ARRAY_SIZE(hvc
->ops
), 0);
121 hvc
->ops
[hvc
->index
++] = ((struct host_vm_op
)
123 .u
= { .mmap
= { .addr
= virt
,
132 static int add_munmap(unsigned long addr
, unsigned long len
,
133 struct host_vm_change
*hvc
)
135 struct host_vm_op
*last
;
138 if ((addr
>= STUB_START
) && (addr
< STUB_END
))
141 if (hvc
->index
!= 0) {
142 last
= &hvc
->ops
[hvc
->index
- 1];
143 if ((last
->type
== MUNMAP
) &&
144 (last
->u
.munmap
.addr
+ last
->u
.mmap
.len
== addr
)) {
145 last
->u
.munmap
.len
+= len
;
150 if (hvc
->index
== ARRAY_SIZE(hvc
->ops
)) {
151 ret
= do_ops(hvc
, ARRAY_SIZE(hvc
->ops
), 0);
155 hvc
->ops
[hvc
->index
++] = ((struct host_vm_op
)
157 .u
= { .munmap
= { .addr
= addr
,
162 static int add_mprotect(unsigned long addr
, unsigned long len
,
163 unsigned int prot
, struct host_vm_change
*hvc
)
165 struct host_vm_op
*last
;
168 if (hvc
->index
!= 0) {
169 last
= &hvc
->ops
[hvc
->index
- 1];
170 if ((last
->type
== MPROTECT
) &&
171 (last
->u
.mprotect
.addr
+ last
->u
.mprotect
.len
== addr
) &&
172 (last
->u
.mprotect
.prot
== prot
)) {
173 last
->u
.mprotect
.len
+= len
;
178 if (hvc
->index
== ARRAY_SIZE(hvc
->ops
)) {
179 ret
= do_ops(hvc
, ARRAY_SIZE(hvc
->ops
), 0);
183 hvc
->ops
[hvc
->index
++] = ((struct host_vm_op
)
185 .u
= { .mprotect
= { .addr
= addr
,
191 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
193 static inline int update_pte_range(pmd_t
*pmd
, unsigned long addr
,
195 struct host_vm_change
*hvc
)
198 int r
, w
, x
, prot
, ret
= 0;
200 pte
= pte_offset_kernel(pmd
, addr
);
202 if ((addr
>= STUB_START
) && (addr
< STUB_END
))
208 if (!pte_young(*pte
)) {
211 } else if (!pte_dirty(*pte
))
214 prot
= ((r
? UM_PROT_READ
: 0) | (w
? UM_PROT_WRITE
: 0) |
215 (x
? UM_PROT_EXEC
: 0));
216 if (hvc
->force
|| pte_newpage(*pte
)) {
217 if (pte_present(*pte
))
218 ret
= add_mmap(addr
, pte_val(*pte
) & PAGE_MASK
,
219 PAGE_SIZE
, prot
, hvc
);
221 ret
= add_munmap(addr
, PAGE_SIZE
, hvc
);
222 } else if (pte_newprot(*pte
))
223 ret
= add_mprotect(addr
, PAGE_SIZE
, prot
, hvc
);
224 *pte
= pte_mkuptodate(*pte
);
225 } while (pte
++, addr
+= PAGE_SIZE
, ((addr
< end
) && !ret
));
229 static inline int update_pmd_range(pud_t
*pud
, unsigned long addr
,
231 struct host_vm_change
*hvc
)
237 pmd
= pmd_offset(pud
, addr
);
239 next
= pmd_addr_end(addr
, end
);
240 if (!pmd_present(*pmd
)) {
241 if (hvc
->force
|| pmd_newpage(*pmd
)) {
242 ret
= add_munmap(addr
, next
- addr
, hvc
);
243 pmd_mkuptodate(*pmd
);
246 else ret
= update_pte_range(pmd
, addr
, next
, hvc
);
247 } while (pmd
++, addr
= next
, ((addr
< end
) && !ret
));
251 static inline int update_pud_range(pgd_t
*pgd
, unsigned long addr
,
253 struct host_vm_change
*hvc
)
259 pud
= pud_offset(pgd
, addr
);
261 next
= pud_addr_end(addr
, end
);
262 if (!pud_present(*pud
)) {
263 if (hvc
->force
|| pud_newpage(*pud
)) {
264 ret
= add_munmap(addr
, next
- addr
, hvc
);
265 pud_mkuptodate(*pud
);
268 else ret
= update_pmd_range(pud
, addr
, next
, hvc
);
269 } while (pud
++, addr
= next
, ((addr
< end
) && !ret
));
273 void fix_range_common(struct mm_struct
*mm
, unsigned long start_addr
,
274 unsigned long end_addr
, int force
)
277 struct host_vm_change hvc
;
278 unsigned long addr
= start_addr
, next
;
281 hvc
= INIT_HVC(mm
, force
);
282 pgd
= pgd_offset(mm
, addr
);
284 next
= pgd_addr_end(addr
, end_addr
);
285 if (!pgd_present(*pgd
)) {
286 if (force
|| pgd_newpage(*pgd
)) {
287 ret
= add_munmap(addr
, next
- addr
, &hvc
);
288 pgd_mkuptodate(*pgd
);
291 else ret
= update_pud_range(pgd
, addr
, next
, &hvc
);
292 } while (pgd
++, addr
= next
, ((addr
< end_addr
) && !ret
));
295 ret
= do_ops(&hvc
, hvc
.index
, 1);
297 /* This is not an else because ret is modified above */
299 printk(KERN_ERR
"fix_range_common: failed, killing current "
300 "process: %d\n", task_tgid_vnr(current
));
301 /* We are under mmap_sem, release it such that current can terminate */
302 up_write(¤t
->mm
->mmap_sem
);
303 force_sig(SIGKILL
, current
);
304 do_signal(¤t
->thread
.regs
);
308 static int flush_tlb_kernel_range_common(unsigned long start
, unsigned long end
)
310 struct mm_struct
*mm
;
315 unsigned long addr
, last
;
316 int updated
= 0, err
;
319 for (addr
= start
; addr
< end
;) {
320 pgd
= pgd_offset(mm
, addr
);
321 if (!pgd_present(*pgd
)) {
322 last
= ADD_ROUND(addr
, PGDIR_SIZE
);
325 if (pgd_newpage(*pgd
)) {
327 err
= os_unmap_memory((void *) addr
,
330 panic("munmap failed, errno = %d\n",
337 pud
= pud_offset(pgd
, addr
);
338 if (!pud_present(*pud
)) {
339 last
= ADD_ROUND(addr
, PUD_SIZE
);
342 if (pud_newpage(*pud
)) {
344 err
= os_unmap_memory((void *) addr
,
347 panic("munmap failed, errno = %d\n",
354 pmd
= pmd_offset(pud
, addr
);
355 if (!pmd_present(*pmd
)) {
356 last
= ADD_ROUND(addr
, PMD_SIZE
);
359 if (pmd_newpage(*pmd
)) {
361 err
= os_unmap_memory((void *) addr
,
364 panic("munmap failed, errno = %d\n",
371 pte
= pte_offset_kernel(pmd
, addr
);
372 if (!pte_present(*pte
) || pte_newpage(*pte
)) {
374 err
= os_unmap_memory((void *) addr
,
377 panic("munmap failed, errno = %d\n",
379 if (pte_present(*pte
))
381 pte_val(*pte
) & PAGE_MASK
,
384 else if (pte_newprot(*pte
)) {
386 os_protect_memory((void *) addr
, PAGE_SIZE
, 1, 1, 1);
393 void flush_tlb_page(struct vm_area_struct
*vma
, unsigned long address
)
399 struct mm_struct
*mm
= vma
->vm_mm
;
401 int r
, w
, x
, prot
, err
= 0;
404 address
&= PAGE_MASK
;
405 pgd
= pgd_offset(mm
, address
);
406 if (!pgd_present(*pgd
))
409 pud
= pud_offset(pgd
, address
);
410 if (!pud_present(*pud
))
413 pmd
= pmd_offset(pud
, address
);
414 if (!pmd_present(*pmd
))
417 pte
= pte_offset_kernel(pmd
, address
);
422 if (!pte_young(*pte
)) {
425 } else if (!pte_dirty(*pte
)) {
429 mm_id
= &mm
->context
.id
;
430 prot
= ((r
? UM_PROT_READ
: 0) | (w
? UM_PROT_WRITE
: 0) |
431 (x
? UM_PROT_EXEC
: 0));
432 if (pte_newpage(*pte
)) {
433 if (pte_present(*pte
)) {
434 unsigned long long offset
;
437 fd
= phys_mapping(pte_val(*pte
) & PAGE_MASK
, &offset
);
438 err
= map(mm_id
, address
, PAGE_SIZE
, prot
, fd
, offset
,
441 else err
= unmap(mm_id
, address
, PAGE_SIZE
, 1, &flush
);
443 else if (pte_newprot(*pte
))
444 err
= protect(mm_id
, address
, PAGE_SIZE
, prot
, 1, &flush
);
453 *pte
= pte_mkuptodate(*pte
);
458 printk(KERN_ERR
"Failed to flush page for address 0x%lx\n", address
);
459 force_sig(SIGKILL
, current
);
462 pgd_t
*pgd_offset_proc(struct mm_struct
*mm
, unsigned long address
)
464 return pgd_offset(mm
, address
);
467 pud_t
*pud_offset_proc(pgd_t
*pgd
, unsigned long address
)
469 return pud_offset(pgd
, address
);
472 pmd_t
*pmd_offset_proc(pud_t
*pud
, unsigned long address
)
474 return pmd_offset(pud
, address
);
477 pte_t
*pte_offset_proc(pmd_t
*pmd
, unsigned long address
)
479 return pte_offset_kernel(pmd
, address
);
482 pte_t
*addr_pte(struct task_struct
*task
, unsigned long addr
)
484 pgd_t
*pgd
= pgd_offset(task
->mm
, addr
);
485 pud_t
*pud
= pud_offset(pgd
, addr
);
486 pmd_t
*pmd
= pmd_offset(pud
, addr
);
488 return pte_offset_map(pmd
, addr
);
491 void flush_tlb_all(void)
493 flush_tlb_mm(current
->mm
);
496 void flush_tlb_kernel_range(unsigned long start
, unsigned long end
)
498 flush_tlb_kernel_range_common(start
, end
);
501 void flush_tlb_kernel_vm(void)
503 flush_tlb_kernel_range_common(start_vm
, end_vm
);
506 void __flush_tlb_one(unsigned long addr
)
508 flush_tlb_kernel_range_common(addr
, addr
+ PAGE_SIZE
);
511 static void fix_range(struct mm_struct
*mm
, unsigned long start_addr
,
512 unsigned long end_addr
, int force
)
514 fix_range_common(mm
, start_addr
, end_addr
, force
);
517 void flush_tlb_range(struct vm_area_struct
*vma
, unsigned long start
,
520 if (vma
->vm_mm
== NULL
)
521 flush_tlb_kernel_range_common(start
, end
);
522 else fix_range(vma
->vm_mm
, start
, end
, 0);
524 EXPORT_SYMBOL(flush_tlb_range
);
526 void flush_tlb_mm_range(struct mm_struct
*mm
, unsigned long start
,
530 * Don't bother flushing if this address space is about to be
533 if (atomic_read(&mm
->mm_users
) == 0)
536 fix_range(mm
, start
, end
, 0);
539 void flush_tlb_mm(struct mm_struct
*mm
)
541 struct vm_area_struct
*vma
= mm
->mmap
;
543 while (vma
!= NULL
) {
544 fix_range(mm
, vma
->vm_start
, vma
->vm_end
, 0);
549 void force_flush_all(void)
551 struct mm_struct
*mm
= current
->mm
;
552 struct vm_area_struct
*vma
= mm
->mmap
;
554 while (vma
!= NULL
) {
555 fix_range(mm
, vma
->vm_start
, vma
->vm_end
, 1);