2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
7 #include <linux/module.h>
8 #include <linux/sched/signal.h>
10 #include <asm/pgtable.h>
11 #include <asm/tlbflush.h>
12 #include <as-layout.h>
16 #include <kern_util.h>
18 struct host_vm_change
{
20 enum { NONE
, MMAP
, MUNMAP
, MPROTECT
} type
;
46 #define INIT_HVC(mm, force) \
47 ((struct host_vm_change) \
48 { .ops = { { .type = NONE } }, \
49 .id = &mm->context.id, \
54 static void report_enomem(void)
56 printk(KERN_ERR
"UML ran out of memory on the host side! "
57 "This can happen due to a memory limitation or "
58 "vm.max_map_count has been reached.\n");
61 static int do_ops(struct host_vm_change
*hvc
, int end
,
64 struct host_vm_op
*op
;
67 for (i
= 0; i
< end
&& !ret
; i
++) {
71 ret
= map(hvc
->id
, op
->u
.mmap
.addr
, op
->u
.mmap
.len
,
72 op
->u
.mmap
.prot
, op
->u
.mmap
.fd
,
73 op
->u
.mmap
.offset
, finished
, &hvc
->data
);
76 ret
= unmap(hvc
->id
, op
->u
.munmap
.addr
,
77 op
->u
.munmap
.len
, finished
, &hvc
->data
);
80 ret
= protect(hvc
->id
, op
->u
.mprotect
.addr
,
81 op
->u
.mprotect
.len
, op
->u
.mprotect
.prot
,
82 finished
, &hvc
->data
);
85 printk(KERN_ERR
"Unknown op type %d in do_ops\n",
98 static int add_mmap(unsigned long virt
, unsigned long phys
, unsigned long len
,
99 unsigned int prot
, struct host_vm_change
*hvc
)
102 struct host_vm_op
*last
;
105 fd
= phys_mapping(phys
, &offset
);
106 if (hvc
->index
!= 0) {
107 last
= &hvc
->ops
[hvc
->index
- 1];
108 if ((last
->type
== MMAP
) &&
109 (last
->u
.mmap
.addr
+ last
->u
.mmap
.len
== virt
) &&
110 (last
->u
.mmap
.prot
== prot
) && (last
->u
.mmap
.fd
== fd
) &&
111 (last
->u
.mmap
.offset
+ last
->u
.mmap
.len
== offset
)) {
112 last
->u
.mmap
.len
+= len
;
117 if (hvc
->index
== ARRAY_SIZE(hvc
->ops
)) {
118 ret
= do_ops(hvc
, ARRAY_SIZE(hvc
->ops
), 0);
122 hvc
->ops
[hvc
->index
++] = ((struct host_vm_op
)
124 .u
= { .mmap
= { .addr
= virt
,
133 static int add_munmap(unsigned long addr
, unsigned long len
,
134 struct host_vm_change
*hvc
)
136 struct host_vm_op
*last
;
139 if ((addr
>= STUB_START
) && (addr
< STUB_END
))
142 if (hvc
->index
!= 0) {
143 last
= &hvc
->ops
[hvc
->index
- 1];
144 if ((last
->type
== MUNMAP
) &&
145 (last
->u
.munmap
.addr
+ last
->u
.mmap
.len
== addr
)) {
146 last
->u
.munmap
.len
+= len
;
151 if (hvc
->index
== ARRAY_SIZE(hvc
->ops
)) {
152 ret
= do_ops(hvc
, ARRAY_SIZE(hvc
->ops
), 0);
156 hvc
->ops
[hvc
->index
++] = ((struct host_vm_op
)
158 .u
= { .munmap
= { .addr
= addr
,
163 static int add_mprotect(unsigned long addr
, unsigned long len
,
164 unsigned int prot
, struct host_vm_change
*hvc
)
166 struct host_vm_op
*last
;
169 if (hvc
->index
!= 0) {
170 last
= &hvc
->ops
[hvc
->index
- 1];
171 if ((last
->type
== MPROTECT
) &&
172 (last
->u
.mprotect
.addr
+ last
->u
.mprotect
.len
== addr
) &&
173 (last
->u
.mprotect
.prot
== prot
)) {
174 last
->u
.mprotect
.len
+= len
;
179 if (hvc
->index
== ARRAY_SIZE(hvc
->ops
)) {
180 ret
= do_ops(hvc
, ARRAY_SIZE(hvc
->ops
), 0);
184 hvc
->ops
[hvc
->index
++] = ((struct host_vm_op
)
186 .u
= { .mprotect
= { .addr
= addr
,
192 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
194 static inline int update_pte_range(pmd_t
*pmd
, unsigned long addr
,
196 struct host_vm_change
*hvc
)
199 int r
, w
, x
, prot
, ret
= 0;
201 pte
= pte_offset_kernel(pmd
, addr
);
203 if ((addr
>= STUB_START
) && (addr
< STUB_END
))
209 if (!pte_young(*pte
)) {
212 } else if (!pte_dirty(*pte
))
215 prot
= ((r
? UM_PROT_READ
: 0) | (w
? UM_PROT_WRITE
: 0) |
216 (x
? UM_PROT_EXEC
: 0));
217 if (hvc
->force
|| pte_newpage(*pte
)) {
218 if (pte_present(*pte
))
219 ret
= add_mmap(addr
, pte_val(*pte
) & PAGE_MASK
,
220 PAGE_SIZE
, prot
, hvc
);
222 ret
= add_munmap(addr
, PAGE_SIZE
, hvc
);
223 } else if (pte_newprot(*pte
))
224 ret
= add_mprotect(addr
, PAGE_SIZE
, prot
, hvc
);
225 *pte
= pte_mkuptodate(*pte
);
226 } while (pte
++, addr
+= PAGE_SIZE
, ((addr
< end
) && !ret
));
230 static inline int update_pmd_range(pud_t
*pud
, unsigned long addr
,
232 struct host_vm_change
*hvc
)
238 pmd
= pmd_offset(pud
, addr
);
240 next
= pmd_addr_end(addr
, end
);
241 if (!pmd_present(*pmd
)) {
242 if (hvc
->force
|| pmd_newpage(*pmd
)) {
243 ret
= add_munmap(addr
, next
- addr
, hvc
);
244 pmd_mkuptodate(*pmd
);
247 else ret
= update_pte_range(pmd
, addr
, next
, hvc
);
248 } while (pmd
++, addr
= next
, ((addr
< end
) && !ret
));
252 static inline int update_pud_range(pgd_t
*pgd
, unsigned long addr
,
254 struct host_vm_change
*hvc
)
260 pud
= pud_offset(pgd
, addr
);
262 next
= pud_addr_end(addr
, end
);
263 if (!pud_present(*pud
)) {
264 if (hvc
->force
|| pud_newpage(*pud
)) {
265 ret
= add_munmap(addr
, next
- addr
, hvc
);
266 pud_mkuptodate(*pud
);
269 else ret
= update_pmd_range(pud
, addr
, next
, hvc
);
270 } while (pud
++, addr
= next
, ((addr
< end
) && !ret
));
274 void fix_range_common(struct mm_struct
*mm
, unsigned long start_addr
,
275 unsigned long end_addr
, int force
)
278 struct host_vm_change hvc
;
279 unsigned long addr
= start_addr
, next
;
282 hvc
= INIT_HVC(mm
, force
);
283 pgd
= pgd_offset(mm
, addr
);
285 next
= pgd_addr_end(addr
, end_addr
);
286 if (!pgd_present(*pgd
)) {
287 if (force
|| pgd_newpage(*pgd
)) {
288 ret
= add_munmap(addr
, next
- addr
, &hvc
);
289 pgd_mkuptodate(*pgd
);
292 else ret
= update_pud_range(pgd
, addr
, next
, &hvc
);
293 } while (pgd
++, addr
= next
, ((addr
< end_addr
) && !ret
));
296 ret
= do_ops(&hvc
, hvc
.index
, 1);
298 /* This is not an else because ret is modified above */
300 printk(KERN_ERR
"fix_range_common: failed, killing current "
301 "process: %d\n", task_tgid_vnr(current
));
302 /* We are under mmap_sem, release it such that current can terminate */
303 up_write(¤t
->mm
->mmap_sem
);
304 force_sig(SIGKILL
, current
);
305 do_signal(¤t
->thread
.regs
);
309 static int flush_tlb_kernel_range_common(unsigned long start
, unsigned long end
)
311 struct mm_struct
*mm
;
316 unsigned long addr
, last
;
317 int updated
= 0, err
;
320 for (addr
= start
; addr
< end
;) {
321 pgd
= pgd_offset(mm
, addr
);
322 if (!pgd_present(*pgd
)) {
323 last
= ADD_ROUND(addr
, PGDIR_SIZE
);
326 if (pgd_newpage(*pgd
)) {
328 err
= os_unmap_memory((void *) addr
,
331 panic("munmap failed, errno = %d\n",
338 pud
= pud_offset(pgd
, addr
);
339 if (!pud_present(*pud
)) {
340 last
= ADD_ROUND(addr
, PUD_SIZE
);
343 if (pud_newpage(*pud
)) {
345 err
= os_unmap_memory((void *) addr
,
348 panic("munmap failed, errno = %d\n",
355 pmd
= pmd_offset(pud
, addr
);
356 if (!pmd_present(*pmd
)) {
357 last
= ADD_ROUND(addr
, PMD_SIZE
);
360 if (pmd_newpage(*pmd
)) {
362 err
= os_unmap_memory((void *) addr
,
365 panic("munmap failed, errno = %d\n",
372 pte
= pte_offset_kernel(pmd
, addr
);
373 if (!pte_present(*pte
) || pte_newpage(*pte
)) {
375 err
= os_unmap_memory((void *) addr
,
378 panic("munmap failed, errno = %d\n",
380 if (pte_present(*pte
))
382 pte_val(*pte
) & PAGE_MASK
,
385 else if (pte_newprot(*pte
)) {
387 os_protect_memory((void *) addr
, PAGE_SIZE
, 1, 1, 1);
394 void flush_tlb_page(struct vm_area_struct
*vma
, unsigned long address
)
400 struct mm_struct
*mm
= vma
->vm_mm
;
402 int r
, w
, x
, prot
, err
= 0;
405 address
&= PAGE_MASK
;
406 pgd
= pgd_offset(mm
, address
);
407 if (!pgd_present(*pgd
))
410 pud
= pud_offset(pgd
, address
);
411 if (!pud_present(*pud
))
414 pmd
= pmd_offset(pud
, address
);
415 if (!pmd_present(*pmd
))
418 pte
= pte_offset_kernel(pmd
, address
);
423 if (!pte_young(*pte
)) {
426 } else if (!pte_dirty(*pte
)) {
430 mm_id
= &mm
->context
.id
;
431 prot
= ((r
? UM_PROT_READ
: 0) | (w
? UM_PROT_WRITE
: 0) |
432 (x
? UM_PROT_EXEC
: 0));
433 if (pte_newpage(*pte
)) {
434 if (pte_present(*pte
)) {
435 unsigned long long offset
;
438 fd
= phys_mapping(pte_val(*pte
) & PAGE_MASK
, &offset
);
439 err
= map(mm_id
, address
, PAGE_SIZE
, prot
, fd
, offset
,
442 else err
= unmap(mm_id
, address
, PAGE_SIZE
, 1, &flush
);
444 else if (pte_newprot(*pte
))
445 err
= protect(mm_id
, address
, PAGE_SIZE
, prot
, 1, &flush
);
454 *pte
= pte_mkuptodate(*pte
);
459 printk(KERN_ERR
"Failed to flush page for address 0x%lx\n", address
);
460 force_sig(SIGKILL
, current
);
463 pgd_t
*pgd_offset_proc(struct mm_struct
*mm
, unsigned long address
)
465 return pgd_offset(mm
, address
);
468 pud_t
*pud_offset_proc(pgd_t
*pgd
, unsigned long address
)
470 return pud_offset(pgd
, address
);
473 pmd_t
*pmd_offset_proc(pud_t
*pud
, unsigned long address
)
475 return pmd_offset(pud
, address
);
478 pte_t
*pte_offset_proc(pmd_t
*pmd
, unsigned long address
)
480 return pte_offset_kernel(pmd
, address
);
483 pte_t
*addr_pte(struct task_struct
*task
, unsigned long addr
)
485 pgd_t
*pgd
= pgd_offset(task
->mm
, addr
);
486 pud_t
*pud
= pud_offset(pgd
, addr
);
487 pmd_t
*pmd
= pmd_offset(pud
, addr
);
489 return pte_offset_map(pmd
, addr
);
492 void flush_tlb_all(void)
494 flush_tlb_mm(current
->mm
);
497 void flush_tlb_kernel_range(unsigned long start
, unsigned long end
)
499 flush_tlb_kernel_range_common(start
, end
);
502 void flush_tlb_kernel_vm(void)
504 flush_tlb_kernel_range_common(start_vm
, end_vm
);
507 void __flush_tlb_one(unsigned long addr
)
509 flush_tlb_kernel_range_common(addr
, addr
+ PAGE_SIZE
);
512 static void fix_range(struct mm_struct
*mm
, unsigned long start_addr
,
513 unsigned long end_addr
, int force
)
515 fix_range_common(mm
, start_addr
, end_addr
, force
);
518 void flush_tlb_range(struct vm_area_struct
*vma
, unsigned long start
,
521 if (vma
->vm_mm
== NULL
)
522 flush_tlb_kernel_range_common(start
, end
);
523 else fix_range(vma
->vm_mm
, start
, end
, 0);
525 EXPORT_SYMBOL(flush_tlb_range
);
527 void flush_tlb_mm_range(struct mm_struct
*mm
, unsigned long start
,
531 * Don't bother flushing if this address space is about to be
534 if (atomic_read(&mm
->mm_users
) == 0)
537 fix_range(mm
, start
, end
, 0);
540 void flush_tlb_mm(struct mm_struct
*mm
)
542 struct vm_area_struct
*vma
= mm
->mmap
;
544 while (vma
!= NULL
) {
545 fix_range(mm
, vma
->vm_start
, vma
->vm_end
, 0);
550 void force_flush_all(void)
552 struct mm_struct
*mm
= current
->mm
;
553 struct vm_area_struct
*vma
= mm
->mmap
;
555 while (vma
!= NULL
) {
556 fix_range(mm
, vma
->vm_start
, vma
->vm_end
, 1);