2 /* This file contains some utility routines for VM. */
6 #include <minix/callnr.h>
8 #include <minix/config.h>
9 #include <minix/const.h>
11 #include <minix/endpoint.h>
12 #include <minix/minlib.h>
13 #include <minix/type.h>
14 #include <minix/ipc.h>
15 #include <minix/sysutil.h>
16 #include <minix/syslib.h>
17 #include <minix/type.h>
18 #include <minix/bitmap.h>
24 #include <sys/cdefs.h>
25 #include <sys/param.h>
27 #include <sys/resource.h>
33 #include "sanitycheck.h"
35 #include <machine/archtypes.h>
36 #include "kernel/const.h"
37 #include "kernel/config.h"
38 #include "kernel/type.h"
39 #include "kernel/proc.h"
41 /*===========================================================================*
43 *===========================================================================*/
45 struct memory
*mem_chunks
) /* store mem chunks here */
47 /* Initialize the free memory list from the kernel-provided memory map. Translate
48 * the byte offsets and sizes in this list to clicks, properly truncated.
50 phys_bytes base
, size
, limit
;
54 /* Initialize everything to zero. */
55 memset(mem_chunks
, 0, NR_MEMS
*sizeof(*mem_chunks
));
57 /* Obtain and parse memory from kernel environment. */
58 /* XXX Any memory chunk in excess of NR_MEMS is silently ignored. */
59 for(i
= 0; i
< MIN(MAXMEMMAP
, NR_MEMS
); i
++) {
60 mem_chunks
[i
].base
= kernel_boot_info
.memmap
[i
].mm_base_addr
;
61 mem_chunks
[i
].size
= kernel_boot_info
.memmap
[i
].mm_length
;
64 /* Round physical memory to clicks. Round start up, round end down. */
65 for (i
= 0; i
< NR_MEMS
; i
++) {
66 memp
= &mem_chunks
[i
]; /* next mem chunk is stored here */
67 base
= mem_chunks
[i
].base
;
68 size
= mem_chunks
[i
].size
;
70 base
= (phys_bytes
) (CLICK_CEIL(base
));
71 limit
= (phys_bytes
) (CLICK_FLOOR(limit
));
73 memp
->base
= memp
->size
= 0;
75 memp
->base
= base
>> CLICK_SHIFT
;
76 memp
->size
= (limit
- base
) >> CLICK_SHIFT
;
81 /*===========================================================================*
83 *===========================================================================*/
84 int vm_isokendpt(endpoint_t endpoint
, int *procn
)
86 *procn
= _ENDPOINT_P(endpoint
);
87 if(*procn
< 0 || *procn
>= NR_PROCS
)
89 if(*procn
>= 0 && endpoint
!= vmproc
[*procn
].vm_endpoint
)
91 if(*procn
>= 0 && !(vmproc
[*procn
].vm_flags
& VMF_INUSE
))
97 /*===========================================================================*
99 *===========================================================================*/
100 int do_info(message
*m
)
102 struct vm_stats_info vsi
;
103 struct vm_usage_info vui
;
104 static struct vm_region_info vri
[MAX_VRI_COUNT
];
106 vir_bytes addr
, size
, next
, ptr
;
107 int r
, pr
, dummy
, count
, free_pages
, largest_contig
;
109 if (vm_isokendpt(m
->m_source
, &pr
) != OK
)
113 ptr
= (vir_bytes
) m
->m_lsys_vm_info
.ptr
;
115 switch(m
->m_lsys_vm_info
.what
) {
117 vsi
.vsi_pagesize
= VM_PAGE_SIZE
;
118 vsi
.vsi_total
= total_pages
;
119 memstats(&dummy
, &free_pages
, &largest_contig
);
120 vsi
.vsi_free
= free_pages
;
121 vsi
.vsi_largest
= largest_contig
;
123 get_stats_info(&vsi
);
125 addr
= (vir_bytes
) &vsi
;
131 if(m
->m_lsys_vm_info
.ep
< 0)
132 get_usage_info_kernel(&vui
);
133 else if (vm_isokendpt(m
->m_lsys_vm_info
.ep
, &pr
) != OK
)
135 else get_usage_info(&vmproc
[pr
], &vui
);
137 addr
= (vir_bytes
) &vui
;
143 if(m
->m_lsys_vm_info
.ep
== SELF
) {
144 m
->m_lsys_vm_info
.ep
= m
->m_source
;
146 if (vm_isokendpt(m
->m_lsys_vm_info
.ep
, &pr
) != OK
)
149 count
= MIN(m
->m_lsys_vm_info
.count
, MAX_VRI_COUNT
);
150 next
= m
->m_lsys_vm_info
.next
;
152 count
= get_region_info(&vmproc
[pr
], vri
, count
, &next
);
154 m
->m_lsys_vm_info
.count
= count
;
155 m
->m_lsys_vm_info
.next
= next
;
157 addr
= (vir_bytes
) vri
;
158 size
= sizeof(vri
[0]) * count
;
169 /* Make sure that no page faults can occur while copying out. A page
170 * fault would cause the kernel to send a notify to us, while we would
171 * be waiting for the result of the copy system call, resulting in a
172 * deadlock. Note that no memory mapping can be undone without the
173 * involvement of VM, so we are safe until we're done.
175 r
= handle_memory_once(vmp
, ptr
, size
, 1 /*wrflag*/);
176 if (r
!= OK
) return r
;
178 /* Now that we know the copy out will succeed, perform the actual copy
181 return sys_datacopy(SELF
, addr
,
182 (vir_bytes
) vmp
->vm_endpoint
, ptr
, size
);
185 /*===========================================================================*
187 *===========================================================================*/
188 int swap_proc_slot(struct vmproc
*src_vmp
, struct vmproc
*dst_vmp
)
190 struct vmproc orig_src_vmproc
, orig_dst_vmproc
;
193 printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n",
194 src_vmp
->vm_endpoint
, src_vmp
->vm_slot
,
195 dst_vmp
->vm_endpoint
, dst_vmp
->vm_slot
);
198 /* Save existing data. */
199 orig_src_vmproc
= *src_vmp
;
200 orig_dst_vmproc
= *dst_vmp
;
203 *src_vmp
= orig_dst_vmproc
;
204 *dst_vmp
= orig_src_vmproc
;
206 /* Preserve endpoints and slot numbers. */
207 src_vmp
->vm_endpoint
= orig_src_vmproc
.vm_endpoint
;
208 src_vmp
->vm_slot
= orig_src_vmproc
.vm_slot
;
209 dst_vmp
->vm_endpoint
= orig_dst_vmproc
.vm_endpoint
;
210 dst_vmp
->vm_slot
= orig_dst_vmproc
.vm_slot
;
213 printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n",
214 src_vmp
->vm_endpoint
, src_vmp
->vm_slot
,
215 dst_vmp
->vm_endpoint
, dst_vmp
->vm_slot
);
222 * Transfer memory mapped regions, using CoW sharing, from 'src_vmp' to
223 * 'dst_vmp', for the source process's address range of 'start_addr'
224 * (inclusive) to 'end_addr' (exclusive). Return OK or an error code.
225 * If the regions seem to have been transferred already, do nothing.
228 transfer_mmap_regions(struct vmproc
*src_vmp
, struct vmproc
*dst_vmp
,
229 vir_bytes start_addr
, vir_bytes end_addr
)
231 struct vir_region
*start_vr
, *check_vr
, *end_vr
;
233 start_vr
= region_search(&src_vmp
->vm_regions_avl
, start_addr
,
236 if (start_vr
== NULL
|| start_vr
->vaddr
>= end_addr
)
237 return OK
; /* nothing to do */
239 /* In the case of multicomponent live update that includes VM, this
240 * function may be called for the same process more than once, for the
241 * sake of keeping code paths as little divergent as possible while at
242 * the same time ensuring that the regions are copied early enough.
244 * To compensate for these multiple calls, we perform a very simple
245 * check here to see if the region to transfer is already present in
246 * the target process. If so, we can safely skip copying the regions
247 * again, because there is no other possible explanation for the
248 * region being present already. Things would go horribly wrong if we
249 * tried copying anyway, but this check is not good enough to detect
250 * all such problems, since we do a check on the base address only.
252 check_vr
= region_search(&dst_vmp
->vm_regions_avl
, start_vr
->vaddr
,
254 if (check_vr
!= NULL
) {
256 printf("VM: transfer_mmap_regions: skipping transfer from "
257 "%d to %d (0x%lx already present)\n",
258 src_vmp
->vm_endpoint
, dst_vmp
->vm_endpoint
,
264 end_vr
= region_search(&src_vmp
->vm_regions_avl
, end_addr
, AVL_LESS
);
265 assert(end_vr
!= NULL
);
266 assert(start_vr
->vaddr
<= end_vr
->vaddr
);
269 printf("VM: transfer_mmap_regions: transferring memory mapped regions "
270 "from %d to %d (0x%lx to 0x%lx)\n", src_vmp
->vm_endpoint
,
271 dst_vmp
->vm_endpoint
, start_vr
->vaddr
, end_vr
->vaddr
);
274 return map_proc_copy_range(dst_vmp
, src_vmp
, start_vr
, end_vr
);
278 * Create copy-on-write mappings in process 'dst_vmp' for all memory-mapped
279 * regions present in 'src_vmp'. Return OK on success, or an error otherwise.
280 * In the case of failure, successfully created mappings are not undone.
283 map_proc_dyn_data(struct vmproc
*src_vmp
, struct vmproc
*dst_vmp
)
288 printf("VM: mapping dynamic data from %d to %d\n",
289 src_vmp
->vm_endpoint
, dst_vmp
->vm_endpoint
);
292 /* Transfer memory mapped regions now. To sandbox the new instance and
293 * prevent state corruption on rollback, we share all the regions
294 * between the two instances as COW.
296 r
= transfer_mmap_regions(src_vmp
, dst_vmp
, VM_MMAPBASE
, VM_MMAPTOP
);
298 /* If the stack is not mapped at the VM_DATATOP, there might be some
299 * more regions hiding above the stack. We also have to transfer
302 if (r
== OK
&& VM_STACKTOP
< VM_DATATOP
)
303 r
= transfer_mmap_regions(src_vmp
, dst_vmp
, VM_STACKTOP
,
309 /*===========================================================================*
310 * swap_proc_dyn_data *
311 *===========================================================================*/
312 int swap_proc_dyn_data(struct vmproc
*src_vmp
, struct vmproc
*dst_vmp
,
318 is_vm
= (dst_vmp
->vm_endpoint
== VM_PROC_NR
);
320 /* For VM, transfer memory mapped regions first. */
323 printf("VM: swap_proc_dyn_data: tranferring memory mapped regions from old (%d) to new VM (%d)\n",
324 src_vmp
->vm_endpoint
, dst_vmp
->vm_endpoint
);
326 r
= pt_map_in_range(src_vmp
, dst_vmp
, VM_OWN_HEAPBASE
, VM_OWN_MMAPTOP
);
328 printf("swap_proc_dyn_data: pt_map_in_range failed\n");
331 r
= pt_map_in_range(src_vmp
, dst_vmp
, VM_STACKTOP
, VM_DATATOP
);
333 printf("swap_proc_dyn_data: pt_map_in_range failed\n");
340 printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n",
341 src_vmp
->vm_endpoint
, src_vmp
->vm_slot
,
342 dst_vmp
->vm_endpoint
, dst_vmp
->vm_slot
);
345 /* Swap vir_regions' parents. */
346 map_setparent(src_vmp
);
347 map_setparent(dst_vmp
);
349 /* Don't transfer mmapped regions if not required. */
350 if(is_vm
|| (sys_upd_flags
& (SF_VM_ROLLBACK
|SF_VM_NOMMAP
))) {
354 /* Make sure regions are consistent. */
355 assert(region_search_root(&src_vmp
->vm_regions_avl
) && region_search_root(&dst_vmp
->vm_regions_avl
));
357 /* Source and destination are intentionally swapped here! */
358 return map_proc_dyn_data(dst_vmp
, src_vmp
);
361 void *mmap(void *addr
, size_t len
, int f
, int f2
, int f3
, off_t o
)
367 assert(!(len
% VM_PAGE_SIZE
));
369 ret
= vm_allocpages(&p
, VMP_SLAB
, len
/VM_PAGE_SIZE
);
371 if(!ret
) return MAP_FAILED
;
376 int munmap(void * addr
, size_t len
)
378 vm_freepages((vir_bytes
) addr
, roundup(len
, VM_PAGE_SIZE
)/VM_PAGE_SIZE
);
383 __weak_alias(brk
, _brk
)
387 /* brk is a special case function to allow vm itself to
388 allocate memory in it's own (cacheable) HEAP */
389 vir_bytes target
= roundup((vir_bytes
)addr
, VM_PAGE_SIZE
), v
;
391 extern char *_brksize
;
392 static vir_bytes prevbrk
= (vir_bytes
) &_end
;
393 struct vmproc
*vmprocess
= &vmproc
[VM_PROC_NR
];
395 for(v
= roundup(prevbrk
, VM_PAGE_SIZE
); v
< target
;
397 phys_bytes mem
, newpage
= alloc_mem(1, 0);
398 if(newpage
== NO_MEM
) return -1;
399 mem
= CLICK2ABS(newpage
);
400 if(pt_writemap(vmprocess
, &vmprocess
->vm_pt
,
401 v
, mem
, VM_PAGE_SIZE
,
409 free_mem(newpage
, 1);
412 prevbrk
= v
+ VM_PAGE_SIZE
;
415 _brksize
= (char *) addr
;
417 if(sys_vmctl(SELF
, VMCTL_FLUSHTLB
, 0) != OK
)
418 panic("flushtlb failed");
423 /*===========================================================================*
425 *===========================================================================*/
426 int do_getrusage(message
*m
)
430 struct rusage r_usage
;
432 /* If the request is not from PM, it is coming directly from userland.
433 * This is an obsolete construction. In the future, userland programs
434 * should no longer be allowed to call vm_getrusage(2) directly at all.
435 * For backward compatibility, we simply return success for now.
437 if (m
->m_source
!= PM_PROC_NR
)
440 /* Get the process for which resource usage is requested. */
441 if ((res
= vm_isokendpt(m
->m_lsys_vm_rusage
.endpt
, &slot
)) != OK
)
446 /* We are going to change only a few fields, so copy in the rusage
447 * structure first. The structure is still in PM's address space at
448 * this point, so use the message source.
450 if ((res
= sys_datacopy(m
->m_source
, m
->m_lsys_vm_rusage
.addr
,
451 SELF
, (vir_bytes
) &r_usage
, (vir_bytes
) sizeof(r_usage
))) < 0)
454 if (!m
->m_lsys_vm_rusage
.children
) {
455 r_usage
.ru_maxrss
= vmp
->vm_total_max
/ 1024L; /* unit is KB */
456 r_usage
.ru_minflt
= vmp
->vm_minor_page_fault
;
457 r_usage
.ru_majflt
= vmp
->vm_major_page_fault
;
459 /* XXX TODO: return the fields for terminated, waited-for
460 * children of the given process. We currently do not have this
461 * information! In the future, rather than teaching VM about
462 * the process hierarchy, PM should probably tell VM at process
463 * exit time which other process should inherit its resource
464 * usage fields. For now, we assume PM clears the fields before
465 * making this call, so we don't zero the fields explicitly.
469 /* Copy out the resulting structure back to PM. */
470 return sys_datacopy(SELF
, (vir_bytes
) &r_usage
, m
->m_source
,
471 m
->m_lsys_vm_rusage
.addr
, (vir_bytes
) sizeof(r_usage
));
474 /*===========================================================================*
476 *===========================================================================*/
477 void adjust_proc_refs()
482 /* Fix up region parents. */
483 for(vmp
= vmproc
; vmp
< &vmproc
[VMP_NR
]; vmp
++) {
484 struct vir_region
*vr
;
485 if(!(vmp
->vm_flags
& VMF_INUSE
))
487 region_start_iter_least(&vmp
->vm_regions_avl
, &iter
);
488 while((vr
= region_get_iter(&iter
))) {
489 USE(vr
, vr
->parent
= vmp
;);
490 region_incr_iter(&iter
);