etc/services - sync with NetBSD-8
[minix.git] / minix / servers / vm / utility.c
blob189c3554981c2205c15c9ee7bcb7d05bd2be1948
2 /* This file contains some utility routines for VM. */
4 #define _SYSTEM 1
6 #include <minix/callnr.h>
7 #include <minix/com.h>
8 #include <minix/config.h>
9 #include <minix/const.h>
10 #include <minix/ds.h>
11 #include <minix/endpoint.h>
12 #include <minix/minlib.h>
13 #include <minix/type.h>
14 #include <minix/ipc.h>
15 #include <minix/sysutil.h>
16 #include <minix/syslib.h>
17 #include <minix/type.h>
18 #include <minix/bitmap.h>
19 #include <minix/rs.h>
20 #include <string.h>
21 #include <errno.h>
22 #include <unistd.h>
23 #include <assert.h>
24 #include <sys/cdefs.h>
25 #include <sys/param.h>
26 #include <sys/mman.h>
27 #include <sys/resource.h>
29 #include "proto.h"
30 #include "glo.h"
31 #include "util.h"
32 #include "region.h"
33 #include "sanitycheck.h"
35 #include <machine/archtypes.h>
36 #include "kernel/const.h"
37 #include "kernel/config.h"
38 #include "kernel/type.h"
39 #include "kernel/proc.h"
41 /*===========================================================================*
42 * get_mem_chunks *
43 *===========================================================================*/
44 void get_mem_chunks(
45 struct memory *mem_chunks) /* store mem chunks here */
47 /* Initialize the free memory list from the kernel-provided memory map. Translate
48 * the byte offsets and sizes in this list to clicks, properly truncated.
50 phys_bytes base, size, limit;
51 int i;
52 struct memory *memp;
54 /* Initialize everything to zero. */
55 memset(mem_chunks, 0, NR_MEMS*sizeof(*mem_chunks));
57 /* Obtain and parse memory from kernel environment. */
58 /* XXX Any memory chunk in excess of NR_MEMS is silently ignored. */
59 for(i = 0; i < MIN(MAXMEMMAP, NR_MEMS); i++) {
60 mem_chunks[i].base = kernel_boot_info.memmap[i].mm_base_addr;
61 mem_chunks[i].size = kernel_boot_info.memmap[i].mm_length;
64 /* Round physical memory to clicks. Round start up, round end down. */
65 for (i = 0; i < NR_MEMS; i++) {
66 memp = &mem_chunks[i]; /* next mem chunk is stored here */
67 base = mem_chunks[i].base;
68 size = mem_chunks[i].size;
69 limit = base + size;
70 base = (phys_bytes) (CLICK_CEIL(base));
71 limit = (phys_bytes) (CLICK_FLOOR(limit));
72 if (limit <= base) {
73 memp->base = memp->size = 0;
74 } else {
75 memp->base = base >> CLICK_SHIFT;
76 memp->size = (limit - base) >> CLICK_SHIFT;
81 /*===========================================================================*
82 * vm_isokendpt *
83 *===========================================================================*/
84 int vm_isokendpt(endpoint_t endpoint, int *procn)
86 *procn = _ENDPOINT_P(endpoint);
87 if(*procn < 0 || *procn >= NR_PROCS)
88 return EINVAL;
89 if(*procn >= 0 && endpoint != vmproc[*procn].vm_endpoint)
90 return EDEADEPT;
91 if(*procn >= 0 && !(vmproc[*procn].vm_flags & VMF_INUSE))
92 return EDEADEPT;
93 return OK;
97 /*===========================================================================*
98 * do_info *
99 *===========================================================================*/
100 int do_info(message *m)
102 struct vm_stats_info vsi;
103 struct vm_usage_info vui;
104 static struct vm_region_info vri[MAX_VRI_COUNT];
105 struct vmproc *vmp;
106 vir_bytes addr, size, next, ptr;
107 int r, pr, dummy, count, free_pages, largest_contig;
109 if (vm_isokendpt(m->m_source, &pr) != OK)
110 return EINVAL;
111 vmp = &vmproc[pr];
113 ptr = (vir_bytes) m->m_lsys_vm_info.ptr;
115 switch(m->m_lsys_vm_info.what) {
116 case VMIW_STATS:
117 vsi.vsi_pagesize = VM_PAGE_SIZE;
118 vsi.vsi_total = total_pages;
119 memstats(&dummy, &free_pages, &largest_contig);
120 vsi.vsi_free = free_pages;
121 vsi.vsi_largest = largest_contig;
123 get_stats_info(&vsi);
125 addr = (vir_bytes) &vsi;
126 size = sizeof(vsi);
128 break;
130 case VMIW_USAGE:
131 if(m->m_lsys_vm_info.ep < 0)
132 get_usage_info_kernel(&vui);
133 else if (vm_isokendpt(m->m_lsys_vm_info.ep, &pr) != OK)
134 return EINVAL;
135 else get_usage_info(&vmproc[pr], &vui);
137 addr = (vir_bytes) &vui;
138 size = sizeof(vui);
140 break;
142 case VMIW_REGION:
143 if(m->m_lsys_vm_info.ep == SELF) {
144 m->m_lsys_vm_info.ep = m->m_source;
146 if (vm_isokendpt(m->m_lsys_vm_info.ep, &pr) != OK)
147 return EINVAL;
149 count = MIN(m->m_lsys_vm_info.count, MAX_VRI_COUNT);
150 next = m->m_lsys_vm_info.next;
152 count = get_region_info(&vmproc[pr], vri, count, &next);
154 m->m_lsys_vm_info.count = count;
155 m->m_lsys_vm_info.next = next;
157 addr = (vir_bytes) vri;
158 size = sizeof(vri[0]) * count;
160 break;
162 default:
163 return EINVAL;
166 if (size == 0)
167 return OK;
169 /* Make sure that no page faults can occur while copying out. A page
170 * fault would cause the kernel to send a notify to us, while we would
171 * be waiting for the result of the copy system call, resulting in a
172 * deadlock. Note that no memory mapping can be undone without the
173 * involvement of VM, so we are safe until we're done.
175 r = handle_memory_once(vmp, ptr, size, 1 /*wrflag*/);
176 if (r != OK) return r;
178 /* Now that we know the copy out will succeed, perform the actual copy
179 * operation.
181 return sys_datacopy(SELF, addr,
182 (vir_bytes) vmp->vm_endpoint, ptr, size);
185 /*===========================================================================*
186 * swap_proc_slot *
187 *===========================================================================*/
188 int swap_proc_slot(struct vmproc *src_vmp, struct vmproc *dst_vmp)
190 struct vmproc orig_src_vmproc, orig_dst_vmproc;
192 #if LU_DEBUG
193 printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n",
194 src_vmp->vm_endpoint, src_vmp->vm_slot,
195 dst_vmp->vm_endpoint, dst_vmp->vm_slot);
196 #endif
198 /* Save existing data. */
199 orig_src_vmproc = *src_vmp;
200 orig_dst_vmproc = *dst_vmp;
202 /* Swap slots. */
203 *src_vmp = orig_dst_vmproc;
204 *dst_vmp = orig_src_vmproc;
206 /* Preserve endpoints and slot numbers. */
207 src_vmp->vm_endpoint = orig_src_vmproc.vm_endpoint;
208 src_vmp->vm_slot = orig_src_vmproc.vm_slot;
209 dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint;
210 dst_vmp->vm_slot = orig_dst_vmproc.vm_slot;
212 #if LU_DEBUG
213 printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n",
214 src_vmp->vm_endpoint, src_vmp->vm_slot,
215 dst_vmp->vm_endpoint, dst_vmp->vm_slot);
216 #endif
218 return OK;
222 * Transfer memory mapped regions, using CoW sharing, from 'src_vmp' to
223 * 'dst_vmp', for the source process's address range of 'start_addr'
224 * (inclusive) to 'end_addr' (exclusive). Return OK or an error code.
225 * If the regions seem to have been transferred already, do nothing.
227 static int
228 transfer_mmap_regions(struct vmproc *src_vmp, struct vmproc *dst_vmp,
229 vir_bytes start_addr, vir_bytes end_addr)
231 struct vir_region *start_vr, *check_vr, *end_vr;
233 start_vr = region_search(&src_vmp->vm_regions_avl, start_addr,
234 AVL_GREATER_EQUAL);
236 if (start_vr == NULL || start_vr->vaddr >= end_addr)
237 return OK; /* nothing to do */
239 /* In the case of multicomponent live update that includes VM, this
240 * function may be called for the same process more than once, for the
241 * sake of keeping code paths as little divergent as possible while at
242 * the same time ensuring that the regions are copied early enough.
244 * To compensate for these multiple calls, we perform a very simple
245 * check here to see if the region to transfer is already present in
246 * the target process. If so, we can safely skip copying the regions
247 * again, because there is no other possible explanation for the
248 * region being present already. Things would go horribly wrong if we
249 * tried copying anyway, but this check is not good enough to detect
250 * all such problems, since we do a check on the base address only.
252 check_vr = region_search(&dst_vmp->vm_regions_avl, start_vr->vaddr,
253 AVL_EQUAL);
254 if (check_vr != NULL) {
255 #if LU_DEBUG
256 printf("VM: transfer_mmap_regions: skipping transfer from "
257 "%d to %d (0x%lx already present)\n",
258 src_vmp->vm_endpoint, dst_vmp->vm_endpoint,
259 start_vr->vaddr);
260 #endif
261 return OK;
264 end_vr = region_search(&src_vmp->vm_regions_avl, end_addr, AVL_LESS);
265 assert(end_vr != NULL);
266 assert(start_vr->vaddr <= end_vr->vaddr);
268 #if LU_DEBUG
269 printf("VM: transfer_mmap_regions: transferring memory mapped regions "
270 "from %d to %d (0x%lx to 0x%lx)\n", src_vmp->vm_endpoint,
271 dst_vmp->vm_endpoint, start_vr->vaddr, end_vr->vaddr);
272 #endif
274 return map_proc_copy_range(dst_vmp, src_vmp, start_vr, end_vr);
278 * Create copy-on-write mappings in process 'dst_vmp' for all memory-mapped
279 * regions present in 'src_vmp'. Return OK on success, or an error otherwise.
280 * In the case of failure, successfully created mappings are not undone.
283 map_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp)
285 int r;
287 #if LU_DEBUG
288 printf("VM: mapping dynamic data from %d to %d\n",
289 src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
290 #endif
292 /* Transfer memory mapped regions now. To sandbox the new instance and
293 * prevent state corruption on rollback, we share all the regions
294 * between the two instances as COW.
296 r = transfer_mmap_regions(src_vmp, dst_vmp, VM_MMAPBASE, VM_MMAPTOP);
298 /* If the stack is not mapped at the VM_DATATOP, there might be some
299 * more regions hiding above the stack. We also have to transfer
300 * those.
302 if (r == OK && VM_STACKTOP < VM_DATATOP)
303 r = transfer_mmap_regions(src_vmp, dst_vmp, VM_STACKTOP,
304 VM_DATATOP);
306 return r;
309 /*===========================================================================*
310 * swap_proc_dyn_data *
311 *===========================================================================*/
312 int swap_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp,
313 int sys_upd_flags)
315 int is_vm;
316 int r;
318 is_vm = (dst_vmp->vm_endpoint == VM_PROC_NR);
320 /* For VM, transfer memory mapped regions first. */
321 if(is_vm) {
322 #if LU_DEBUG
323 printf("VM: swap_proc_dyn_data: tranferring memory mapped regions from old (%d) to new VM (%d)\n",
324 src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
325 #endif
326 r = pt_map_in_range(src_vmp, dst_vmp, VM_OWN_HEAPBASE, VM_OWN_MMAPTOP);
327 if(r != OK) {
328 printf("swap_proc_dyn_data: pt_map_in_range failed\n");
329 return r;
331 r = pt_map_in_range(src_vmp, dst_vmp, VM_STACKTOP, VM_DATATOP);
332 if(r != OK) {
333 printf("swap_proc_dyn_data: pt_map_in_range failed\n");
334 return r;
339 #if LU_DEBUG
340 printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n",
341 src_vmp->vm_endpoint, src_vmp->vm_slot,
342 dst_vmp->vm_endpoint, dst_vmp->vm_slot);
343 #endif
345 /* Swap vir_regions' parents. */
346 map_setparent(src_vmp);
347 map_setparent(dst_vmp);
349 /* Don't transfer mmapped regions if not required. */
350 if(is_vm || (sys_upd_flags & (SF_VM_ROLLBACK|SF_VM_NOMMAP))) {
351 return OK;
354 /* Make sure regions are consistent. */
355 assert(region_search_root(&src_vmp->vm_regions_avl) && region_search_root(&dst_vmp->vm_regions_avl));
357 /* Source and destination are intentionally swapped here! */
358 return map_proc_dyn_data(dst_vmp, src_vmp);
361 void *mmap(void *addr, size_t len, int f, int f2, int f3, off_t o)
363 void *ret;
364 phys_bytes p;
366 assert(!addr);
367 assert(!(len % VM_PAGE_SIZE));
369 ret = vm_allocpages(&p, VMP_SLAB, len/VM_PAGE_SIZE);
371 if(!ret) return MAP_FAILED;
372 memset(ret, 0, len);
373 return ret;
376 int munmap(void * addr, size_t len)
378 vm_freepages((vir_bytes) addr, roundup(len, VM_PAGE_SIZE)/VM_PAGE_SIZE);
379 return 0;
382 #ifdef __weak_alias
383 __weak_alias(brk, _brk)
384 #endif
385 int _brk(void *addr)
387 /* brk is a special case function to allow vm itself to
388 allocate memory in it's own (cacheable) HEAP */
389 vir_bytes target = roundup((vir_bytes)addr, VM_PAGE_SIZE), v;
390 extern char _end;
391 extern char *_brksize;
392 static vir_bytes prevbrk = (vir_bytes) &_end;
393 struct vmproc *vmprocess = &vmproc[VM_PROC_NR];
395 for(v = roundup(prevbrk, VM_PAGE_SIZE); v < target;
396 v += VM_PAGE_SIZE) {
397 phys_bytes mem, newpage = alloc_mem(1, 0);
398 if(newpage == NO_MEM) return -1;
399 mem = CLICK2ABS(newpage);
400 if(pt_writemap(vmprocess, &vmprocess->vm_pt,
401 v, mem, VM_PAGE_SIZE,
402 ARCH_VM_PTE_PRESENT
403 | ARCH_VM_PTE_USER
404 | ARCH_VM_PTE_RW
405 #if defined(__arm__)
406 | ARM_VM_PTE_CACHED
407 #endif
408 , 0) != OK) {
409 free_mem(newpage, 1);
410 return -1;
412 prevbrk = v + VM_PAGE_SIZE;
415 _brksize = (char *) addr;
417 if(sys_vmctl(SELF, VMCTL_FLUSHTLB, 0) != OK)
418 panic("flushtlb failed");
420 return 0;
423 /*===========================================================================*
424 * do_getrusage *
425 *===========================================================================*/
426 int do_getrusage(message *m)
428 int res, slot;
429 struct vmproc *vmp;
430 struct rusage r_usage;
432 /* If the request is not from PM, it is coming directly from userland.
433 * This is an obsolete construction. In the future, userland programs
434 * should no longer be allowed to call vm_getrusage(2) directly at all.
435 * For backward compatibility, we simply return success for now.
437 if (m->m_source != PM_PROC_NR)
438 return OK;
440 /* Get the process for which resource usage is requested. */
441 if ((res = vm_isokendpt(m->m_lsys_vm_rusage.endpt, &slot)) != OK)
442 return ESRCH;
444 vmp = &vmproc[slot];
446 /* We are going to change only a few fields, so copy in the rusage
447 * structure first. The structure is still in PM's address space at
448 * this point, so use the message source.
450 if ((res = sys_datacopy(m->m_source, m->m_lsys_vm_rusage.addr,
451 SELF, (vir_bytes) &r_usage, (vir_bytes) sizeof(r_usage))) < 0)
452 return res;
454 if (!m->m_lsys_vm_rusage.children) {
455 r_usage.ru_maxrss = vmp->vm_total_max / 1024L; /* unit is KB */
456 r_usage.ru_minflt = vmp->vm_minor_page_fault;
457 r_usage.ru_majflt = vmp->vm_major_page_fault;
458 } else {
459 /* XXX TODO: return the fields for terminated, waited-for
460 * children of the given process. We currently do not have this
461 * information! In the future, rather than teaching VM about
462 * the process hierarchy, PM should probably tell VM at process
463 * exit time which other process should inherit its resource
464 * usage fields. For now, we assume PM clears the fields before
465 * making this call, so we don't zero the fields explicitly.
469 /* Copy out the resulting structure back to PM. */
470 return sys_datacopy(SELF, (vir_bytes) &r_usage, m->m_source,
471 m->m_lsys_vm_rusage.addr, (vir_bytes) sizeof(r_usage));
474 /*===========================================================================*
475 * adjust_proc_refs *
476 *===========================================================================*/
477 void adjust_proc_refs()
479 struct vmproc *vmp;
480 region_iter iter;
482 /* Fix up region parents. */
483 for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) {
484 struct vir_region *vr;
485 if(!(vmp->vm_flags & VMF_INUSE))
486 continue;
487 region_start_iter_least(&vmp->vm_regions_avl, &iter);
488 while((vr = region_get_iter(&iter))) {
489 USE(vr, vr->parent = vmp;);
490 region_incr_iter(&iter);