1 .\" $NetBSD: uvm.9,v 1.100 2009/10/21 22:18:37 wiz Exp $
3 .\" Copyright (c) 1998 Matthew R. Green
4 .\" All rights reserved.
6 .\" Redistribution and use in source and binary forms, with or without
7 .\" modification, are permitted provided that the following conditions
9 .\" 1. Redistributions of source code must retain the above copyright
10 .\" notice, this list of conditions and the following disclaimer.
11 .\" 2. Redistributions in binary form must reproduce the above copyright
12 .\" notice, this list of conditions and the following disclaimer in the
13 .\" documentation and/or other materials provided with the distribution.
15 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 .\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 .\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 .\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
22 .\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 .\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 .Nd virtual memory system external interface
37 The UVM virtual memory system manages access to the computer's memory
39 User processes and the kernel access these resources through
40 UVM's external interface.
41 UVM's external interface includes functions that:
45 initialize UVM sub-systems
47 manage virtual address spaces
51 memory map files and devices
53 perform uio-based I/O to virtual memory
55 allocate and free kernel virtual memory
57 allocate and free physical memory
60 In addition to exporting these services, UVM has two kernel-level processes:
61 pagedaemon and swapper.
62 The pagedaemon process sleeps until physical memory becomes scarce.
63 When that happens, pagedaemon is awoken.
64 It scans physical memory, paging out and freeing memory that has not
66 The swapper process swaps in runnable processes that are currently swapped
67 out, if there is room.
69 There are also several miscellaneous functions.
75 .Fn uvm_init_limits "struct lwp *l" ;
77 .Fn uvm_setpagesize "void" ;
79 .Fn uvm_swap_init "void" ;
83 sets up the UVM system at system boot time, after the
84 console has been setup.
85 It initializes global state, the page, map, kernel virtual memory state,
86 machine-dependent physical map, kernel memory allocator,
87 pager and anonymous memory sub-systems, and then enables
88 paging of kernel objects.
91 initializes process limits for the named process.
92 This is for use by the system startup for process zero, before any
93 other processes are created.
96 initializes the uvmexp members pagesize (if not already done by
97 machine-dependent code), pageshift and pagemask.
98 It should be called by machine-dependent code early in the
104 initializes the swap sub-system.
105 .Sh VIRTUAL ADDRESS SPACE MANAGEMENT
108 .Fn uvm_map "struct vm_map *map" "vaddr_t *startp" "vsize_t size" "struct uvm_object *uobj" "voff_t uoffset" "vsize_t align" "uvm_flag_t flags" ;
110 .Fn uvm_unmap "struct vm_map *map" "vaddr_t start" "vaddr_t end" ;
112 .Fn uvm_map_pageable "struct vm_map *map" "vaddr_t start" "vaddr_t end" "bool new_pageable" "int lockflags" ;
114 .Fn uvm_map_checkprot "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t protection" ;
116 .Fn uvm_map_protect "struct vm_map *map" "vaddr_t start" "vaddr_t end" "vm_prot_t new_prot" "bool set_max" ;
118 .Fn uvm_deallocate "struct vm_map *map" "vaddr_t start" "vsize_t size" ;
119 .It Ft struct vmspace *
120 .Fn uvmspace_alloc "vaddr_t min" "vaddr_t max" "int pageable" ;
122 .Fn uvmspace_exec "struct lwp *l" "vaddr_t start" "vaddr_t end" ;
123 .It Ft struct vmspace *
124 .Fn uvmspace_fork "struct vmspace *vm" ;
126 .Fn uvmspace_free "struct vmspace *vm1" ;
128 .Fn uvmspace_share "struct proc *p1" "struct proc *p2" ;
130 .Fn uvmspace_unshare "struct lwp *l" ;
132 .Fn uvm_uarea_alloc "vaddr_t *uaddrp" ;
134 .Fn uvm_uarea_free "vaddr_t uaddr" ;
138 establishes a valid mapping in map
140 which must be unlocked.
141 The new mapping has size
143 which must be a multiple of
149 arguments can have four meanings.
157 .Dv UVM_UNKNOWN_OFFSET ,
159 does not use the machine-dependent
164 is any other value, it is used as the hint to
173 .Dv UVM_UNKNOWN_OFFSET ,
175 finds the offset based upon the virtual address, passed as
179 is any other value, we are doing a normal mapping at this offset.
180 The start address of the map will be returned in
184 specifies alignment of mapping unless
189 must be a power of 2.
194 are typically created using the
195 .Fn UVM_MAPFLAG "vm_prot_t prot" "vm_prot_t maxprot" "vm_inherit_t inh" "int advice" "int flags"
196 macro, which uses the following values.
203 #define UVM_PROT_MASK 0x07 /* protection mask */
204 #define UVM_PROT_NONE 0x00 /* protection none */
205 #define UVM_PROT_ALL 0x07 /* everything */
206 #define UVM_PROT_READ 0x01 /* read */
207 #define UVM_PROT_WRITE 0x02 /* write */
208 #define UVM_PROT_EXEC 0x04 /* exec */
209 #define UVM_PROT_R 0x01 /* read */
210 #define UVM_PROT_W 0x02 /* write */
211 #define UVM_PROT_RW 0x03 /* read-write */
212 #define UVM_PROT_X 0x04 /* exec */
213 #define UVM_PROT_RX 0x05 /* read-exec */
214 #define UVM_PROT_WX 0x06 /* write-exec */
215 #define UVM_PROT_RWX 0x07 /* read-write-exec */
222 #define UVM_INH_MASK 0x30 /* inherit mask */
223 #define UVM_INH_SHARE 0x00 /* "share" */
224 #define UVM_INH_COPY 0x10 /* "copy" */
225 #define UVM_INH_NONE 0x20 /* "none" */
226 #define UVM_INH_DONATE 0x30 /* "donate" \*[Lt]\*[Lt] not used */
233 #define UVM_ADV_NORMAL 0x0 /* 'normal' */
234 #define UVM_ADV_RANDOM 0x1 /* 'random' */
235 #define UVM_ADV_SEQUENTIAL 0x2 /* 'sequential' */
236 #define UVM_ADV_MASK 0x7 /* mask */
243 #define UVM_FLAG_FIXED 0x010000 /* find space */
244 #define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
245 #define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
246 #define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
247 #define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
248 #define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
253 macro arguments can be combined with an or operator.
254 There are several special purpose macros for checking protection
255 combinations, e.g., the
258 There are also some additional macros to extract bits from the flags.
262 .Dv UVM_MAXPROTECTION
265 macros return the protection, inheritance, maximum protection and advice,
268 returns a standard UVM return value.
271 removes a valid mapping,
278 which must be unlocked.
281 changes the pageability of the pages in the range from
290 returns a standard UVM return value.
292 .Fn uvm_map_checkprot
293 checks the protection of the range from
307 changes the protection
315 also setting the maximum protection to the region to
320 This function returns a standard UVM return value.
323 deallocates kernel memory in map
331 allocates and returns a new address space, with ranges from
335 setting the pageability of the address space to
339 either reuses the address space of lwp
341 if there are no other references to it, or creates
344 The range of valid addresses in the address space is reset to
350 creates and returns a new address space based upon the
352 address space, typically used when allocating an address space for a
356 lowers the reference count on the address space
358 freeing the data structures if there are no other references.
363 to share the address space of
369 has its own, unshared address space, by creating a new one if
374 allocates virtual space for a u-area (i.e., a kernel stack) and stores
375 its virtual address in
379 if the u-area is already backed by wired physical memory, otherwise
383 frees a u-area allocated with
384 .Fn uvm_uarea_alloc ,
385 freeing both the virtual space and any physical pages which may have been
386 allocated to back that virtual space later.
387 .Sh PAGE FAULT HANDLING
390 .Fn uvm_fault "struct vm_map *orig_map" "vaddr_t vaddr" "vm_prot_t access_type" ;
394 is the main entry point for faults.
397 as the map the fault originated in, a
399 offset into the map the fault occurred, and
401 describing the type of access requested.
403 returns a standard UVM return value.
404 .Sh VIRTUAL MEMORY I/O
407 .Fn uvm_io "struct vm_map *map" "struct uio *uio" ;
411 performs the I/O described in
413 on the memory described in
415 .Sh ALLOCATION OF KERNEL MEMORY
418 .Fn uvm_km_alloc "struct vm_map *map" "vsize_t size" "vsize_t align" "uvm_flag_t flags" ;
420 .Fn uvm_km_free "struct vm_map *map" "vaddr_t addr" "vsize_t size" "uvm_flag_t flags" ;
421 .It Ft struct vm_map *
422 .Fn uvm_km_suballoc "struct vm_map *map" "vaddr_t *min" "vaddr_t *max" \
423 "vsize_t size" "int flags" "bool fixed" "struct vm_map *submap" ;
429 bytes of kernel memory in map
431 The first address of the allocated memory range will be aligned according to the
434 .Pq specify 0 if no alignment is necessary .
435 The alignment must be a multiple of page size.
438 is a bitwise inclusive OR of the allocation type and operation flags.
440 The allocation type should be one of:
441 .Bl -tag -width UVM_KMF_PAGEABLE
445 Demand-paged zero-filled memory.
447 Virtual address only.
448 No physical pages are mapped in the allocated region.
449 If necessary, it's the caller's responsibility to enter page mappings.
450 It's also the caller's responsibility to clean up the mappings before freeing
454 The following operation flags are available:
455 .Bl -tag -width UVM_KMF_PAGEABLE
463 Request zero-filled memory.
466 Shouldn't be used with other types.
468 Fail if we can't lock the map.
470 Fail immediately if no memory is available.
472 Sleep to wait for the virtual address resources if needed.
483 will never fail, but rather sleep indefinitely until the allocation succeeds.)
485 Pageability of the pages allocated with
488 .Fn uvm_map_pageable .
489 In that case, the entire range must be changed atomically.
490 Changing a part of the range is not supported.
493 frees the memory range allocated by
496 must be an address returned by
501 must be the same as the ones used for the corresponding
504 must be the allocation type used for the corresponding
508 is the only way to free memory ranges allocated by
514 allocates submap from
516 creating a new map if
520 The addresses of the submap can be specified exactly by setting the
522 argument to true, which causes the
524 argument to specify the beginning of the address in the submap.
527 is false, any address of size
529 will be allocated from
531 and the start and end addresses returned in
537 are used to initialize the created submap.
538 The following flags could be set:
539 .Bl -tag -width VM_MAP_PAGEABLE
541 Entries in the map may be paged out.
543 Map should be interrupt-safe.
545 A top-down mapping should be arranged.
547 .Sh ALLOCATION OF PHYSICAL MEMORY
549 .It Ft struct vm_page *
550 .Fn uvm_pagealloc "struct uvm_object *uobj" "voff_t off" "struct vm_anon *anon" "int flags" ;
552 .Fn uvm_pagerealloc "struct vm_page *pg" "struct uvm_object *newobj" "voff_t newoff" ;
554 .Fn uvm_pagefree "struct vm_page *pg" ;
556 .Fn uvm_pglistalloc "psize_t size" "paddr_t low" "paddr_t high" "paddr_t alignment" "paddr_t boundary" "struct pglist *rlist" "int nsegs" "int waitok" ;
558 .Fn uvm_pglistfree "struct pglist *list" ;
560 .Fn uvm_page_physload "paddr_t start" "paddr_t end" "paddr_t avail_start" "paddr_t avail_end" "int free_list" ;
564 allocates a page of memory at virtual address
568 or the anonymous memory
570 which must be locked by the caller.
579 when no page can be found.
580 The flags can be any of
582 #define UVM_PGA_USERESERVE 0x0001 /* ok to use reserve pages */
583 #define UVM_PGA_ZERO 0x0002 /* returned page must be zero'd */
586 .Dv UVM_PGA_USERESERVE
587 means to allocate a page even if that will result in the number of free pages
589 .Dv uvmexp.reserve_pagedaemon
590 (if the current thread is the pagedaemon) or
591 .Dv uvmexp.reserve_kernel
592 (if the current thread is not the pagedaemon).
594 causes the returned page to be filled with zeroes, either by allocating it
595 from a pool of pre-zeroed pages or by zeroing it in-line as necessary.
606 frees the physical page
608 If the content of the page is known to be zero-filled,
611 in pg-\*[Gt]flags so that the page allocator will use
612 the page to serve future
614 requests efficiently.
617 allocates a list of pages for size
619 byte under various constraints.
623 describe the lowest and highest addresses acceptable for the list.
626 is non-zero, it describes the required alignment of the list, in
627 power-of-two notation.
630 is non-zero, no segment of the list may cross this power-of-two
631 boundary, relative to zero.
633 is the maximum number of physically contiguous segments.
636 is non-zero, the function may sleep until enough memory is available.
637 (It also may give up in some situations, so a non-zero
641 cannot return an error.)
642 The allocated memory is returned in the
644 list; the caller has to provide storage only, the list is initialized by
645 .Fn uvm_pglistalloc .
648 frees the list of pages pointed to by
650 If the content of the page is known to be zero-filled,
653 in pg-\*[Gt]flags so that the page allocator will use
654 the page to serve future
656 requests efficiently.
658 .Fn uvm_page_physload
659 loads physical memory segments into VM space on the specified
661 It must be called at system boot time to set up physical memory
663 The arguments describe the
667 of the physical addresses of the segment, and the available start and end
668 addresses of pages not already in use.
669 If a system has memory banks of
670 different speeds the slower memory should be given a higher
673 .\" XXX expand on "system boot time"!
677 .Fn uvm_pageout "void" ;
679 .Fn uvm_scheduler "void" ;
683 is the main loop for the page daemon.
686 is the process zero main loop, which is to be called after the
687 system has finished starting other processes.
688 It handles the swapping in of runnable, swapped out processes in priority
693 .Fn uvm_loan "struct vm_map *map" "vaddr_t start" "vsize_t len" "void *v" "int flags" ;
695 .Fn uvm_unloan "void *v" "int npages" "int flags" ;
699 loans pages in a map out to anons or to the kernel.
705 should be multiples of
711 #define UVM_LOAN_TOANON 0x01 /* loan to anons */
712 #define UVM_LOAN_TOPAGE 0x02 /* loan to kernel */
716 should be pointer to array of pointers to
721 The caller has to allocate memory for the array and
722 ensure it's big enough to hold
725 Returns 0 for success, or appropriate error number otherwise.
726 Note that wired pages can't be loaned out and
728 will fail in that case.
731 kills loans on pages or anons.
734 must point to the array of pointers initialized by previous call to
737 should match number of pages allocated for loan, this also matches
738 number of items in the array.
743 #define UVM_LOAN_TOANON 0x01 /* loan to anons */
744 #define UVM_LOAN_TOPAGE 0x02 /* loan to kernel */
747 and should match what was used for previous call to
749 .Sh MISCELLANEOUS FUNCTIONS
751 .It Ft struct uvm_object *
752 .Fn uao_create "vsize_t size" "int flags" ;
754 .Fn uao_detach "struct uvm_object *uobj" ;
756 .Fn uao_reference "struct uvm_object *uobj" ;
758 .Fn uvm_chgkprot "void *addr" "size_t len" "int rw" ;
760 .Fn uvm_kernacc "void *addr" "size_t len" "int rw" ;
762 .Fn uvm_vslock "struct vmspace *vs" "void *addr" "size_t len" "vm_prot_t prot" ;
764 .Fn uvm_vsunlock "struct vmspace *vs" "void *addr" "size_t len" ;
766 .Fn uvm_meter "void" ;
768 .Fn uvm_proc_fork "struct proc *p1" "struct proc *p2" "bool shared" ;
770 .Fn uvm_grow "struct proc *p" "vaddr_t sp" ;
772 .Fn uvn_findpages "struct uvm_object *uobj" "voff_t offset" "int *npagesp" "struct vm_page **pps" "int flags" ;
774 .Fn uvm_vnp_setsize "struct vnode *vp" "voff_t newsize" ;
776 .Fn uvm_swap_stats "int cmd" "struct swapent *sep" "int sec" "register_t *retval" ;
784 functions operate on anonymous memory objects, such as those used to support
785 System V shared memory.
787 returns an object of size
791 #define UAO_FLAG_KERNOBJ 0x1 /* create kernel object */
792 #define UAO_FLAG_KERNSWAP 0x2 /* enable kernel swap */
795 which can only be used once each at system boot time.
797 creates an additional reference to the named anonymous memory object.
799 removes a reference from the named anonymous memory object, destroying
800 it if removing the last reference.
803 changes the protection of kernel memory from
809 This is primarily useful for debuggers, for setting breakpoints.
810 This function is only available with options
814 checks the access at address
820 access in the kernel address space.
825 control the wiring and unwiring of pages for process
831 These functions are normally used to wire memory for I/O.
834 calculates the load average.
837 forks a virtual address space for process' (old)
843 argument is non zero, p1 shares its address space with p2,
844 otherwise a new address space is created.
845 This function currently has no return value, and thus cannot fail.
846 In the future, this function will be changed to allow it to
847 fail in low memory conditions.
850 increases the stack segment of process
856 looks up or creates pages in
860 marks them busy and returns them in the
865 must be a vnode object.
866 The number of pages requested is pointed to by
868 and this value is updated with the actual number of pages returned.
871 #define UFP_ALL 0x00 /* return all pages requested */
872 #define UFP_NOWAIT 0x01 /* don't sleep */
873 #define UFP_NOALLOC 0x02 /* don't allocate new pages */
874 #define UFP_NOCACHE 0x04 /* don't return pages which already exist */
875 #define UFP_NORDONLY 0x08 /* don't return PG_READONLY pages */
879 is a pseudo-flag meaning all requested pages should be returned.
881 means that we must not sleep.
883 causes any pages which do not already exist to be skipped.
885 causes any pages which do already exist to be skipped.
887 causes any pages which are marked PG_READONLY to be skipped.
890 sets the size of vnode
894 Caller must hold a reference to the vnode.
895 If the vnode shrinks, pages no longer used are discarded.
906 is the requested command,
910 The function will copy no more than
912 entries in the array pointed by
916 holds the actual number of entries copied in the array.
918 UVM provides support for the
929 nodes, which return the current load averages, calculates current VM
930 totals, returns the uvmexp structure, and a kernel version independent
931 view of the uvmexp structure, respectively.
932 It also exports a number of tunables that control how much VM space is
933 allowed to be consumed by various tasks.
934 The load averages are typically accessed from userland using the
937 The uvmexp structure has all global state of the UVM system,
938 and has the following members:
940 /* vm_page constants */
941 int pagesize; /* size of a page (PAGE_SIZE): must be power of 2 */
942 int pagemask; /* page mask */
943 int pageshift; /* page shift */
945 /* vm_page counters */
946 int npages; /* number of pages we manage */
947 int free; /* number of free pages */
948 int active; /* number of active pages */
949 int inactive; /* number of pages that we free'd but may want back */
950 int paging; /* number of pages in the process of being paged out */
951 int wired; /* number of wired pages */
952 int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
953 int reserve_kernel; /* number of pages reserved for kernel */
956 int freemin; /* min number of free pages */
957 int freetarg; /* target number of free pages */
958 int inactarg; /* target number of inactive pages */
959 int wiredmax; /* max number of wired pages */
962 int nswapdev; /* number of configured swap devices in system */
963 int swpages; /* number of PAGE_SIZE'ed swap pages */
964 int swpginuse; /* number of swap pages in use */
965 int nswget; /* number of times fault calls uvm_swap_get() */
966 int nanon; /* number total of anon's in system */
967 int nfreeanon; /* number of free anon's */
970 int faults; /* page fault count */
971 int traps; /* trap count */
972 int intrs; /* interrupt count */
973 int swtch; /* context switch count */
974 int softs; /* software interrupt count */
975 int syscalls; /* system calls */
976 int pageins; /* pagein operation count */
977 /* pageouts are in pdpageouts below */
978 int pgswapin; /* pages swapped in */
979 int pgswapout; /* pages swapped out */
980 int forks; /* forks */
981 int forks_ppwait; /* forks where parent waits */
982 int forks_sharevm; /* forks where vmspace is shared */
984 /* fault subcounters */
985 int fltnoram; /* number of times fault was out of ram */
986 int fltnoanon; /* number of times fault was out of anons */
987 int fltpgwait; /* number of times fault had to wait on a page */
988 int fltpgrele; /* number of times fault found a released page */
989 int fltrelck; /* number of times fault relock called */
990 int fltrelckok; /* number of times fault relock is a success */
991 int fltanget; /* number of times fault gets anon page */
992 int fltanretry; /* number of times fault retrys an anon get */
993 int fltamcopy; /* number of times fault clears "needs copy" */
994 int fltnamap; /* number of times fault maps a neighbor anon page */
995 int fltnomap; /* number of times fault maps a neighbor obj page */
996 int fltlget; /* number of times fault does a locked pgo_get */
997 int fltget; /* number of times fault does an unlocked get */
998 int flt_anon; /* number of times fault anon (case 1a) */
999 int flt_acow; /* number of times fault anon cow (case 1b) */
1000 int flt_obj; /* number of times fault is on object page (2a) */
1001 int flt_prcopy; /* number of times fault promotes with copy (2b) */
1002 int flt_przero; /* number of times fault promotes with zerofill (2b) */
1004 /* daemon counters */
1005 int pdwoke; /* number of times daemon woke up */
1006 int pdrevs; /* number of times daemon rev'd clock hand */
1007 int pdfreed; /* number of pages daemon freed since boot */
1008 int pdscans; /* number of pages daemon scanned since boot */
1009 int pdanscan; /* number of anonymous pages scanned by daemon */
1010 int pdobscan; /* number of object pages scanned by daemon */
1011 int pdreact; /* number of pages daemon reactivated since boot */
1012 int pdbusy; /* number of times daemon found a busy page */
1013 int pdpageouts; /* number of times daemon started a pageout */
1014 int pdpending; /* number of times daemon got a pending pageout */
1015 int pddeact; /* number of pages daemon deactivates */
1019 is only available if the kernel has been compiled with options
1022 All structure and types whose names begin with
1033 .Xr memoryallocators 9 ,
1037 UVM is a new VM system developed at Washington University in St. Louis
1039 UVM's roots lie partly in the Mach-based
1043 VM system, and the SunOS 4 VM system.
1044 UVM's basic structure is based on the
1047 UVM's new anonymous memory system is based on the
1048 anonymous memory system found in the SunOS 4 VM (as described in papers
1049 published by Sun Microsystems, Inc.).
1050 UVM also includes a number of features new to
1052 including page loanout, map entry passing, simplified
1053 copy-on-write, and clustered anonymous memory pageout.
1054 UVM is also further documented in an August 1998 dissertation by
1061 .Aq chuck@ccrc.wustl.edu
1062 designed and implemented UVM.
1065 .Aq mrg@eterna.com.au
1066 wrote the swap-space management code and handled the logistical issues
1067 involved with merging UVM into the
1073 implemented the aobj pager, thus allowing UVM to support System V shared
1074 memory and process swapping.