2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2016 Joyent, Inc.
17 * VM - Kernel-to-user mapping segment
19 * The umap segment driver was primarily designed to facilitate the comm page:
20 * a portion of kernel memory shared with userspace so that certain (namely
21 * clock-related) actions could operate without making an expensive trip into
24 * Since the initial requirements for the comm page are slim, advanced features
25 * of the segment driver such as per-page protection have been left
26 * unimplemented at this time.
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/errno.h>
41 #include <vm/seg_kmem.h>
42 #include <vm/seg_umap.h>
45 static boolean_t
segumap_verify_safe(caddr_t
, size_t);
46 static int segumap_dup(struct seg
*, struct seg
*);
47 static int segumap_unmap(struct seg
*, caddr_t
, size_t);
48 static void segumap_free(struct seg
*);
49 static faultcode_t
segumap_fault(struct hat
*, struct seg
*, caddr_t
, size_t,
50 enum fault_type
, enum seg_rw
);
51 static faultcode_t
segumap_faulta(struct seg
*, caddr_t
);
52 static int segumap_setprot(struct seg
*, caddr_t
, size_t, uint_t
);
53 static int segumap_checkprot(struct seg
*, caddr_t
, size_t, uint_t
);
54 static int segumap_sync(struct seg
*, caddr_t
, size_t, int, uint_t
);
55 static size_t segumap_incore(struct seg
*, caddr_t
, size_t, char *);
56 static int segumap_lockop(struct seg
*, caddr_t
, size_t, int, int, ulong_t
*,
58 static int segumap_getprot(struct seg
*, caddr_t
, size_t, uint_t
*);
59 static uoff_t
segumap_getoffset(struct seg
*, caddr_t
);
60 static int segumap_gettype(struct seg
*, caddr_t
);
61 static int segumap_getvp(struct seg
*, caddr_t
, struct vnode
**);
62 static int segumap_advise(struct seg
*, caddr_t
, size_t, uint_t
);
63 static void segumap_dump(struct seg
*);
64 static int segumap_pagelock(struct seg
*, caddr_t
, size_t, struct page
***,
65 enum lock_type
, enum seg_rw
);
66 static int segumap_setpagesize(struct seg
*, caddr_t
, size_t, uint_t
);
67 static int segumap_getmemid(struct seg
*, caddr_t
, memid_t
*);
68 static int segumap_capable(struct seg
*, segcapability_t
);
70 static struct seg_ops segumap_ops
= {
72 .unmap
= segumap_unmap
,
74 .fault
= segumap_fault
,
75 .faulta
= segumap_faulta
,
76 .setprot
= segumap_setprot
,
77 .checkprot
= segumap_checkprot
,
79 .incore
= segumap_incore
,
80 .lockop
= segumap_lockop
,
81 .getprot
= segumap_getprot
,
82 .getoffset
= segumap_getoffset
,
83 .gettype
= segumap_gettype
,
84 .getvp
= segumap_getvp
,
85 .advise
= segumap_advise
,
87 .pagelock
= segumap_pagelock
,
88 .setpagesize
= segumap_setpagesize
,
89 .getmemid
= segumap_getmemid
,
90 .capable
= segumap_capable
,
95 * Create a kernel/user-mapped segment.
98 segumap_create(struct seg
*seg
, void *argsp
)
100 segumap_crargs_t
*a
= (struct segumap_crargs
*)argsp
;
101 segumap_data_t
*data
;
103 ASSERT((uintptr_t)a
->kaddr
> _userlimit
);
106 * Check several aspects of the mapping request to ensure validity:
107 * - kernel pages must reside entirely in kernel space
108 * - target protection must be user-accessible
109 * - kernel address must be page-aligned
110 * - kernel address must reside inside a "safe" segment
112 if ((uintptr_t)a
->kaddr
<= _userlimit
||
113 ((uintptr_t)a
->kaddr
+ seg
->s_size
) < (uintptr_t)a
->kaddr
||
114 (a
->prot
& PROT_USER
) == 0 ||
115 ((uintptr_t)a
->kaddr
& PAGEOFFSET
) != 0 ||
116 !segumap_verify_safe(a
->kaddr
, seg
->s_size
)) {
120 data
= kmem_zalloc(sizeof (*data
), KM_SLEEP
);
121 rw_init(&data
->sud_lock
, NULL
, RW_DEFAULT
, NULL
);
122 data
->sud_kaddr
= a
->kaddr
;
123 data
->sud_prot
= a
->prot
;
125 seg
->s_ops
= &segumap_ops
;
131 segumap_verify_safe(caddr_t kaddr
, size_t len
)
136 * Presently, only pages which are backed by segkmem are allowed to be
137 * shared with userspace. This prevents nasty paging behavior with
138 * other drivers such as seg_kp. Furthermore, the backing kernel
139 * segment must completely contain the region to be mapped.
141 * Failing these checks is fatal for now since such mappings are done
142 * in a very limited context from the kernel.
144 AS_LOCK_ENTER(&kas
, RW_READER
);
145 seg
= as_segat(&kas
, kaddr
);
147 VERIFY(seg
->s_base
+ seg
->s_size
>= kaddr
+ len
);
148 VERIFY(seg
->s_ops
== &segkmem_ops
);
155 segumap_dup(struct seg
*seg
, struct seg
*newseg
)
157 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
158 segumap_data_t
*newsud
;
160 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
162 newsud
= kmem_zalloc(sizeof (segumap_data_t
), KM_SLEEP
);
163 rw_init(&newsud
->sud_lock
, NULL
, RW_DEFAULT
, NULL
);
164 newsud
->sud_kaddr
= sud
->sud_kaddr
;
165 newsud
->sud_prot
= sud
->sud_prot
;
167 newseg
->s_ops
= seg
->s_ops
;
168 newseg
->s_data
= newsud
;
173 segumap_unmap(struct seg
*seg
, caddr_t addr
, size_t len
)
175 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
177 ASSERT(seg
->s_as
&& AS_WRITE_HELD(seg
->s_as
));
179 /* Only allow unmap of entire segment */
180 if (addr
!= seg
->s_base
|| len
!= seg
->s_size
) {
183 if (sud
->sud_softlockcnt
!= 0) {
188 * Unconditionally unload the entire segment range.
190 hat_unload(seg
->s_as
->a_hat
, addr
, len
, HAT_UNLOAD_UNMAP
);
197 segumap_free(struct seg
*seg
)
199 segumap_data_t
*data
= (segumap_data_t
*)seg
->s_data
;
201 ASSERT(data
!= NULL
);
203 rw_destroy(&data
->sud_lock
);
204 VERIFY(data
->sud_softlockcnt
== 0);
205 kmem_free(data
, sizeof (*data
));
211 segumap_fault(struct hat
*hat
, struct seg
*seg
, caddr_t addr
, size_t len
,
212 enum fault_type type
, enum seg_rw tw
)
214 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
216 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
218 if (type
== F_PROT
) {
220 * Since protection on the segment is fixed, there is nothing
221 * to do but report an error for protection faults.
224 } else if (type
== F_SOFTUNLOCK
) {
225 size_t plen
= btop(len
);
227 rw_enter(&sud
->sud_lock
, RW_WRITER
);
228 VERIFY(sud
->sud_softlockcnt
>= plen
);
229 sud
->sud_softlockcnt
-= plen
;
230 rw_exit(&sud
->sud_lock
);
234 ASSERT(type
== F_INVAL
|| type
== F_SOFTLOCK
);
235 rw_enter(&sud
->sud_lock
, RW_WRITER
);
237 if (type
== F_INVAL
||
238 (type
== F_SOFTLOCK
&& sud
->sud_softlockcnt
== 0)) {
240 * Load the (entire) segment into the HAT.
242 * It's possible that threads racing into as_fault will cause
243 * seg_umap to load the same range multiple times in quick
244 * succession. Redundant hat_devload operations are safe.
246 for (uintptr_t i
= 0; i
< seg
->s_size
; i
+= PAGESIZE
) {
249 pfn
= hat_getpfnum(kas
.a_hat
, sud
->sud_kaddr
+ i
);
250 VERIFY(pfn
!= PFN_INVALID
);
251 hat_devload(seg
->s_as
->a_hat
, seg
->s_base
+ i
,
252 PAGESIZE
, pfn
, sud
->sud_prot
, HAT_LOAD
);
255 if (type
== F_SOFTLOCK
) {
256 size_t nval
= sud
->sud_softlockcnt
+ btop(len
);
258 if (sud
->sud_softlockcnt
>= nval
) {
259 rw_exit(&sud
->sud_lock
);
260 return (FC_MAKE_ERR(EOVERFLOW
));
262 sud
->sud_softlockcnt
= nval
;
265 rw_exit(&sud
->sud_lock
);
271 segumap_faulta(struct seg
*seg
, caddr_t addr
)
273 /* Do nothing since asynch pagefault should not load translation. */
279 segumap_setprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
281 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
284 * The seg_umap driver does not yet allow protection to be changed.
291 segumap_checkprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
293 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
296 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
298 rw_enter(&sud
->sud_lock
, RW_READER
);
299 if ((sud
->sud_prot
& prot
) != prot
) {
302 rw_exit(&sud
->sud_lock
);
308 segumap_sync(struct seg
*seg
, caddr_t addr
, size_t len
, int attr
, uint_t flags
)
310 /* Always succeed since there are no backing store to sync */
316 segumap_incore(struct seg
*seg
, caddr_t addr
, size_t len
, char *vec
)
320 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
322 len
= (len
+ PAGEOFFSET
) & PAGEMASK
;
334 segumap_lockop(struct seg
*seg
, caddr_t addr
, size_t len
, int attr
, int op
,
335 ulong_t
*lockmap
, size_t pos
)
337 /* Report success since kernel pages are always in memory. */
342 segumap_getprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t
*protv
)
344 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
348 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
350 rw_enter(&sud
->sud_lock
, RW_READER
);
351 prot
= sud
->sud_prot
;
352 rw_exit(&sud
->sud_lock
);
355 * Reporting protection is simple since it is not tracked per-page.
357 pgno
= seg_page(seg
, addr
+ len
) - seg_page(seg
, addr
) + 1;
359 protv
[--pgno
] = prot
;
366 segumap_getoffset(struct seg
*seg
, caddr_t addr
)
369 * To avoid leaking information about the layout of the kernel address
370 * space, always report '0' as the offset.
377 segumap_gettype(struct seg
*seg
, caddr_t addr
)
380 * Since already-existing kernel pages are being mapped into userspace,
381 * always report the segment type as shared.
388 segumap_getvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
)
390 ASSERT(seg
->s_as
&& AS_LOCK_HELD(seg
->s_as
));
398 segumap_advise(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t behav
)
400 if (behav
== MADV_PURGE
) {
401 /* Purge does not make sense for this mapping */
404 /* Indicate success for everything else. */
410 segumap_dump(struct seg
*seg
)
413 * Since this is a mapping to share kernel data with userspace, nothing
414 * additional should be dumped.
420 segumap_pagelock(struct seg
*seg
, caddr_t addr
, size_t len
, struct page
***ppp
,
421 enum lock_type type
, enum seg_rw rw
)
428 segumap_setpagesize(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t szc
)
434 segumap_getmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memidp
)
436 segumap_data_t
*sud
= (segumap_data_t
*)seg
->s_data
;
438 memidp
->val
[0] = (uintptr_t)sud
->sud_kaddr
;
439 memidp
->val
[1] = (uintptr_t)(addr
- seg
->s_base
);
445 segumap_capable(struct seg
*seg
, segcapability_t capability
)
447 /* no special capablities */