dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / vm / seg_umap.c
blob75d7da449698d9ffde0cadcb4a743aaa507f3541
1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2016 Joyent, Inc.
17 * VM - Kernel-to-user mapping segment
19 * The umap segment driver was primarily designed to facilitate the comm page:
20 * a portion of kernel memory shared with userspace so that certain (namely
21 * clock-related) actions could operate without making an expensive trip into
22 * the kernel.
24 * Since the initial requirements for the comm page are slim, advanced features
25 * of the segment driver such as per-page protection have been left
26 * unimplemented at this time.
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/errno.h>
33 #include <sys/cred.h>
34 #include <sys/kmem.h>
35 #include <sys/lgrp.h>
36 #include <sys/mman.h>
38 #include <vm/hat.h>
39 #include <vm/as.h>
40 #include <vm/seg.h>
41 #include <vm/seg_kmem.h>
42 #include <vm/seg_umap.h>
45 static boolean_t segumap_verify_safe(caddr_t, size_t);
46 static int segumap_dup(struct seg *, struct seg *);
47 static int segumap_unmap(struct seg *, caddr_t, size_t);
48 static void segumap_free(struct seg *);
49 static faultcode_t segumap_fault(struct hat *, struct seg *, caddr_t, size_t,
50 enum fault_type, enum seg_rw);
51 static faultcode_t segumap_faulta(struct seg *, caddr_t);
52 static int segumap_setprot(struct seg *, caddr_t, size_t, uint_t);
53 static int segumap_checkprot(struct seg *, caddr_t, size_t, uint_t);
54 static int segumap_sync(struct seg *, caddr_t, size_t, int, uint_t);
55 static size_t segumap_incore(struct seg *, caddr_t, size_t, char *);
56 static int segumap_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *,
57 size_t);
58 static int segumap_getprot(struct seg *, caddr_t, size_t, uint_t *);
59 static uoff_t segumap_getoffset(struct seg *, caddr_t);
60 static int segumap_gettype(struct seg *, caddr_t);
61 static int segumap_getvp(struct seg *, caddr_t, struct vnode **);
62 static int segumap_advise(struct seg *, caddr_t, size_t, uint_t);
63 static void segumap_dump(struct seg *);
64 static int segumap_pagelock(struct seg *, caddr_t, size_t, struct page ***,
65 enum lock_type, enum seg_rw);
66 static int segumap_setpagesize(struct seg *, caddr_t, size_t, uint_t);
67 static int segumap_getmemid(struct seg *, caddr_t, memid_t *);
68 static int segumap_capable(struct seg *, segcapability_t);
70 static struct seg_ops segumap_ops = {
71 .dup = segumap_dup,
72 .unmap = segumap_unmap,
73 .free = segumap_free,
74 .fault = segumap_fault,
75 .faulta = segumap_faulta,
76 .setprot = segumap_setprot,
77 .checkprot = segumap_checkprot,
78 .sync = segumap_sync,
79 .incore = segumap_incore,
80 .lockop = segumap_lockop,
81 .getprot = segumap_getprot,
82 .getoffset = segumap_getoffset,
83 .gettype = segumap_gettype,
84 .getvp = segumap_getvp,
85 .advise = segumap_advise,
86 .dump = segumap_dump,
87 .pagelock = segumap_pagelock,
88 .setpagesize = segumap_setpagesize,
89 .getmemid = segumap_getmemid,
90 .capable = segumap_capable,
95 * Create a kernel/user-mapped segment.
97 int
98 segumap_create(struct seg *seg, void *argsp)
100 segumap_crargs_t *a = (struct segumap_crargs *)argsp;
101 segumap_data_t *data;
103 ASSERT((uintptr_t)a->kaddr > _userlimit);
106 * Check several aspects of the mapping request to ensure validity:
107 * - kernel pages must reside entirely in kernel space
108 * - target protection must be user-accessible
109 * - kernel address must be page-aligned
110 * - kernel address must reside inside a "safe" segment
112 if ((uintptr_t)a->kaddr <= _userlimit ||
113 ((uintptr_t)a->kaddr + seg->s_size) < (uintptr_t)a->kaddr ||
114 (a->prot & PROT_USER) == 0 ||
115 ((uintptr_t)a->kaddr & PAGEOFFSET) != 0 ||
116 !segumap_verify_safe(a->kaddr, seg->s_size)) {
117 return (EINVAL);
120 data = kmem_zalloc(sizeof (*data), KM_SLEEP);
121 rw_init(&data->sud_lock, NULL, RW_DEFAULT, NULL);
122 data->sud_kaddr = a->kaddr;
123 data->sud_prot = a->prot;
125 seg->s_ops = &segumap_ops;
126 seg->s_data = data;
127 return (0);
130 static boolean_t
131 segumap_verify_safe(caddr_t kaddr, size_t len)
133 struct seg *seg;
136 * Presently, only pages which are backed by segkmem are allowed to be
137 * shared with userspace. This prevents nasty paging behavior with
138 * other drivers such as seg_kp. Furthermore, the backing kernel
139 * segment must completely contain the region to be mapped.
141 * Failing these checks is fatal for now since such mappings are done
142 * in a very limited context from the kernel.
144 AS_LOCK_ENTER(&kas, RW_READER);
145 seg = as_segat(&kas, kaddr);
146 VERIFY(seg != NULL);
147 VERIFY(seg->s_base + seg->s_size >= kaddr + len);
148 VERIFY(seg->s_ops == &segkmem_ops);
149 AS_LOCK_EXIT(&kas);
151 return (B_TRUE);
154 static int
155 segumap_dup(struct seg *seg, struct seg *newseg)
157 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
158 segumap_data_t *newsud;
160 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
162 newsud = kmem_zalloc(sizeof (segumap_data_t), KM_SLEEP);
163 rw_init(&newsud->sud_lock, NULL, RW_DEFAULT, NULL);
164 newsud->sud_kaddr = sud->sud_kaddr;
165 newsud->sud_prot = sud->sud_prot;
167 newseg->s_ops = seg->s_ops;
168 newseg->s_data = newsud;
169 return (0);
172 static int
173 segumap_unmap(struct seg *seg, caddr_t addr, size_t len)
175 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
177 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
179 /* Only allow unmap of entire segment */
180 if (addr != seg->s_base || len != seg->s_size) {
181 return (EINVAL);
183 if (sud->sud_softlockcnt != 0) {
184 return (EAGAIN);
188 * Unconditionally unload the entire segment range.
190 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
192 seg_free(seg);
193 return (0);
196 static void
197 segumap_free(struct seg *seg)
199 segumap_data_t *data = (segumap_data_t *)seg->s_data;
201 ASSERT(data != NULL);
203 rw_destroy(&data->sud_lock);
204 VERIFY(data->sud_softlockcnt == 0);
205 kmem_free(data, sizeof (*data));
206 seg->s_data = NULL;
209 /* ARGSUSED */
210 static faultcode_t
211 segumap_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
212 enum fault_type type, enum seg_rw tw)
214 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
216 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
218 if (type == F_PROT) {
220 * Since protection on the segment is fixed, there is nothing
221 * to do but report an error for protection faults.
223 return (FC_PROT);
224 } else if (type == F_SOFTUNLOCK) {
225 size_t plen = btop(len);
227 rw_enter(&sud->sud_lock, RW_WRITER);
228 VERIFY(sud->sud_softlockcnt >= plen);
229 sud->sud_softlockcnt -= plen;
230 rw_exit(&sud->sud_lock);
231 return (0);
234 ASSERT(type == F_INVAL || type == F_SOFTLOCK);
235 rw_enter(&sud->sud_lock, RW_WRITER);
237 if (type == F_INVAL ||
238 (type == F_SOFTLOCK && sud->sud_softlockcnt == 0)) {
240 * Load the (entire) segment into the HAT.
242 * It's possible that threads racing into as_fault will cause
243 * seg_umap to load the same range multiple times in quick
244 * succession. Redundant hat_devload operations are safe.
246 for (uintptr_t i = 0; i < seg->s_size; i += PAGESIZE) {
247 pfn_t pfn;
249 pfn = hat_getpfnum(kas.a_hat, sud->sud_kaddr + i);
250 VERIFY(pfn != PFN_INVALID);
251 hat_devload(seg->s_as->a_hat, seg->s_base + i,
252 PAGESIZE, pfn, sud->sud_prot, HAT_LOAD);
255 if (type == F_SOFTLOCK) {
256 size_t nval = sud->sud_softlockcnt + btop(len);
258 if (sud->sud_softlockcnt >= nval) {
259 rw_exit(&sud->sud_lock);
260 return (FC_MAKE_ERR(EOVERFLOW));
262 sud->sud_softlockcnt = nval;
265 rw_exit(&sud->sud_lock);
266 return (0);
269 /* ARGSUSED */
270 static faultcode_t
271 segumap_faulta(struct seg *seg, caddr_t addr)
273 /* Do nothing since asynch pagefault should not load translation. */
274 return (0);
277 /* ARGSUSED */
278 static int
279 segumap_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
281 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
284 * The seg_umap driver does not yet allow protection to be changed.
286 return (EACCES);
289 /* ARGSUSED */
290 static int
291 segumap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
293 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
294 int error = 0;
296 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
298 rw_enter(&sud->sud_lock, RW_READER);
299 if ((sud->sud_prot & prot) != prot) {
300 error = EACCES;
302 rw_exit(&sud->sud_lock);
303 return (error);
306 /* ARGSUSED */
307 static int
308 segumap_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
310 /* Always succeed since there are no backing store to sync */
311 return (0);
314 /* ARGSUSED */
315 static size_t
316 segumap_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
318 size_t sz = 0;
320 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
322 len = (len + PAGEOFFSET) & PAGEMASK;
323 while (len > 0) {
324 *vec = 1;
325 sz += PAGESIZE;
326 vec++;
327 len -= PAGESIZE;
329 return (sz);
332 /* ARGSUSED */
333 static int
334 segumap_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op,
335 ulong_t *lockmap, size_t pos)
337 /* Report success since kernel pages are always in memory. */
338 return (0);
341 static int
342 segumap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
344 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
345 size_t pgno;
346 uint_t prot;
348 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
350 rw_enter(&sud->sud_lock, RW_READER);
351 prot = sud->sud_prot;
352 rw_exit(&sud->sud_lock);
355 * Reporting protection is simple since it is not tracked per-page.
357 pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
358 while (pgno > 0) {
359 protv[--pgno] = prot;
361 return (0);
364 /* ARGSUSED */
365 static uoff_t
366 segumap_getoffset(struct seg *seg, caddr_t addr)
369 * To avoid leaking information about the layout of the kernel address
370 * space, always report '0' as the offset.
372 return (0);
375 /* ARGSUSED */
376 static int
377 segumap_gettype(struct seg *seg, caddr_t addr)
380 * Since already-existing kernel pages are being mapped into userspace,
381 * always report the segment type as shared.
383 return (MAP_SHARED);
386 /* ARGSUSED */
387 static int
388 segumap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
390 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
392 *vpp = NULL;
393 return (0);
396 /* ARGSUSED */
397 static int
398 segumap_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
400 if (behav == MADV_PURGE) {
401 /* Purge does not make sense for this mapping */
402 return (EINVAL);
404 /* Indicate success for everything else. */
405 return (0);
408 /* ARGSUSED */
409 static void
410 segumap_dump(struct seg *seg)
413 * Since this is a mapping to share kernel data with userspace, nothing
414 * additional should be dumped.
418 /* ARGSUSED */
419 static int
420 segumap_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
421 enum lock_type type, enum seg_rw rw)
423 return (ENOTSUP);
426 /* ARGSUSED */
427 static int
428 segumap_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
430 return (ENOTSUP);
433 static int
434 segumap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
436 segumap_data_t *sud = (segumap_data_t *)seg->s_data;
438 memidp->val[0] = (uintptr_t)sud->sud_kaddr;
439 memidp->val[1] = (uintptr_t)(addr - seg->s_base);
440 return (0);
443 /* ARGSUSED */
444 static int
445 segumap_capable(struct seg *seg, segcapability_t capability)
447 /* no special capablities */
448 return (0);