dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / vm / seg_dev.c
blob35e4920876a3d93a76c9ef723c4f03bcec779bfe
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
31 * University Copyright- Copyright (c) 1982, 1986, 1988
32 * The Regents of the University of California
33 * All Rights Reserved
35 * University Acknowledgment- Portions of this document are derived from
36 * software developed by the University of California, Berkeley, and its
37 * contributors.
41 * VM - segment of a mapped device.
43 * This segment driver is used when mapping character special devices.
46 #include <sys/types.h>
47 #include <sys/t_lock.h>
48 #include <sys/sysmacros.h>
49 #include <sys/vtrace.h>
50 #include <sys/systm.h>
51 #include <sys/vmsystm.h>
52 #include <sys/mman.h>
53 #include <sys/errno.h>
54 #include <sys/kmem.h>
55 #include <sys/cmn_err.h>
56 #include <sys/vnode.h>
57 #include <sys/proc.h>
58 #include <sys/conf.h>
59 #include <sys/debug.h>
60 #include <sys/ddidevmap.h>
61 #include <sys/ddi_implfuncs.h>
62 #include <sys/lgrp.h>
64 #include <vm/page.h>
65 #include <vm/hat.h>
66 #include <vm/as.h>
67 #include <vm/seg.h>
68 #include <vm/seg_dev.h>
69 #include <vm/seg_kp.h>
70 #include <vm/seg_kmem.h>
71 #include <vm/vpage.h>
73 #include <sys/sunddi.h>
74 #include <sys/esunddi.h>
75 #include <sys/fs/snode.h>
78 #if DEBUG
79 int segdev_debug;
80 #define DEBUGF(level, args) { if (segdev_debug >= (level)) cmn_err args; }
81 #else
82 #define DEBUGF(level, args)
83 #endif
85 /* Default timeout for devmap context management */
86 #define CTX_TIMEOUT_VALUE 0
88 #define HOLD_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
89 { mutex_enter(&dhp->dh_lock); }
91 #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
92 { mutex_exit(&dhp->dh_lock); }
94 #define round_down_p2(a, s) ((a) & ~((s) - 1))
95 #define round_up_p2(a, s) (((a) + (s) - 1) & ~((s) - 1))
98 * VA_PA_ALIGNED checks to see if both VA and PA are on pgsize boundary
99 * VA_PA_PGSIZE_ALIGNED check to see if VA is aligned with PA w.r.t. pgsize
101 #define VA_PA_ALIGNED(uvaddr, paddr, pgsize) \
102 (((uvaddr | paddr) & (pgsize - 1)) == 0)
103 #define VA_PA_PGSIZE_ALIGNED(uvaddr, paddr, pgsize) \
104 (((uvaddr ^ paddr) & (pgsize - 1)) == 0)
106 #define vpgtob(n) ((n) * sizeof (struct vpage)) /* For brevity */
108 #define VTOCVP(vp) (VTOS(vp)->s_commonvp) /* we "know" it's an snode */
110 static struct devmap_ctx *devmapctx_list = NULL;
111 static struct devmap_softlock *devmap_slist = NULL;
114 * mutex, vnode and page for the page of zeros we use for the trash mappings.
115 * One trash page is allocated on the first ddi_umem_setup call that uses it
116 * XXX Eventually, we may want to combine this with what segnf does when all
117 * hat layers implement HAT_NOFAULT.
119 * The trash page is used when the backing store for a userland mapping is
120 * removed but the application semantics do not take kindly to a SIGBUS.
121 * In that scenario, the applications pages are mapped to some dummy page
122 * which returns garbage on read and writes go into a common place.
123 * (Perfect for NO_FAULT semantics)
124 * The device driver is responsible to communicating to the app with some
125 * other mechanism that such remapping has happened and the app should take
126 * corrective action.
127 * We can also use an anonymous memory page as there is no requirement to
128 * keep the page locked, however this complicates the fault code. RFE.
130 static struct vnode trashvp;
131 static struct page *trashpp;
133 /* Non-pageable kernel memory is allocated from the umem_np_arena. */
134 static vmem_t *umem_np_arena;
136 /* Set the cookie to a value we know will never be a valid umem_cookie */
137 #define DEVMAP_DEVMEM_COOKIE ((ddi_umem_cookie_t)0x1)
140 * Macros to check if type of devmap handle
142 #define cookie_is_devmem(c) \
143 ((c) == (struct ddi_umem_cookie *)DEVMAP_DEVMEM_COOKIE)
145 #define cookie_is_pmem(c) \
146 ((c) == (struct ddi_umem_cookie *)DEVMAP_PMEM_COOKIE)
148 #define cookie_is_kpmem(c) (!cookie_is_devmem(c) && !cookie_is_pmem(c) &&\
149 ((c)->type == KMEM_PAGEABLE))
151 #define dhp_is_devmem(dhp) \
152 (cookie_is_devmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
154 #define dhp_is_pmem(dhp) \
155 (cookie_is_pmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
157 #define dhp_is_kpmem(dhp) \
158 (cookie_is_kpmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
161 * Private seg op routines.
163 static int segdev_dup(struct seg *, struct seg *);
164 static int segdev_unmap(struct seg *, caddr_t, size_t);
165 static void segdev_free(struct seg *);
166 static faultcode_t segdev_fault(struct hat *, struct seg *, caddr_t, size_t,
167 enum fault_type, enum seg_rw);
168 static faultcode_t segdev_faulta(struct seg *, caddr_t);
169 static int segdev_setprot(struct seg *, caddr_t, size_t, uint_t);
170 static int segdev_checkprot(struct seg *, caddr_t, size_t, uint_t);
171 static void segdev_badop(void);
172 static int segdev_sync(struct seg *, caddr_t, size_t, int, uint_t);
173 static size_t segdev_incore(struct seg *, caddr_t, size_t, char *);
174 static int segdev_lockop(struct seg *, caddr_t, size_t, int, int,
175 ulong_t *, size_t);
176 static int segdev_getprot(struct seg *, caddr_t, size_t, uint_t *);
177 static uoff_t segdev_getoffset(struct seg *, caddr_t);
178 static int segdev_gettype(struct seg *, caddr_t);
179 static int segdev_getvp(struct seg *, caddr_t, struct vnode **);
180 static int segdev_advise(struct seg *, caddr_t, size_t, uint_t);
181 static int segdev_pagelock(struct seg *, caddr_t, size_t,
182 struct page ***, enum lock_type, enum seg_rw);
183 static int segdev_getmemid(struct seg *, caddr_t, memid_t *);
186 * XXX this struct is used by rootnex_map_fault to identify
187 * the segment it has been passed. So if you make it
188 * "static" you'll need to fix rootnex_map_fault.
190 const struct seg_ops segdev_ops = {
191 .dup = segdev_dup,
192 .unmap = segdev_unmap,
193 .free = segdev_free,
194 .fault = segdev_fault,
195 .faulta = segdev_faulta,
196 .setprot = segdev_setprot,
197 .checkprot = segdev_checkprot,
198 .kluster = (int (*)())segdev_badop,
199 .sync = segdev_sync,
200 .incore = segdev_incore,
201 .lockop = segdev_lockop,
202 .getprot = segdev_getprot,
203 .getoffset = segdev_getoffset,
204 .gettype = segdev_gettype,
205 .getvp = segdev_getvp,
206 .advise = segdev_advise,
207 .pagelock = segdev_pagelock,
208 .getmemid = segdev_getmemid,
212 * Private segdev support routines
214 static struct segdev_data *sdp_alloc(void);
216 static void segdev_softunlock(struct hat *, struct seg *, caddr_t,
217 size_t, enum seg_rw);
219 static faultcode_t segdev_faultpage(struct hat *, struct seg *, caddr_t,
220 struct vpage *, enum fault_type, enum seg_rw, devmap_handle_t *);
222 static faultcode_t segdev_faultpages(struct hat *, struct seg *, caddr_t,
223 size_t, enum fault_type, enum seg_rw, devmap_handle_t *);
225 static struct devmap_ctx *devmap_ctxinit(dev_t, ulong_t);
226 static struct devmap_softlock *devmap_softlock_init(dev_t, ulong_t);
227 static void devmap_softlock_rele(devmap_handle_t *);
228 static void devmap_ctx_rele(devmap_handle_t *);
230 static void devmap_ctxto(void *);
232 static devmap_handle_t *devmap_find_handle(devmap_handle_t *dhp_head,
233 caddr_t addr);
235 static ulong_t devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
236 ulong_t *opfn, ulong_t *pagesize);
238 static void free_devmap_handle(devmap_handle_t *dhp);
240 static int devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
241 struct seg *newseg);
243 static devmap_handle_t *devmap_handle_unmap(devmap_handle_t *dhp);
245 static void devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len);
247 static void devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr);
249 static int devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
250 offset_t off, size_t len, uint_t flags);
252 static void devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len,
253 caddr_t addr, size_t *llen, caddr_t *laddr);
255 static void devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len);
257 static void *devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag);
258 static void devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size);
260 static void *devmap_umem_alloc_np(size_t size, size_t flags);
261 static void devmap_umem_free_np(void *addr, size_t size);
264 * routines to lock and unlock underlying segkp segment for
265 * KMEM_PAGEABLE type cookies.
267 static faultcode_t acquire_kpmem_lock(struct ddi_umem_cookie *, size_t);
268 static void release_kpmem_lock(struct ddi_umem_cookie *, size_t);
271 * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
272 * drivers with devmap_access callbacks
274 static int devmap_softlock_enter(struct devmap_softlock *, size_t,
275 enum fault_type);
276 static void devmap_softlock_exit(struct devmap_softlock *, size_t,
277 enum fault_type);
279 static kmutex_t devmapctx_lock;
281 static kmutex_t devmap_slock;
284 * Initialize the thread callbacks and thread private data.
286 static struct devmap_ctx *
287 devmap_ctxinit(dev_t dev, ulong_t id)
289 struct devmap_ctx *devctx;
290 struct devmap_ctx *tmp;
291 dev_info_t *dip;
293 tmp = kmem_zalloc(sizeof (struct devmap_ctx), KM_SLEEP);
295 mutex_enter(&devmapctx_lock);
297 dip = e_ddi_hold_devi_by_dev(dev, 0);
298 ASSERT(dip != NULL);
299 ddi_release_devi(dip);
301 for (devctx = devmapctx_list; devctx != NULL; devctx = devctx->next)
302 if ((devctx->dip == dip) && (devctx->id == id))
303 break;
305 if (devctx == NULL) {
306 devctx = tmp;
307 devctx->dip = dip;
308 devctx->id = id;
309 mutex_init(&devctx->lock, NULL, MUTEX_DEFAULT, NULL);
310 cv_init(&devctx->cv, NULL, CV_DEFAULT, NULL);
311 devctx->next = devmapctx_list;
312 devmapctx_list = devctx;
313 } else
314 kmem_free(tmp, sizeof (struct devmap_ctx));
316 mutex_enter(&devctx->lock);
317 devctx->refcnt++;
318 mutex_exit(&devctx->lock);
319 mutex_exit(&devmapctx_lock);
321 return (devctx);
325 * Timeout callback called if a CPU has not given up the device context
326 * within dhp->dh_timeout_length ticks
328 static void
329 devmap_ctxto(void *data)
331 struct devmap_ctx *devctx = data;
333 mutex_enter(&devctx->lock);
335 * Set oncpu = 0 so the next mapping trying to get the device context
336 * can.
338 devctx->oncpu = 0;
339 devctx->timeout = 0;
340 cv_signal(&devctx->cv);
341 mutex_exit(&devctx->lock);
345 * Create a device segment.
348 segdev_create(struct seg *seg, void *argsp)
350 struct segdev_data *sdp;
351 struct segdev_crargs *a = (struct segdev_crargs *)argsp;
352 devmap_handle_t *dhp = (devmap_handle_t *)a->devmap_data;
353 int error;
356 * Since the address space is "write" locked, we
357 * don't need the segment lock to protect "segdev" data.
359 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
361 hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
363 sdp = sdp_alloc();
365 sdp->mapfunc = a->mapfunc;
366 sdp->offset = a->offset;
367 sdp->prot = a->prot;
368 sdp->maxprot = a->maxprot;
369 sdp->type = a->type;
370 sdp->pageprot = 0;
371 sdp->softlockcnt = 0;
372 sdp->vpage = NULL;
374 if (sdp->mapfunc == NULL)
375 sdp->devmap_data = dhp;
376 else
377 sdp->devmap_data = dhp = NULL;
379 sdp->hat_flags = a->hat_flags;
380 sdp->hat_attr = a->hat_attr;
383 * Currently, hat_flags supports only HAT_LOAD_NOCONSIST
385 ASSERT(!(sdp->hat_flags & ~HAT_LOAD_NOCONSIST));
388 * Hold shadow vnode -- segdev only deals with
389 * character (VCHR) devices. We use the common
390 * vp to hang pages on.
392 sdp->vp = specfind(a->dev, VCHR);
393 ASSERT(sdp->vp != NULL);
395 seg->s_ops = &segdev_ops;
396 seg->s_data = sdp;
398 while (dhp != NULL) {
399 dhp->dh_seg = seg;
400 dhp = dhp->dh_next;
404 * Inform the vnode of the new mapping.
407 * It is ok to use pass sdp->maxprot to ADDMAP rather than to use
408 * dhp specific maxprot because spec_addmap does not use maxprot.
410 error = fop_addmap(VTOCVP(sdp->vp), sdp->offset,
411 seg->s_as, seg->s_base, seg->s_size,
412 sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
414 if (error != 0) {
415 sdp->devmap_data = NULL;
416 hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size,
417 HAT_UNLOAD_UNMAP);
418 } else {
420 * Mappings of /dev/null don't count towards the VSZ of a
421 * process. Mappings of /dev/null have no mapping type.
423 if ((segop_gettype(seg, seg->s_base) & (MAP_SHARED |
424 MAP_PRIVATE)) == 0) {
425 seg->s_as->a_resvsize -= seg->s_size;
429 return (error);
432 static struct segdev_data *
433 sdp_alloc(void)
435 struct segdev_data *sdp;
437 sdp = kmem_zalloc(sizeof (struct segdev_data), KM_SLEEP);
438 rw_init(&sdp->lock, NULL, RW_DEFAULT, NULL);
440 return (sdp);
444 * Duplicate seg and return new segment in newseg.
446 static int
447 segdev_dup(struct seg *seg, struct seg *newseg)
449 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
450 struct segdev_data *newsdp;
451 devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
452 size_t npages;
453 int ret;
455 DEBUGF(3, (CE_CONT, "segdev_dup: dhp %p seg %p\n",
456 (void *)dhp, (void *)seg));
459 * Since the address space is "write" locked, we
460 * don't need the segment lock to protect "segdev" data.
462 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
464 newsdp = sdp_alloc();
466 newseg->s_ops = seg->s_ops;
467 newseg->s_data = (void *)newsdp;
469 VN_HOLD(sdp->vp);
470 newsdp->vp = sdp->vp;
471 newsdp->mapfunc = sdp->mapfunc;
472 newsdp->offset = sdp->offset;
473 newsdp->pageprot = sdp->pageprot;
474 newsdp->prot = sdp->prot;
475 newsdp->maxprot = sdp->maxprot;
476 newsdp->type = sdp->type;
477 newsdp->hat_attr = sdp->hat_attr;
478 newsdp->hat_flags = sdp->hat_flags;
479 newsdp->softlockcnt = 0;
482 * Initialize per page data if the segment we are
483 * dup'ing has per page information.
485 npages = seg_pages(newseg);
487 if (sdp->vpage != NULL) {
488 size_t nbytes = vpgtob(npages);
490 newsdp->vpage = kmem_zalloc(nbytes, KM_SLEEP);
491 bcopy(sdp->vpage, newsdp->vpage, nbytes);
492 } else
493 newsdp->vpage = NULL;
496 * duplicate devmap handles
498 if (dhp != NULL) {
499 ret = devmap_handle_dup(dhp,
500 (devmap_handle_t **)&newsdp->devmap_data, newseg);
501 if (ret != 0) {
502 DEBUGF(1, (CE_CONT,
503 "segdev_dup: ret %x dhp %p seg %p\n",
504 ret, (void *)dhp, (void *)seg));
505 return (ret);
510 * Inform the common vnode of the new mapping.
512 return (fop_addmap(VTOCVP(newsdp->vp),
513 newsdp->offset, newseg->s_as,
514 newseg->s_base, newseg->s_size, newsdp->prot,
515 newsdp->maxprot, sdp->type, CRED(), NULL));
519 * duplicate devmap handles
521 static int
522 devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
523 struct seg *newseg)
525 devmap_handle_t *newdhp_save = NULL;
526 devmap_handle_t *newdhp = NULL;
527 struct devmap_callback_ctl *callbackops;
529 while (dhp != NULL) {
530 newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
532 /* Need to lock the original dhp while copying if REMAP */
533 HOLD_DHP_LOCK(dhp);
534 bcopy(dhp, newdhp, sizeof (devmap_handle_t));
535 RELE_DHP_LOCK(dhp);
536 newdhp->dh_seg = newseg;
537 newdhp->dh_next = NULL;
538 if (newdhp_save != NULL)
539 newdhp_save->dh_next = newdhp;
540 else
541 *new_dhp = newdhp;
542 newdhp_save = newdhp;
544 callbackops = &newdhp->dh_callbackops;
546 if (dhp->dh_softlock != NULL)
547 newdhp->dh_softlock = devmap_softlock_init(
548 newdhp->dh_dev,
549 (ulong_t)callbackops->devmap_access);
550 if (dhp->dh_ctx != NULL)
551 newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
552 (ulong_t)callbackops->devmap_access);
555 * Initialize dh_lock if we want to do remap.
557 if (newdhp->dh_flags & DEVMAP_ALLOW_REMAP) {
558 mutex_init(&newdhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
559 newdhp->dh_flags |= DEVMAP_LOCK_INITED;
562 if (callbackops->devmap_dup != NULL) {
563 int ret;
566 * Call the dup callback so that the driver can
567 * duplicate its private data.
569 ret = (*callbackops->devmap_dup)(dhp, dhp->dh_pvtp,
570 (devmap_cookie_t *)newdhp, &newdhp->dh_pvtp);
572 if (ret != 0) {
574 * We want to free up this segment as the driver
575 * has indicated that we can't dup it. But we
576 * don't want to call the drivers, devmap_unmap,
577 * callback function as the driver does not
578 * think this segment exists. The caller of
579 * devmap_dup will call seg_free on newseg
580 * as it was the caller that allocated the
581 * segment.
583 DEBUGF(1, (CE_CONT, "devmap_handle_dup ERROR: "
584 "newdhp %p dhp %p\n", (void *)newdhp,
585 (void *)dhp));
586 callbackops->devmap_unmap = NULL;
587 return (ret);
591 dhp = dhp->dh_next;
594 return (0);
598 * Split a segment at addr for length len.
600 /*ARGSUSED*/
601 static int
602 segdev_unmap(struct seg *seg, caddr_t addr, size_t len)
604 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
605 register struct segdev_data *nsdp;
606 register struct seg *nseg;
607 register size_t opages; /* old segment size in pages */
608 register size_t npages; /* new segment size in pages */
609 register size_t dpages; /* pages being deleted (unmapped) */
610 register size_t nbytes;
611 devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
612 devmap_handle_t *dhpp;
613 devmap_handle_t *newdhp;
614 struct devmap_callback_ctl *callbackops;
615 caddr_t nbase;
616 offset_t off;
617 ulong_t nsize;
618 size_t mlen, sz;
620 DEBUGF(3, (CE_CONT, "segdev_unmap: dhp %p seg %p addr %p len %lx\n",
621 (void *)dhp, (void *)seg, (void *)addr, len));
624 * Since the address space is "write" locked, we
625 * don't need the segment lock to protect "segdev" data.
627 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
629 if ((sz = sdp->softlockcnt) > 0) {
631 * Fail the unmap if pages are SOFTLOCKed through this mapping.
632 * softlockcnt is protected from change by the as write lock.
634 DEBUGF(1, (CE_CONT, "segdev_unmap: softlockcnt %ld\n", sz));
635 return (EAGAIN);
639 * Check for bad sizes
641 if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
642 (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
643 panic("segdev_unmap");
645 if (dhp != NULL) {
646 devmap_handle_t *tdhp;
648 * If large page size was used in hat_devload(),
649 * the same page size must be used in hat_unload().
651 dhpp = tdhp = devmap_find_handle(dhp, addr);
652 while (tdhp != NULL) {
653 if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
654 break;
656 tdhp = tdhp->dh_next;
658 if (tdhp != NULL) { /* found a dhp using large pages */
659 size_t slen = len;
660 size_t mlen;
661 size_t soff;
663 soff = (ulong_t)(addr - dhpp->dh_uvaddr);
664 while (slen != 0) {
665 mlen = MIN(slen, (dhpp->dh_len - soff));
666 hat_unload(seg->s_as->a_hat, dhpp->dh_uvaddr,
667 dhpp->dh_len, HAT_UNLOAD_UNMAP);
668 dhpp = dhpp->dh_next;
669 ASSERT(slen >= mlen);
670 slen -= mlen;
671 soff = 0;
673 } else
674 hat_unload(seg->s_as->a_hat, addr, len,
675 HAT_UNLOAD_UNMAP);
676 } else {
678 * Unload any hardware translations in the range
679 * to be taken out.
681 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
685 * get the user offset which will used in the driver callbacks
687 off = sdp->offset + (offset_t)(addr - seg->s_base);
690 * Inform the vnode of the unmapping.
692 ASSERT(sdp->vp != NULL);
693 (void) fop_delmap(VTOCVP(sdp->vp), off, seg->s_as, addr, len,
694 sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
697 * Check for entire segment
699 if (addr == seg->s_base && len == seg->s_size) {
700 seg_free(seg);
701 return (0);
704 opages = seg_pages(seg);
705 dpages = btop(len);
706 npages = opages - dpages;
709 * Check for beginning of segment
711 if (addr == seg->s_base) {
712 if (sdp->vpage != NULL) {
713 register struct vpage *ovpage;
715 ovpage = sdp->vpage; /* keep pointer to vpage */
717 nbytes = vpgtob(npages);
718 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
719 bcopy(&ovpage[dpages], sdp->vpage, nbytes);
721 /* free up old vpage */
722 kmem_free(ovpage, vpgtob(opages));
726 * free devmap handles from the beginning of the mapping.
728 if (dhp != NULL)
729 devmap_handle_unmap_head(dhp, len);
731 sdp->offset += (offset_t)len;
733 seg->s_base += len;
734 seg->s_size -= len;
736 return (0);
740 * Check for end of segment
742 if (addr + len == seg->s_base + seg->s_size) {
743 if (sdp->vpage != NULL) {
744 register struct vpage *ovpage;
746 ovpage = sdp->vpage; /* keep pointer to vpage */
748 nbytes = vpgtob(npages);
749 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
750 bcopy(ovpage, sdp->vpage, nbytes);
752 /* free up old vpage */
753 kmem_free(ovpage, vpgtob(opages));
755 seg->s_size -= len;
758 * free devmap handles from addr to the end of the mapping.
760 if (dhp != NULL)
761 devmap_handle_unmap_tail(dhp, addr);
763 return (0);
767 * The section to go is in the middle of the segment,
768 * have to make it into two segments. nseg is made for
769 * the high end while seg is cut down at the low end.
771 nbase = addr + len; /* new seg base */
772 nsize = (seg->s_base + seg->s_size) - nbase; /* new seg size */
773 seg->s_size = addr - seg->s_base; /* shrink old seg */
774 nseg = seg_alloc(seg->s_as, nbase, nsize);
775 if (nseg == NULL)
776 panic("segdev_unmap seg_alloc");
778 DEBUGF(3, (CE_CONT, "segdev_unmap: segdev_dup seg %p nseg %p\n",
779 (void *)seg, (void *)nseg));
780 nsdp = sdp_alloc();
782 nseg->s_ops = seg->s_ops;
783 nseg->s_data = (void *)nsdp;
785 VN_HOLD(sdp->vp);
786 nsdp->mapfunc = sdp->mapfunc;
787 nsdp->offset = sdp->offset + (offset_t)(nseg->s_base - seg->s_base);
788 nsdp->vp = sdp->vp;
789 nsdp->pageprot = sdp->pageprot;
790 nsdp->prot = sdp->prot;
791 nsdp->maxprot = sdp->maxprot;
792 nsdp->type = sdp->type;
793 nsdp->hat_attr = sdp->hat_attr;
794 nsdp->hat_flags = sdp->hat_flags;
795 nsdp->softlockcnt = 0;
798 * Initialize per page data if the segment we are
799 * dup'ing has per page information.
801 if (sdp->vpage != NULL) {
802 /* need to split vpage into two arrays */
803 register size_t nnbytes;
804 register size_t nnpages;
805 register struct vpage *ovpage;
807 ovpage = sdp->vpage; /* keep pointer to vpage */
809 npages = seg_pages(seg); /* seg has shrunk */
810 nbytes = vpgtob(npages);
811 nnpages = seg_pages(nseg);
812 nnbytes = vpgtob(nnpages);
814 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
815 bcopy(ovpage, sdp->vpage, nbytes);
817 nsdp->vpage = kmem_alloc(nnbytes, KM_SLEEP);
818 bcopy(&ovpage[npages + dpages], nsdp->vpage, nnbytes);
820 /* free up old vpage */
821 kmem_free(ovpage, vpgtob(opages));
822 } else
823 nsdp->vpage = NULL;
826 * unmap dhps.
828 if (dhp == NULL) {
829 nsdp->devmap_data = NULL;
830 return (0);
832 while (dhp != NULL) {
833 callbackops = &dhp->dh_callbackops;
834 DEBUGF(3, (CE_CONT, "unmap: dhp %p addr %p uvaddr %p len %lx\n",
835 (void *)dhp, (void *)addr,
836 (void *)dhp->dh_uvaddr, dhp->dh_len));
838 if (addr == (dhp->dh_uvaddr + dhp->dh_len)) {
839 dhpp = dhp->dh_next;
840 dhp->dh_next = NULL;
841 dhp = dhpp;
842 } else if (addr > (dhp->dh_uvaddr + dhp->dh_len)) {
843 dhp = dhp->dh_next;
844 } else if (addr > dhp->dh_uvaddr &&
845 (addr + len) < (dhp->dh_uvaddr + dhp->dh_len)) {
847 * <addr, addr+len> is enclosed by dhp.
848 * create a newdhp that begins at addr+len and
849 * ends at dhp->dh_uvaddr+dhp->dh_len.
851 newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
852 HOLD_DHP_LOCK(dhp);
853 bcopy(dhp, newdhp, sizeof (devmap_handle_t));
854 RELE_DHP_LOCK(dhp);
855 newdhp->dh_seg = nseg;
856 newdhp->dh_next = dhp->dh_next;
857 if (dhp->dh_softlock != NULL)
858 newdhp->dh_softlock = devmap_softlock_init(
859 newdhp->dh_dev,
860 (ulong_t)callbackops->devmap_access);
861 if (dhp->dh_ctx != NULL)
862 newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
863 (ulong_t)callbackops->devmap_access);
864 if (newdhp->dh_flags & DEVMAP_LOCK_INITED) {
865 mutex_init(&newdhp->dh_lock,
866 NULL, MUTEX_DEFAULT, NULL);
868 if (callbackops->devmap_unmap != NULL)
869 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
870 off, len, dhp, &dhp->dh_pvtp,
871 newdhp, &newdhp->dh_pvtp);
872 mlen = len + (addr - dhp->dh_uvaddr);
873 devmap_handle_reduce_len(newdhp, mlen);
874 nsdp->devmap_data = newdhp;
875 /* XX Changing len should recalculate LARGE flag */
876 dhp->dh_len = addr - dhp->dh_uvaddr;
877 dhpp = dhp->dh_next;
878 dhp->dh_next = NULL;
879 dhp = dhpp;
880 } else if ((addr > dhp->dh_uvaddr) &&
881 ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len))) {
882 mlen = dhp->dh_len + dhp->dh_uvaddr - addr;
884 * <addr, addr+len> spans over dhps.
886 if (callbackops->devmap_unmap != NULL)
887 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
888 off, mlen, (devmap_cookie_t *)dhp,
889 &dhp->dh_pvtp, NULL, NULL);
890 /* XX Changing len should recalculate LARGE flag */
891 dhp->dh_len = addr - dhp->dh_uvaddr;
892 dhpp = dhp->dh_next;
893 dhp->dh_next = NULL;
894 dhp = dhpp;
895 nsdp->devmap_data = dhp;
896 } else if ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len)) {
898 * dhp is enclosed by <addr, addr+len>.
900 dhp->dh_seg = nseg;
901 nsdp->devmap_data = dhp;
902 dhp = devmap_handle_unmap(dhp);
903 nsdp->devmap_data = dhp; /* XX redundant? */
904 } else if (((addr + len) > dhp->dh_uvaddr) &&
905 ((addr + len) < (dhp->dh_uvaddr + dhp->dh_len))) {
906 mlen = addr + len - dhp->dh_uvaddr;
907 if (callbackops->devmap_unmap != NULL)
908 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
909 dhp->dh_uoff, mlen, NULL,
910 NULL, dhp, &dhp->dh_pvtp);
911 devmap_handle_reduce_len(dhp, mlen);
912 nsdp->devmap_data = dhp;
913 dhp->dh_seg = nseg;
914 dhp = dhp->dh_next;
915 } else {
916 dhp->dh_seg = nseg;
917 dhp = dhp->dh_next;
920 return (0);
924 * Utility function handles reducing the length of a devmap handle during unmap
925 * Note that is only used for unmapping the front portion of the handler,
926 * i.e., we are bumping up the offset/pfn etc up by len
927 * Do not use if reducing length at the tail.
929 static void
930 devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len)
932 struct ddi_umem_cookie *cp;
933 struct devmap_pmem_cookie *pcp;
935 * adjust devmap handle fields
937 ASSERT(len < dhp->dh_len);
939 /* Make sure only page-aligned changes are done */
940 ASSERT((len & PAGEOFFSET) == 0);
942 dhp->dh_len -= len;
943 dhp->dh_uoff += (offset_t)len;
944 dhp->dh_roff += (offset_t)len;
945 dhp->dh_uvaddr += len;
946 /* Need to grab dhp lock if REMAP */
947 HOLD_DHP_LOCK(dhp);
948 cp = dhp->dh_cookie;
949 if (!(dhp->dh_flags & DEVMAP_MAPPING_INVALID)) {
950 if (cookie_is_devmem(cp)) {
951 dhp->dh_pfn += btop(len);
952 } else if (cookie_is_pmem(cp)) {
953 pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
954 ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
955 dhp->dh_roff < ptob(pcp->dp_npages));
956 } else {
957 ASSERT(dhp->dh_roff < cp->size);
958 ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
959 dhp->dh_cvaddr < (cp->cvaddr + cp->size));
960 ASSERT((dhp->dh_cvaddr + len) <=
961 (cp->cvaddr + cp->size));
963 dhp->dh_cvaddr += len;
966 /* XXX - Should recalculate the DEVMAP_FLAG_LARGE after changes */
967 RELE_DHP_LOCK(dhp);
971 * Free devmap handle, dhp.
972 * Return the next devmap handle on the linked list.
974 static devmap_handle_t *
975 devmap_handle_unmap(devmap_handle_t *dhp)
977 struct devmap_callback_ctl *callbackops = &dhp->dh_callbackops;
978 struct segdev_data *sdp = (struct segdev_data *)dhp->dh_seg->s_data;
979 devmap_handle_t *dhpp = (devmap_handle_t *)sdp->devmap_data;
981 ASSERT(dhp != NULL);
984 * before we free up dhp, call the driver's devmap_unmap entry point
985 * to free resources allocated for this dhp.
987 if (callbackops->devmap_unmap != NULL) {
988 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, dhp->dh_uoff,
989 dhp->dh_len, NULL, NULL, NULL, NULL);
992 if (dhpp == dhp) { /* releasing first dhp, change sdp data */
993 sdp->devmap_data = dhp->dh_next;
994 } else {
995 while (dhpp->dh_next != dhp) {
996 dhpp = dhpp->dh_next;
998 dhpp->dh_next = dhp->dh_next;
1000 dhpp = dhp->dh_next; /* return value is next dhp in chain */
1002 if (dhp->dh_softlock != NULL)
1003 devmap_softlock_rele(dhp);
1005 if (dhp->dh_ctx != NULL)
1006 devmap_ctx_rele(dhp);
1008 if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
1009 mutex_destroy(&dhp->dh_lock);
1011 kmem_free(dhp, sizeof (devmap_handle_t));
1013 return (dhpp);
1017 * Free complete devmap handles from dhp for len bytes
1018 * dhp can be either the first handle or a subsequent handle
1020 static void
1021 devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len)
1023 struct devmap_callback_ctl *callbackops;
1026 * free the devmap handles covered by len.
1028 while (len >= dhp->dh_len) {
1029 len -= dhp->dh_len;
1030 dhp = devmap_handle_unmap(dhp);
1032 if (len != 0) { /* partial unmap at head of first remaining dhp */
1033 callbackops = &dhp->dh_callbackops;
1036 * Call the unmap callback so the drivers can make
1037 * adjustment on its private data.
1039 if (callbackops->devmap_unmap != NULL)
1040 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
1041 dhp->dh_uoff, len, NULL, NULL, dhp, &dhp->dh_pvtp);
1042 devmap_handle_reduce_len(dhp, len);
1047 * Free devmap handles to truncate the mapping after addr
1048 * RFE: Simpler to pass in dhp pointing at correct dhp (avoid find again)
1049 * Also could then use the routine in middle unmap case too
1051 static void
1052 devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr)
1054 register struct seg *seg = dhp->dh_seg;
1055 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1056 register devmap_handle_t *dhph = (devmap_handle_t *)sdp->devmap_data;
1057 struct devmap_callback_ctl *callbackops;
1058 register devmap_handle_t *dhpp;
1059 size_t maplen;
1060 ulong_t off;
1061 size_t len;
1063 maplen = (size_t)(addr - dhp->dh_uvaddr);
1064 dhph = devmap_find_handle(dhph, addr);
1066 while (dhph != NULL) {
1067 if (maplen == 0) {
1068 dhph = devmap_handle_unmap(dhph);
1069 } else {
1070 callbackops = &dhph->dh_callbackops;
1071 len = dhph->dh_len - maplen;
1072 off = (ulong_t)sdp->offset + (addr - seg->s_base);
1074 * Call the unmap callback so the driver
1075 * can make adjustments on its private data.
1077 if (callbackops->devmap_unmap != NULL)
1078 (*callbackops->devmap_unmap)(dhph,
1079 dhph->dh_pvtp, off, len,
1080 (devmap_cookie_t *)dhph,
1081 &dhph->dh_pvtp, NULL, NULL);
1082 /* XXX Reducing len needs to recalculate LARGE flag */
1083 dhph->dh_len = maplen;
1084 maplen = 0;
1085 dhpp = dhph->dh_next;
1086 dhph->dh_next = NULL;
1087 dhph = dhpp;
1089 } /* end while */
1093 * Free a segment.
1095 static void
1096 segdev_free(struct seg *seg)
1098 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1099 devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
1101 DEBUGF(3, (CE_CONT, "segdev_free: dhp %p seg %p\n",
1102 (void *)dhp, (void *)seg));
1105 * Since the address space is "write" locked, we
1106 * don't need the segment lock to protect "segdev" data.
1108 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
1110 while (dhp != NULL)
1111 dhp = devmap_handle_unmap(dhp);
1113 VN_RELE(sdp->vp);
1114 if (sdp->vpage != NULL)
1115 kmem_free(sdp->vpage, vpgtob(seg_pages(seg)));
1117 rw_destroy(&sdp->lock);
1118 kmem_free(sdp, sizeof (*sdp));
1121 static void
1122 free_devmap_handle(devmap_handle_t *dhp)
1124 register devmap_handle_t *dhpp;
1127 * free up devmap handle
1129 while (dhp != NULL) {
1130 dhpp = dhp->dh_next;
1131 if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
1132 mutex_destroy(&dhp->dh_lock);
1135 if (dhp->dh_softlock != NULL)
1136 devmap_softlock_rele(dhp);
1138 if (dhp->dh_ctx != NULL)
1139 devmap_ctx_rele(dhp);
1141 kmem_free(dhp, sizeof (devmap_handle_t));
1142 dhp = dhpp;
1147 * routines to lock and unlock underlying segkp segment for
1148 * KMEM_PAGEABLE type cookies.
1149 * segkp only allows a single pending F_SOFTLOCK
1150 * we keep track of number of locks in the cookie so we can
1151 * have multiple pending faults and manage the calls to segkp.
1152 * RFE: if segkp supports either pagelock or can support multiple
1153 * calls to F_SOFTLOCK, then these routines can go away.
1154 * If pagelock, segdev_faultpage can fault on a page by page basis
1155 * and simplifies the code quite a bit.
1156 * if multiple calls allowed but not partial ranges, then need for
1157 * cookie->lock and locked count goes away, code can call as_fault directly
1159 static faultcode_t
1160 acquire_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
1162 int err = 0;
1163 ASSERT(cookie_is_kpmem(cookie));
1165 * Fault in pages in segkp with F_SOFTLOCK.
1166 * We want to hold the lock until all pages have been loaded.
1167 * segkp only allows single caller to hold SOFTLOCK, so cookie
1168 * holds a count so we dont call into segkp multiple times
1170 mutex_enter(&cookie->lock);
1173 * Check for overflow in locked field
1175 if ((UINT32_MAX - cookie->locked) < npages) {
1176 err = FC_MAKE_ERR(ENOMEM);
1177 } else if (cookie->locked == 0) {
1178 /* First time locking */
1179 err = as_fault(kas.a_hat, &kas, cookie->cvaddr,
1180 cookie->size, F_SOFTLOCK, PROT_READ|PROT_WRITE);
1182 if (!err) {
1183 cookie->locked += npages;
1185 mutex_exit(&cookie->lock);
1186 return (err);
1189 static void
1190 release_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
1192 mutex_enter(&cookie->lock);
1193 ASSERT(cookie_is_kpmem(cookie));
1194 ASSERT(cookie->locked >= npages);
1195 cookie->locked -= (uint_t)npages;
1196 if (cookie->locked == 0) {
1197 /* Last unlock */
1198 if (as_fault(kas.a_hat, &kas, cookie->cvaddr,
1199 cookie->size, F_SOFTUNLOCK, PROT_READ|PROT_WRITE))
1200 panic("segdev releasing kpmem lock %p", (void *)cookie);
1202 mutex_exit(&cookie->lock);
1206 * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
1207 * drivers with devmap_access callbacks
1208 * slock->softlocked basically works like a rw lock
1209 * -ve counts => F_SOFTLOCK in progress
1210 * +ve counts => F_INVAL/F_PROT in progress
1211 * We allow only one F_SOFTLOCK at a time
1212 * but can have multiple pending F_INVAL/F_PROT calls
1214 * This routine waits using cv_wait_sig so killing processes is more graceful
1215 * Returns EINTR if coming out of this routine due to a signal, 0 otherwise
1217 static int devmap_softlock_enter(
1218 struct devmap_softlock *slock,
1219 size_t npages,
1220 enum fault_type type)
1222 if (npages == 0)
1223 return (0);
1224 mutex_enter(&(slock->lock));
1225 switch (type) {
1226 case F_SOFTLOCK :
1227 while (slock->softlocked) {
1228 if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
1229 /* signalled */
1230 mutex_exit(&(slock->lock));
1231 return (EINTR);
1234 slock->softlocked -= npages; /* -ve count => locked */
1235 break;
1236 case F_INVAL :
1237 case F_PROT :
1238 while (slock->softlocked < 0)
1239 if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
1240 /* signalled */
1241 mutex_exit(&(slock->lock));
1242 return (EINTR);
1244 slock->softlocked += npages; /* +ve count => f_invals */
1245 break;
1246 default:
1247 ASSERT(0);
1249 mutex_exit(&(slock->lock));
1250 return (0);
1253 static void devmap_softlock_exit(
1254 struct devmap_softlock *slock,
1255 size_t npages,
1256 enum fault_type type)
1258 if (slock == NULL)
1259 return;
1260 mutex_enter(&(slock->lock));
1261 switch (type) {
1262 case F_SOFTLOCK :
1263 ASSERT(-slock->softlocked >= npages);
1264 slock->softlocked += npages; /* -ve count is softlocked */
1265 if (slock->softlocked == 0)
1266 cv_signal(&slock->cv);
1267 break;
1268 case F_INVAL :
1269 case F_PROT:
1270 ASSERT(slock->softlocked >= npages);
1271 slock->softlocked -= npages;
1272 if (slock->softlocked == 0)
1273 cv_signal(&slock->cv);
1274 break;
1275 default:
1276 ASSERT(0);
1278 mutex_exit(&(slock->lock));
1282 * Do a F_SOFTUNLOCK call over the range requested.
1283 * The range must have already been F_SOFTLOCK'ed.
1284 * The segment lock should be held, (but not the segment private lock?)
1285 * The softunlock code below does not adjust for large page sizes
1286 * assumes the caller already did any addr/len adjustments for
1287 * pagesize mappings before calling.
1289 /*ARGSUSED*/
1290 static void
1291 segdev_softunlock(
1292 struct hat *hat, /* the hat */
1293 struct seg *seg, /* seg_dev of interest */
1294 caddr_t addr, /* base address of range */
1295 size_t len, /* number of bytes */
1296 enum seg_rw rw) /* type of access at fault */
1298 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1299 devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
1301 DEBUGF(3, (CE_CONT, "segdev_softunlock: dhp %p lockcnt %lx "
1302 "addr %p len %lx\n",
1303 (void *)dhp_head, sdp->softlockcnt, (void *)addr, len));
1305 hat_unlock(hat, addr, len);
1307 if (dhp_head != NULL) {
1308 devmap_handle_t *dhp;
1309 size_t mlen;
1310 size_t tlen = len;
1311 ulong_t off;
1313 dhp = devmap_find_handle(dhp_head, addr);
1314 ASSERT(dhp != NULL);
1316 off = (ulong_t)(addr - dhp->dh_uvaddr);
1317 while (tlen != 0) {
1318 mlen = MIN(tlen, (dhp->dh_len - off));
1321 * unlock segkp memory, locked during F_SOFTLOCK
1323 if (dhp_is_kpmem(dhp)) {
1324 release_kpmem_lock(
1325 (struct ddi_umem_cookie *)dhp->dh_cookie,
1326 btopr(mlen));
1330 * Do the softlock accounting for devmap_access
1332 if (dhp->dh_callbackops.devmap_access != NULL) {
1333 devmap_softlock_exit(dhp->dh_softlock,
1334 btopr(mlen), F_SOFTLOCK);
1337 tlen -= mlen;
1338 dhp = dhp->dh_next;
1339 off = 0;
1343 mutex_enter(&freemem_lock);
1344 ASSERT(sdp->softlockcnt >= btopr(len));
1345 sdp->softlockcnt -= btopr(len);
1346 mutex_exit(&freemem_lock);
1347 if (sdp->softlockcnt == 0) {
1349 * All SOFTLOCKS are gone. Wakeup any waiting
1350 * unmappers so they can try again to unmap.
1351 * Check for waiters first without the mutex
1352 * held so we don't always grab the mutex on
1353 * softunlocks.
1355 if (AS_ISUNMAPWAIT(seg->s_as)) {
1356 mutex_enter(&seg->s_as->a_contents);
1357 if (AS_ISUNMAPWAIT(seg->s_as)) {
1358 AS_CLRUNMAPWAIT(seg->s_as);
1359 cv_broadcast(&seg->s_as->a_cv);
1361 mutex_exit(&seg->s_as->a_contents);
1368 * Handle fault for a single page.
1369 * Done in a separate routine so we can handle errors more easily.
1370 * This routine is called only from segdev_faultpages()
1371 * when looping over the range of addresses requested. The segment lock is held.
1373 static faultcode_t
1374 segdev_faultpage(
1375 struct hat *hat, /* the hat */
1376 struct seg *seg, /* seg_dev of interest */
1377 caddr_t addr, /* address in as */
1378 struct vpage *vpage, /* pointer to vpage for seg, addr */
1379 enum fault_type type, /* type of fault */
1380 enum seg_rw rw, /* type of access at fault */
1381 devmap_handle_t *dhp) /* devmap handle if any for this page */
1383 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1384 uint_t prot;
1385 pfn_t pfnum = PFN_INVALID;
1386 uoff_t offset;
1387 uint_t hat_flags;
1388 dev_info_t *dip;
1390 DEBUGF(8, (CE_CONT, "segdev_faultpage: dhp %p seg %p addr %p \n",
1391 (void *)dhp, (void *)seg, (void *)addr));
1394 * Initialize protection value for this page.
1395 * If we have per page protection values check it now.
1397 if (sdp->pageprot) {
1398 uint_t protchk;
1400 switch (rw) {
1401 case S_READ:
1402 protchk = PROT_READ;
1403 break;
1404 case S_WRITE:
1405 protchk = PROT_WRITE;
1406 break;
1407 case S_EXEC:
1408 protchk = PROT_EXEC;
1409 break;
1410 case S_OTHER:
1411 default:
1412 protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
1413 break;
1416 prot = VPP_PROT(vpage);
1417 if ((prot & protchk) == 0)
1418 return (FC_PROT); /* illegal access type */
1419 } else {
1420 prot = sdp->prot;
1421 /* caller has already done segment level protection check */
1424 if (type == F_SOFTLOCK) {
1425 mutex_enter(&freemem_lock);
1426 sdp->softlockcnt++;
1427 mutex_exit(&freemem_lock);
1430 hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
1431 offset = sdp->offset + (uoff_t)(addr - seg->s_base);
1433 * In the devmap framework, sdp->mapfunc is set to NULL. we can get
1434 * pfnum from dhp->dh_pfn (at beginning of segment) and offset from
1435 * seg->s_base.
1437 if (dhp == NULL) {
1438 /* If segment has devmap_data, then dhp should be non-NULL */
1439 ASSERT(sdp->devmap_data == NULL);
1440 pfnum = (pfn_t)cdev_mmap(sdp->mapfunc, sdp->vp->v_rdev,
1441 (off_t)offset, prot);
1442 prot |= sdp->hat_attr;
1443 } else {
1444 ulong_t off;
1445 struct ddi_umem_cookie *cp;
1446 struct devmap_pmem_cookie *pcp;
1448 /* ensure the dhp passed in contains addr. */
1449 ASSERT(dhp == devmap_find_handle(
1450 (devmap_handle_t *)sdp->devmap_data, addr));
1452 off = addr - dhp->dh_uvaddr;
1455 * This routine assumes that the caller makes sure that the
1456 * fields in dhp used below are unchanged due to remap during
1457 * this call. Caller does HOLD_DHP_LOCK if neeed
1459 cp = dhp->dh_cookie;
1460 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
1461 pfnum = PFN_INVALID;
1462 } else if (cookie_is_devmem(cp)) {
1463 pfnum = dhp->dh_pfn + btop(off);
1464 } else if (cookie_is_pmem(cp)) {
1465 pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
1466 ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
1467 dhp->dh_roff < ptob(pcp->dp_npages));
1468 pfnum = page_pptonum(
1469 pcp->dp_pparray[btop(off + dhp->dh_roff)]);
1470 } else {
1471 ASSERT(dhp->dh_roff < cp->size);
1472 ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
1473 dhp->dh_cvaddr < (cp->cvaddr + cp->size));
1474 ASSERT((dhp->dh_cvaddr + off) <=
1475 (cp->cvaddr + cp->size));
1476 ASSERT((dhp->dh_cvaddr + off + PAGESIZE) <=
1477 (cp->cvaddr + cp->size));
1479 switch (cp->type) {
1480 case UMEM_LOCKED :
1481 if (cp->pparray != NULL) {
1482 ASSERT((dhp->dh_roff &
1483 PAGEOFFSET) == 0);
1484 pfnum = page_pptonum(
1485 cp->pparray[btop(off +
1486 dhp->dh_roff)]);
1487 } else {
1488 pfnum = hat_getpfnum(
1489 ((proc_t *)cp->procp)->p_as->a_hat,
1490 cp->cvaddr + off);
1492 break;
1493 case UMEM_TRASH :
1494 pfnum = page_pptonum(trashpp);
1496 * We should set hat_flags to HAT_NOFAULT also
1497 * However, not all hat layers implement this
1499 break;
1500 case KMEM_PAGEABLE:
1501 case KMEM_NON_PAGEABLE:
1502 pfnum = hat_getpfnum(kas.a_hat,
1503 dhp->dh_cvaddr + off);
1504 break;
1505 default :
1506 pfnum = PFN_INVALID;
1507 break;
1510 prot |= dhp->dh_hat_attr;
1512 if (pfnum == PFN_INVALID) {
1513 return (FC_MAKE_ERR(EFAULT));
1515 /* prot should already be OR'ed in with hat_attributes if needed */
1517 DEBUGF(9, (CE_CONT, "segdev_faultpage: pfnum %lx memory %x "
1518 "prot %x flags %x\n", pfnum, pf_is_memory(pfnum), prot, hat_flags));
1520 if (pf_is_memory(pfnum) || (dhp != NULL)) {
1522 * It's not _really_ required here to pass sdp->hat_flags
1523 * to hat_devload even though we do it.
1524 * This is because hat figures it out DEVMEM mappings
1525 * are non-consistent, anyway.
1527 hat_devload(hat, addr, PAGESIZE, pfnum,
1528 prot, hat_flags | sdp->hat_flags);
1529 return (0);
1533 * Fall through to the case where devmap is not used and need to call
1534 * up the device tree to set up the mapping
1537 dip = VTOS(VTOCVP(sdp->vp))->s_dip;
1538 ASSERT(dip);
1541 * When calling ddi_map_fault, we do not OR in sdp->hat_attr
1542 * This is because this calls drivers which may not expect
1543 * prot to have any other values than PROT_ALL
1544 * The root nexus driver has a hack to peek into the segment
1545 * structure and then OR in sdp->hat_attr.
1546 * XX In case the bus_ops interfaces are ever revisited
1547 * we need to fix this. prot should include other hat attributes
1549 if (ddi_map_fault(dip, hat, seg, addr, NULL, pfnum, prot & PROT_ALL,
1550 (uint_t)(type == F_SOFTLOCK)) != DDI_SUCCESS) {
1551 return (FC_MAKE_ERR(EFAULT));
1553 return (0);
1556 static faultcode_t
1557 segdev_fault(
1558 struct hat *hat, /* the hat */
1559 struct seg *seg, /* the seg_dev of interest */
1560 caddr_t addr, /* the address of the fault */
1561 size_t len, /* the length of the range */
1562 enum fault_type type, /* type of fault */
1563 enum seg_rw rw) /* type of access at fault */
1565 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1566 devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
1567 devmap_handle_t *dhp;
1568 struct devmap_softlock *slock = NULL;
1569 ulong_t slpage = 0;
1570 ulong_t off;
1571 caddr_t maddr = addr;
1572 int err;
1573 int err_is_faultcode = 0;
1575 DEBUGF(7, (CE_CONT, "segdev_fault: dhp_head %p seg %p "
1576 "addr %p len %lx type %x\n",
1577 (void *)dhp_head, (void *)seg, (void *)addr, len, type));
1579 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
1581 /* Handle non-devmap case */
1582 if (dhp_head == NULL)
1583 return (segdev_faultpages(hat, seg, addr, len, type, rw, NULL));
1585 /* Find devmap handle */
1586 if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
1587 return (FC_NOMAP);
1590 * The seg_dev driver does not implement copy-on-write,
1591 * and always loads translations with maximal allowed permissions
1592 * but we got an fault trying to access the device.
1593 * Servicing the fault is not going to result in any better result
1594 * RFE: If we want devmap_access callbacks to be involved in F_PROT
1595 * faults, then the code below is written for that
1596 * Pending resolution of the following:
1597 * - determine if the F_INVAL/F_SOFTLOCK syncing
1598 * is needed for F_PROT also or not. The code below assumes it does
1599 * - If driver sees F_PROT and calls devmap_load with same type,
1600 * then segdev_faultpages will fail with FC_PROT anyway, need to
1601 * change that so calls from devmap_load to segdev_faultpages for
1602 * F_PROT type are retagged to F_INVAL.
1603 * RFE: Today we dont have drivers that use devmap and want to handle
1604 * F_PROT calls. The code in segdev_fault* is written to allow
1605 * this case but is not tested. A driver that needs this capability
1606 * should be able to remove the short-circuit case; resolve the
1607 * above issues and "should" work.
1609 if (type == F_PROT) {
1610 return (FC_PROT);
1614 * Loop through dhp list calling devmap_access or segdev_faultpages for
1615 * each devmap handle.
1616 * drivers which implement devmap_access can interpose on faults and do
1617 * device-appropriate special actions before calling devmap_load.
1621 * Unfortunately, this simple loop has turned out to expose a variety
1622 * of complex problems which results in the following convoluted code.
1624 * First, a desire to handle a serialization of F_SOFTLOCK calls
1625 * to the driver within the framework.
1626 * This results in a dh_softlock structure that is on a per device
1627 * (or device instance) basis and serializes devmap_access calls.
1628 * Ideally we would need to do this for underlying
1629 * memory/device regions that are being faulted on
1630 * but that is hard to identify and with REMAP, harder
1631 * Second, a desire to serialize F_INVAL(and F_PROT) calls w.r.t.
1632 * to F_SOFTLOCK calls to the driver.
1633 * These serializations are to simplify the driver programmer model.
1634 * To support these two features, the code first goes through the
1635 * devmap handles and counts the pages (slpage) that are covered
1636 * by devmap_access callbacks.
1637 * This part ends with a devmap_softlock_enter call
1638 * which allows only one F_SOFTLOCK active on a device instance,
1639 * but multiple F_INVAL/F_PROTs can be active except when a
1640 * F_SOFTLOCK is active
1642 * Next, we dont short-circuit the fault code upfront to call
1643 * segdev_softunlock for F_SOFTUNLOCK, because we must use
1644 * the same length when we softlock and softunlock.
1646 * -Hat layers may not support softunlocking lengths less than the
1647 * original length when there is large page support.
1648 * -kpmem locking is dependent on keeping the lengths same.
1649 * -if drivers handled F_SOFTLOCK, they probably also expect to
1650 * see an F_SOFTUNLOCK of the same length
1651 * Hence, if extending lengths during softlock,
1652 * softunlock has to make the same adjustments and goes through
1653 * the same loop calling segdev_faultpages/segdev_softunlock
1654 * But some of the synchronization and error handling is different
1657 if (type != F_SOFTUNLOCK) {
1658 devmap_handle_t *dhpp = dhp;
1659 size_t slen = len;
1662 * Calculate count of pages that are :
1663 * a) within the (potentially extended) fault region
1664 * b) AND covered by devmap handle with devmap_access
1666 off = (ulong_t)(addr - dhpp->dh_uvaddr);
1667 while (slen != 0) {
1668 size_t mlen;
1671 * Softlocking on a region that allows remap is
1672 * unsupported due to unresolved locking issues
1673 * XXX: unclear what these are?
1674 * One potential is that if there is a pending
1675 * softlock, then a remap should not be allowed
1676 * until the unlock is done. This is easily
1677 * fixed by returning error in devmap*remap on
1678 * checking the dh->dh_softlock->softlocked value
1680 if ((type == F_SOFTLOCK) &&
1681 (dhpp->dh_flags & DEVMAP_ALLOW_REMAP)) {
1682 return (FC_NOSUPPORT);
1685 mlen = MIN(slen, (dhpp->dh_len - off));
1686 if (dhpp->dh_callbackops.devmap_access) {
1687 size_t llen;
1688 caddr_t laddr;
1690 * use extended length for large page mappings
1692 HOLD_DHP_LOCK(dhpp);
1693 if ((sdp->pageprot == 0) &&
1694 (dhpp->dh_flags & DEVMAP_FLAG_LARGE)) {
1695 devmap_get_large_pgsize(dhpp,
1696 mlen, maddr, &llen, &laddr);
1697 } else {
1698 llen = mlen;
1700 RELE_DHP_LOCK(dhpp);
1702 slpage += btopr(llen);
1703 slock = dhpp->dh_softlock;
1705 maddr += mlen;
1706 ASSERT(slen >= mlen);
1707 slen -= mlen;
1708 dhpp = dhpp->dh_next;
1709 off = 0;
1712 * synchonize with other faulting threads and wait till safe
1713 * devmap_softlock_enter might return due to signal in cv_wait
1715 * devmap_softlock_enter has to be called outside of while loop
1716 * to prevent a deadlock if len spans over multiple dhps.
1717 * dh_softlock is based on device instance and if multiple dhps
1718 * use the same device instance, the second dhp's LOCK call
1719 * will hang waiting on the first to complete.
1720 * devmap_setup verifies that slocks in a dhp_chain are same.
1721 * RFE: this deadlock only hold true for F_SOFTLOCK. For
1722 * F_INVAL/F_PROT, since we now allow multiple in parallel,
1723 * we could have done the softlock_enter inside the loop
1724 * and supported multi-dhp mappings with dissimilar devices
1726 if (err = devmap_softlock_enter(slock, slpage, type))
1727 return (FC_MAKE_ERR(err));
1730 /* reset 'maddr' to the start addr of the range of fault. */
1731 maddr = addr;
1733 /* calculate the offset corresponds to 'addr' in the first dhp. */
1734 off = (ulong_t)(addr - dhp->dh_uvaddr);
1737 * The fault length may span over multiple dhps.
1738 * Loop until the total length is satisfied.
1740 while (len != 0) {
1741 size_t llen;
1742 size_t mlen;
1743 caddr_t laddr;
1746 * mlen is the smaller of 'len' and the length
1747 * from addr to the end of mapping defined by dhp.
1749 mlen = MIN(len, (dhp->dh_len - off));
1751 HOLD_DHP_LOCK(dhp);
1753 * Pass the extended length and address to devmap_access
1754 * if large pagesize is used for loading address translations.
1756 if ((sdp->pageprot == 0) &&
1757 (dhp->dh_flags & DEVMAP_FLAG_LARGE)) {
1758 devmap_get_large_pgsize(dhp, mlen, maddr,
1759 &llen, &laddr);
1760 ASSERT(maddr == addr || laddr == maddr);
1761 } else {
1762 llen = mlen;
1763 laddr = maddr;
1766 if (dhp->dh_callbackops.devmap_access != NULL) {
1767 offset_t aoff;
1769 aoff = sdp->offset + (offset_t)(laddr - seg->s_base);
1772 * call driver's devmap_access entry point which will
1773 * call devmap_load/contextmgmt to load the translations
1775 * We drop the dhp_lock before calling access so
1776 * drivers can call devmap_*_remap within access
1778 RELE_DHP_LOCK(dhp);
1780 err = (*dhp->dh_callbackops.devmap_access)(
1781 dhp, (void *)dhp->dh_pvtp, aoff, llen, type, rw);
1782 } else {
1784 * If no devmap_access entry point, then load mappings
1785 * hold dhp_lock across faultpages if REMAP
1787 err = segdev_faultpages(hat, seg, laddr, llen,
1788 type, rw, dhp);
1789 err_is_faultcode = 1;
1790 RELE_DHP_LOCK(dhp);
1793 if (err) {
1794 if ((type == F_SOFTLOCK) && (maddr > addr)) {
1796 * If not first dhp, use
1797 * segdev_fault(F_SOFTUNLOCK) for prior dhps
1798 * While this is recursion, it is incorrect to
1799 * call just segdev_softunlock
1800 * if we are using either large pages
1801 * or devmap_access. It will be more right
1802 * to go through the same loop as above
1803 * rather than call segdev_softunlock directly
1804 * It will use the right lenghths as well as
1805 * call into the driver devmap_access routines.
1807 size_t done = (size_t)(maddr - addr);
1808 (void) segdev_fault(hat, seg, addr, done,
1809 F_SOFTUNLOCK, S_OTHER);
1811 * reduce slpage by number of pages
1812 * released by segdev_softunlock
1814 ASSERT(slpage >= btopr(done));
1815 devmap_softlock_exit(slock,
1816 slpage - btopr(done), type);
1817 } else {
1818 devmap_softlock_exit(slock, slpage, type);
1823 * Segdev_faultpages() already returns a faultcode,
1824 * hence, result from segdev_faultpages() should be
1825 * returned directly.
1827 if (err_is_faultcode)
1828 return (err);
1829 return (FC_MAKE_ERR(err));
1832 maddr += mlen;
1833 ASSERT(len >= mlen);
1834 len -= mlen;
1835 dhp = dhp->dh_next;
1836 off = 0;
1838 ASSERT(!dhp || len == 0 || maddr == dhp->dh_uvaddr);
1841 * release the softlock count at end of fault
1842 * For F_SOFTLOCk this is done in the later F_SOFTUNLOCK
1844 if ((type == F_INVAL) || (type == F_PROT))
1845 devmap_softlock_exit(slock, slpage, type);
1846 return (0);
1850 * segdev_faultpages
1852 * Used to fault in seg_dev segment pages. Called by segdev_fault or devmap_load
1853 * This routine assumes that the callers makes sure that the fields
1854 * in dhp used below are not changed due to remap during this call.
1855 * Caller does HOLD_DHP_LOCK if neeed
1856 * This routine returns a faultcode_t as a return value for segdev_fault.
1858 static faultcode_t
1859 segdev_faultpages(
1860 struct hat *hat, /* the hat */
1861 struct seg *seg, /* the seg_dev of interest */
1862 caddr_t addr, /* the address of the fault */
1863 size_t len, /* the length of the range */
1864 enum fault_type type, /* type of fault */
1865 enum seg_rw rw, /* type of access at fault */
1866 devmap_handle_t *dhp) /* devmap handle */
1868 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1869 register caddr_t a;
1870 struct vpage *vpage;
1871 struct ddi_umem_cookie *kpmem_cookie = NULL;
1872 int err;
1874 DEBUGF(5, (CE_CONT, "segdev_faultpages: "
1875 "dhp %p seg %p addr %p len %lx\n",
1876 (void *)dhp, (void *)seg, (void *)addr, len));
1879 * The seg_dev driver does not implement copy-on-write,
1880 * and always loads translations with maximal allowed permissions
1881 * but we got an fault trying to access the device.
1882 * Servicing the fault is not going to result in any better result
1883 * XXX: If we want to allow devmap_access to handle F_PROT calls,
1884 * This code should be removed and let the normal fault handling
1885 * take care of finding the error
1887 if (type == F_PROT) {
1888 return (FC_PROT);
1891 if (type == F_SOFTUNLOCK) {
1892 segdev_softunlock(hat, seg, addr, len, rw);
1893 return (0);
1897 * For kernel pageable memory, fault/lock segkp pages
1898 * We hold this until the completion of this
1899 * fault (INVAL/PROT) or till unlock (SOFTLOCK).
1901 if ((dhp != NULL) && dhp_is_kpmem(dhp)) {
1902 kpmem_cookie = (struct ddi_umem_cookie *)dhp->dh_cookie;
1903 if (err = acquire_kpmem_lock(kpmem_cookie, btopr(len)))
1904 return (err);
1908 * If we have the same protections for the entire segment,
1909 * insure that the access being attempted is legitimate.
1911 rw_enter(&sdp->lock, RW_READER);
1912 if (sdp->pageprot == 0) {
1913 uint_t protchk;
1915 switch (rw) {
1916 case S_READ:
1917 protchk = PROT_READ;
1918 break;
1919 case S_WRITE:
1920 protchk = PROT_WRITE;
1921 break;
1922 case S_EXEC:
1923 protchk = PROT_EXEC;
1924 break;
1925 case S_OTHER:
1926 default:
1927 protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
1928 break;
1931 if ((sdp->prot & protchk) == 0) {
1932 rw_exit(&sdp->lock);
1933 /* undo kpmem locking */
1934 if (kpmem_cookie != NULL) {
1935 release_kpmem_lock(kpmem_cookie, btopr(len));
1937 return (FC_PROT); /* illegal access type */
1942 * we do a single hat_devload for the range if
1943 * - devmap framework (dhp is not NULL),
1944 * - pageprot == 0, i.e., no per-page protection set and
1945 * - is device pages, irrespective of whether we are using large pages
1947 if ((sdp->pageprot == 0) && (dhp != NULL) && dhp_is_devmem(dhp)) {
1948 pfn_t pfnum;
1949 uint_t hat_flags;
1951 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
1952 rw_exit(&sdp->lock);
1953 return (FC_NOMAP);
1956 if (type == F_SOFTLOCK) {
1957 mutex_enter(&freemem_lock);
1958 sdp->softlockcnt += btopr(len);
1959 mutex_exit(&freemem_lock);
1962 hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
1963 pfnum = dhp->dh_pfn + btop((uintptr_t)(addr - dhp->dh_uvaddr));
1964 ASSERT(!pf_is_memory(pfnum));
1966 hat_devload(hat, addr, len, pfnum, sdp->prot | dhp->dh_hat_attr,
1967 hat_flags | sdp->hat_flags);
1968 rw_exit(&sdp->lock);
1969 return (0);
1972 /* Handle cases where we have to loop through fault handling per-page */
1974 if (sdp->vpage == NULL)
1975 vpage = NULL;
1976 else
1977 vpage = &sdp->vpage[seg_page(seg, addr)];
1979 /* loop over the address range handling each fault */
1980 for (a = addr; a < addr + len; a += PAGESIZE) {
1981 if (err = segdev_faultpage(hat, seg, a, vpage, type, rw, dhp)) {
1982 break;
1984 if (vpage != NULL)
1985 vpage++;
1987 rw_exit(&sdp->lock);
1988 if (err && (type == F_SOFTLOCK)) { /* error handling for F_SOFTLOCK */
1989 size_t done = (size_t)(a - addr); /* pages fault successfully */
1990 if (done > 0) {
1991 /* use softunlock for those pages */
1992 segdev_softunlock(hat, seg, addr, done, S_OTHER);
1994 if (kpmem_cookie != NULL) {
1995 /* release kpmem lock for rest of pages */
1996 ASSERT(len >= done);
1997 release_kpmem_lock(kpmem_cookie, btopr(len - done));
1999 } else if ((kpmem_cookie != NULL) && (type != F_SOFTLOCK)) {
2000 /* for non-SOFTLOCK cases, release kpmem */
2001 release_kpmem_lock(kpmem_cookie, btopr(len));
2003 return (err);
2007 * Asynchronous page fault. We simply do nothing since this
2008 * entry point is not supposed to load up the translation.
2010 /*ARGSUSED*/
2011 static faultcode_t
2012 segdev_faulta(struct seg *seg, caddr_t addr)
2014 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2016 return (0);
2019 static int
2020 segdev_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
2022 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2023 register devmap_handle_t *dhp;
2024 register struct vpage *vp, *evp;
2025 devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
2026 ulong_t off;
2027 size_t mlen, sz;
2029 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2031 if ((sz = sdp->softlockcnt) > 0 && dhp_head != NULL) {
2033 * Fail the setprot if pages are SOFTLOCKed through this
2034 * mapping.
2035 * Softlockcnt is protected from change by the as read lock.
2037 DEBUGF(1, (CE_CONT, "segdev_setprot: softlockcnt %ld\n", sz));
2038 return (EAGAIN);
2041 if (dhp_head != NULL) {
2042 if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
2043 return (EINVAL);
2046 * check if violate maxprot.
2048 off = (ulong_t)(addr - dhp->dh_uvaddr);
2049 mlen = len;
2050 while (dhp) {
2051 if ((dhp->dh_maxprot & prot) != prot)
2052 return (EACCES); /* violated maxprot */
2054 if (mlen > (dhp->dh_len - off)) {
2055 mlen -= dhp->dh_len - off;
2056 dhp = dhp->dh_next;
2057 off = 0;
2058 } else
2059 break;
2061 } else {
2062 if ((sdp->maxprot & prot) != prot)
2063 return (EACCES);
2066 rw_enter(&sdp->lock, RW_WRITER);
2067 if (addr == seg->s_base && len == seg->s_size && sdp->pageprot == 0) {
2068 if (sdp->prot == prot) {
2069 rw_exit(&sdp->lock);
2070 return (0); /* all done */
2072 sdp->prot = (uchar_t)prot;
2073 } else {
2074 sdp->pageprot = 1;
2075 if (sdp->vpage == NULL) {
2077 * First time through setting per page permissions,
2078 * initialize all the vpage structures to prot
2080 sdp->vpage = kmem_zalloc(vpgtob(seg_pages(seg)),
2081 KM_SLEEP);
2082 evp = &sdp->vpage[seg_pages(seg)];
2083 for (vp = sdp->vpage; vp < evp; vp++)
2084 VPP_SETPROT(vp, sdp->prot);
2087 * Now go change the needed vpages protections.
2089 evp = &sdp->vpage[seg_page(seg, addr + len)];
2090 for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
2091 VPP_SETPROT(vp, prot);
2093 rw_exit(&sdp->lock);
2095 if (dhp_head != NULL) {
2096 devmap_handle_t *tdhp;
2098 * If large page size was used in hat_devload(),
2099 * the same page size must be used in hat_unload().
2101 dhp = tdhp = devmap_find_handle(dhp_head, addr);
2102 while (tdhp != NULL) {
2103 if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
2104 break;
2106 tdhp = tdhp->dh_next;
2108 if (tdhp) {
2109 size_t slen = len;
2110 size_t mlen;
2111 size_t soff;
2113 soff = (ulong_t)(addr - dhp->dh_uvaddr);
2114 while (slen != 0) {
2115 mlen = MIN(slen, (dhp->dh_len - soff));
2116 hat_unload(seg->s_as->a_hat, dhp->dh_uvaddr,
2117 dhp->dh_len, HAT_UNLOAD);
2118 dhp = dhp->dh_next;
2119 ASSERT(slen >= mlen);
2120 slen -= mlen;
2121 soff = 0;
2123 return (0);
2127 if ((prot & ~PROT_USER) == PROT_NONE) {
2128 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD);
2129 } else {
2131 * RFE: the segment should keep track of all attributes
2132 * allowing us to remove the deprecated hat_chgprot
2133 * and use hat_chgattr.
2135 hat_chgprot(seg->s_as->a_hat, addr, len, prot);
2138 return (0);
2141 static int
2142 segdev_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
2144 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2145 struct vpage *vp, *evp;
2147 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2150 * If segment protection can be used, simply check against them
2152 rw_enter(&sdp->lock, RW_READER);
2153 if (sdp->pageprot == 0) {
2154 register int err;
2156 err = ((sdp->prot & prot) != prot) ? EACCES : 0;
2157 rw_exit(&sdp->lock);
2158 return (err);
2162 * Have to check down to the vpage level
2164 evp = &sdp->vpage[seg_page(seg, addr + len)];
2165 for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++) {
2166 if ((VPP_PROT(vp) & prot) != prot) {
2167 rw_exit(&sdp->lock);
2168 return (EACCES);
2171 rw_exit(&sdp->lock);
2172 return (0);
2175 static int
2176 segdev_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2178 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2179 size_t pgno;
2181 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2183 pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
2184 if (pgno != 0) {
2185 rw_enter(&sdp->lock, RW_READER);
2186 if (sdp->pageprot == 0) {
2187 do {
2188 protv[--pgno] = sdp->prot;
2189 } while (pgno != 0);
2190 } else {
2191 size_t pgoff = seg_page(seg, addr);
2193 do {
2194 pgno--;
2195 protv[pgno] =
2196 VPP_PROT(&sdp->vpage[pgno + pgoff]);
2197 } while (pgno != 0);
2199 rw_exit(&sdp->lock);
2201 return (0);
2204 static uoff_t
2205 segdev_getoffset(register struct seg *seg, caddr_t addr)
2207 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2209 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2211 return ((uoff_t)sdp->offset + (addr - seg->s_base));
2214 /*ARGSUSED*/
2215 static int
2216 segdev_gettype(register struct seg *seg, caddr_t addr)
2218 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2220 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2222 return (sdp->type);
2226 /*ARGSUSED*/
2227 static int
2228 segdev_getvp(register struct seg *seg, caddr_t addr, struct vnode **vpp)
2230 register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2232 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2235 * Note that this vp is the common_vp of the device, where the
2236 * pages are hung ..
2238 *vpp = VTOCVP(sdp->vp);
2240 return (0);
2243 static void
2244 segdev_badop(void)
2246 panic("segdev_badop");
2247 /*NOTREACHED*/
2251 * segdev pages are not in the cache, and thus can't really be controlled.
2252 * Hence, syncs are simply always successful.
2254 /*ARGSUSED*/
2255 static int
2256 segdev_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
2258 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2260 return (0);
2264 * segdev pages are always "in core".
2266 /*ARGSUSED*/
2267 static size_t
2268 segdev_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
2270 size_t v = 0;
2272 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2274 for (len = (len + PAGEOFFSET) & PAGEMASK; len; len -= PAGESIZE,
2275 v += PAGESIZE)
2276 *vec++ = 1;
2277 return (v);
2281 * segdev pages are not in the cache, and thus can't really be controlled.
2282 * Hence, locks are simply always successful.
2284 /*ARGSUSED*/
2285 static int
2286 segdev_lockop(struct seg *seg, caddr_t addr,
2287 size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
2289 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2291 return (0);
2295 * segdev pages are not in the cache, and thus can't really be controlled.
2296 * Hence, advise is simply always successful.
2298 /*ARGSUSED*/
2299 static int
2300 segdev_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2302 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
2304 return (0);
2308 * ddi_segmap_setup: Used by drivers who wish specify mapping attributes
2309 * for a segment. Called from a drivers segmap(9E)
2310 * routine.
2312 /*ARGSUSED*/
2314 ddi_segmap_setup(dev_t dev, off_t offset, struct as *as, caddr_t *addrp,
2315 off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cred,
2316 ddi_device_acc_attr_t *accattrp, uint_t rnumber)
2318 struct segdev_crargs dev_a;
2319 int (*mapfunc)(dev_t dev, off_t off, int prot);
2320 uint_t hat_attr;
2321 pfn_t pfn;
2322 int error, i;
2324 if ((mapfunc = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap) == nodev)
2325 return (ENODEV);
2328 * Character devices that support the d_mmap
2329 * interface can only be mmap'ed shared.
2331 if ((flags & MAP_TYPE) != MAP_SHARED)
2332 return (EINVAL);
2335 * Check that this region is indeed mappable on this platform.
2336 * Use the mapping function.
2338 if (ddi_device_mapping_check(dev, accattrp, rnumber, &hat_attr) == -1)
2339 return (ENXIO);
2342 * Check to ensure that the entire range is
2343 * legal and we are not trying to map in
2344 * more than the device will let us.
2346 for (i = 0; i < len; i += PAGESIZE) {
2347 if (i == 0) {
2349 * Save the pfn at offset here. This pfn will be
2350 * used later to get user address.
2352 if ((pfn = (pfn_t)cdev_mmap(mapfunc, dev, offset,
2353 maxprot)) == PFN_INVALID)
2354 return (ENXIO);
2355 } else {
2356 if (cdev_mmap(mapfunc, dev, offset + i, maxprot) ==
2357 PFN_INVALID)
2358 return (ENXIO);
2362 as_rangelock(as);
2363 /* Pick an address w/o worrying about any vac alignment constraints. */
2364 error = choose_addr(as, addrp, len, ptob(pfn), ADDR_NOVACALIGN, flags);
2365 if (error != 0) {
2366 as_rangeunlock(as);
2367 return (error);
2370 dev_a.mapfunc = mapfunc;
2371 dev_a.dev = dev;
2372 dev_a.offset = (offset_t)offset;
2373 dev_a.type = flags & MAP_TYPE;
2374 dev_a.prot = (uchar_t)prot;
2375 dev_a.maxprot = (uchar_t)maxprot;
2376 dev_a.hat_attr = hat_attr;
2377 dev_a.hat_flags = 0;
2378 dev_a.devmap_data = NULL;
2380 error = as_map(as, *addrp, len, segdev_create, &dev_a);
2381 as_rangeunlock(as);
2382 return (error);
2386 /*ARGSUSED*/
2387 static int
2388 segdev_pagelock(struct seg *seg, caddr_t addr, size_t len,
2389 struct page ***ppp, enum lock_type type, enum seg_rw rw)
2391 return (ENOTSUP);
2395 * devmap_device: Used by devmap framework to establish mapping
2396 * called by devmap_seup(9F) during map setup time.
2398 /*ARGSUSED*/
2399 static int
2400 devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
2401 offset_t off, size_t len, uint_t flags)
2403 devmap_handle_t *rdhp, *maxdhp;
2404 struct segdev_crargs dev_a;
2405 int err;
2406 uint_t maxprot = PROT_ALL;
2407 offset_t offset = 0;
2408 pfn_t pfn;
2409 struct devmap_pmem_cookie *pcp;
2411 DEBUGF(2, (CE_CONT, "devmap_device: dhp %p addr %p off %llx len %lx\n",
2412 (void *)dhp, (void *)addr, off, len));
2414 as_rangelock(as);
2415 if ((flags & MAP_FIXED) == 0) {
2416 offset_t aligned_off;
2418 rdhp = maxdhp = dhp;
2419 while (rdhp != NULL) {
2420 maxdhp = (maxdhp->dh_len > rdhp->dh_len) ?
2421 maxdhp : rdhp;
2422 rdhp = rdhp->dh_next;
2423 maxprot |= dhp->dh_maxprot;
2425 offset = maxdhp->dh_uoff - dhp->dh_uoff;
2428 * Use the dhp that has the
2429 * largest len to get user address.
2432 * If MAPPING_INVALID, cannot use dh_pfn/dh_cvaddr,
2433 * use 0 which is as good as any other.
2435 if (maxdhp->dh_flags & DEVMAP_MAPPING_INVALID) {
2436 aligned_off = (offset_t)0;
2437 } else if (dhp_is_devmem(maxdhp)) {
2438 aligned_off = (offset_t)ptob(maxdhp->dh_pfn) - offset;
2439 } else if (dhp_is_pmem(maxdhp)) {
2440 pcp = (struct devmap_pmem_cookie *)maxdhp->dh_pcookie;
2441 pfn = page_pptonum(
2442 pcp->dp_pparray[btop(maxdhp->dh_roff)]);
2443 aligned_off = (offset_t)ptob(pfn) - offset;
2444 } else {
2445 aligned_off = (offset_t)(uintptr_t)maxdhp->dh_cvaddr -
2446 offset;
2450 * Pick an address aligned to dh_cookie.
2451 * for kernel memory/user memory, cookie is cvaddr.
2452 * for device memory, cookie is physical address.
2454 map_addr(addr, len, aligned_off, 1, flags);
2455 if (*addr == NULL) {
2456 as_rangeunlock(as);
2457 return (ENOMEM);
2459 } else {
2461 * User-specified address; blow away any previous mappings.
2463 (void) as_unmap(as, *addr, len);
2466 dev_a.mapfunc = NULL;
2467 dev_a.dev = dhp->dh_dev;
2468 dev_a.type = flags & MAP_TYPE;
2469 dev_a.offset = off;
2471 * sdp->maxprot has the least restrict protection of all dhps.
2473 dev_a.maxprot = maxprot;
2474 dev_a.prot = dhp->dh_prot;
2476 * devmap uses dhp->dh_hat_attr for hat.
2478 dev_a.hat_flags = 0;
2479 dev_a.hat_attr = 0;
2480 dev_a.devmap_data = (void *)dhp;
2482 err = as_map(as, *addr, len, segdev_create, &dev_a);
2483 as_rangeunlock(as);
2484 return (err);
2488 devmap_do_ctxmgt(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
2489 uint_t type, uint_t rw, int (*ctxmgt)(devmap_cookie_t, void *, offset_t,
2490 size_t, uint_t, uint_t))
2492 register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2493 struct devmap_ctx *devctx;
2494 int do_timeout = 0;
2495 int ret;
2498 DEBUGF(7, (CE_CONT, "devmap_do_ctxmgt: dhp %p off %llx len %lx\n",
2499 (void *)dhp, off, len));
2501 if (ctxmgt == NULL)
2502 return (FC_HWERR);
2504 devctx = dhp->dh_ctx;
2507 * If we are on an MP system with more than one cpu running
2508 * and if a thread on some CPU already has the context, wait
2509 * for it to finish if there is a hysteresis timeout.
2511 * We call cv_wait() instead of cv_wait_sig() because
2512 * it does not matter much if it returned due to a signal
2513 * or due to a cv_signal() or cv_broadcast(). In either event
2514 * we need to complete the mapping otherwise the processes
2515 * will die with a SEGV.
2517 if ((dhp->dh_timeout_length > 0) && (ncpus > 1)) {
2518 do_timeout = 1;
2519 mutex_enter(&devctx->lock);
2520 while (devctx->oncpu)
2521 cv_wait(&devctx->cv, &devctx->lock);
2522 devctx->oncpu = 1;
2523 mutex_exit(&devctx->lock);
2527 * Call the contextmgt callback so that the driver can handle
2528 * the fault.
2530 ret = (*ctxmgt)(dhp, dhp->dh_pvtp, off, len, type, rw);
2533 * If devmap_access() returned -1, then there was a hardware
2534 * error so we need to convert the return value to something
2535 * that trap() will understand. Otherwise, the return value
2536 * is already a fault code generated by devmap_unload()
2537 * or devmap_load().
2539 if (ret) {
2540 DEBUGF(1, (CE_CONT, "devmap_do_ctxmgt: ret %x dhp %p\n",
2541 ret, (void *)dhp));
2542 if (devctx->oncpu) {
2543 mutex_enter(&devctx->lock);
2544 devctx->oncpu = 0;
2545 cv_signal(&devctx->cv);
2546 mutex_exit(&devctx->lock);
2548 return (FC_HWERR);
2552 * Setup the timeout if we need to
2554 if (do_timeout) {
2555 mutex_enter(&devctx->lock);
2556 if (dhp->dh_timeout_length > 0) {
2557 devctx->timeout = timeout(devmap_ctxto,
2558 devctx, dhp->dh_timeout_length);
2559 } else {
2561 * We don't want to wait so set oncpu to
2562 * 0 and wake up anyone waiting.
2564 devctx->oncpu = 0;
2565 cv_signal(&devctx->cv);
2567 mutex_exit(&devctx->lock);
2570 return (DDI_SUCCESS);
2574 * end of mapping
2575 * poff fault_offset |
2576 * base | | |
2577 * | | | |
2578 * V V V V
2579 * +-----------+---------------+-------+---------+-------+
2580 * ^ ^ ^ ^
2581 * |<--- offset--->|<-len->| |
2582 * |<--- dh_len(size of mapping) --->|
2583 * |<-- pg -->|
2584 * -->|rlen|<--
2586 static ulong_t
2587 devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
2588 ulong_t *opfn, ulong_t *pagesize)
2590 register int level;
2591 ulong_t pg;
2592 ulong_t poff;
2593 ulong_t base;
2594 caddr_t uvaddr;
2595 long rlen;
2597 DEBUGF(2, (CE_CONT, "devmap_roundup: dhp %p off %lx len %lx\n",
2598 (void *)dhp, offset, len));
2601 * get the max. pagesize that is aligned within the range
2602 * <dh_pfn, dh_pfn+offset>.
2604 * The calculations below use physical address to ddetermine
2605 * the page size to use. The same calculations can use the
2606 * virtual address to determine the page size.
2608 base = (ulong_t)ptob(dhp->dh_pfn);
2609 for (level = dhp->dh_mmulevel; level >= 0; level--) {
2610 pg = page_get_pagesize(level);
2611 poff = ((base + offset) & ~(pg - 1));
2612 uvaddr = dhp->dh_uvaddr + (poff - base);
2613 if ((poff >= base) &&
2614 ((poff + pg) <= (base + dhp->dh_len)) &&
2615 VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg))
2616 break;
2619 DEBUGF(2, (CE_CONT, "devmap_roundup: base %lx poff %lx pfn %lx\n",
2620 base, poff, dhp->dh_pfn));
2622 ASSERT(VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg));
2623 ASSERT(level >= 0);
2625 *pagesize = pg;
2626 *opfn = dhp->dh_pfn + btop(poff - base);
2628 rlen = len + offset - (poff - base + pg);
2630 ASSERT(rlen < (long)len);
2632 DEBUGF(1, (CE_CONT, "devmap_roundup: dhp %p "
2633 "level %x rlen %lx psize %lx opfn %lx\n",
2634 (void *)dhp, level, rlen, *pagesize, *opfn));
2636 return ((ulong_t)((rlen > 0) ? rlen : 0));
2640 * find the dhp that contains addr.
2642 static devmap_handle_t *
2643 devmap_find_handle(devmap_handle_t *dhp_head, caddr_t addr)
2645 devmap_handle_t *dhp;
2647 dhp = dhp_head;
2648 while (dhp) {
2649 if (addr >= dhp->dh_uvaddr &&
2650 addr < (dhp->dh_uvaddr + dhp->dh_len))
2651 return (dhp);
2652 dhp = dhp->dh_next;
2655 return ((devmap_handle_t *)NULL);
2659 * devmap_unload:
2660 * Marks a segdev segment or pages if offset->offset+len
2661 * is not the entire segment as intercept and unloads the
2662 * pages in the range offset -> offset+len.
2665 devmap_unload(devmap_cookie_t dhc, offset_t offset, size_t len)
2667 register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2668 caddr_t addr;
2669 ulong_t size;
2670 ssize_t soff;
2672 DEBUGF(7, (CE_CONT, "devmap_unload: dhp %p offset %llx len %lx\n",
2673 (void *)dhp, offset, len));
2675 soff = (ssize_t)(offset - dhp->dh_uoff);
2676 soff = round_down_p2(soff, PAGESIZE);
2677 if (soff < 0 || soff >= dhp->dh_len)
2678 return (FC_MAKE_ERR(EINVAL));
2681 * Address and size must be page aligned. Len is set to the
2682 * number of bytes in the number of pages that are required to
2683 * support len. Offset is set to the byte offset of the first byte
2684 * of the page that contains offset.
2686 len = round_up_p2(len, PAGESIZE);
2689 * If len is == 0, then calculate the size by getting
2690 * the number of bytes from offset to the end of the segment.
2692 if (len == 0)
2693 size = dhp->dh_len - soff;
2694 else {
2695 size = len;
2696 if ((soff + size) > dhp->dh_len)
2697 return (FC_MAKE_ERR(EINVAL));
2701 * The address is offset bytes from the base address of
2702 * the dhp.
2704 addr = (caddr_t)(soff + dhp->dh_uvaddr);
2707 * If large page size was used in hat_devload(),
2708 * the same page size must be used in hat_unload().
2710 if (dhp->dh_flags & DEVMAP_FLAG_LARGE) {
2711 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
2712 dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
2713 } else {
2714 hat_unload(dhp->dh_seg->s_as->a_hat, addr, size,
2715 HAT_UNLOAD|HAT_UNLOAD_OTHER);
2718 return (0);
2722 * calculates the optimal page size that will be used for hat_devload().
2724 static void
2725 devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len, caddr_t addr,
2726 size_t *llen, caddr_t *laddr)
2728 ulong_t off;
2729 ulong_t pfn;
2730 ulong_t pgsize;
2731 uint_t first = 1;
2734 * RFE - Code only supports large page mappings for devmem
2735 * This code could be changed in future if we want to support
2736 * large page mappings for kernel exported memory.
2738 ASSERT(dhp_is_devmem(dhp));
2739 ASSERT(!(dhp->dh_flags & DEVMAP_MAPPING_INVALID));
2741 *llen = 0;
2742 off = (ulong_t)(addr - dhp->dh_uvaddr);
2743 while ((long)len > 0) {
2745 * get the optimal pfn to minimize address translations.
2746 * devmap_roundup() returns residue bytes for next round
2747 * calculations.
2749 len = devmap_roundup(dhp, off, len, &pfn, &pgsize);
2751 if (first) {
2752 *laddr = dhp->dh_uvaddr + ptob(pfn - dhp->dh_pfn);
2753 first = 0;
2756 *llen += pgsize;
2757 off = ptob(pfn - dhp->dh_pfn) + pgsize;
2759 /* Large page mapping len/addr cover more range than original fault */
2760 ASSERT(*llen >= len && *laddr <= addr);
2761 ASSERT((*laddr + *llen) >= (addr + len));
2765 * Initialize the devmap_softlock structure.
2767 static struct devmap_softlock *
2768 devmap_softlock_init(dev_t dev, ulong_t id)
2770 struct devmap_softlock *slock;
2771 struct devmap_softlock *tmp;
2773 tmp = kmem_zalloc(sizeof (struct devmap_softlock), KM_SLEEP);
2774 mutex_enter(&devmap_slock);
2776 for (slock = devmap_slist; slock != NULL; slock = slock->next)
2777 if ((slock->dev == dev) && (slock->id == id))
2778 break;
2780 if (slock == NULL) {
2781 slock = tmp;
2782 slock->dev = dev;
2783 slock->id = id;
2784 mutex_init(&slock->lock, NULL, MUTEX_DEFAULT, NULL);
2785 cv_init(&slock->cv, NULL, CV_DEFAULT, NULL);
2786 slock->next = devmap_slist;
2787 devmap_slist = slock;
2788 } else
2789 kmem_free(tmp, sizeof (struct devmap_softlock));
2791 mutex_enter(&slock->lock);
2792 slock->refcnt++;
2793 mutex_exit(&slock->lock);
2794 mutex_exit(&devmap_slock);
2796 return (slock);
2800 * Wake up processes that sleep on softlocked.
2801 * Free dh_softlock if refcnt is 0.
2803 static void
2804 devmap_softlock_rele(devmap_handle_t *dhp)
2806 struct devmap_softlock *slock = dhp->dh_softlock;
2807 struct devmap_softlock *tmp;
2808 struct devmap_softlock *parent;
2810 mutex_enter(&devmap_slock);
2811 mutex_enter(&slock->lock);
2813 ASSERT(slock->refcnt > 0);
2815 slock->refcnt--;
2818 * If no one is using the device, free up the slock data.
2820 if (slock->refcnt == 0) {
2821 slock->softlocked = 0;
2822 cv_signal(&slock->cv);
2824 if (devmap_slist == slock)
2825 devmap_slist = slock->next;
2826 else {
2827 parent = devmap_slist;
2828 for (tmp = devmap_slist->next; tmp != NULL;
2829 tmp = tmp->next) {
2830 if (tmp == slock) {
2831 parent->next = tmp->next;
2832 break;
2834 parent = tmp;
2837 mutex_exit(&slock->lock);
2838 mutex_destroy(&slock->lock);
2839 cv_destroy(&slock->cv);
2840 kmem_free(slock, sizeof (struct devmap_softlock));
2841 } else
2842 mutex_exit(&slock->lock);
2844 mutex_exit(&devmap_slock);
2848 * Wake up processes that sleep on dh_ctx->locked.
2849 * Free dh_ctx if refcnt is 0.
2851 static void
2852 devmap_ctx_rele(devmap_handle_t *dhp)
2854 struct devmap_ctx *devctx = dhp->dh_ctx;
2855 struct devmap_ctx *tmp;
2856 struct devmap_ctx *parent;
2857 timeout_id_t tid;
2859 mutex_enter(&devmapctx_lock);
2860 mutex_enter(&devctx->lock);
2862 ASSERT(devctx->refcnt > 0);
2864 devctx->refcnt--;
2867 * If no one is using the device, free up the devctx data.
2869 if (devctx->refcnt == 0) {
2871 * Untimeout any threads using this mapping as they are about
2872 * to go away.
2874 if (devctx->timeout != 0) {
2875 tid = devctx->timeout;
2876 mutex_exit(&devctx->lock);
2877 (void) untimeout(tid);
2878 mutex_enter(&devctx->lock);
2881 devctx->oncpu = 0;
2882 cv_signal(&devctx->cv);
2884 if (devmapctx_list == devctx)
2885 devmapctx_list = devctx->next;
2886 else {
2887 parent = devmapctx_list;
2888 for (tmp = devmapctx_list->next; tmp != NULL;
2889 tmp = tmp->next) {
2890 if (tmp == devctx) {
2891 parent->next = tmp->next;
2892 break;
2894 parent = tmp;
2897 mutex_exit(&devctx->lock);
2898 mutex_destroy(&devctx->lock);
2899 cv_destroy(&devctx->cv);
2900 kmem_free(devctx, sizeof (struct devmap_ctx));
2901 } else
2902 mutex_exit(&devctx->lock);
2904 mutex_exit(&devmapctx_lock);
2908 * devmap_load:
2909 * Marks a segdev segment or pages if offset->offset+len
2910 * is not the entire segment as nointercept and faults in
2911 * the pages in the range offset -> offset+len.
2914 devmap_load(devmap_cookie_t dhc, offset_t offset, size_t len, uint_t type,
2915 uint_t rw)
2917 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2918 struct as *asp = dhp->dh_seg->s_as;
2919 caddr_t addr;
2920 ulong_t size;
2921 ssize_t soff; /* offset from the beginning of the segment */
2922 int rc;
2924 DEBUGF(7, (CE_CONT, "devmap_load: dhp %p offset %llx len %lx\n",
2925 (void *)dhp, offset, len));
2928 * Hat layer only supports devload to process' context for which
2929 * the as lock is held. Verify here and return error if drivers
2930 * inadvertently call devmap_load on a wrong devmap handle.
2932 if ((asp != &kas) && !AS_LOCK_HELD(asp))
2933 return (FC_MAKE_ERR(EINVAL));
2935 soff = (ssize_t)(offset - dhp->dh_uoff);
2936 soff = round_down_p2(soff, PAGESIZE);
2937 if (soff < 0 || soff >= dhp->dh_len)
2938 return (FC_MAKE_ERR(EINVAL));
2941 * Address and size must be page aligned. Len is set to the
2942 * number of bytes in the number of pages that are required to
2943 * support len. Offset is set to the byte offset of the first byte
2944 * of the page that contains offset.
2946 len = round_up_p2(len, PAGESIZE);
2949 * If len == 0, then calculate the size by getting
2950 * the number of bytes from offset to the end of the segment.
2952 if (len == 0)
2953 size = dhp->dh_len - soff;
2954 else {
2955 size = len;
2956 if ((soff + size) > dhp->dh_len)
2957 return (FC_MAKE_ERR(EINVAL));
2961 * The address is offset bytes from the base address of
2962 * the segment.
2964 addr = (caddr_t)(soff + dhp->dh_uvaddr);
2966 HOLD_DHP_LOCK(dhp);
2967 rc = segdev_faultpages(asp->a_hat,
2968 dhp->dh_seg, addr, size, type, rw, dhp);
2969 RELE_DHP_LOCK(dhp);
2970 return (rc);
2974 devmap_setup(dev_t dev, offset_t off, struct as *as, caddr_t *addrp,
2975 size_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
2977 register devmap_handle_t *dhp;
2978 int (*devmap)(dev_t, devmap_cookie_t, offset_t, size_t,
2979 size_t *, uint_t);
2980 int (*mmap)(dev_t, off_t, int);
2981 struct devmap_callback_ctl *callbackops;
2982 devmap_handle_t *dhp_head = NULL;
2983 devmap_handle_t *dhp_prev = NULL;
2984 devmap_handle_t *dhp_curr;
2985 caddr_t addr;
2986 int map_flag;
2987 int ret;
2988 ulong_t total_len;
2989 size_t map_len;
2990 size_t resid_len = len;
2991 offset_t map_off = off;
2992 struct devmap_softlock *slock = NULL;
2995 DEBUGF(3, (CE_CONT, "devmap_setup: off %llx len %lx\n",
2996 off, len));
2998 devmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_devmap;
2999 mmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap;
3002 * driver must provide devmap(9E) entry point in cb_ops to use the
3003 * devmap framework.
3005 if (devmap == NULL || devmap == nulldev || devmap == nodev)
3006 return (EINVAL);
3009 * To protect from an inadvertent entry because the devmap entry point
3010 * is not NULL, return error if D_DEVMAP bit is not set in cb_flag and
3011 * mmap is NULL.
3013 map_flag = devopsp[getmajor(dev)]->devo_cb_ops->cb_flag;
3014 if ((map_flag & D_DEVMAP) == 0 && (mmap == NULL || mmap == nulldev))
3015 return (EINVAL);
3018 * devmap allows mmap(2) to map multiple registers.
3019 * one devmap_handle is created for each register mapped.
3021 for (total_len = 0; total_len < len; total_len += map_len) {
3022 dhp = kmem_zalloc(sizeof (devmap_handle_t), KM_SLEEP);
3024 if (dhp_prev != NULL)
3025 dhp_prev->dh_next = dhp;
3026 else
3027 dhp_head = dhp;
3028 dhp_prev = dhp;
3030 dhp->dh_prot = prot;
3031 dhp->dh_orig_maxprot = dhp->dh_maxprot = maxprot;
3032 dhp->dh_dev = dev;
3033 dhp->dh_timeout_length = CTX_TIMEOUT_VALUE;
3034 dhp->dh_uoff = map_off;
3037 * Get mapping specific info from
3038 * the driver, such as rnumber, roff, len, callbackops,
3039 * accattrp and, if the mapping is for kernel memory,
3040 * ddi_umem_cookie.
3042 if ((ret = cdev_devmap(dev, dhp, map_off,
3043 resid_len, &map_len, get_udatamodel())) != 0) {
3044 free_devmap_handle(dhp_head);
3045 return (ENXIO);
3048 if (map_len & PAGEOFFSET) {
3049 free_devmap_handle(dhp_head);
3050 return (EINVAL);
3053 callbackops = &dhp->dh_callbackops;
3055 if ((callbackops->devmap_access == NULL) ||
3056 (callbackops->devmap_access == nulldev) ||
3057 (callbackops->devmap_access == nodev)) {
3059 * Normally devmap does not support MAP_PRIVATE unless
3060 * the drivers provide a valid devmap_access routine.
3062 if ((flags & MAP_PRIVATE) != 0) {
3063 free_devmap_handle(dhp_head);
3064 return (EINVAL);
3066 } else {
3068 * Initialize dhp_softlock and dh_ctx if the drivers
3069 * provide devmap_access.
3071 dhp->dh_softlock = devmap_softlock_init(dev,
3072 (ulong_t)callbackops->devmap_access);
3073 dhp->dh_ctx = devmap_ctxinit(dev,
3074 (ulong_t)callbackops->devmap_access);
3077 * segdev_fault can only work when all
3078 * dh_softlock in a multi-dhp mapping
3079 * are same. see comments in segdev_fault
3080 * This code keeps track of the first
3081 * dh_softlock allocated in slock and
3082 * compares all later allocations and if
3083 * not similar, returns an error.
3085 if (slock == NULL)
3086 slock = dhp->dh_softlock;
3087 if (slock != dhp->dh_softlock) {
3088 free_devmap_handle(dhp_head);
3089 return (ENOTSUP);
3093 map_off += map_len;
3094 resid_len -= map_len;
3098 * get the user virtual address and establish the mapping between
3099 * uvaddr and device physical address.
3101 if ((ret = devmap_device(dhp_head, as, addrp, off, len, flags))
3102 != 0) {
3104 * free devmap handles if error during the mapping.
3106 free_devmap_handle(dhp_head);
3108 return (ret);
3112 * call the driver's devmap_map callback to do more after the mapping,
3113 * such as to allocate driver private data for context management.
3115 dhp = dhp_head;
3116 map_off = off;
3117 addr = *addrp;
3118 while (dhp != NULL) {
3119 callbackops = &dhp->dh_callbackops;
3120 dhp->dh_uvaddr = addr;
3121 dhp_curr = dhp;
3122 if (callbackops->devmap_map != NULL) {
3123 ret = (*callbackops->devmap_map)((devmap_cookie_t)dhp,
3124 dev, flags, map_off,
3125 dhp->dh_len, &dhp->dh_pvtp);
3126 if (ret != 0) {
3127 struct segdev_data *sdp;
3130 * call driver's devmap_unmap entry point
3131 * to free driver resources.
3133 dhp = dhp_head;
3134 map_off = off;
3135 while (dhp != dhp_curr) {
3136 callbackops = &dhp->dh_callbackops;
3137 if (callbackops->devmap_unmap != NULL) {
3138 (*callbackops->devmap_unmap)(
3139 dhp, dhp->dh_pvtp,
3140 map_off, dhp->dh_len,
3141 NULL, NULL, NULL, NULL);
3143 map_off += dhp->dh_len;
3144 dhp = dhp->dh_next;
3146 sdp = dhp_head->dh_seg->s_data;
3147 sdp->devmap_data = NULL;
3148 free_devmap_handle(dhp_head);
3149 return (ENXIO);
3152 map_off += dhp->dh_len;
3153 addr += dhp->dh_len;
3154 dhp = dhp->dh_next;
3157 return (0);
3161 ddi_devmap_segmap(dev_t dev, off_t off, ddi_as_handle_t as, caddr_t *addrp,
3162 off_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
3164 return (devmap_setup(dev, (offset_t)off, (struct as *)as, addrp,
3165 (size_t)len, prot, maxprot, flags, cred));
3169 * Called from devmap_devmem_setup/remap to see if can use large pages for
3170 * this device mapping.
3171 * Also calculate the max. page size for this mapping.
3172 * this page size will be used in fault routine for
3173 * optimal page size calculations.
3175 static void
3176 devmap_devmem_large_page_setup(devmap_handle_t *dhp)
3178 ASSERT(dhp_is_devmem(dhp));
3179 dhp->dh_mmulevel = 0;
3182 * use large page size only if:
3183 * 1. device memory.
3184 * 2. mmu supports multiple page sizes,
3185 * 3. Driver did not disallow it
3186 * 4. dhp length is at least as big as the large pagesize
3187 * 5. the uvaddr and pfn are large pagesize aligned
3189 if (page_num_pagesizes() > 1 &&
3190 !(dhp->dh_flags & (DEVMAP_USE_PAGESIZE | DEVMAP_MAPPING_INVALID))) {
3191 ulong_t base;
3192 int level;
3194 base = (ulong_t)ptob(dhp->dh_pfn);
3195 for (level = 1; level < page_num_pagesizes(); level++) {
3196 size_t pgsize = page_get_pagesize(level);
3197 if ((dhp->dh_len < pgsize) ||
3198 (!VA_PA_PGSIZE_ALIGNED((uintptr_t)dhp->dh_uvaddr,
3199 base, pgsize))) {
3200 break;
3203 dhp->dh_mmulevel = level - 1;
3205 if (dhp->dh_mmulevel > 0) {
3206 dhp->dh_flags |= DEVMAP_FLAG_LARGE;
3207 } else {
3208 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3213 * Called by driver devmap routine to pass device specific info to
3214 * the framework. used for device memory mapping only.
3217 devmap_devmem_setup(devmap_cookie_t dhc, dev_info_t *dip,
3218 struct devmap_callback_ctl *callbackops, uint_t rnumber, offset_t roff,
3219 size_t len, uint_t maxprot, uint_t flags, ddi_device_acc_attr_t *accattrp)
3221 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3222 ddi_acc_handle_t handle;
3223 ddi_map_req_t mr;
3224 ddi_acc_hdl_t *hp;
3225 int err;
3227 DEBUGF(2, (CE_CONT, "devmap_devmem_setup: dhp %p offset %llx "
3228 "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
3231 * First to check if this function has been called for this dhp.
3233 if (dhp->dh_flags & DEVMAP_SETUP_DONE)
3234 return (DDI_FAILURE);
3236 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3237 return (DDI_FAILURE);
3239 if (flags & DEVMAP_MAPPING_INVALID) {
3241 * Don't go up the tree to get pfn if the driver specifies
3242 * DEVMAP_MAPPING_INVALID in flags.
3244 * If DEVMAP_MAPPING_INVALID is specified, we have to grant
3245 * remap permission.
3247 if (!(flags & DEVMAP_ALLOW_REMAP)) {
3248 return (DDI_FAILURE);
3250 dhp->dh_pfn = PFN_INVALID;
3251 } else {
3252 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
3253 if (handle == NULL)
3254 return (DDI_FAILURE);
3256 hp = impl_acc_hdl_get(handle);
3257 hp->ah_vers = VERS_ACCHDL;
3258 hp->ah_dip = dip;
3259 hp->ah_rnumber = rnumber;
3260 hp->ah_offset = roff;
3261 hp->ah_len = len;
3262 if (accattrp != NULL)
3263 hp->ah_acc = *accattrp;
3265 mr.map_op = DDI_MO_MAP_LOCKED;
3266 mr.map_type = DDI_MT_RNUMBER;
3267 mr.map_obj.rnumber = rnumber;
3268 mr.map_prot = maxprot & dhp->dh_orig_maxprot;
3269 mr.map_flags = DDI_MF_DEVICE_MAPPING;
3270 mr.map_handlep = hp;
3271 mr.map_vers = DDI_MAP_VERSION;
3274 * up the device tree to get pfn.
3275 * The rootnex_map_regspec() routine in nexus drivers has been
3276 * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
3278 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&dhp->dh_pfn);
3279 dhp->dh_hat_attr = hp->ah_hat_flags;
3280 impl_acc_hdl_free(handle);
3282 if (err)
3283 return (DDI_FAILURE);
3285 /* Should not be using devmem setup for memory pages */
3286 ASSERT(!pf_is_memory(dhp->dh_pfn));
3288 /* Only some of the flags bits are settable by the driver */
3289 dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
3290 dhp->dh_len = ptob(btopr(len));
3292 dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
3293 dhp->dh_roff = ptob(btop(roff));
3295 /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
3296 devmap_devmem_large_page_setup(dhp);
3297 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3298 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3301 if (callbackops != NULL) {
3302 bcopy(callbackops, &dhp->dh_callbackops,
3303 sizeof (struct devmap_callback_ctl));
3307 * Initialize dh_lock if we want to do remap.
3309 if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
3310 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3311 dhp->dh_flags |= DEVMAP_LOCK_INITED;
3314 dhp->dh_flags |= DEVMAP_SETUP_DONE;
3316 return (DDI_SUCCESS);
3320 devmap_devmem_remap(devmap_cookie_t dhc, dev_info_t *dip,
3321 uint_t rnumber, offset_t roff, size_t len, uint_t maxprot,
3322 uint_t flags, ddi_device_acc_attr_t *accattrp)
3324 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3325 ddi_acc_handle_t handle;
3326 ddi_map_req_t mr;
3327 ddi_acc_hdl_t *hp;
3328 pfn_t pfn;
3329 uint_t hat_flags;
3330 int err;
3332 DEBUGF(2, (CE_CONT, "devmap_devmem_remap: dhp %p offset %llx "
3333 "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
3336 * Return failure if setup has not been done or no remap permission
3337 * has been granted during the setup.
3339 if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
3340 (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
3341 return (DDI_FAILURE);
3343 /* Only DEVMAP_MAPPING_INVALID flag supported for remap */
3344 if ((flags != 0) && (flags != DEVMAP_MAPPING_INVALID))
3345 return (DDI_FAILURE);
3347 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3348 return (DDI_FAILURE);
3350 if (!(flags & DEVMAP_MAPPING_INVALID)) {
3351 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
3352 if (handle == NULL)
3353 return (DDI_FAILURE);
3356 HOLD_DHP_LOCK(dhp);
3359 * Unload the old mapping, so next fault will setup the new mappings
3360 * Do this while holding the dhp lock so other faults dont reestablish
3361 * the mappings
3363 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
3364 dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
3366 if (flags & DEVMAP_MAPPING_INVALID) {
3367 dhp->dh_flags |= DEVMAP_MAPPING_INVALID;
3368 dhp->dh_pfn = PFN_INVALID;
3369 } else {
3370 /* clear any prior DEVMAP_MAPPING_INVALID flag */
3371 dhp->dh_flags &= ~DEVMAP_MAPPING_INVALID;
3372 hp = impl_acc_hdl_get(handle);
3373 hp->ah_vers = VERS_ACCHDL;
3374 hp->ah_dip = dip;
3375 hp->ah_rnumber = rnumber;
3376 hp->ah_offset = roff;
3377 hp->ah_len = len;
3378 if (accattrp != NULL)
3379 hp->ah_acc = *accattrp;
3381 mr.map_op = DDI_MO_MAP_LOCKED;
3382 mr.map_type = DDI_MT_RNUMBER;
3383 mr.map_obj.rnumber = rnumber;
3384 mr.map_prot = maxprot & dhp->dh_orig_maxprot;
3385 mr.map_flags = DDI_MF_DEVICE_MAPPING;
3386 mr.map_handlep = hp;
3387 mr.map_vers = DDI_MAP_VERSION;
3390 * up the device tree to get pfn.
3391 * The rootnex_map_regspec() routine in nexus drivers has been
3392 * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
3394 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&pfn);
3395 hat_flags = hp->ah_hat_flags;
3396 impl_acc_hdl_free(handle);
3397 if (err) {
3398 RELE_DHP_LOCK(dhp);
3399 return (DDI_FAILURE);
3402 * Store result of ddi_map first in local variables, as we do
3403 * not want to overwrite the existing dhp with wrong data.
3405 dhp->dh_pfn = pfn;
3406 dhp->dh_hat_attr = hat_flags;
3409 /* clear the large page size flag */
3410 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3412 dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
3413 dhp->dh_roff = ptob(btop(roff));
3415 /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
3416 devmap_devmem_large_page_setup(dhp);
3417 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3418 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3420 RELE_DHP_LOCK(dhp);
3421 return (DDI_SUCCESS);
3425 * called by driver devmap routine to pass kernel virtual address mapping
3426 * info to the framework. used only for kernel memory
3427 * allocated from ddi_umem_alloc().
3430 devmap_umem_setup(devmap_cookie_t dhc, dev_info_t *dip,
3431 struct devmap_callback_ctl *callbackops, ddi_umem_cookie_t cookie,
3432 offset_t off, size_t len, uint_t maxprot, uint_t flags,
3433 ddi_device_acc_attr_t *accattrp)
3435 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3436 struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
3439 DEBUGF(2, (CE_CONT, "devmap_umem_setup: dhp %p offset %llx "
3440 "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
3442 if (cookie == NULL)
3443 return (DDI_FAILURE);
3445 /* For UMEM_TRASH, this restriction is not needed */
3446 if ((off + len) > cp->size)
3447 return (DDI_FAILURE);
3449 /* check if the cache attributes are supported */
3450 if (i_ddi_check_cache_attr(flags) == B_FALSE)
3451 return (DDI_FAILURE);
3454 * First to check if this function has been called for this dhp.
3456 if (dhp->dh_flags & DEVMAP_SETUP_DONE)
3457 return (DDI_FAILURE);
3459 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3460 return (DDI_FAILURE);
3462 if (flags & DEVMAP_MAPPING_INVALID) {
3464 * If DEVMAP_MAPPING_INVALID is specified, we have to grant
3465 * remap permission.
3467 if (!(flags & DEVMAP_ALLOW_REMAP)) {
3468 return (DDI_FAILURE);
3470 } else {
3471 dhp->dh_cookie = cookie;
3472 dhp->dh_roff = ptob(btop(off));
3473 dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
3474 /* set HAT cache attributes */
3475 i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
3476 /* set HAT endianess attributes */
3477 i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
3481 * The default is _not_ to pass HAT_LOAD_NOCONSIST to hat_devload();
3482 * we pass HAT_LOAD_NOCONSIST _only_ in cases where hat tries to
3483 * create consistent mappings but our intention was to create
3484 * non-consistent mappings.
3486 * DEVMEM: hat figures it out it's DEVMEM and creates non-consistent
3487 * mappings.
3489 * kernel exported memory: hat figures it out it's memory and always
3490 * creates consistent mappings.
3492 * /dev/mem: non-consistent mappings. See comments in common/io/mem.c
3494 * /dev/kmem: consistent mappings are created unless they are
3495 * MAP_FIXED. We _explicitly_ tell hat to create non-consistent
3496 * mappings by passing HAT_LOAD_NOCONSIST in case of MAP_FIXED
3497 * mappings of /dev/kmem. See common/io/mem.c
3500 /* Only some of the flags bits are settable by the driver */
3501 dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
3503 dhp->dh_len = ptob(btopr(len));
3504 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3505 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3507 if (callbackops != NULL) {
3508 bcopy(callbackops, &dhp->dh_callbackops,
3509 sizeof (struct devmap_callback_ctl));
3512 * Initialize dh_lock if we want to do remap.
3514 if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
3515 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3516 dhp->dh_flags |= DEVMAP_LOCK_INITED;
3519 dhp->dh_flags |= DEVMAP_SETUP_DONE;
3521 return (DDI_SUCCESS);
3525 devmap_umem_remap(devmap_cookie_t dhc, dev_info_t *dip,
3526 ddi_umem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot,
3527 uint_t flags, ddi_device_acc_attr_t *accattrp)
3529 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3530 struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
3532 DEBUGF(2, (CE_CONT, "devmap_umem_remap: dhp %p offset %llx "
3533 "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
3536 * Reture failure if setup has not been done or no remap permission
3537 * has been granted during the setup.
3539 if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
3540 (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
3541 return (DDI_FAILURE);
3543 /* No flags supported for remap yet */
3544 if (flags != 0)
3545 return (DDI_FAILURE);
3547 /* check if the cache attributes are supported */
3548 if (i_ddi_check_cache_attr(flags) == B_FALSE)
3549 return (DDI_FAILURE);
3551 if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3552 return (DDI_FAILURE);
3554 /* For UMEM_TRASH, this restriction is not needed */
3555 if ((off + len) > cp->size)
3556 return (DDI_FAILURE);
3558 HOLD_DHP_LOCK(dhp);
3560 * Unload the old mapping, so next fault will setup the new mappings
3561 * Do this while holding the dhp lock so other faults dont reestablish
3562 * the mappings
3564 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
3565 dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
3567 dhp->dh_cookie = cookie;
3568 dhp->dh_roff = ptob(btop(off));
3569 dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
3570 /* set HAT cache attributes */
3571 i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
3572 /* set HAT endianess attributes */
3573 i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
3575 /* clear the large page size flag */
3576 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3578 dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3579 ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3580 RELE_DHP_LOCK(dhp);
3581 return (DDI_SUCCESS);
3585 * to set timeout value for the driver's context management callback, e.g.
3586 * devmap_access().
3588 void
3589 devmap_set_ctx_timeout(devmap_cookie_t dhc, clock_t ticks)
3591 devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3593 dhp->dh_timeout_length = ticks;
3597 devmap_default_access(devmap_cookie_t dhp, void *pvtp, offset_t off,
3598 size_t len, uint_t type, uint_t rw)
3601 return (devmap_load(dhp, off, len, type, rw));
3605 * segkmem_alloc() wrapper to allocate memory which is both
3606 * non-relocatable (for DR) and sharelocked, since the rest
3607 * of this segment driver requires it.
3609 static void *
3610 devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag)
3612 ASSERT(vmp != NULL);
3613 ASSERT(kvseg.s_base != NULL);
3614 vmflag |= (VM_NORELOC | SEGKMEM_SHARELOCKED);
3615 return (segkmem_alloc(vmp, size, vmflag));
3619 * This is where things are a bit incestuous with seg_kmem: unlike
3620 * seg_kp, seg_kmem does not keep its pages long-term sharelocked, so
3621 * we need to do a bit of a dance around that to prevent duplication of
3622 * code until we decide to bite the bullet and implement a new kernel
3623 * segment for driver-allocated memory that is exported to user space.
3625 static void
3626 devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size)
3628 page_t *pp;
3629 caddr_t addr = inaddr;
3630 caddr_t eaddr;
3631 pgcnt_t npages = btopr(size);
3633 ASSERT(vmp != NULL);
3634 ASSERT(kvseg.s_base != NULL);
3635 ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
3637 hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
3639 for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
3641 * Use page_find() instead of page_lookup() to find the page
3642 * since we know that it is hashed and has a shared lock.
3644 pp = page_find(&kvp.v_object, (uoff_t)(uintptr_t)addr);
3646 if (pp == NULL)
3647 panic("devmap_free_pages: page not found");
3648 if (!page_tryupgrade(pp)) {
3649 page_unlock(pp);
3650 pp = page_lookup(&kvp.v_object,
3651 (uoff_t)(uintptr_t)addr, SE_EXCL);
3652 if (pp == NULL)
3653 panic("devmap_free_pages: page already freed");
3655 /* Clear p_lckcnt so page_destroy() doesn't update availrmem */
3656 pp->p_lckcnt = 0;
3657 page_destroy(pp, 0);
3659 page_unresv(npages);
3661 if (vmp != NULL)
3662 vmem_free(vmp, inaddr, size);
3666 * devmap_umem_alloc_np() replaces kmem_zalloc() as the method for
3667 * allocating non-pageable kmem in response to a ddi_umem_alloc()
3668 * default request. For now we allocate our own pages and we keep
3669 * them long-term sharelocked, since: A) the fault routines expect the
3670 * memory to already be locked; B) pageable umem is already long-term
3671 * locked; C) it's a lot of work to make it otherwise, particularly
3672 * since the nexus layer expects the pages to never fault. An RFE is to
3673 * not keep the pages long-term locked, but instead to be able to
3674 * take faults on them and simply look them up in kvp in case we
3675 * fault on them. Even then, we must take care not to let pageout
3676 * steal them from us since the data must remain resident; if we
3677 * do this we must come up with some way to pin the pages to prevent
3678 * faults while a driver is doing DMA to/from them.
3680 static void *
3681 devmap_umem_alloc_np(size_t size, size_t flags)
3683 void *buf;
3684 int vmflags = (flags & DDI_UMEM_NOSLEEP)? VM_NOSLEEP : VM_SLEEP;
3686 buf = vmem_alloc(umem_np_arena, size, vmflags);
3687 if (buf != NULL)
3688 bzero(buf, size);
3689 return (buf);
3692 static void
3693 devmap_umem_free_np(void *addr, size_t size)
3695 vmem_free(umem_np_arena, addr, size);
3699 * allocate page aligned kernel memory for exporting to user land.
3700 * The devmap framework will use the cookie allocated by ddi_umem_alloc()
3701 * to find a user virtual address that is in same color as the address
3702 * allocated here.
3704 void *
3705 ddi_umem_alloc(size_t size, int flags, ddi_umem_cookie_t *cookie)
3707 register size_t len = ptob(btopr(size));
3708 void *buf = NULL;
3709 struct ddi_umem_cookie *cp;
3710 int iflags = 0;
3712 *cookie = NULL;
3714 if (len == 0)
3715 return (NULL);
3718 * allocate cookie
3720 if ((cp = kmem_zalloc(sizeof (struct ddi_umem_cookie),
3721 flags & DDI_UMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP)) == NULL) {
3722 ASSERT(flags & DDI_UMEM_NOSLEEP);
3723 return (NULL);
3726 if (flags & DDI_UMEM_PAGEABLE) {
3727 /* Only one of the flags is allowed */
3728 ASSERT(!(flags & DDI_UMEM_TRASH));
3729 /* initialize resource with 0 */
3730 iflags = KPD_ZERO;
3733 * to allocate unlocked pageable memory, use segkp_get() to
3734 * create a segkp segment. Since segkp can only service kas,
3735 * other segment drivers such as segdev have to do
3736 * as_fault(segkp, SOFTLOCK) in its fault routine,
3738 if (flags & DDI_UMEM_NOSLEEP)
3739 iflags |= KPD_NOWAIT;
3741 if ((buf = segkp_get(segkp, len, iflags)) == NULL) {
3742 kmem_free(cp, sizeof (struct ddi_umem_cookie));
3743 return (NULL);
3745 cp->type = KMEM_PAGEABLE;
3746 mutex_init(&cp->lock, NULL, MUTEX_DEFAULT, NULL);
3747 cp->locked = 0;
3748 } else if (flags & DDI_UMEM_TRASH) {
3749 /* Only one of the flags is allowed */
3750 ASSERT(!(flags & DDI_UMEM_PAGEABLE));
3751 cp->type = UMEM_TRASH;
3752 buf = NULL;
3753 } else {
3754 if ((buf = devmap_umem_alloc_np(len, flags)) == NULL) {
3755 kmem_free(cp, sizeof (struct ddi_umem_cookie));
3756 return (NULL);
3759 cp->type = KMEM_NON_PAGEABLE;
3763 * need to save size here. size will be used when
3764 * we do kmem_free.
3766 cp->size = len;
3767 cp->cvaddr = (caddr_t)buf;
3769 *cookie = (void *)cp;
3770 return (buf);
3773 void
3774 ddi_umem_free(ddi_umem_cookie_t cookie)
3776 struct ddi_umem_cookie *cp;
3779 * if cookie is NULL, no effects on the system
3781 if (cookie == NULL)
3782 return;
3784 cp = (struct ddi_umem_cookie *)cookie;
3786 switch (cp->type) {
3787 case KMEM_PAGEABLE :
3788 ASSERT(cp->cvaddr != NULL && cp->size != 0);
3790 * Check if there are still any pending faults on the cookie
3791 * while the driver is deleting it,
3792 * XXX - could change to an ASSERT but wont catch errant drivers
3794 mutex_enter(&cp->lock);
3795 if (cp->locked) {
3796 mutex_exit(&cp->lock);
3797 panic("ddi_umem_free for cookie with pending faults %p",
3798 (void *)cp);
3799 return;
3802 segkp_release(segkp, cp->cvaddr);
3805 * release mutex associated with this cookie.
3807 mutex_destroy(&cp->lock);
3808 break;
3809 case KMEM_NON_PAGEABLE :
3810 ASSERT(cp->cvaddr != NULL && cp->size != 0);
3811 devmap_umem_free_np(cp->cvaddr, cp->size);
3812 break;
3813 case UMEM_TRASH :
3814 break;
3815 case UMEM_LOCKED :
3816 /* Callers should use ddi_umem_unlock for this type */
3817 ddi_umem_unlock(cookie);
3818 /* Frees the cookie too */
3819 return;
3820 default:
3821 /* panic so we can diagnose the underlying cause */
3822 panic("ddi_umem_free: illegal cookie type 0x%x\n",
3823 cp->type);
3826 kmem_free(cookie, sizeof (struct ddi_umem_cookie));
3830 static int
3831 segdev_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
3833 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
3836 * It looks as if it is always mapped shared
3838 memidp->val[0] = (uintptr_t)VTOCVP(sdp->vp);
3839 memidp->val[1] = sdp->offset + (uintptr_t)(addr - seg->s_base);
3840 return (0);
3844 * ddi_umem_alloc() non-pageable quantum cache max size.
3845 * This is just a SWAG.
3847 #define DEVMAP_UMEM_QUANTUM (8*PAGESIZE)
3850 * Initialize seg_dev from boot. This routine sets up the trash page
3851 * and creates the umem_np_arena used to back non-pageable memory
3852 * requests.
3854 void
3855 segdev_init(void)
3857 struct seg kseg;
3859 vmobject_init(&trashvp.v_object, &trashvp);
3861 umem_np_arena = vmem_create("umem_np", NULL, 0, PAGESIZE,
3862 devmap_alloc_pages, devmap_free_pages, heap_arena,
3863 DEVMAP_UMEM_QUANTUM, VM_SLEEP);
3865 kseg.s_as = &kas;
3866 trashpp = page_create_va(&trashvp.v_object, 0, PAGESIZE,
3867 PG_NORELOC | PG_EXCL | PG_WAIT, &kseg, NULL);
3868 if (trashpp == NULL)
3869 panic("segdev_init: failed to create trash page");
3870 pagezero(trashpp, 0, PAGESIZE);
3871 page_downgrade(trashpp);
3875 * Invoke platform-dependent support routines so that /proc can have
3876 * the platform code deal with curious hardware.
3879 segdev_copyfrom(struct seg *seg,
3880 caddr_t uaddr, const void *devaddr, void *kaddr, size_t len)
3882 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
3883 struct snode *sp = VTOS(VTOCVP(sdp->vp));
3885 return (e_ddi_copyfromdev(sp->s_dip,
3886 (off_t)(uaddr - seg->s_base), devaddr, kaddr, len));
3890 segdev_copyto(struct seg *seg,
3891 caddr_t uaddr, const void *kaddr, void *devaddr, size_t len)
3893 struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
3894 struct snode *sp = VTOS(VTOCVP(sdp->vp));
3896 return (e_ddi_copytodev(sp->s_dip,
3897 (off_t)(uaddr - seg->s_base), kaddr, devaddr, len));