sd: remove 'ssd' driver support
[unleashed/tickless.git] / usr / src / uts / i86xpv / vm / seg_mf.c
blob54a60cf2a1506e23960fc1075d4989e02b4a3ea6
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Machine frame segment driver. This segment driver allows dom0 processes to
29 * map pages of other domains or Xen (e.g. during save/restore). ioctl()s on
30 * the privcmd driver provide the MFN values backing each mapping, and we map
31 * them into the process's address space at this time. Demand-faulting is not
32 * supported by this driver due to the requirements upon some of the ioctl()s.
36 #include <sys/types.h>
37 #include <sys/systm.h>
38 #include <sys/vmsystm.h>
39 #include <sys/mman.h>
40 #include <sys/errno.h>
41 #include <sys/kmem.h>
42 #include <sys/cmn_err.h>
43 #include <sys/vnode.h>
44 #include <sys/conf.h>
45 #include <sys/debug.h>
46 #include <sys/lgrp.h>
47 #include <sys/hypervisor.h>
49 #include <vm/page.h>
50 #include <vm/hat.h>
51 #include <vm/as.h>
52 #include <vm/seg.h>
54 #include <vm/hat_pte.h>
55 #include <vm/hat_i86.h>
56 #include <vm/seg_mf.h>
58 #include <sys/fs/snode.h>
60 #define VTOCVP(vp) (VTOS(vp)->s_commonvp)
62 typedef struct segmf_mfn_s {
63 mfn_t m_mfn;
64 } segmf_mfn_t;
66 /* g_flags */
67 #define SEGMF_GFLAGS_WR 0x1
68 #define SEGMF_GFLAGS_MAPPED 0x2
69 typedef struct segmf_gref_s {
70 uint64_t g_ptep;
71 grant_ref_t g_gref;
72 uint32_t g_flags;
73 grant_handle_t g_handle;
74 } segmf_gref_t;
76 typedef union segmf_mu_u {
77 segmf_mfn_t m;
78 segmf_gref_t g;
79 } segmf_mu_t;
81 typedef enum {
82 SEGMF_MAP_EMPTY = 0,
83 SEGMF_MAP_MFN,
84 SEGMF_MAP_GREF
85 } segmf_map_type_t;
87 typedef struct segmf_map_s {
88 segmf_map_type_t t_type;
89 segmf_mu_t u;
90 } segmf_map_t;
92 struct segmf_data {
93 kmutex_t lock;
94 struct vnode *vp;
95 uchar_t prot;
96 uchar_t maxprot;
97 size_t softlockcnt;
98 domid_t domid;
99 segmf_map_t *map;
102 static const struct seg_ops segmf_ops;
104 static int segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t len);
106 static struct segmf_data *
107 segmf_data_zalloc(struct seg *seg)
109 struct segmf_data *data = kmem_zalloc(sizeof (*data), KM_SLEEP);
111 mutex_init(&data->lock, "segmf.lock", MUTEX_DEFAULT, NULL);
112 seg->s_ops = &segmf_ops;
113 seg->s_data = data;
114 return (data);
118 segmf_create(struct seg *seg, void *args)
120 struct segmf_crargs *a = args;
121 struct segmf_data *data;
122 struct as *as = seg->s_as;
123 pgcnt_t i, npages = seg_pages(seg);
124 int error;
126 hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
128 data = segmf_data_zalloc(seg);
129 data->vp = specfind(a->dev, VCHR);
130 data->prot = a->prot;
131 data->maxprot = a->maxprot;
133 data->map = kmem_alloc(npages * sizeof (segmf_map_t), KM_SLEEP);
134 for (i = 0; i < npages; i++) {
135 data->map[i].t_type = SEGMF_MAP_EMPTY;
138 error = fop_addmap(VTOCVP(data->vp), 0, as, seg->s_base, seg->s_size,
139 data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
141 if (error != 0)
142 hat_unload(as->a_hat,
143 seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP);
144 return (error);
148 * Duplicate a seg and return new segment in newseg.
150 static int
151 segmf_dup(struct seg *seg, struct seg *newseg)
153 struct segmf_data *data = seg->s_data;
154 struct segmf_data *ndata;
155 pgcnt_t npages = seg_pages(newseg);
156 size_t sz;
158 ndata = segmf_data_zalloc(newseg);
160 VN_HOLD(data->vp);
161 ndata->vp = data->vp;
162 ndata->prot = data->prot;
163 ndata->maxprot = data->maxprot;
164 ndata->domid = data->domid;
166 sz = npages * sizeof (segmf_map_t);
167 ndata->map = kmem_alloc(sz, KM_SLEEP);
168 bcopy(data->map, ndata->map, sz);
170 return (fop_addmap(VTOCVP(ndata->vp), 0, newseg->s_as,
171 newseg->s_base, newseg->s_size, ndata->prot, ndata->maxprot,
172 MAP_SHARED, CRED(), NULL));
176 * We only support unmapping the whole segment, and we automatically unlock
177 * what we previously soft-locked.
179 static int
180 segmf_unmap(struct seg *seg, caddr_t addr, size_t len)
182 struct segmf_data *data = seg->s_data;
183 offset_t off;
185 if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
186 (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
187 panic("segmf_unmap");
189 if (addr != seg->s_base || len != seg->s_size)
190 return (ENOTSUP);
192 hat_unload(seg->s_as->a_hat, addr, len,
193 HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
195 off = (offset_t)seg_page(seg, addr);
197 ASSERT(data->vp != NULL);
199 (void) fop_delmap(VTOCVP(data->vp), off, seg->s_as, addr, len,
200 data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
202 seg_free(seg);
203 return (0);
206 static void
207 segmf_free(struct seg *seg)
209 struct segmf_data *data = seg->s_data;
210 pgcnt_t npages = seg_pages(seg);
212 kmem_free(data->map, npages * sizeof (segmf_map_t));
213 VN_RELE(data->vp);
214 mutex_destroy(&data->lock);
215 kmem_free(data, sizeof (*data));
218 static int segmf_faultpage_debug = 0;
219 /*ARGSUSED*/
220 static int
221 segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr,
222 enum fault_type type, uint_t prot)
224 struct segmf_data *data = seg->s_data;
225 uint_t hat_flags = HAT_LOAD_NOCONSIST;
226 mfn_t mfn;
227 x86pte_t pte;
228 segmf_map_t *map;
229 uint_t idx;
232 idx = seg_page(seg, addr);
233 map = &data->map[idx];
234 ASSERT(map->t_type == SEGMF_MAP_MFN);
236 mfn = map->u.m.m_mfn;
238 if (type == F_SOFTLOCK) {
239 mutex_enter(&freemem_lock);
240 data->softlockcnt++;
241 mutex_exit(&freemem_lock);
242 hat_flags |= HAT_LOAD_LOCK;
243 } else
244 hat_flags |= HAT_LOAD;
246 if (segmf_faultpage_debug > 0) {
247 uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n",
248 (void *)addr, data->domid, mfn, prot);
249 segmf_faultpage_debug--;
253 * Ask the HAT to load a throwaway mapping to page zero, then
254 * overwrite it with our foreign domain mapping. It gets removed
255 * later via hat_unload()
257 hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0,
258 PROT_READ | HAT_UNORDERED_OK, hat_flags);
260 pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN;
261 if (prot & PROT_WRITE)
262 pte |= PT_WRITABLE;
264 if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte,
265 UVMF_INVLPG | UVMF_ALL, data->domid) != 0) {
266 hat_flags = HAT_UNLOAD_UNMAP;
268 if (type == F_SOFTLOCK) {
269 hat_flags |= HAT_UNLOAD_UNLOCK;
270 mutex_enter(&freemem_lock);
271 data->softlockcnt--;
272 mutex_exit(&freemem_lock);
275 hat_unload(hat, addr, MMU_PAGESIZE, hat_flags);
276 return (FC_MAKE_ERR(EFAULT));
279 return (0);
282 static int
283 seg_rw_to_prot(enum seg_rw rw)
285 switch (rw) {
286 case S_READ:
287 return (PROT_READ);
288 case S_WRITE:
289 return (PROT_WRITE);
290 case S_EXEC:
291 return (PROT_EXEC);
292 case S_OTHER:
293 default:
294 break;
296 return (PROT_READ | PROT_WRITE | PROT_EXEC);
299 static void
300 segmf_softunlock(struct hat *hat, struct seg *seg, caddr_t addr, size_t len)
302 struct segmf_data *data = seg->s_data;
304 hat_unlock(hat, addr, len);
306 mutex_enter(&freemem_lock);
307 ASSERT(data->softlockcnt >= btopr(len));
308 data->softlockcnt -= btopr(len);
309 mutex_exit(&freemem_lock);
311 if (data->softlockcnt == 0) {
312 struct as *as = seg->s_as;
314 if (AS_ISUNMAPWAIT(as)) {
315 mutex_enter(&as->a_contents);
316 if (AS_ISUNMAPWAIT(as)) {
317 AS_CLRUNMAPWAIT(as);
318 cv_broadcast(&as->a_cv);
320 mutex_exit(&as->a_contents);
325 static int
326 segmf_fault_range(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
327 enum fault_type type, enum seg_rw rw)
329 struct segmf_data *data = seg->s_data;
330 int error = 0;
331 caddr_t a;
333 if ((data->prot & seg_rw_to_prot(rw)) == 0)
334 return (FC_PROT);
336 /* loop over the address range handling each fault */
338 for (a = addr; a < addr + len; a += PAGESIZE) {
339 error = segmf_faultpage(hat, seg, a, type, data->prot);
340 if (error != 0)
341 break;
344 if (error != 0 && type == F_SOFTLOCK) {
345 size_t done = (size_t)(a - addr);
348 * Undo what's been done so far.
350 if (done > 0)
351 segmf_softunlock(hat, seg, addr, done);
354 return (error);
358 * We never demand-fault for seg_mf.
360 /*ARGSUSED*/
361 static int
362 segmf_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
363 enum fault_type type, enum seg_rw rw)
365 return (FC_MAKE_ERR(EFAULT));
368 /*ARGSUSED*/
369 static int
370 segmf_faulta(struct seg *seg, caddr_t addr)
372 return (0);
375 /*ARGSUSED*/
376 static int
377 segmf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
379 return (EINVAL);
382 /*ARGSUSED*/
383 static int
384 segmf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
386 return (EINVAL);
389 /*ARGSUSED*/
390 static int
391 segmf_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
393 return (-1);
396 /*ARGSUSED*/
397 static int
398 segmf_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
400 return (0);
404 * XXPV Hmm. Should we say that mf mapping are "in core?"
407 /*ARGSUSED*/
408 static size_t
409 segmf_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
411 size_t v;
413 for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
414 len -= PAGESIZE, v += PAGESIZE)
415 *vec++ = 1;
416 return (v);
419 /*ARGSUSED*/
420 static int
421 segmf_lockop(struct seg *seg, caddr_t addr,
422 size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
424 return (0);
427 static int
428 segmf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
430 struct segmf_data *data = seg->s_data;
431 pgcnt_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
433 if (pgno != 0) {
435 protv[--pgno] = data->prot;
436 while (pgno != 0)
439 return (0);
442 static uoff_t
443 segmf_getoffset(struct seg *seg, caddr_t addr)
445 return (addr - seg->s_base);
448 /*ARGSUSED*/
449 static int
450 segmf_gettype(struct seg *seg, caddr_t addr)
452 return (MAP_SHARED);
455 /*ARGSUSED1*/
456 static int
457 segmf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
459 struct segmf_data *data = seg->s_data;
461 *vpp = VTOCVP(data->vp);
462 return (0);
465 /*ARGSUSED*/
466 static int
467 segmf_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
469 return (0);
472 /*ARGSUSED*/
473 static int
474 segmf_pagelock(struct seg *seg, caddr_t addr, size_t len,
475 struct page ***ppp, enum lock_type type, enum seg_rw rw)
477 return (ENOTSUP);
480 static int
481 segmf_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
483 struct segmf_data *data = seg->s_data;
485 memid->val[0] = (uintptr_t)VTOCVP(data->vp);
486 memid->val[1] = (uintptr_t)seg_page(seg, addr);
487 return (0);
491 * Add a set of contiguous foreign MFNs to the segment. soft-locking them. The
492 * pre-faulting is necessary due to live migration; in particular we must
493 * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting
494 * later on a bad MFN. Whilst this isn't necessary for the other MMAP
495 * ioctl()s, we lock them too, as they should be transitory.
498 segmf_add_mfns(struct seg *seg, caddr_t addr, mfn_t mfn,
499 pgcnt_t pgcnt, domid_t domid)
501 struct segmf_data *data = seg->s_data;
502 pgcnt_t base;
503 faultcode_t fc;
504 pgcnt_t i;
505 int error = 0;
507 if (seg->s_ops != &segmf_ops)
508 return (EINVAL);
511 * Don't mess with dom0.
513 * Only allow the domid to be set once for the segment.
514 * After that attempts to add mappings to this segment for
515 * other domains explicitly fails.
518 if (domid == 0 || domid == DOMID_SELF)
519 return (EACCES);
521 mutex_enter(&data->lock);
523 if (data->domid == 0)
524 data->domid = domid;
526 if (data->domid != domid) {
527 error = EINVAL;
528 goto out;
531 base = seg_page(seg, addr);
533 for (i = 0; i < pgcnt; i++) {
534 data->map[base + i].t_type = SEGMF_MAP_MFN;
535 data->map[base + i].u.m.m_mfn = mfn++;
538 fc = segmf_fault_range(seg->s_as->a_hat, seg, addr,
539 pgcnt * MMU_PAGESIZE, F_SOFTLOCK, S_OTHER);
541 if (fc != 0) {
542 error = fc_decode(fc);
543 for (i = 0; i < pgcnt; i++) {
544 data->map[base + i].t_type = SEGMF_MAP_EMPTY;
548 out:
549 mutex_exit(&data->lock);
550 return (error);
554 segmf_add_grefs(struct seg *seg, caddr_t addr, uint_t flags,
555 grant_ref_t *grefs, uint_t cnt, domid_t domid)
557 struct segmf_data *data;
558 segmf_map_t *map;
559 faultcode_t fc;
560 uint_t idx;
561 uint_t i;
562 int e;
564 if (seg->s_ops != &segmf_ops)
565 return (EINVAL);
568 * Don't mess with dom0.
570 * Only allow the domid to be set once for the segment.
571 * After that attempts to add mappings to this segment for
572 * other domains explicitly fails.
575 if (domid == 0 || domid == DOMID_SELF)
576 return (EACCES);
578 data = seg->s_data;
579 idx = seg_page(seg, addr);
580 map = &data->map[idx];
581 e = 0;
583 mutex_enter(&data->lock);
585 if (data->domid == 0)
586 data->domid = domid;
588 if (data->domid != domid) {
589 e = EINVAL;
590 goto out;
593 /* store away the grefs passed in then fault in the pages */
594 for (i = 0; i < cnt; i++) {
595 map[i].t_type = SEGMF_MAP_GREF;
596 map[i].u.g.g_gref = grefs[i];
597 map[i].u.g.g_handle = 0;
598 map[i].u.g.g_flags = 0;
599 if (flags & SEGMF_GREF_WR) {
600 map[i].u.g.g_flags |= SEGMF_GFLAGS_WR;
603 fc = segmf_fault_gref_range(seg, addr, cnt);
604 if (fc != 0) {
605 e = fc_decode(fc);
606 for (i = 0; i < cnt; i++) {
607 data->map[i].t_type = SEGMF_MAP_EMPTY;
611 out:
612 mutex_exit(&data->lock);
613 return (e);
617 segmf_release_grefs(struct seg *seg, caddr_t addr, uint_t cnt)
619 gnttab_unmap_grant_ref_t mapop[SEGMF_MAX_GREFS];
620 struct segmf_data *data;
621 segmf_map_t *map;
622 uint_t idx;
623 long e;
624 int i;
625 int n;
628 if (cnt > SEGMF_MAX_GREFS) {
629 return (-1);
632 idx = seg_page(seg, addr);
633 data = seg->s_data;
634 map = &data->map[idx];
636 bzero(mapop, sizeof (gnttab_unmap_grant_ref_t) * cnt);
639 * for each entry which isn't empty and is currently mapped,
640 * set it up for an unmap then mark them empty.
642 n = 0;
643 for (i = 0; i < cnt; i++) {
644 ASSERT(map[i].t_type != SEGMF_MAP_MFN);
645 if ((map[i].t_type == SEGMF_MAP_GREF) &&
646 (map[i].u.g.g_flags & SEGMF_GFLAGS_MAPPED)) {
647 mapop[n].handle = map[i].u.g.g_handle;
648 mapop[n].host_addr = map[i].u.g.g_ptep;
649 mapop[n].dev_bus_addr = 0;
650 n++;
652 map[i].t_type = SEGMF_MAP_EMPTY;
655 /* if there's nothing to unmap, just return */
656 if (n == 0) {
657 return (0);
660 e = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &mapop, n);
661 if (e != 0) {
662 return (-1);
665 return (0);
669 void
670 segmf_add_gref_pte(struct seg *seg, caddr_t addr, uint64_t pte_ma)
672 struct segmf_data *data;
673 uint_t idx;
675 idx = seg_page(seg, addr);
676 data = seg->s_data;
678 data->map[idx].u.g.g_ptep = pte_ma;
682 static int
683 segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t cnt)
685 gnttab_map_grant_ref_t mapop[SEGMF_MAX_GREFS];
686 struct segmf_data *data;
687 segmf_map_t *map;
688 uint_t idx;
689 int e;
690 int i;
693 if (cnt > SEGMF_MAX_GREFS) {
694 return (-1);
697 data = seg->s_data;
698 idx = seg_page(seg, addr);
699 map = &data->map[idx];
701 bzero(mapop, sizeof (gnttab_map_grant_ref_t) * cnt);
703 ASSERT(map->t_type == SEGMF_MAP_GREF);
706 * map in each page passed in into the user apps AS. We do this by
707 * passing the MA of the actual pte of the mapping to the hypervisor.
709 for (i = 0; i < cnt; i++) {
710 mapop[i].host_addr = map[i].u.g.g_ptep;
711 mapop[i].dom = data->domid;
712 mapop[i].ref = map[i].u.g.g_gref;
713 mapop[i].flags = GNTMAP_host_map | GNTMAP_application_map |
714 GNTMAP_contains_pte;
715 if (!(map[i].u.g.g_flags & SEGMF_GFLAGS_WR)) {
716 mapop[i].flags |= GNTMAP_readonly;
719 e = xen_map_gref(GNTTABOP_map_grant_ref, mapop, cnt, B_TRUE);
720 if ((e != 0) || (mapop[0].status != GNTST_okay)) {
721 return (FC_MAKE_ERR(EFAULT));
724 /* save handle for segmf_release_grefs() and mark it as mapped */
725 for (i = 0; i < cnt; i++) {
726 ASSERT(mapop[i].status == GNTST_okay);
727 map[i].u.g.g_handle = mapop[i].handle;
728 map[i].u.g.g_flags |= SEGMF_GFLAGS_MAPPED;
731 return (0);
734 static const struct seg_ops segmf_ops = {
735 .dup = segmf_dup,
736 .unmap = segmf_unmap,
737 .free = segmf_free,
738 .fault = segmf_fault,
739 .faulta = segmf_faulta,
740 .setprot = segmf_setprot,
741 .checkprot = segmf_checkprot,
742 .kluster = segmf_kluster,
743 .sync = segmf_sync,
744 .incore = segmf_incore,
745 .lockop = segmf_lockop,
746 .getprot = segmf_getprot,
747 .getoffset = segmf_getoffset,
748 .gettype = segmf_gettype,
749 .getvp = segmf_getvp,
750 .advise = segmf_advise,
751 .pagelock = segmf_pagelock,
752 .getmemid = segmf_getmemid,