8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / uts / i86xpv / vm / seg_mf.c
blob081ee85b15c6c8316673fe71e36de670885e194a
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Machine frame segment driver. This segment driver allows dom0 processes to
29 * map pages of other domains or Xen (e.g. during save/restore). ioctl()s on
30 * the privcmd driver provide the MFN values backing each mapping, and we map
31 * them into the process's address space at this time. Demand-faulting is not
32 * supported by this driver due to the requirements upon some of the ioctl()s.
36 #include <sys/types.h>
37 #include <sys/systm.h>
38 #include <sys/vmsystm.h>
39 #include <sys/mman.h>
40 #include <sys/errno.h>
41 #include <sys/kmem.h>
42 #include <sys/cmn_err.h>
43 #include <sys/vnode.h>
44 #include <sys/conf.h>
45 #include <sys/debug.h>
46 #include <sys/lgrp.h>
47 #include <sys/hypervisor.h>
49 #include <vm/page.h>
50 #include <vm/hat.h>
51 #include <vm/as.h>
52 #include <vm/seg.h>
54 #include <vm/hat_pte.h>
55 #include <vm/hat_i86.h>
56 #include <vm/seg_mf.h>
58 #include <sys/fs/snode.h>
60 #define VTOCVP(vp) (VTOS(vp)->s_commonvp)
62 typedef struct segmf_mfn_s {
63 mfn_t m_mfn;
64 } segmf_mfn_t;
66 /* g_flags */
67 #define SEGMF_GFLAGS_WR 0x1
68 #define SEGMF_GFLAGS_MAPPED 0x2
69 typedef struct segmf_gref_s {
70 uint64_t g_ptep;
71 grant_ref_t g_gref;
72 uint32_t g_flags;
73 grant_handle_t g_handle;
74 } segmf_gref_t;
76 typedef union segmf_mu_u {
77 segmf_mfn_t m;
78 segmf_gref_t g;
79 } segmf_mu_t;
81 typedef enum {
82 SEGMF_MAP_EMPTY = 0,
83 SEGMF_MAP_MFN,
84 SEGMF_MAP_GREF
85 } segmf_map_type_t;
87 typedef struct segmf_map_s {
88 segmf_map_type_t t_type;
89 segmf_mu_t u;
90 } segmf_map_t;
92 struct segmf_data {
93 kmutex_t lock;
94 struct vnode *vp;
95 uchar_t prot;
96 uchar_t maxprot;
97 size_t softlockcnt;
98 domid_t domid;
99 segmf_map_t *map;
102 static struct seg_ops segmf_ops;
104 static int segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t len);
106 static struct segmf_data *
107 segmf_data_zalloc(struct seg *seg)
109 struct segmf_data *data = kmem_zalloc(sizeof (*data), KM_SLEEP);
111 mutex_init(&data->lock, "segmf.lock", MUTEX_DEFAULT, NULL);
112 seg->s_ops = &segmf_ops;
113 seg->s_data = data;
114 return (data);
118 segmf_create(struct seg *seg, void *args)
120 struct segmf_crargs *a = args;
121 struct segmf_data *data;
122 struct as *as = seg->s_as;
123 pgcnt_t i, npages = seg_pages(seg);
124 int error;
126 hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
128 data = segmf_data_zalloc(seg);
129 data->vp = specfind(a->dev, VCHR);
130 data->prot = a->prot;
131 data->maxprot = a->maxprot;
133 data->map = kmem_alloc(npages * sizeof (segmf_map_t), KM_SLEEP);
134 for (i = 0; i < npages; i++) {
135 data->map[i].t_type = SEGMF_MAP_EMPTY;
138 error = VOP_ADDMAP(VTOCVP(data->vp), 0, as, seg->s_base, seg->s_size,
139 data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
141 if (error != 0)
142 hat_unload(as->a_hat,
143 seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP);
144 return (error);
148 * Duplicate a seg and return new segment in newseg.
150 static int
151 segmf_dup(struct seg *seg, struct seg *newseg)
153 struct segmf_data *data = seg->s_data;
154 struct segmf_data *ndata;
155 pgcnt_t npages = seg_pages(newseg);
156 size_t sz;
158 ndata = segmf_data_zalloc(newseg);
160 VN_HOLD(data->vp);
161 ndata->vp = data->vp;
162 ndata->prot = data->prot;
163 ndata->maxprot = data->maxprot;
164 ndata->domid = data->domid;
166 sz = npages * sizeof (segmf_map_t);
167 ndata->map = kmem_alloc(sz, KM_SLEEP);
168 bcopy(data->map, ndata->map, sz);
170 return (VOP_ADDMAP(VTOCVP(ndata->vp), 0, newseg->s_as,
171 newseg->s_base, newseg->s_size, ndata->prot, ndata->maxprot,
172 MAP_SHARED, CRED(), NULL));
176 * We only support unmapping the whole segment, and we automatically unlock
177 * what we previously soft-locked.
179 static int
180 segmf_unmap(struct seg *seg, caddr_t addr, size_t len)
182 struct segmf_data *data = seg->s_data;
183 offset_t off;
185 if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
186 (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
187 panic("segmf_unmap");
189 if (addr != seg->s_base || len != seg->s_size)
190 return (ENOTSUP);
192 hat_unload(seg->s_as->a_hat, addr, len,
193 HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
195 off = (offset_t)seg_page(seg, addr);
197 ASSERT(data->vp != NULL);
199 (void) VOP_DELMAP(VTOCVP(data->vp), off, seg->s_as, addr, len,
200 data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
202 seg_free(seg);
203 return (0);
206 static void
207 segmf_free(struct seg *seg)
209 struct segmf_data *data = seg->s_data;
210 pgcnt_t npages = seg_pages(seg);
212 kmem_free(data->map, npages * sizeof (segmf_map_t));
213 VN_RELE(data->vp);
214 mutex_destroy(&data->lock);
215 kmem_free(data, sizeof (*data));
218 static int segmf_faultpage_debug = 0;
219 /*ARGSUSED*/
220 static int
221 segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr,
222 enum fault_type type, uint_t prot)
224 struct segmf_data *data = seg->s_data;
225 uint_t hat_flags = HAT_LOAD_NOCONSIST;
226 mfn_t mfn;
227 x86pte_t pte;
228 segmf_map_t *map;
229 uint_t idx;
232 idx = seg_page(seg, addr);
233 map = &data->map[idx];
234 ASSERT(map->t_type == SEGMF_MAP_MFN);
236 mfn = map->u.m.m_mfn;
238 if (type == F_SOFTLOCK) {
239 mutex_enter(&freemem_lock);
240 data->softlockcnt++;
241 mutex_exit(&freemem_lock);
242 hat_flags |= HAT_LOAD_LOCK;
243 } else
244 hat_flags |= HAT_LOAD;
246 if (segmf_faultpage_debug > 0) {
247 uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n",
248 (void *)addr, data->domid, mfn, prot);
249 segmf_faultpage_debug--;
253 * Ask the HAT to load a throwaway mapping to page zero, then
254 * overwrite it with our foreign domain mapping. It gets removed
255 * later via hat_unload()
257 hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0,
258 PROT_READ | HAT_UNORDERED_OK, hat_flags);
260 pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN;
261 if (prot & PROT_WRITE)
262 pte |= PT_WRITABLE;
264 if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte,
265 UVMF_INVLPG | UVMF_ALL, data->domid) != 0) {
266 hat_flags = HAT_UNLOAD_UNMAP;
268 if (type == F_SOFTLOCK) {
269 hat_flags |= HAT_UNLOAD_UNLOCK;
270 mutex_enter(&freemem_lock);
271 data->softlockcnt--;
272 mutex_exit(&freemem_lock);
275 hat_unload(hat, addr, MMU_PAGESIZE, hat_flags);
276 return (FC_MAKE_ERR(EFAULT));
279 return (0);
282 static int
283 seg_rw_to_prot(enum seg_rw rw)
285 switch (rw) {
286 case S_READ:
287 return (PROT_READ);
288 case S_WRITE:
289 return (PROT_WRITE);
290 case S_EXEC:
291 return (PROT_EXEC);
292 case S_OTHER:
293 default:
294 break;
296 return (PROT_READ | PROT_WRITE | PROT_EXEC);
299 static void
300 segmf_softunlock(struct hat *hat, struct seg *seg, caddr_t addr, size_t len)
302 struct segmf_data *data = seg->s_data;
304 hat_unlock(hat, addr, len);
306 mutex_enter(&freemem_lock);
307 ASSERT(data->softlockcnt >= btopr(len));
308 data->softlockcnt -= btopr(len);
309 mutex_exit(&freemem_lock);
311 if (data->softlockcnt == 0) {
312 struct as *as = seg->s_as;
314 if (AS_ISUNMAPWAIT(as)) {
315 mutex_enter(&as->a_contents);
316 if (AS_ISUNMAPWAIT(as)) {
317 AS_CLRUNMAPWAIT(as);
318 cv_broadcast(&as->a_cv);
320 mutex_exit(&as->a_contents);
325 static int
326 segmf_fault_range(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
327 enum fault_type type, enum seg_rw rw)
329 struct segmf_data *data = seg->s_data;
330 int error = 0;
331 caddr_t a;
333 if ((data->prot & seg_rw_to_prot(rw)) == 0)
334 return (FC_PROT);
336 /* loop over the address range handling each fault */
338 for (a = addr; a < addr + len; a += PAGESIZE) {
339 error = segmf_faultpage(hat, seg, a, type, data->prot);
340 if (error != 0)
341 break;
344 if (error != 0 && type == F_SOFTLOCK) {
345 size_t done = (size_t)(a - addr);
348 * Undo what's been done so far.
350 if (done > 0)
351 segmf_softunlock(hat, seg, addr, done);
354 return (error);
358 * We never demand-fault for seg_mf.
360 /*ARGSUSED*/
361 static int
362 segmf_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
363 enum fault_type type, enum seg_rw rw)
365 return (FC_MAKE_ERR(EFAULT));
368 /*ARGSUSED*/
369 static int
370 segmf_faulta(struct seg *seg, caddr_t addr)
372 return (0);
375 /*ARGSUSED*/
376 static int
377 segmf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
379 return (EINVAL);
382 /*ARGSUSED*/
383 static int
384 segmf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
386 return (EINVAL);
389 /*ARGSUSED*/
390 static int
391 segmf_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
393 return (-1);
396 /*ARGSUSED*/
397 static int
398 segmf_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
400 return (0);
404 * XXPV Hmm. Should we say that mf mapping are "in core?"
407 /*ARGSUSED*/
408 static size_t
409 segmf_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
411 size_t v;
413 for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
414 len -= PAGESIZE, v += PAGESIZE)
415 *vec++ = 1;
416 return (v);
419 /*ARGSUSED*/
420 static int
421 segmf_lockop(struct seg *seg, caddr_t addr,
422 size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
424 return (0);
427 static int
428 segmf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
430 struct segmf_data *data = seg->s_data;
431 pgcnt_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
433 if (pgno != 0) {
435 protv[--pgno] = data->prot;
436 while (pgno != 0)
439 return (0);
442 static u_offset_t
443 segmf_getoffset(struct seg *seg, caddr_t addr)
445 return (addr - seg->s_base);
448 /*ARGSUSED*/
449 static int
450 segmf_gettype(struct seg *seg, caddr_t addr)
452 return (MAP_SHARED);
455 /*ARGSUSED1*/
456 static int
457 segmf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
459 struct segmf_data *data = seg->s_data;
461 *vpp = VTOCVP(data->vp);
462 return (0);
465 /*ARGSUSED*/
466 static int
467 segmf_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
469 return (0);
472 /*ARGSUSED*/
473 static void
474 segmf_dump(struct seg *seg)
477 /*ARGSUSED*/
478 static int
479 segmf_pagelock(struct seg *seg, caddr_t addr, size_t len,
480 struct page ***ppp, enum lock_type type, enum seg_rw rw)
482 return (ENOTSUP);
485 /*ARGSUSED*/
486 static int
487 segmf_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
489 return (ENOTSUP);
492 static int
493 segmf_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
495 struct segmf_data *data = seg->s_data;
497 memid->val[0] = (uintptr_t)VTOCVP(data->vp);
498 memid->val[1] = (uintptr_t)seg_page(seg, addr);
499 return (0);
502 /*ARGSUSED*/
503 static lgrp_mem_policy_info_t *
504 segmf_getpolicy(struct seg *seg, caddr_t addr)
506 return (NULL);
509 /*ARGSUSED*/
510 static int
511 segmf_capable(struct seg *seg, segcapability_t capability)
513 return (0);
517 * Add a set of contiguous foreign MFNs to the segment. soft-locking them. The
518 * pre-faulting is necessary due to live migration; in particular we must
519 * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting
520 * later on a bad MFN. Whilst this isn't necessary for the other MMAP
521 * ioctl()s, we lock them too, as they should be transitory.
524 segmf_add_mfns(struct seg *seg, caddr_t addr, mfn_t mfn,
525 pgcnt_t pgcnt, domid_t domid)
527 struct segmf_data *data = seg->s_data;
528 pgcnt_t base;
529 faultcode_t fc;
530 pgcnt_t i;
531 int error = 0;
533 if (seg->s_ops != &segmf_ops)
534 return (EINVAL);
537 * Don't mess with dom0.
539 * Only allow the domid to be set once for the segment.
540 * After that attempts to add mappings to this segment for
541 * other domains explicitly fails.
544 if (domid == 0 || domid == DOMID_SELF)
545 return (EACCES);
547 mutex_enter(&data->lock);
549 if (data->domid == 0)
550 data->domid = domid;
552 if (data->domid != domid) {
553 error = EINVAL;
554 goto out;
557 base = seg_page(seg, addr);
559 for (i = 0; i < pgcnt; i++) {
560 data->map[base + i].t_type = SEGMF_MAP_MFN;
561 data->map[base + i].u.m.m_mfn = mfn++;
564 fc = segmf_fault_range(seg->s_as->a_hat, seg, addr,
565 pgcnt * MMU_PAGESIZE, F_SOFTLOCK, S_OTHER);
567 if (fc != 0) {
568 error = fc_decode(fc);
569 for (i = 0; i < pgcnt; i++) {
570 data->map[base + i].t_type = SEGMF_MAP_EMPTY;
574 out:
575 mutex_exit(&data->lock);
576 return (error);
580 segmf_add_grefs(struct seg *seg, caddr_t addr, uint_t flags,
581 grant_ref_t *grefs, uint_t cnt, domid_t domid)
583 struct segmf_data *data;
584 segmf_map_t *map;
585 faultcode_t fc;
586 uint_t idx;
587 uint_t i;
588 int e;
590 if (seg->s_ops != &segmf_ops)
591 return (EINVAL);
594 * Don't mess with dom0.
596 * Only allow the domid to be set once for the segment.
597 * After that attempts to add mappings to this segment for
598 * other domains explicitly fails.
601 if (domid == 0 || domid == DOMID_SELF)
602 return (EACCES);
604 data = seg->s_data;
605 idx = seg_page(seg, addr);
606 map = &data->map[idx];
607 e = 0;
609 mutex_enter(&data->lock);
611 if (data->domid == 0)
612 data->domid = domid;
614 if (data->domid != domid) {
615 e = EINVAL;
616 goto out;
619 /* store away the grefs passed in then fault in the pages */
620 for (i = 0; i < cnt; i++) {
621 map[i].t_type = SEGMF_MAP_GREF;
622 map[i].u.g.g_gref = grefs[i];
623 map[i].u.g.g_handle = 0;
624 map[i].u.g.g_flags = 0;
625 if (flags & SEGMF_GREF_WR) {
626 map[i].u.g.g_flags |= SEGMF_GFLAGS_WR;
629 fc = segmf_fault_gref_range(seg, addr, cnt);
630 if (fc != 0) {
631 e = fc_decode(fc);
632 for (i = 0; i < cnt; i++) {
633 data->map[i].t_type = SEGMF_MAP_EMPTY;
637 out:
638 mutex_exit(&data->lock);
639 return (e);
643 segmf_release_grefs(struct seg *seg, caddr_t addr, uint_t cnt)
645 gnttab_unmap_grant_ref_t mapop[SEGMF_MAX_GREFS];
646 struct segmf_data *data;
647 segmf_map_t *map;
648 uint_t idx;
649 long e;
650 int i;
651 int n;
654 if (cnt > SEGMF_MAX_GREFS) {
655 return (-1);
658 idx = seg_page(seg, addr);
659 data = seg->s_data;
660 map = &data->map[idx];
662 bzero(mapop, sizeof (gnttab_unmap_grant_ref_t) * cnt);
665 * for each entry which isn't empty and is currently mapped,
666 * set it up for an unmap then mark them empty.
668 n = 0;
669 for (i = 0; i < cnt; i++) {
670 ASSERT(map[i].t_type != SEGMF_MAP_MFN);
671 if ((map[i].t_type == SEGMF_MAP_GREF) &&
672 (map[i].u.g.g_flags & SEGMF_GFLAGS_MAPPED)) {
673 mapop[n].handle = map[i].u.g.g_handle;
674 mapop[n].host_addr = map[i].u.g.g_ptep;
675 mapop[n].dev_bus_addr = 0;
676 n++;
678 map[i].t_type = SEGMF_MAP_EMPTY;
681 /* if there's nothing to unmap, just return */
682 if (n == 0) {
683 return (0);
686 e = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &mapop, n);
687 if (e != 0) {
688 return (-1);
691 return (0);
695 void
696 segmf_add_gref_pte(struct seg *seg, caddr_t addr, uint64_t pte_ma)
698 struct segmf_data *data;
699 uint_t idx;
701 idx = seg_page(seg, addr);
702 data = seg->s_data;
704 data->map[idx].u.g.g_ptep = pte_ma;
708 static int
709 segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t cnt)
711 gnttab_map_grant_ref_t mapop[SEGMF_MAX_GREFS];
712 struct segmf_data *data;
713 segmf_map_t *map;
714 uint_t idx;
715 int e;
716 int i;
719 if (cnt > SEGMF_MAX_GREFS) {
720 return (-1);
723 data = seg->s_data;
724 idx = seg_page(seg, addr);
725 map = &data->map[idx];
727 bzero(mapop, sizeof (gnttab_map_grant_ref_t) * cnt);
729 ASSERT(map->t_type == SEGMF_MAP_GREF);
732 * map in each page passed in into the user apps AS. We do this by
733 * passing the MA of the actual pte of the mapping to the hypervisor.
735 for (i = 0; i < cnt; i++) {
736 mapop[i].host_addr = map[i].u.g.g_ptep;
737 mapop[i].dom = data->domid;
738 mapop[i].ref = map[i].u.g.g_gref;
739 mapop[i].flags = GNTMAP_host_map | GNTMAP_application_map |
740 GNTMAP_contains_pte;
741 if (!(map[i].u.g.g_flags & SEGMF_GFLAGS_WR)) {
742 mapop[i].flags |= GNTMAP_readonly;
745 e = xen_map_gref(GNTTABOP_map_grant_ref, mapop, cnt, B_TRUE);
746 if ((e != 0) || (mapop[0].status != GNTST_okay)) {
747 return (FC_MAKE_ERR(EFAULT));
750 /* save handle for segmf_release_grefs() and mark it as mapped */
751 for (i = 0; i < cnt; i++) {
752 ASSERT(mapop[i].status == GNTST_okay);
753 map[i].u.g.g_handle = mapop[i].handle;
754 map[i].u.g.g_flags |= SEGMF_GFLAGS_MAPPED;
757 return (0);
760 static struct seg_ops segmf_ops = {
761 segmf_dup,
762 segmf_unmap,
763 segmf_free,
764 segmf_fault,
765 segmf_faulta,
766 segmf_setprot,
767 segmf_checkprot,
768 (int (*)())segmf_kluster,
769 (size_t (*)(struct seg *))NULL, /* swapout */
770 segmf_sync,
771 segmf_incore,
772 segmf_lockop,
773 segmf_getprot,
774 segmf_getoffset,
775 segmf_gettype,
776 segmf_getvp,
777 segmf_advise,
778 segmf_dump,
779 segmf_pagelock,
780 segmf_setpagesize,
781 segmf_getmemid,
782 segmf_getpolicy,
783 segmf_capable,
784 seg_inherit_notsup