4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Machine frame segment driver. This segment driver allows dom0 processes to
29 * map pages of other domains or Xen (e.g. during save/restore). ioctl()s on
30 * the privcmd driver provide the MFN values backing each mapping, and we map
31 * them into the process's address space at this time. Demand-faulting is not
32 * supported by this driver due to the requirements upon some of the ioctl()s.
36 #include <sys/types.h>
37 #include <sys/systm.h>
38 #include <sys/vmsystm.h>
40 #include <sys/errno.h>
42 #include <sys/cmn_err.h>
43 #include <sys/vnode.h>
45 #include <sys/debug.h>
47 #include <sys/hypervisor.h>
54 #include <vm/hat_pte.h>
55 #include <vm/hat_i86.h>
56 #include <vm/seg_mf.h>
58 #include <sys/fs/snode.h>
60 #define VTOCVP(vp) (VTOS(vp)->s_commonvp)
62 typedef struct segmf_mfn_s
{
67 #define SEGMF_GFLAGS_WR 0x1
68 #define SEGMF_GFLAGS_MAPPED 0x2
69 typedef struct segmf_gref_s
{
73 grant_handle_t g_handle
;
76 typedef union segmf_mu_u
{
87 typedef struct segmf_map_s
{
88 segmf_map_type_t t_type
;
102 static struct seg_ops segmf_ops
;
104 static int segmf_fault_gref_range(struct seg
*seg
, caddr_t addr
, size_t len
);
106 static struct segmf_data
*
107 segmf_data_zalloc(struct seg
*seg
)
109 struct segmf_data
*data
= kmem_zalloc(sizeof (*data
), KM_SLEEP
);
111 mutex_init(&data
->lock
, "segmf.lock", MUTEX_DEFAULT
, NULL
);
112 seg
->s_ops
= &segmf_ops
;
118 segmf_create(struct seg
*seg
, void *args
)
120 struct segmf_crargs
*a
= args
;
121 struct segmf_data
*data
;
122 struct as
*as
= seg
->s_as
;
123 pgcnt_t i
, npages
= seg_pages(seg
);
126 hat_map(as
->a_hat
, seg
->s_base
, seg
->s_size
, HAT_MAP
);
128 data
= segmf_data_zalloc(seg
);
129 data
->vp
= specfind(a
->dev
, VCHR
);
130 data
->prot
= a
->prot
;
131 data
->maxprot
= a
->maxprot
;
133 data
->map
= kmem_alloc(npages
* sizeof (segmf_map_t
), KM_SLEEP
);
134 for (i
= 0; i
< npages
; i
++) {
135 data
->map
[i
].t_type
= SEGMF_MAP_EMPTY
;
138 error
= VOP_ADDMAP(VTOCVP(data
->vp
), 0, as
, seg
->s_base
, seg
->s_size
,
139 data
->prot
, data
->maxprot
, MAP_SHARED
, CRED(), NULL
);
142 hat_unload(as
->a_hat
,
143 seg
->s_base
, seg
->s_size
, HAT_UNLOAD_UNMAP
);
148 * Duplicate a seg and return new segment in newseg.
151 segmf_dup(struct seg
*seg
, struct seg
*newseg
)
153 struct segmf_data
*data
= seg
->s_data
;
154 struct segmf_data
*ndata
;
155 pgcnt_t npages
= seg_pages(newseg
);
158 ndata
= segmf_data_zalloc(newseg
);
161 ndata
->vp
= data
->vp
;
162 ndata
->prot
= data
->prot
;
163 ndata
->maxprot
= data
->maxprot
;
164 ndata
->domid
= data
->domid
;
166 sz
= npages
* sizeof (segmf_map_t
);
167 ndata
->map
= kmem_alloc(sz
, KM_SLEEP
);
168 bcopy(data
->map
, ndata
->map
, sz
);
170 return (VOP_ADDMAP(VTOCVP(ndata
->vp
), 0, newseg
->s_as
,
171 newseg
->s_base
, newseg
->s_size
, ndata
->prot
, ndata
->maxprot
,
172 MAP_SHARED
, CRED(), NULL
));
176 * We only support unmapping the whole segment, and we automatically unlock
177 * what we previously soft-locked.
180 segmf_unmap(struct seg
*seg
, caddr_t addr
, size_t len
)
182 struct segmf_data
*data
= seg
->s_data
;
185 if (addr
< seg
->s_base
|| addr
+ len
> seg
->s_base
+ seg
->s_size
||
186 (len
& PAGEOFFSET
) || ((uintptr_t)addr
& PAGEOFFSET
))
187 panic("segmf_unmap");
189 if (addr
!= seg
->s_base
|| len
!= seg
->s_size
)
192 hat_unload(seg
->s_as
->a_hat
, addr
, len
,
193 HAT_UNLOAD_UNMAP
| HAT_UNLOAD_UNLOCK
);
195 off
= (offset_t
)seg_page(seg
, addr
);
197 ASSERT(data
->vp
!= NULL
);
199 (void) VOP_DELMAP(VTOCVP(data
->vp
), off
, seg
->s_as
, addr
, len
,
200 data
->prot
, data
->maxprot
, MAP_SHARED
, CRED(), NULL
);
207 segmf_free(struct seg
*seg
)
209 struct segmf_data
*data
= seg
->s_data
;
210 pgcnt_t npages
= seg_pages(seg
);
212 kmem_free(data
->map
, npages
* sizeof (segmf_map_t
));
214 mutex_destroy(&data
->lock
);
215 kmem_free(data
, sizeof (*data
));
218 static int segmf_faultpage_debug
= 0;
221 segmf_faultpage(struct hat
*hat
, struct seg
*seg
, caddr_t addr
,
222 enum fault_type type
, uint_t prot
)
224 struct segmf_data
*data
= seg
->s_data
;
225 uint_t hat_flags
= HAT_LOAD_NOCONSIST
;
232 idx
= seg_page(seg
, addr
);
233 map
= &data
->map
[idx
];
234 ASSERT(map
->t_type
== SEGMF_MAP_MFN
);
236 mfn
= map
->u
.m
.m_mfn
;
238 if (type
== F_SOFTLOCK
) {
239 mutex_enter(&freemem_lock
);
241 mutex_exit(&freemem_lock
);
242 hat_flags
|= HAT_LOAD_LOCK
;
244 hat_flags
|= HAT_LOAD
;
246 if (segmf_faultpage_debug
> 0) {
247 uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n",
248 (void *)addr
, data
->domid
, mfn
, prot
);
249 segmf_faultpage_debug
--;
253 * Ask the HAT to load a throwaway mapping to page zero, then
254 * overwrite it with our foreign domain mapping. It gets removed
255 * later via hat_unload()
257 hat_devload(hat
, addr
, MMU_PAGESIZE
, (pfn_t
)0,
258 PROT_READ
| HAT_UNORDERED_OK
, hat_flags
);
260 pte
= mmu_ptob((x86pte_t
)mfn
) | PT_VALID
| PT_USER
| PT_FOREIGN
;
261 if (prot
& PROT_WRITE
)
264 if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr
, pte
,
265 UVMF_INVLPG
| UVMF_ALL
, data
->domid
) != 0) {
266 hat_flags
= HAT_UNLOAD_UNMAP
;
268 if (type
== F_SOFTLOCK
) {
269 hat_flags
|= HAT_UNLOAD_UNLOCK
;
270 mutex_enter(&freemem_lock
);
272 mutex_exit(&freemem_lock
);
275 hat_unload(hat
, addr
, MMU_PAGESIZE
, hat_flags
);
276 return (FC_MAKE_ERR(EFAULT
));
283 seg_rw_to_prot(enum seg_rw rw
)
296 return (PROT_READ
| PROT_WRITE
| PROT_EXEC
);
300 segmf_softunlock(struct hat
*hat
, struct seg
*seg
, caddr_t addr
, size_t len
)
302 struct segmf_data
*data
= seg
->s_data
;
304 hat_unlock(hat
, addr
, len
);
306 mutex_enter(&freemem_lock
);
307 ASSERT(data
->softlockcnt
>= btopr(len
));
308 data
->softlockcnt
-= btopr(len
);
309 mutex_exit(&freemem_lock
);
311 if (data
->softlockcnt
== 0) {
312 struct as
*as
= seg
->s_as
;
314 if (AS_ISUNMAPWAIT(as
)) {
315 mutex_enter(&as
->a_contents
);
316 if (AS_ISUNMAPWAIT(as
)) {
318 cv_broadcast(&as
->a_cv
);
320 mutex_exit(&as
->a_contents
);
326 segmf_fault_range(struct hat
*hat
, struct seg
*seg
, caddr_t addr
, size_t len
,
327 enum fault_type type
, enum seg_rw rw
)
329 struct segmf_data
*data
= seg
->s_data
;
333 if ((data
->prot
& seg_rw_to_prot(rw
)) == 0)
336 /* loop over the address range handling each fault */
338 for (a
= addr
; a
< addr
+ len
; a
+= PAGESIZE
) {
339 error
= segmf_faultpage(hat
, seg
, a
, type
, data
->prot
);
344 if (error
!= 0 && type
== F_SOFTLOCK
) {
345 size_t done
= (size_t)(a
- addr
);
348 * Undo what's been done so far.
351 segmf_softunlock(hat
, seg
, addr
, done
);
358 * We never demand-fault for seg_mf.
362 segmf_fault(struct hat
*hat
, struct seg
*seg
, caddr_t addr
, size_t len
,
363 enum fault_type type
, enum seg_rw rw
)
365 return (FC_MAKE_ERR(EFAULT
));
370 segmf_faulta(struct seg
*seg
, caddr_t addr
)
377 segmf_setprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
384 segmf_checkprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t prot
)
391 segmf_kluster(struct seg
*seg
, caddr_t addr
, ssize_t delta
)
398 segmf_sync(struct seg
*seg
, caddr_t addr
, size_t len
, int attr
, uint_t flags
)
404 * XXPV Hmm. Should we say that mf mapping are "in core?"
409 segmf_incore(struct seg
*seg
, caddr_t addr
, size_t len
, char *vec
)
413 for (v
= 0, len
= (len
+ PAGEOFFSET
) & PAGEMASK
; len
;
414 len
-= PAGESIZE
, v
+= PAGESIZE
)
421 segmf_lockop(struct seg
*seg
, caddr_t addr
,
422 size_t len
, int attr
, int op
, ulong_t
*lockmap
, size_t pos
)
428 segmf_getprot(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t
*protv
)
430 struct segmf_data
*data
= seg
->s_data
;
431 pgcnt_t pgno
= seg_page(seg
, addr
+ len
) - seg_page(seg
, addr
) + 1;
435 protv
[--pgno
] = data
->prot
;
443 segmf_getoffset(struct seg
*seg
, caddr_t addr
)
445 return (addr
- seg
->s_base
);
450 segmf_gettype(struct seg
*seg
, caddr_t addr
)
457 segmf_getvp(struct seg
*seg
, caddr_t addr
, struct vnode
**vpp
)
459 struct segmf_data
*data
= seg
->s_data
;
461 *vpp
= VTOCVP(data
->vp
);
467 segmf_advise(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t behav
)
474 segmf_dump(struct seg
*seg
)
479 segmf_pagelock(struct seg
*seg
, caddr_t addr
, size_t len
,
480 struct page
***ppp
, enum lock_type type
, enum seg_rw rw
)
487 segmf_setpagesize(struct seg
*seg
, caddr_t addr
, size_t len
, uint_t szc
)
493 segmf_getmemid(struct seg
*seg
, caddr_t addr
, memid_t
*memid
)
495 struct segmf_data
*data
= seg
->s_data
;
497 memid
->val
[0] = (uintptr_t)VTOCVP(data
->vp
);
498 memid
->val
[1] = (uintptr_t)seg_page(seg
, addr
);
503 static lgrp_mem_policy_info_t
*
504 segmf_getpolicy(struct seg
*seg
, caddr_t addr
)
511 segmf_capable(struct seg
*seg
, segcapability_t capability
)
517 * Add a set of contiguous foreign MFNs to the segment. soft-locking them. The
518 * pre-faulting is necessary due to live migration; in particular we must
519 * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting
520 * later on a bad MFN. Whilst this isn't necessary for the other MMAP
521 * ioctl()s, we lock them too, as they should be transitory.
524 segmf_add_mfns(struct seg
*seg
, caddr_t addr
, mfn_t mfn
,
525 pgcnt_t pgcnt
, domid_t domid
)
527 struct segmf_data
*data
= seg
->s_data
;
533 if (seg
->s_ops
!= &segmf_ops
)
537 * Don't mess with dom0.
539 * Only allow the domid to be set once for the segment.
540 * After that attempts to add mappings to this segment for
541 * other domains explicitly fails.
544 if (domid
== 0 || domid
== DOMID_SELF
)
547 mutex_enter(&data
->lock
);
549 if (data
->domid
== 0)
552 if (data
->domid
!= domid
) {
557 base
= seg_page(seg
, addr
);
559 for (i
= 0; i
< pgcnt
; i
++) {
560 data
->map
[base
+ i
].t_type
= SEGMF_MAP_MFN
;
561 data
->map
[base
+ i
].u
.m
.m_mfn
= mfn
++;
564 fc
= segmf_fault_range(seg
->s_as
->a_hat
, seg
, addr
,
565 pgcnt
* MMU_PAGESIZE
, F_SOFTLOCK
, S_OTHER
);
568 error
= fc_decode(fc
);
569 for (i
= 0; i
< pgcnt
; i
++) {
570 data
->map
[base
+ i
].t_type
= SEGMF_MAP_EMPTY
;
575 mutex_exit(&data
->lock
);
580 segmf_add_grefs(struct seg
*seg
, caddr_t addr
, uint_t flags
,
581 grant_ref_t
*grefs
, uint_t cnt
, domid_t domid
)
583 struct segmf_data
*data
;
590 if (seg
->s_ops
!= &segmf_ops
)
594 * Don't mess with dom0.
596 * Only allow the domid to be set once for the segment.
597 * After that attempts to add mappings to this segment for
598 * other domains explicitly fails.
601 if (domid
== 0 || domid
== DOMID_SELF
)
605 idx
= seg_page(seg
, addr
);
606 map
= &data
->map
[idx
];
609 mutex_enter(&data
->lock
);
611 if (data
->domid
== 0)
614 if (data
->domid
!= domid
) {
619 /* store away the grefs passed in then fault in the pages */
620 for (i
= 0; i
< cnt
; i
++) {
621 map
[i
].t_type
= SEGMF_MAP_GREF
;
622 map
[i
].u
.g
.g_gref
= grefs
[i
];
623 map
[i
].u
.g
.g_handle
= 0;
624 map
[i
].u
.g
.g_flags
= 0;
625 if (flags
& SEGMF_GREF_WR
) {
626 map
[i
].u
.g
.g_flags
|= SEGMF_GFLAGS_WR
;
629 fc
= segmf_fault_gref_range(seg
, addr
, cnt
);
632 for (i
= 0; i
< cnt
; i
++) {
633 data
->map
[i
].t_type
= SEGMF_MAP_EMPTY
;
638 mutex_exit(&data
->lock
);
643 segmf_release_grefs(struct seg
*seg
, caddr_t addr
, uint_t cnt
)
645 gnttab_unmap_grant_ref_t mapop
[SEGMF_MAX_GREFS
];
646 struct segmf_data
*data
;
654 if (cnt
> SEGMF_MAX_GREFS
) {
658 idx
= seg_page(seg
, addr
);
660 map
= &data
->map
[idx
];
662 bzero(mapop
, sizeof (gnttab_unmap_grant_ref_t
) * cnt
);
665 * for each entry which isn't empty and is currently mapped,
666 * set it up for an unmap then mark them empty.
669 for (i
= 0; i
< cnt
; i
++) {
670 ASSERT(map
[i
].t_type
!= SEGMF_MAP_MFN
);
671 if ((map
[i
].t_type
== SEGMF_MAP_GREF
) &&
672 (map
[i
].u
.g
.g_flags
& SEGMF_GFLAGS_MAPPED
)) {
673 mapop
[n
].handle
= map
[i
].u
.g
.g_handle
;
674 mapop
[n
].host_addr
= map
[i
].u
.g
.g_ptep
;
675 mapop
[n
].dev_bus_addr
= 0;
678 map
[i
].t_type
= SEGMF_MAP_EMPTY
;
681 /* if there's nothing to unmap, just return */
686 e
= HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref
, &mapop
, n
);
696 segmf_add_gref_pte(struct seg
*seg
, caddr_t addr
, uint64_t pte_ma
)
698 struct segmf_data
*data
;
701 idx
= seg_page(seg
, addr
);
704 data
->map
[idx
].u
.g
.g_ptep
= pte_ma
;
709 segmf_fault_gref_range(struct seg
*seg
, caddr_t addr
, size_t cnt
)
711 gnttab_map_grant_ref_t mapop
[SEGMF_MAX_GREFS
];
712 struct segmf_data
*data
;
719 if (cnt
> SEGMF_MAX_GREFS
) {
724 idx
= seg_page(seg
, addr
);
725 map
= &data
->map
[idx
];
727 bzero(mapop
, sizeof (gnttab_map_grant_ref_t
) * cnt
);
729 ASSERT(map
->t_type
== SEGMF_MAP_GREF
);
732 * map in each page passed in into the user apps AS. We do this by
733 * passing the MA of the actual pte of the mapping to the hypervisor.
735 for (i
= 0; i
< cnt
; i
++) {
736 mapop
[i
].host_addr
= map
[i
].u
.g
.g_ptep
;
737 mapop
[i
].dom
= data
->domid
;
738 mapop
[i
].ref
= map
[i
].u
.g
.g_gref
;
739 mapop
[i
].flags
= GNTMAP_host_map
| GNTMAP_application_map
|
741 if (!(map
[i
].u
.g
.g_flags
& SEGMF_GFLAGS_WR
)) {
742 mapop
[i
].flags
|= GNTMAP_readonly
;
745 e
= xen_map_gref(GNTTABOP_map_grant_ref
, mapop
, cnt
, B_TRUE
);
746 if ((e
!= 0) || (mapop
[0].status
!= GNTST_okay
)) {
747 return (FC_MAKE_ERR(EFAULT
));
750 /* save handle for segmf_release_grefs() and mark it as mapped */
751 for (i
= 0; i
< cnt
; i
++) {
752 ASSERT(mapop
[i
].status
== GNTST_okay
);
753 map
[i
].u
.g
.g_handle
= mapop
[i
].handle
;
754 map
[i
].u
.g
.g_flags
|= SEGMF_GFLAGS_MAPPED
;
760 static struct seg_ops segmf_ops
= {
768 (int (*)())segmf_kluster
,
769 (size_t (*)(struct seg
*))NULL
, /* swapout */