4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Note: This is the backend part of the split PV disk driver. This driver
29 * is not a nexus driver, nor is it a leaf driver(block/char/stream driver).
30 * Currently, it does not create any minor node. So, although, it runs in
31 * backend domain, it will not be used directly from within dom0.
32 * It simply gets block I/O requests issued by frontend from a shared page
33 * (blkif ring buffer - defined by Xen) between backend and frontend domain,
34 * generates a buf, and push it down to underlying disk target driver via
35 * ldi interface. When buf is done, this driver will generate a response
36 * and put it into ring buffer to inform frontend of the status of the I/O
37 * request issued by it. When a new virtual device entry is added in xenstore,
38 * there will be an watch event sent from Xen to xvdi framework, who will,
39 * in turn, create the devinfo node and try to attach this driver
40 * (see xvdi_create_dev). When frontend peer changes its state to
41 * XenbusStateClose, an event will also be sent from Xen to xvdi framework,
42 * who will detach and remove this devinfo node (see i_xvdi_oestate_handler).
43 * I/O requests get from ring buffer and event coming from xenstore cannot be
44 * trusted. We verify them in xdb_get_buf() and xdb_check_state_transition().
46 * Virtual device configuration is read/written from/to the database via
47 * xenbus_* interfaces. Driver also use xvdi_* to interact with hypervisor.
48 * There is an on-going effort to make xvdi_* cover all xenbus_*.
51 #include <sys/types.h>
54 #include <sys/dditypes.h>
55 #include <sys/sunddi.h>
60 #include <sys/modctl.h>
61 #include <sys/bootconf.h>
62 #include <sys/promif.h>
63 #include <sys/sysmacros.h>
64 #include <public/io/xenbus.h>
65 #include <public/io/xs_wire.h>
66 #include <xen/sys/xenbus_impl.h>
67 #include <xen/sys/xendev.h>
68 #include <sys/gnttab.h>
69 #include <sys/scsi/generic/inquiry.h>
70 #include <vm/seg_kmem.h>
71 #include <vm/hat_i86.h>
72 #include <sys/gnttab.h>
75 #include <xen/io/blkif_impl.h>
78 static xdb_t
*xdb_statep
;
79 static int xdb_debug
= 0;
81 static void xdb_close(dev_info_t
*);
82 static int xdb_push_response(xdb_t
*, uint64_t, uint8_t, uint16_t);
83 static int xdb_get_request(xdb_t
*, blkif_request_t
*);
84 static void blkif_get_x86_32_req(blkif_request_t
*, blkif_x86_32_request_t
*);
85 static void blkif_get_x86_64_req(blkif_request_t
*, blkif_x86_64_request_t
*);
86 static int xdb_biodone(buf_t
*);
95 logva(xdb_t
*vdp
, uint64_t va
)
100 page_addrs
= vdp
->page_addrs
;
101 for (i
= 0; i
< XDB_MAX_IO_PAGES(vdp
); i
++) {
102 if (page_addrs
[i
] == va
)
103 debug_enter("VA remapping found!");
106 for (i
= 0; i
< XDB_MAX_IO_PAGES(vdp
); i
++) {
107 if (page_addrs
[i
] == 0) {
112 ASSERT(i
< XDB_MAX_IO_PAGES(vdp
));
116 unlogva(xdb_t
*vdp
, uint64_t va
)
118 uint64_t *page_addrs
;
121 page_addrs
= vdp
->page_addrs
;
122 for (i
= 0; i
< XDB_MAX_IO_PAGES(vdp
); i
++) {
123 if (page_addrs
[i
] == va
) {
128 ASSERT(i
< XDB_MAX_IO_PAGES(vdp
));
132 xdb_dump_request_oe(blkif_request_t
*req
)
137 * Exploit the public interface definitions for BLKIF_OP_READ
140 char *op_name
[] = { "read", "write", "barrier", "flush" };
142 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
, "op=%s", op_name
[req
->operation
]));
143 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
, "num of segments=%d",
145 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
, "handle=%d", req
->handle
));
146 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
, "id=%llu",
147 (unsigned long long)req
->id
));
148 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
, "start sector=%llu",
149 (unsigned long long)req
->sector_number
));
150 for (i
= 0; i
< req
->nr_segments
; i
++) {
151 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
, "gref=%d, first sec=%d,"
152 "last sec=%d", req
->seg
[i
].gref
, req
->seg
[i
].first_sect
,
153 req
->seg
[i
].last_sect
));
161 static char *xdb_stats
[] = {
170 xdb_kstat_update(kstat_t
*ksp
, int flag
)
175 if (flag
!= KSTAT_READ
)
178 vdp
= ksp
->ks_private
;
182 * Assignment order should match that of the names in
185 (knp
++)->value
.ui64
= vdp
->xs_stat_req_reads
;
186 (knp
++)->value
.ui64
= vdp
->xs_stat_req_writes
;
187 (knp
++)->value
.ui64
= vdp
->xs_stat_req_barriers
;
188 (knp
++)->value
.ui64
= vdp
->xs_stat_req_flushes
;
189 (knp
++)->value
.ui64
= 0; /* oo_req */
195 xdb_kstat_init(xdb_t
*vdp
)
197 int nstat
= sizeof (xdb_stats
) / sizeof (xdb_stats
[0]);
198 char **cp
= xdb_stats
;
201 if ((vdp
->xs_kstats
= kstat_create("xdb",
202 ddi_get_instance(vdp
->xs_dip
),
203 "req_statistics", "block", KSTAT_TYPE_NAMED
,
207 vdp
->xs_kstats
->ks_private
= vdp
;
208 vdp
->xs_kstats
->ks_update
= xdb_kstat_update
;
210 knp
= vdp
->xs_kstats
->ks_data
;
212 kstat_named_init(knp
, *cp
, KSTAT_DATA_UINT64
);
218 kstat_install(vdp
->xs_kstats
);
224 i_pathname(dev_info_t
*dip
)
228 path
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
229 (void) ddi_pathname(dip
, path
);
231 kmem_free(path
, MAXPATHLEN
);
237 xdb_get_buf(xdb_t
*vdp
, blkif_request_t
*req
, xdb_request_t
*xreq
)
240 uint8_t segs
, curseg
;
243 gnttab_map_grant_ref_t mapops
[BLKIF_MAX_SEGMENTS_PER_REQUEST
];
244 ddi_acc_handle_t acchdl
;
246 acchdl
= vdp
->xs_ring_hdl
;
247 bp
= XDB_XREQ2BP(xreq
);
248 curseg
= xreq
->xr_curseg
;
249 /* init a new xdb request */
251 ASSERT(MUTEX_HELD(&vdp
->xs_iomutex
));
252 boolean_t pagemapok
= B_TRUE
;
253 uint8_t op
= ddi_get8(acchdl
, &req
->operation
);
257 xreq
->xr_id
= ddi_get64(acchdl
, &req
->id
);
258 segs
= xreq
->xr_buf_pages
= ddi_get8(acchdl
, &req
->nr_segments
);
260 if (op
!= BLKIF_OP_FLUSH_DISKCACHE
)
261 cmn_err(CE_WARN
, "!non-BLKIF_OP_FLUSH_DISKCACHE"
262 " is seen from domain %d with zero "
263 "length data buffer!", vdp
->xs_peer
);
267 bp
->b_un
.b_addr
= NULL
;
269 } else if (op
== BLKIF_OP_FLUSH_DISKCACHE
) {
270 cmn_err(CE_WARN
, "!BLKIF_OP_FLUSH_DISKCACHE"
271 " is seen from domain %d with non-zero "
272 "length data buffer!", vdp
->xs_peer
);
276 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
277 * according to the definition of blk interface by Xen
278 * we do sanity check here
280 if (segs
> BLKIF_MAX_SEGMENTS_PER_REQUEST
)
281 segs
= xreq
->xr_buf_pages
=
282 BLKIF_MAX_SEGMENTS_PER_REQUEST
;
284 for (i
= 0; i
< segs
; i
++) {
287 mapops
[i
].host_addr
=
288 (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
289 vdp
->xs_iopage_va
, xreq
->xr_idx
, i
);
290 mapops
[i
].dom
= vdp
->xs_peer
;
291 mapops
[i
].ref
= ddi_get32(acchdl
, &req
->seg
[i
].gref
);
292 mapops
[i
].flags
= GNTMAP_host_map
;
293 if (op
!= BLKIF_OP_READ
)
294 mapops
[i
].flags
|= GNTMAP_readonly
;
296 fs
= ddi_get8(acchdl
, &req
->seg
[i
].first_sect
);
297 ls
= ddi_get8(acchdl
, &req
->seg
[i
].last_sect
);
300 * first_sect should be no bigger than last_sect and
301 * both of them should be no bigger than
302 * XB_LAST_SECTOR_IN_SEG according to definition
303 * of blk interface by Xen, so sanity check again
305 if (fs
> XB_LAST_SECTOR_IN_SEG
)
306 fs
= XB_LAST_SECTOR_IN_SEG
;
307 if (ls
> XB_LAST_SECTOR_IN_SEG
)
308 ls
= XB_LAST_SECTOR_IN_SEG
;
312 xreq
->xr_segs
[i
].fs
= fs
;
313 xreq
->xr_segs
[i
].ls
= ls
;
316 /* map in io pages */
317 err
= xen_map_gref(GNTTABOP_map_grant_ref
, mapops
, i
, B_FALSE
);
320 for (i
= 0; i
< segs
; i
++) {
322 * Although HYPERVISOR_grant_table_op() returned no
323 * error, mapping of each single page can fail. So,
324 * we have to do the check here and handle the error
327 if (mapops
[i
].status
!= GNTST_okay
) {
329 for (j
= 0; j
< i
; j
++) {
331 unlogva(vdp
, mapops
[j
].host_addr
);
334 xreq
->xr_plist
[j
].p_pagenum
);
339 /* record page mapping handle for unmapping later */
340 xreq
->xr_page_hdls
[i
] = mapops
[i
].handle
;
342 logva(vdp
, mapops
[i
].host_addr
);
345 * Pass the MFNs down using the shadow list (xr_pplist)
347 * This is pretty ugly since we have implict knowledge
348 * of how the rootnex binds buffers.
349 * The GNTTABOP_map_grant_ref op makes us do some ugly
350 * stuff since we're not allowed to touch these PTEs
353 * Obviously, these aren't real page_t's. The rootnex
354 * only needs p_pagenum.
355 * Also, don't use btop() here or 32 bit PAE breaks.
357 xreq
->xr_pplist
[i
] = &xreq
->xr_plist
[i
];
358 xreq
->xr_plist
[i
].p_pagenum
=
359 xen_assign_pfn(mapops
[i
].dev_bus_addr
>> PAGESHIFT
);
363 * not all pages mapped in successfully, unmap those mapped-in
364 * page and return failure
367 gnttab_unmap_grant_ref_t unmapop
;
369 for (i
= 0; i
< segs
; i
++) {
370 if (mapops
[i
].status
!= GNTST_okay
)
373 (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
374 vdp
->xs_iopage_va
, xreq
->xr_idx
, i
);
375 unmapop
.dev_bus_addr
= (uintptr_t)NULL
;
376 unmapop
.handle
= mapops
[i
].handle
;
377 (void) HYPERVISOR_grant_table_op(
378 GNTTABOP_unmap_grant_ref
, &unmapop
, 1);
384 bp
->b_lblkno
= ddi_get64(acchdl
, &req
->sector_number
);
385 bp
->b_flags
= B_BUSY
| B_SHADOW
| B_PHYS
;
386 bp
->b_flags
|= (ddi_get8(acchdl
, &req
->operation
) ==
387 BLKIF_OP_READ
) ? B_READ
: (B_WRITE
| B_ASYNC
);
393 blkst
= bp
->b_lblkno
+ bp
->b_bcount
/ DEV_BSIZE
;
394 isread
= bp
->b_flags
& B_READ
;
396 bp
->b_lblkno
= blkst
;
397 bp
->b_flags
= B_BUSY
| B_SHADOW
| B_PHYS
;
398 bp
->b_flags
|= isread
? B_READ
: (B_WRITE
| B_ASYNC
);
399 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
, "reuse buf, xreq is %d!!",
404 bp
->b_un
.b_addr
= XDB_IOPAGE_VA(vdp
->xs_iopage_va
, xreq
->xr_idx
,
405 curseg
) + xreq
->xr_segs
[curseg
].fs
* DEV_BSIZE
;
406 bp
->b_shadow
= &xreq
->xr_pplist
[curseg
];
407 bp
->b_iodone
= xdb_biodone
;
411 * Run through the segments. There are XB_NUM_SECTORS_PER_SEG sectors
412 * per segment. On some OSes (e.g. Linux), there may be empty gaps
413 * between segments. (i.e. the first segment may end on sector 6 and
414 * the second segment start on sector 4).
416 * if a segments first sector is not set to 0, and this is not the
417 * first segment in our buf, end this buf now.
419 * if a segments last sector is not set to XB_LAST_SECTOR_IN_SEG, and
420 * this is not the last segment in the request, add this segment into
421 * the buf, then end this buf (updating the pointer to point to the
422 * next segment next time around).
424 for (i
= curseg
; i
< xreq
->xr_buf_pages
; i
++) {
425 if ((xreq
->xr_segs
[i
].fs
!= 0) && (i
!= curseg
)) {
428 sectors
+= (xreq
->xr_segs
[i
].ls
- xreq
->xr_segs
[i
].fs
+ 1);
429 if ((xreq
->xr_segs
[i
].ls
!= XB_LAST_SECTOR_IN_SEG
) &&
430 (i
!= (xreq
->xr_buf_pages
- 1))) {
436 bp
->b_bcount
= sectors
* DEV_BSIZE
;
437 bp
->b_bufsize
= bp
->b_bcount
;
442 static xdb_request_t
*
443 xdb_get_req(xdb_t
*vdp
)
448 ASSERT(MUTEX_HELD(&vdp
->xs_iomutex
));
449 ASSERT(vdp
->xs_free_req
!= -1);
450 req
= &vdp
->xs_req
[vdp
->xs_free_req
];
451 vdp
->xs_free_req
= req
->xr_next
;
453 bzero(req
, sizeof (xdb_request_t
));
459 xdb_free_req(xdb_request_t
*req
)
461 xdb_t
*vdp
= req
->xr_vdp
;
463 ASSERT(MUTEX_HELD(&vdp
->xs_iomutex
));
464 req
->xr_next
= vdp
->xs_free_req
;
465 vdp
->xs_free_req
= req
->xr_idx
;
469 xdb_response(xdb_t
*vdp
, blkif_request_t
*req
, boolean_t ok
)
471 ddi_acc_handle_t acchdl
= vdp
->xs_ring_hdl
;
473 if (xdb_push_response(vdp
, ddi_get64(acchdl
, &req
->id
),
474 ddi_get8(acchdl
, &req
->operation
), ok
))
475 xvdi_notify_oe(vdp
->xs_dip
);
479 xdb_init_ioreqs(xdb_t
*vdp
)
483 ASSERT(vdp
->xs_nentry
);
485 if (vdp
->xs_req
== NULL
)
486 vdp
->xs_req
= kmem_alloc(vdp
->xs_nentry
*
487 sizeof (xdb_request_t
), KM_SLEEP
);
489 if (vdp
->page_addrs
== NULL
)
490 vdp
->page_addrs
= kmem_zalloc(XDB_MAX_IO_PAGES(vdp
) *
491 sizeof (uint64_t), KM_SLEEP
);
493 for (i
= 0; i
< vdp
->xs_nentry
; i
++) {
494 vdp
->xs_req
[i
].xr_idx
= i
;
495 vdp
->xs_req
[i
].xr_next
= i
+ 1;
497 vdp
->xs_req
[vdp
->xs_nentry
- 1].xr_next
= -1;
498 vdp
->xs_free_req
= 0;
500 /* alloc va in host dom for io page mapping */
501 vdp
->xs_iopage_va
= vmem_xalloc(heap_arena
,
502 XDB_MAX_IO_PAGES(vdp
) * PAGESIZE
, PAGESIZE
, 0, 0, 0, 0,
504 for (i
= 0; i
< XDB_MAX_IO_PAGES(vdp
); i
++)
505 hat_prepare_mapping(kas
.a_hat
,
506 vdp
->xs_iopage_va
+ i
* PAGESIZE
, NULL
);
510 xdb_uninit_ioreqs(xdb_t
*vdp
)
514 for (i
= 0; i
< XDB_MAX_IO_PAGES(vdp
); i
++)
515 hat_release_mapping(kas
.a_hat
,
516 vdp
->xs_iopage_va
+ i
* PAGESIZE
);
517 vmem_xfree(heap_arena
, vdp
->xs_iopage_va
,
518 XDB_MAX_IO_PAGES(vdp
) * PAGESIZE
);
519 if (vdp
->xs_req
!= NULL
) {
520 kmem_free(vdp
->xs_req
, vdp
->xs_nentry
* sizeof (xdb_request_t
));
524 if (vdp
->page_addrs
!= NULL
) {
525 kmem_free(vdp
->page_addrs
, XDB_MAX_IO_PAGES(vdp
) *
527 vdp
->page_addrs
= NULL
;
533 xdb_intr(caddr_t arg
)
535 xdb_t
*vdp
= (xdb_t
*)arg
;
536 dev_info_t
*dip
= vdp
->xs_dip
;
537 blkif_request_t req
, *reqp
= &req
;
541 int ret
= DDI_INTR_UNCLAIMED
;
543 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
,
544 "xdb@%s: I/O request received from dom %d",
545 ddi_get_name_addr(dip
), vdp
->xs_peer
));
547 mutex_enter(&vdp
->xs_iomutex
);
549 /* shouldn't touch ring buffer if not in connected state */
550 if (!vdp
->xs_if_connected
) {
551 mutex_exit(&vdp
->xs_iomutex
);
552 return (DDI_INTR_UNCLAIMED
);
554 ASSERT(vdp
->xs_hp_connected
&& vdp
->xs_fe_initialised
);
557 * We'll loop till there is no more request in the ring
558 * We won't stuck in this loop for ever since the size of ring buffer
559 * is limited, and frontend will stop pushing requests into it when
560 * the ring buffer is full
563 /* req_event will be increased in xvdi_ring_get_request() */
564 while (xdb_get_request(vdp
, reqp
)) {
565 ret
= DDI_INTR_CLAIMED
;
567 op
= ddi_get8(vdp
->xs_ring_hdl
, &reqp
->operation
);
568 if (op
== BLKIF_OP_READ
||
569 op
== BLKIF_OP_WRITE
||
570 op
== BLKIF_OP_WRITE_BARRIER
||
571 op
== BLKIF_OP_FLUSH_DISKCACHE
) {
573 xdb_dump_request_oe(reqp
);
575 xreq
= xdb_get_req(vdp
);
579 vdp
->xs_stat_req_reads
++;
581 case BLKIF_OP_WRITE_BARRIER
:
582 vdp
->xs_stat_req_barriers
++;
585 vdp
->xs_stat_req_writes
++;
587 case BLKIF_OP_FLUSH_DISKCACHE
:
588 vdp
->xs_stat_req_flushes
++;
592 xreq
->xr_curseg
= 0; /* start from first segment */
593 bp
= xdb_get_buf(vdp
, reqp
, xreq
);
595 /* failed to form a buf */
597 xdb_response(vdp
, reqp
, B_FALSE
);
602 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
,
603 " buf %p, blkno %lld, size %lu, addr %p",
604 (void *)bp
, (longlong_t
)bp
->b_blkno
,
605 (ulong_t
)bp
->b_bcount
, (void *)bp
->b_un
.b_addr
));
607 /* send bp to underlying blk driver */
608 if (vdp
->xs_f_iobuf
== NULL
) {
609 vdp
->xs_f_iobuf
= vdp
->xs_l_iobuf
= bp
;
611 vdp
->xs_l_iobuf
->av_forw
= bp
;
612 vdp
->xs_l_iobuf
= bp
;
615 xdb_response(vdp
, reqp
, B_FALSE
);
616 XDB_DBPRINT(XDB_DBG_IO
, (CE_WARN
, "xdb@%s: "
617 "Unsupported cmd received from dom %d",
618 ddi_get_name_addr(dip
), vdp
->xs_peer
));
621 /* notify our taskq to push buf to underlying blk driver */
622 if (ret
== DDI_INTR_CLAIMED
)
623 cv_broadcast(&vdp
->xs_iocv
);
625 mutex_exit(&vdp
->xs_iomutex
);
631 xdb_biodone(buf_t
*bp
)
635 gnttab_unmap_grant_ref_t unmapops
[BLKIF_MAX_SEGMENTS_PER_REQUEST
];
636 xdb_request_t
*xreq
= XDB_BP2XREQ(bp
);
637 xdb_t
*vdp
= xreq
->xr_vdp
;
640 bioerr
= geterror(bp
);
642 XDB_DBPRINT(XDB_DBG_IO
, (CE_WARN
, "xdb@%s: I/O error %d",
643 ddi_get_name_addr(vdp
->xs_dip
), bioerr
));
645 /* check if we are done w/ this I/O request */
646 if ((bioerr
== 0) && (xreq
->xr_curseg
< xreq
->xr_buf_pages
)) {
647 nbp
= xdb_get_buf(vdp
, NULL
, xreq
);
649 err
= ldi_strategy(vdp
->xs_ldi_hdl
, nbp
);
651 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
,
652 "sent buf to backend ok"));
653 return (DDI_SUCCESS
);
656 XDB_DBPRINT(XDB_DBG_IO
, (CE_WARN
, "xdb@%s: "
657 "sent buf to backend dev failed, err=%d",
658 ddi_get_name_addr(vdp
->xs_dip
), err
));
665 segs
= xreq
->xr_buf_pages
;
667 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
668 * according to the definition of blk interface by Xen
670 ASSERT(segs
<= BLKIF_MAX_SEGMENTS_PER_REQUEST
);
671 for (i
= 0; i
< segs
; i
++) {
672 unmapops
[i
].host_addr
= (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
673 vdp
->xs_iopage_va
, xreq
->xr_idx
, i
);
675 mutex_enter(&vdp
->xs_iomutex
);
676 unlogva(vdp
, unmapops
[i
].host_addr
);
677 mutex_exit(&vdp
->xs_iomutex
);
679 unmapops
[i
].dev_bus_addr
= (uintptr_t)NULL
;
680 unmapops
[i
].handle
= xreq
->xr_page_hdls
[i
];
682 err
= HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref
,
687 * If we have reached a barrier write or a cache flush , then we must
688 * flush all our I/Os.
690 if (xreq
->xr_op
== BLKIF_OP_WRITE_BARRIER
||
691 xreq
->xr_op
== BLKIF_OP_FLUSH_DISKCACHE
) {
693 * XXX At this point the write did succeed, so I don't
694 * believe we should report an error because the flush
695 * failed. However, this is a debatable point, so
696 * maybe we need to think more carefully about this.
697 * For now, just cast to void.
699 (void) ldi_ioctl(vdp
->xs_ldi_hdl
,
700 DKIOCFLUSHWRITECACHE
, (intptr_t)NULL
, FKIOCTL
, kcred
, NULL
);
703 mutex_enter(&vdp
->xs_iomutex
);
705 /* send response back to frontend */
706 if (vdp
->xs_if_connected
) {
707 ASSERT(vdp
->xs_hp_connected
&& vdp
->xs_fe_initialised
);
708 if (xdb_push_response(vdp
, xreq
->xr_id
, xreq
->xr_op
, bioerr
))
709 xvdi_notify_oe(vdp
->xs_dip
);
710 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
,
711 "sent resp back to frontend, id=%llu",
712 (unsigned long long)xreq
->xr_id
));
714 /* free io resources */
719 if (!vdp
->xs_if_connected
&& (vdp
->xs_ionum
== 0)) {
720 /* we're closing, someone is waiting for I/O clean-up */
721 cv_signal(&vdp
->xs_ionumcv
);
724 mutex_exit(&vdp
->xs_iomutex
);
726 return (DDI_SUCCESS
);
730 xdb_bindto_frontend(xdb_t
*vdp
)
735 evtchn_port_t evtchn
;
736 dev_info_t
*dip
= vdp
->xs_dip
;
737 char protocol
[64] = "";
739 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
742 * Switch to the XenbusStateInitialised state. This let's the
743 * frontend know that we're about to negotiate a connection.
745 (void) xvdi_switch_state(dip
, XBT_NULL
, XenbusStateInitialised
);
748 * Gather info from frontend
750 oename
= xvdi_get_oename(dip
);
752 return (DDI_FAILURE
);
754 err
= xenbus_gather(XBT_NULL
, oename
,
755 XBP_RING_REF
, "%lu", &gref
,
756 XBP_EVENT_CHAN
, "%u", &evtchn
,
759 xvdi_dev_error(dip
, err
,
760 "Getting ring-ref and evtchn from frontend");
761 return (DDI_FAILURE
);
764 vdp
->xs_blk_protocol
= BLKIF_PROTOCOL_NATIVE
;
765 vdp
->xs_nentry
= BLKIF_RING_SIZE
;
766 vdp
->xs_entrysize
= sizeof (union blkif_sring_entry
);
768 err
= xenbus_gather(XBT_NULL
, oename
,
769 XBP_PROTOCOL
, "%63s", protocol
, NULL
);
771 (void) strcpy(protocol
, "unspecified, assuming native");
774 * We must check for NATIVE first, so that the fast path
775 * is taken for copying data from the guest to the host.
777 if (strcmp(protocol
, XEN_IO_PROTO_ABI_NATIVE
) != 0) {
778 if (strcmp(protocol
, XEN_IO_PROTO_ABI_X86_32
) == 0) {
779 vdp
->xs_blk_protocol
= BLKIF_PROTOCOL_X86_32
;
780 vdp
->xs_nentry
= BLKIF_X86_32_RING_SIZE
;
782 sizeof (union blkif_x86_32_sring_entry
);
783 } else if (strcmp(protocol
, XEN_IO_PROTO_ABI_X86_64
) ==
785 vdp
->xs_blk_protocol
= BLKIF_PROTOCOL_X86_64
;
786 vdp
->xs_nentry
= BLKIF_X86_64_RING_SIZE
;
788 sizeof (union blkif_x86_64_sring_entry
);
790 xvdi_fatal_error(dip
, err
, "unknown protocol");
791 return (DDI_FAILURE
);
796 cmn_err(CE_NOTE
, "!xdb@%s: blkif protocol '%s' ",
797 ddi_get_name_addr(dip
), protocol
);
801 * Map and init ring. The ring parameters must match those which
802 * have been allocated in the front end.
804 if (xvdi_map_ring(dip
, vdp
->xs_nentry
, vdp
->xs_entrysize
,
805 gref
, &vdp
->xs_ring
) != DDI_SUCCESS
)
806 return (DDI_FAILURE
);
809 * This will be removed after we use shadow I/O ring request since
810 * we don't need to access the ring itself directly, thus the access
811 * handle is not needed
813 vdp
->xs_ring_hdl
= vdp
->xs_ring
->xr_acc_hdl
;
815 /* bind event channel */
816 err
= xvdi_bind_evtchn(dip
, evtchn
);
817 if (err
!= DDI_SUCCESS
) {
818 xvdi_unmap_ring(vdp
->xs_ring
);
819 return (DDI_FAILURE
);
822 return (DDI_SUCCESS
);
826 xdb_unbindfrom_frontend(xdb_t
*vdp
)
828 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
830 xvdi_free_evtchn(vdp
->xs_dip
);
831 xvdi_unmap_ring(vdp
->xs_ring
);
835 * xdb_params_change() initiates a allows change to the underlying device/file
836 * that the backend is accessing. It does this by disconnecting from the
837 * frontend, closing the old device, clearing a bunch of xenbus parameters,
838 * and switching back to the XenbusStateInitialising state. The frontend
839 * should notice this transition to the XenbusStateInitialising state and
840 * should attempt to reconnect to us (the backend).
843 xdb_params_change(xdb_t
*vdp
, char *params
, boolean_t update_xs
)
845 xenbus_transaction_t xbt
;
846 dev_info_t
*dip
= vdp
->xs_dip
;
850 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
851 ASSERT(vdp
->xs_params_path
!= NULL
);
853 if ((xsname
= xvdi_get_xsname(dip
)) == NULL
)
855 if (strcmp(vdp
->xs_params_path
, params
) == 0)
859 * Close the device we're currently accessing and update the
860 * path which points to our backend device/file.
863 vdp
->xs_fe_initialised
= B_FALSE
;
866 if ((err
= xenbus_transaction_start(&xbt
)) != 0) {
867 xvdi_dev_error(dip
, err
, "params change transaction init");
872 * Delete all the xenbus properties that are connection dependant
873 * and go back to the initializing state so that the frontend
874 * driver can re-negotiate a connection.
876 if (((err
= xenbus_rm(xbt
, xsname
, XBP_FB
)) != 0) ||
877 ((err
= xenbus_rm(xbt
, xsname
, XBP_INFO
)) != 0) ||
878 ((err
= xenbus_rm(xbt
, xsname
, "sector-size")) != 0) ||
879 ((err
= xenbus_rm(xbt
, xsname
, XBP_SECTORS
)) != 0) ||
880 ((err
= xenbus_rm(xbt
, xsname
, "instance")) != 0) ||
881 ((err
= xenbus_rm(xbt
, xsname
, "node")) != 0) ||
882 (update_xs
&& ((err
= xenbus_printf(xbt
, xsname
,
883 "params", "%s", params
)) != 0)) ||
884 ((err
= xvdi_switch_state(dip
,
885 xbt
, XenbusStateInitialising
) > 0))) {
886 (void) xenbus_transaction_end(xbt
, 1);
887 xvdi_dev_error(dip
, err
, "params change transaction setup");
891 if ((err
= xenbus_transaction_end(xbt
, 0)) != 0) {
893 /* transaction is ended, don't need to abort it */
896 xvdi_dev_error(dip
, err
, "params change transaction commit");
900 /* Change the device that we plan to access */
901 strfree(vdp
->xs_params_path
);
902 vdp
->xs_params_path
= strdup(params
);
906 (void) xvdi_switch_state(dip
, xbt
, XenbusStateInitialising
);
910 * xdb_watch_params_cb() - This callback is invoked whenever there
911 * is an update to the following xenbus parameter:
912 * /local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params
914 * This normally happens during xm block-configure operations, which
915 * are used to change CD device images for HVM domUs.
919 xdb_watch_params_cb(dev_info_t
*dip
, const char *path
, void *arg
)
921 xdb_t
*vdp
= (xdb_t
*)ddi_get_driver_private(dip
);
922 char *xsname
, *oename
, *str
, *str2
;
924 if (((xsname
= xvdi_get_xsname(dip
)) == NULL
) ||
925 ((oename
= xvdi_get_oename(dip
)) == NULL
)) {
929 mutex_enter(&vdp
->xs_cbmutex
);
931 if (xenbus_read_str(xsname
, "params", &str
) != 0) {
932 mutex_exit(&vdp
->xs_cbmutex
);
936 if (strcmp(vdp
->xs_params_path
, str
) == 0) {
938 mutex_exit(&vdp
->xs_cbmutex
);
944 * If the frontend isn't a cd device, doesn't support media
945 * requests, or has locked the media, then we can't change
946 * the params value. restore the current value.
949 if (!XDB_IS_FE_CD(vdp
) ||
950 (xenbus_read_str(oename
, XBP_MEDIA_REQ
, &str2
) != 0) ||
951 (strcmp(str2
, XBV_MEDIA_REQ_LOCK
) == 0)) {
956 str
= i_pathname(dip
);
958 "!%s: media locked, ignoring params update", str
);
961 mutex_exit(&vdp
->xs_cbmutex
);
965 XDB_DBPRINT(XDB_DBG_INFO
, (CE_NOTE
,
966 "block-configure params request: \"%s\"", str
));
968 xdb_params_change(vdp
, str
, B_FALSE
);
969 mutex_exit(&vdp
->xs_cbmutex
);
974 * xdb_watch_media_req_cb() - This callback is invoked whenever there
975 * is an update to the following xenbus parameter:
976 * /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
978 * Media requests are only supported on CD devices and are issued by
979 * the frontend. Currently the only supported media request operaions
980 * are "lock" and "eject". A "lock" prevents the backend from changing
981 * the backing device/file (via xm block-configure). An "eject" requests
982 * tells the backend device that it should disconnect from the frontend
983 * and closing the backing device/file that is currently in use.
987 xdb_watch_media_req_cb(dev_info_t
*dip
, const char *path
, void *arg
)
989 xdb_t
*vdp
= (xdb_t
*)ddi_get_driver_private(dip
);
992 mutex_enter(&vdp
->xs_cbmutex
);
994 if ((oename
= xvdi_get_oename(dip
)) == NULL
) {
995 mutex_exit(&vdp
->xs_cbmutex
);
999 if (xenbus_read_str(oename
, XBP_MEDIA_REQ
, &str
) != 0) {
1000 mutex_exit(&vdp
->xs_cbmutex
);
1004 if (!XDB_IS_FE_CD(vdp
)) {
1005 xvdi_dev_error(dip
, EINVAL
,
1006 "media-req only supported for cdrom devices");
1007 mutex_exit(&vdp
->xs_cbmutex
);
1011 if (strcmp(str
, XBV_MEDIA_REQ_EJECT
) != 0) {
1012 mutex_exit(&vdp
->xs_cbmutex
);
1018 XDB_DBPRINT(XDB_DBG_INFO
, (CE_NOTE
, "media eject request"));
1020 xdb_params_change(vdp
, "", B_TRUE
);
1021 (void) xenbus_printf(XBT_NULL
, oename
,
1022 XBP_MEDIA_REQ
, "%s", XBV_MEDIA_REQ_NONE
);
1023 mutex_exit(&vdp
->xs_cbmutex
);
1027 * If we're dealing with a cdrom device, let the frontend know that
1028 * we support media requests via XBP_MEDIA_REQ_SUP, and setup a watch
1029 * to handle those frontend media request changes, which modify the
1030 * following xenstore parameter:
1031 * /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
1034 xdb_media_req_init(xdb_t
*vdp
)
1036 dev_info_t
*dip
= vdp
->xs_dip
;
1037 char *xsname
, *oename
;
1039 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
1041 if (((xsname
= xvdi_get_xsname(dip
)) == NULL
) ||
1042 ((oename
= xvdi_get_oename(dip
)) == NULL
))
1045 if (!XDB_IS_FE_CD(vdp
))
1048 if (xenbus_printf(XBT_NULL
, xsname
, XBP_MEDIA_REQ_SUP
, "%d", 1) != 0)
1051 if (xvdi_add_xb_watch_handler(dip
, oename
,
1052 XBP_MEDIA_REQ
, xdb_watch_media_req_cb
, NULL
) != DDI_SUCCESS
) {
1053 xvdi_dev_error(dip
, EAGAIN
,
1054 "Failed to register watch for cdrom media requests");
1062 * Get our params value. Also, if we're using "params" then setup a
1063 * watch to handle xm block-configure operations which modify the
1064 * following xenstore parameter:
1065 * /local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params
1068 xdb_params_init(xdb_t
*vdp
)
1070 dev_info_t
*dip
= vdp
->xs_dip
;
1074 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
1075 ASSERT(vdp
->xs_params_path
== NULL
);
1077 if ((xsname
= xvdi_get_xsname(dip
)) == NULL
)
1080 err
= xenbus_read_str(xsname
, "params", &str
);
1084 vdp
->xs_params_path
= str
;
1086 if (xvdi_add_xb_watch_handler(dip
, xsname
, "params",
1087 xdb_watch_params_cb
, NULL
) != DDI_SUCCESS
) {
1088 strfree(vdp
->xs_params_path
);
1089 vdp
->xs_params_path
= NULL
;
1096 #define LOFI_CTRL_NODE "/dev/lofictl"
1097 #define LOFI_DEV_NODE "/devices/pseudo/lofi@0:"
1098 #define LOFI_MODE (FREAD | FWRITE | FEXCL)
1101 xdb_setup_node(xdb_t
*vdp
, char *path
)
1103 dev_info_t
*dip
= vdp
->xs_dip
;
1105 ldi_handle_t ldi_hdl
;
1106 struct lofi_ioctl
*li
;
1109 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
1111 if ((xsname
= xvdi_get_xsname(dip
)) == NULL
)
1112 return (DDI_FAILURE
);
1114 if ((err
= xenbus_read_str(xsname
, "type", &str
)) != 0) {
1115 xvdi_dev_error(dip
, err
, "Getting type from backend device");
1116 return (DDI_FAILURE
);
1118 if (strcmp(str
, "file") == 0)
1119 vdp
->xs_type
|= XDB_DEV_BE_LOFI
;
1122 if (!XDB_IS_BE_LOFI(vdp
)) {
1123 (void) strlcpy(path
, vdp
->xs_params_path
, MAXPATHLEN
);
1124 ASSERT(vdp
->xs_lofi_path
== NULL
);
1125 return (DDI_SUCCESS
);
1129 err
= ldi_open_by_name(LOFI_CTRL_NODE
, LOFI_MODE
, kcred
,
1130 &ldi_hdl
, vdp
->xs_ldi_li
);
1131 } while (err
== EBUSY
);
1133 return (DDI_FAILURE
);
1136 li
= kmem_zalloc(sizeof (*li
), KM_SLEEP
);
1137 (void) strlcpy(li
->li_filename
, vdp
->xs_params_path
,
1138 sizeof (li
->li_filename
));
1139 err
= ldi_ioctl(ldi_hdl
, LOFI_MAP_FILE
, (intptr_t)li
,
1140 LOFI_MODE
| FKIOCTL
, kcred
, &minor
);
1141 (void) ldi_close(ldi_hdl
, LOFI_MODE
, kcred
);
1142 kmem_free(li
, sizeof (*li
));
1145 cmn_err(CE_WARN
, "xdb@%s: Failed to create lofi dev for %s",
1146 ddi_get_name_addr(dip
), vdp
->xs_params_path
);
1147 return (DDI_FAILURE
);
1151 * return '/devices/...' instead of '/dev/lofi/...' since the
1152 * former is available immediately after calling ldi_ioctl
1154 (void) snprintf(path
, MAXPATHLEN
, LOFI_DEV_NODE
"%d", minor
);
1155 (void) xenbus_printf(XBT_NULL
, xsname
, "node", "%s", path
);
1157 ASSERT(vdp
->xs_lofi_path
== NULL
);
1158 vdp
->xs_lofi_path
= strdup(path
);
1160 return (DDI_SUCCESS
);
1164 xdb_teardown_node(xdb_t
*vdp
)
1166 dev_info_t
*dip
= vdp
->xs_dip
;
1167 ldi_handle_t ldi_hdl
;
1168 struct lofi_ioctl
*li
;
1171 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
1173 if (!XDB_IS_BE_LOFI(vdp
))
1176 vdp
->xs_type
&= ~XDB_DEV_BE_LOFI
;
1177 ASSERT(vdp
->xs_lofi_path
!= NULL
);
1179 li
= kmem_zalloc(sizeof (*li
), KM_SLEEP
);
1180 (void) strlcpy(li
->li_filename
, vdp
->xs_params_path
,
1181 sizeof (li
->li_filename
));
1184 err
= ldi_open_by_name(LOFI_CTRL_NODE
, LOFI_MODE
, kcred
,
1185 &ldi_hdl
, vdp
->xs_ldi_li
);
1186 } while (err
== EBUSY
);
1189 kmem_free(li
, sizeof (*li
));
1193 if (ldi_ioctl(ldi_hdl
, LOFI_UNMAP_FILE
, (intptr_t)li
,
1194 LOFI_MODE
| FKIOCTL
, kcred
, NULL
) != 0) {
1195 cmn_err(CE_WARN
, "xdb@%s: Failed to delete lofi dev for %s",
1196 ddi_get_name_addr(dip
), li
->li_filename
);
1199 (void) ldi_close(ldi_hdl
, LOFI_MODE
, kcred
);
1200 kmem_free(li
, sizeof (*li
));
1202 strfree(vdp
->xs_lofi_path
);
1203 vdp
->xs_lofi_path
= NULL
;
1207 xdb_open_device(xdb_t
*vdp
)
1209 dev_info_t
*dip
= vdp
->xs_dip
;
1217 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
1219 if (strlen(vdp
->xs_params_path
) == 0) {
1221 * it's possible to have no backing device when dealing
1222 * with a pv cdrom drive that has no virtual cd associated
1225 ASSERT(XDB_IS_FE_CD(vdp
));
1226 ASSERT(vdp
->xs_sectors
== 0);
1227 ASSERT(vdp
->xs_ldi_li
== NULL
);
1228 ASSERT(vdp
->xs_ldi_hdl
== NULL
);
1229 return (DDI_SUCCESS
);
1233 * after the hotplug scripts have "connected" the device, check to see
1234 * if we're using a dynamic device. If so, replace the params path
1235 * with the dynamic one.
1237 xsname
= xvdi_get_xsname(dip
);
1238 err
= xenbus_read_str(xsname
, "dynamic-device-path", &str
);
1240 strfree(vdp
->xs_params_path
);
1241 vdp
->xs_params_path
= str
;
1244 if (ldi_ident_from_dip(dip
, &vdp
->xs_ldi_li
) != 0)
1245 return (DDI_FAILURE
);
1247 nodepath
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
1249 /* try to open backend device */
1250 if (xdb_setup_node(vdp
, nodepath
) != DDI_SUCCESS
) {
1251 xvdi_dev_error(dip
, ENXIO
,
1252 "Getting device path of backend device");
1253 ldi_ident_release(vdp
->xs_ldi_li
);
1254 kmem_free(nodepath
, MAXPATHLEN
);
1255 return (DDI_FAILURE
);
1258 if (ldi_open_by_name(nodepath
,
1259 FREAD
| (XDB_IS_RO(vdp
) ? 0 : FWRITE
),
1260 kcred
, &vdp
->xs_ldi_hdl
, vdp
->xs_ldi_li
) != 0) {
1261 xdb_teardown_node(vdp
);
1262 ldi_ident_release(vdp
->xs_ldi_li
);
1263 cmn_err(CE_WARN
, "xdb@%s: Failed to open: %s",
1264 ddi_get_name_addr(dip
), nodepath
);
1265 kmem_free(nodepath
, MAXPATHLEN
);
1266 return (DDI_FAILURE
);
1269 if (ldi_get_size(vdp
->xs_ldi_hdl
, &devsize
) != DDI_SUCCESS
) {
1270 (void) ldi_close(vdp
->xs_ldi_hdl
,
1271 FREAD
| (XDB_IS_RO(vdp
) ? 0 : FWRITE
), kcred
);
1272 xdb_teardown_node(vdp
);
1273 ldi_ident_release(vdp
->xs_ldi_li
);
1274 kmem_free(nodepath
, MAXPATHLEN
);
1275 return (DDI_FAILURE
);
1278 blksize
= ldi_prop_get_int64(vdp
->xs_ldi_hdl
,
1279 DDI_PROP_DONTPASS
| DDI_PROP_NOTPROM
,
1280 "blksize", DEV_BSIZE
);
1281 if (blksize
== DEV_BSIZE
)
1282 blksize
= ldi_prop_get_int(vdp
->xs_ldi_hdl
,
1283 LDI_DEV_T_ANY
| DDI_PROP_DONTPASS
|
1284 DDI_PROP_NOTPROM
, "device-blksize", DEV_BSIZE
);
1286 vdp
->xs_sec_size
= blksize
;
1287 vdp
->xs_sectors
= devsize
/ blksize
;
1289 /* check if the underlying device is a CD/DVD disc */
1290 if (ldi_prop_get_int(vdp
->xs_ldi_hdl
, LDI_DEV_T_ANY
| DDI_PROP_DONTPASS
,
1291 INQUIRY_DEVICE_TYPE
, DTYPE_DIRECT
) == DTYPE_RODIRECT
)
1292 vdp
->xs_type
|= XDB_DEV_BE_CD
;
1294 /* check if the underlying device is a removable disk */
1295 if (ldi_prop_exists(vdp
->xs_ldi_hdl
,
1296 LDI_DEV_T_ANY
| DDI_PROP_DONTPASS
| DDI_PROP_NOTPROM
,
1298 vdp
->xs_type
|= XDB_DEV_BE_RMB
;
1300 kmem_free(nodepath
, MAXPATHLEN
);
1301 return (DDI_SUCCESS
);
1305 xdb_close_device(xdb_t
*vdp
)
1307 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
1309 if (strlen(vdp
->xs_params_path
) == 0) {
1310 ASSERT(XDB_IS_FE_CD(vdp
));
1311 ASSERT(vdp
->xs_sectors
== 0);
1312 ASSERT(vdp
->xs_ldi_li
== NULL
);
1313 ASSERT(vdp
->xs_ldi_hdl
== NULL
);
1317 (void) ldi_close(vdp
->xs_ldi_hdl
,
1318 FREAD
| (XDB_IS_RO(vdp
) ? 0 : FWRITE
), kcred
);
1319 xdb_teardown_node(vdp
);
1320 ldi_ident_release(vdp
->xs_ldi_li
);
1321 vdp
->xs_type
&= ~(XDB_DEV_BE_CD
| XDB_DEV_BE_RMB
);
1322 vdp
->xs_sectors
= 0;
1323 vdp
->xs_ldi_li
= NULL
;
1324 vdp
->xs_ldi_hdl
= NULL
;
1328 * Kick-off connect process
1329 * If xs_fe_initialised == B_TRUE and xs_hp_connected == B_TRUE
1330 * the xs_if_connected will be changed to B_TRUE on success,
1333 xdb_start_connect(xdb_t
*vdp
)
1335 xenbus_transaction_t xbt
;
1336 dev_info_t
*dip
= vdp
->xs_dip
;
1337 boolean_t fb_exists
;
1338 int err
, instance
= ddi_get_instance(dip
);
1340 uint_t dinfo
, ssize
;
1343 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
1345 if (((xsname
= xvdi_get_xsname(dip
)) == NULL
) ||
1346 ((vdp
->xs_peer
= xvdi_get_oeid(dip
)) == (domid_t
)-1))
1349 mutex_enter(&vdp
->xs_iomutex
);
1351 * if the hotplug scripts haven't run or if the frontend is not
1352 * initialized, then we can't try to connect.
1354 if (!vdp
->xs_hp_connected
|| !vdp
->xs_fe_initialised
) {
1355 ASSERT(!vdp
->xs_if_connected
);
1356 mutex_exit(&vdp
->xs_iomutex
);
1360 /* If we're already connected then there's nothing todo */
1361 if (vdp
->xs_if_connected
) {
1362 mutex_exit(&vdp
->xs_iomutex
);
1365 mutex_exit(&vdp
->xs_iomutex
);
1368 * Start connect to frontend only when backend device are ready
1369 * and frontend has moved to XenbusStateInitialised, which means
1372 XDB_DBPRINT(XDB_DBG_INFO
, (CE_NOTE
,
1373 "xdb@%s: starting connection process", ddi_get_name_addr(dip
)));
1375 if (xdb_open_device(vdp
) != DDI_SUCCESS
)
1378 if (xdb_bindto_frontend(vdp
) != DDI_SUCCESS
) {
1379 xdb_close_device(vdp
);
1383 /* init i/o requests */
1384 xdb_init_ioreqs(vdp
);
1386 if (ddi_add_intr(dip
, 0, NULL
, NULL
, xdb_intr
, (caddr_t
)vdp
)
1388 xdb_uninit_ioreqs(vdp
);
1389 xdb_unbindfrom_frontend(vdp
);
1390 xdb_close_device(vdp
);
1396 dinfo
|= VDISK_READONLY
;
1397 if (XDB_IS_BE_RMB(vdp
))
1398 dinfo
|= VDISK_REMOVABLE
;
1399 if (XDB_IS_BE_CD(vdp
))
1400 dinfo
|= VDISK_CDROM
;
1401 if (XDB_IS_FE_CD(vdp
))
1402 dinfo
|= VDISK_REMOVABLE
| VDISK_CDROM
;
1405 * we can recieve intr any time from now on
1406 * mark that we're ready to take intr
1408 mutex_enter(&vdp
->xs_iomutex
);
1409 ASSERT(vdp
->xs_fe_initialised
);
1410 vdp
->xs_if_connected
= B_TRUE
;
1411 mutex_exit(&vdp
->xs_iomutex
);
1414 /* write into xenstore the info needed by frontend */
1415 if ((err
= xenbus_transaction_start(&xbt
)) != 0) {
1416 xvdi_dev_error(dip
, err
, "connect transaction init");
1420 /* If feature-barrier isn't present in xenstore, add it. */
1421 fb_exists
= xenbus_exists(xsname
, XBP_FB
);
1423 ssize
= (vdp
->xs_sec_size
== 0) ? DEV_BSIZE
: vdp
->xs_sec_size
;
1424 sectors
= vdp
->xs_sectors
;
1426 (err
= xenbus_printf(xbt
, xsname
, XBP_FB
, "%d", 1)))) ||
1427 (err
= xenbus_printf(xbt
, xsname
, XBP_INFO
, "%u", dinfo
)) ||
1428 (err
= xenbus_printf(xbt
, xsname
, XBP_SECTOR_SIZE
, "%u", ssize
)) ||
1429 (err
= xenbus_printf(xbt
, xsname
,
1430 XBP_SECTORS
, "%"PRIu64
, sectors
)) ||
1431 (err
= xenbus_printf(xbt
, xsname
, "instance", "%d", instance
)) ||
1432 ((err
= xvdi_switch_state(dip
, xbt
, XenbusStateConnected
)) > 0)) {
1433 (void) xenbus_transaction_end(xbt
, 1);
1434 xvdi_dev_error(dip
, err
, "connect transaction setup");
1438 if ((err
= xenbus_transaction_end(xbt
, 0)) != 0) {
1439 if (err
== EAGAIN
) {
1440 /* transaction is ended, don't need to abort it */
1443 xvdi_dev_error(dip
, err
, "connect transaction commit");
1454 * Disconnect from frontend and close backend device
1457 xdb_close(dev_info_t
*dip
)
1459 xdb_t
*vdp
= (xdb_t
*)ddi_get_driver_private(dip
);
1461 ASSERT(MUTEX_HELD(&vdp
->xs_cbmutex
));
1462 mutex_enter(&vdp
->xs_iomutex
);
1465 * if the hotplug scripts haven't run or if the frontend is not
1466 * initialized, then we can't be connected, so there's no
1467 * connection to close.
1469 if (!vdp
->xs_hp_connected
|| !vdp
->xs_fe_initialised
) {
1470 ASSERT(!vdp
->xs_if_connected
);
1471 mutex_exit(&vdp
->xs_iomutex
);
1475 /* if we're not connected, there's nothing to do */
1476 if (!vdp
->xs_if_connected
) {
1477 cv_broadcast(&vdp
->xs_iocv
);
1478 mutex_exit(&vdp
->xs_iomutex
);
1482 XDB_DBPRINT(XDB_DBG_INFO
, (CE_NOTE
, "closing while connected"));
1484 vdp
->xs_if_connected
= B_FALSE
;
1485 cv_broadcast(&vdp
->xs_iocv
);
1487 mutex_exit(&vdp
->xs_iomutex
);
1489 /* stop accepting I/O request from frontend */
1490 ddi_remove_intr(dip
, 0, NULL
);
1492 /* clear all on-going I/Os, if any */
1493 mutex_enter(&vdp
->xs_iomutex
);
1494 while (vdp
->xs_ionum
> 0)
1495 cv_wait(&vdp
->xs_ionumcv
, &vdp
->xs_iomutex
);
1496 mutex_exit(&vdp
->xs_iomutex
);
1498 /* clean up resources and close this interface */
1499 xdb_uninit_ioreqs(vdp
);
1500 xdb_unbindfrom_frontend(vdp
);
1501 xdb_close_device(vdp
);
1502 vdp
->xs_peer
= (domid_t
)-1;
1506 xdb_send_buf(void *arg
)
1508 xdb_t
*vdp
= (xdb_t
*)arg
;
1512 mutex_enter(&vdp
->xs_iomutex
);
1513 while (vdp
->xs_send_buf
) {
1514 if ((bp
= vdp
->xs_f_iobuf
) == NULL
) {
1515 /* wait for some io to send */
1516 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
,
1517 "send buf waiting for io"));
1518 cv_wait(&vdp
->xs_iocv
, &vdp
->xs_iomutex
);
1522 vdp
->xs_f_iobuf
= bp
->av_forw
;
1526 mutex_exit(&vdp
->xs_iomutex
);
1527 if (bp
->b_bcount
== 0) {
1528 /* no I/O needs to be done */
1529 (void) xdb_biodone(bp
);
1530 mutex_enter(&vdp
->xs_iomutex
);
1535 if (vdp
->xs_ldi_hdl
!= NULL
)
1536 err
= ldi_strategy(vdp
->xs_ldi_hdl
, bp
);
1538 bp
->b_flags
|= B_ERROR
;
1539 (void) xdb_biodone(bp
);
1540 XDB_DBPRINT(XDB_DBG_IO
, (CE_WARN
,
1541 "xdb@%s: sent buf to backend devfailed, err=%d",
1542 ddi_get_name_addr(vdp
->xs_dip
), err
));
1544 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
,
1545 "sent buf to backend ok"));
1547 mutex_enter(&vdp
->xs_iomutex
);
1549 XDB_DBPRINT(XDB_DBG_IO
, (CE_NOTE
, "send buf finishing"));
1550 mutex_exit(&vdp
->xs_iomutex
);
1555 xdb_hp_state_change(dev_info_t
*dip
, ddi_eventcookie_t id
, void *arg
,
1558 xendev_hotplug_state_t state
= *(xendev_hotplug_state_t
*)impl_data
;
1559 xdb_t
*vdp
= (xdb_t
*)ddi_get_driver_private(dip
);
1561 XDB_DBPRINT(XDB_DBG_INFO
, (CE_NOTE
, "xdb@%s: "
1562 "hotplug status change to %d!", ddi_get_name_addr(dip
), state
));
1564 if (state
!= Connected
)
1567 mutex_enter(&vdp
->xs_cbmutex
);
1569 /* If hotplug script have already run, there's nothing todo */
1570 if (vdp
->xs_hp_connected
) {
1571 mutex_exit(&vdp
->xs_cbmutex
);
1575 vdp
->xs_hp_connected
= B_TRUE
;
1576 xdb_start_connect(vdp
);
1577 mutex_exit(&vdp
->xs_cbmutex
);
1582 xdb_oe_state_change(dev_info_t
*dip
, ddi_eventcookie_t id
, void *arg
,
1585 XenbusState new_state
= *(XenbusState
*)impl_data
;
1586 xdb_t
*vdp
= (xdb_t
*)ddi_get_driver_private(dip
);
1588 XDB_DBPRINT(XDB_DBG_INFO
, (CE_NOTE
, "xdb@%s: "
1589 "otherend state change to %d!", ddi_get_name_addr(dip
), new_state
));
1591 mutex_enter(&vdp
->xs_cbmutex
);
1594 * Now it'd really be nice if there was a well defined state
1595 * transition model for xen frontend drivers, but unfortunatly
1596 * there isn't. So we're stuck with assuming that all state
1597 * transitions are possible, and we'll just have to deal with
1598 * them regardless of what state we're in.
1600 switch (new_state
) {
1601 case XenbusStateUnknown
:
1602 case XenbusStateInitialising
:
1603 case XenbusStateInitWait
:
1604 /* tear down our connection to the frontend */
1606 vdp
->xs_fe_initialised
= B_FALSE
;
1609 case XenbusStateInitialised
:
1611 * If we were conected, then we need to drop the connection
1612 * and re-negotiate it.
1615 vdp
->xs_fe_initialised
= B_TRUE
;
1616 xdb_start_connect(vdp
);
1619 case XenbusStateConnected
:
1620 /* nothing todo here other than congratulate the frontend */
1623 case XenbusStateClosing
:
1624 /* monkey see monkey do */
1625 (void) xvdi_switch_state(dip
, XBT_NULL
, XenbusStateClosing
);
1628 case XenbusStateClosed
:
1629 /* tear down our connection to the frontend */
1631 vdp
->xs_fe_initialised
= B_FALSE
;
1632 (void) xvdi_switch_state(dip
, XBT_NULL
, new_state
);
1636 mutex_exit(&vdp
->xs_cbmutex
);
1640 xdb_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
1642 ddi_iblock_cookie_t ibc
;
1644 int instance
= ddi_get_instance(dip
);
1645 char *xsname
, *oename
;
1650 return (DDI_FAILURE
);
1654 return (DDI_FAILURE
);
1658 if (((xsname
= xvdi_get_xsname(dip
)) == NULL
) ||
1659 ((oename
= xvdi_get_oename(dip
)) == NULL
))
1660 return (DDI_FAILURE
);
1663 * Disable auto-detach. This is necessary so that we don't get
1664 * detached while we're disconnected from the front end.
1666 (void) ddi_prop_update_int(DDI_DEV_T_NONE
, dip
, DDI_NO_AUTODETACH
, 1);
1668 if (ddi_get_iblock_cookie(dip
, 0, &ibc
) != DDI_SUCCESS
)
1669 return (DDI_FAILURE
);
1671 if (ddi_soft_state_zalloc(xdb_statep
, instance
) != DDI_SUCCESS
)
1672 return (DDI_FAILURE
);
1674 vdp
= ddi_get_soft_state(xdb_statep
, instance
);
1676 mutex_init(&vdp
->xs_iomutex
, NULL
, MUTEX_DRIVER
, (void *)ibc
);
1677 mutex_init(&vdp
->xs_cbmutex
, NULL
, MUTEX_DRIVER
, (void *)ibc
);
1678 cv_init(&vdp
->xs_iocv
, NULL
, CV_DRIVER
, NULL
);
1679 cv_init(&vdp
->xs_ionumcv
, NULL
, CV_DRIVER
, NULL
);
1680 ddi_set_driver_private(dip
, vdp
);
1682 if (!xdb_kstat_init(vdp
))
1685 /* Check if the frontend device is supposed to be a cdrom */
1686 if (xenbus_read_str(oename
, XBP_DEV_TYPE
, &str
) != 0)
1687 return (DDI_FAILURE
);
1688 if (strcmp(str
, XBV_DEV_TYPE_CD
) == 0)
1689 vdp
->xs_type
|= XDB_DEV_FE_CD
;
1692 /* Check if the frontend device is supposed to be read only */
1693 if (xenbus_read_str(xsname
, "mode", &str
) != 0)
1694 return (DDI_FAILURE
);
1695 if ((strcmp(str
, "r") == 0) || (strcmp(str
, "ro") == 0))
1696 vdp
->xs_type
|= XDB_DEV_RO
;
1699 mutex_enter(&vdp
->xs_cbmutex
);
1700 if (!xdb_media_req_init(vdp
) || !xdb_params_init(vdp
)) {
1701 xvdi_remove_xb_watch_handlers(dip
);
1702 mutex_exit(&vdp
->xs_cbmutex
);
1705 mutex_exit(&vdp
->xs_cbmutex
);
1707 vdp
->xs_send_buf
= B_TRUE
;
1708 vdp
->xs_iotaskq
= ddi_taskq_create(dip
, "xdb_iotask", 1,
1709 TASKQ_DEFAULTPRI
, 0);
1710 (void) ddi_taskq_dispatch(vdp
->xs_iotaskq
, xdb_send_buf
, vdp
,
1713 /* Watch frontend and hotplug state change */
1714 if ((xvdi_add_event_handler(dip
, XS_OE_STATE
, xdb_oe_state_change
,
1715 NULL
) != DDI_SUCCESS
) ||
1716 (xvdi_add_event_handler(dip
, XS_HP_STATE
, xdb_hp_state_change
,
1717 NULL
) != DDI_SUCCESS
))
1721 * Kick-off hotplug script
1723 if (xvdi_post_event(dip
, XEN_HP_ADD
) != DDI_SUCCESS
) {
1724 cmn_err(CE_WARN
, "xdb@%s: failed to start hotplug script",
1725 ddi_get_name_addr(dip
));
1730 * start waiting for hotplug event and otherend state event
1731 * mainly for debugging, frontend will not take any op seeing this
1733 (void) xvdi_switch_state(dip
, XBT_NULL
, XenbusStateInitWait
);
1735 XDB_DBPRINT(XDB_DBG_INFO
, (CE_NOTE
, "xdb@%s: attached!",
1736 ddi_get_name_addr(dip
)));
1737 return (DDI_SUCCESS
);
1740 ASSERT(vdp
->xs_hp_connected
&& vdp
->xs_if_connected
);
1742 xvdi_remove_event_handler(dip
, NULL
);
1744 /* Disconnect from the backend */
1745 mutex_enter(&vdp
->xs_cbmutex
);
1746 mutex_enter(&vdp
->xs_iomutex
);
1747 vdp
->xs_send_buf
= B_FALSE
;
1748 cv_broadcast(&vdp
->xs_iocv
);
1749 mutex_exit(&vdp
->xs_iomutex
);
1750 mutex_exit(&vdp
->xs_cbmutex
);
1752 /* wait for all io to dtrain and destroy io taskq */
1753 ddi_taskq_destroy(vdp
->xs_iotaskq
);
1755 /* tear down block-configure watch */
1756 mutex_enter(&vdp
->xs_cbmutex
);
1757 xvdi_remove_xb_watch_handlers(dip
);
1758 mutex_exit(&vdp
->xs_cbmutex
);
1762 kstat_delete(vdp
->xs_kstats
);
1765 /* free up driver state */
1766 ddi_set_driver_private(dip
, NULL
);
1767 cv_destroy(&vdp
->xs_iocv
);
1768 cv_destroy(&vdp
->xs_ionumcv
);
1769 mutex_destroy(&vdp
->xs_cbmutex
);
1770 mutex_destroy(&vdp
->xs_iomutex
);
1771 ddi_soft_state_free(xdb_statep
, instance
);
1773 return (DDI_FAILURE
);
1778 xdb_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
1780 int instance
= ddi_get_instance(dip
);
1781 xdb_t
*vdp
= XDB_INST2SOFTS(instance
);
1785 return (DDI_FAILURE
);
1789 return (DDI_FAILURE
);
1792 /* DDI_DETACH handling */
1794 /* refuse to detach if we're still in use by the frontend */
1795 mutex_enter(&vdp
->xs_iomutex
);
1796 if (vdp
->xs_if_connected
) {
1797 mutex_exit(&vdp
->xs_iomutex
);
1798 return (DDI_FAILURE
);
1800 vdp
->xs_send_buf
= B_FALSE
;
1801 cv_broadcast(&vdp
->xs_iocv
);
1802 mutex_exit(&vdp
->xs_iomutex
);
1804 xvdi_remove_event_handler(dip
, NULL
);
1805 (void) xvdi_post_event(dip
, XEN_HP_REMOVE
);
1807 ddi_taskq_destroy(vdp
->xs_iotaskq
);
1809 mutex_enter(&vdp
->xs_cbmutex
);
1810 xvdi_remove_xb_watch_handlers(dip
);
1811 mutex_exit(&vdp
->xs_cbmutex
);
1813 cv_destroy(&vdp
->xs_iocv
);
1814 cv_destroy(&vdp
->xs_ionumcv
);
1815 mutex_destroy(&vdp
->xs_cbmutex
);
1816 mutex_destroy(&vdp
->xs_iomutex
);
1817 kstat_delete(vdp
->xs_kstats
);
1818 ddi_set_driver_private(dip
, NULL
);
1819 ddi_soft_state_free(xdb_statep
, instance
);
1821 XDB_DBPRINT(XDB_DBG_INFO
, (CE_NOTE
, "xdb@%s: detached!",
1822 ddi_get_name_addr(dip
)));
1823 return (DDI_SUCCESS
);
1826 static struct dev_ops xdb_dev_ops
= {
1827 DEVO_REV
, /* devo_rev */
1828 0, /* devo_refcnt */
1829 ddi_getinfo_1to1
, /* devo_getinfo */
1830 nulldev
, /* devo_identify */
1831 nulldev
, /* devo_probe */
1832 xdb_attach
, /* devo_attach */
1833 xdb_detach
, /* devo_detach */
1834 nodev
, /* devo_reset */
1835 NULL
, /* devo_cb_ops */
1836 NULL
, /* devo_bus_ops */
1838 ddi_quiesce_not_needed
, /* quiesce */
1842 * Module linkage information for the kernel.
1844 static struct modldrv modldrv
= {
1845 &mod_driverops
, /* Type of module. */
1846 "vbd backend driver", /* Name of the module */
1847 &xdb_dev_ops
/* driver ops */
1850 static struct modlinkage xdb_modlinkage
= {
1861 if ((rv
= ddi_soft_state_init((void **)&xdb_statep
,
1862 sizeof (xdb_t
), 0)) == 0)
1863 if ((rv
= mod_install(&xdb_modlinkage
)) != 0)
1864 ddi_soft_state_fini((void **)&xdb_statep
);
1873 if ((rv
= mod_remove(&xdb_modlinkage
)) != 0)
1875 ddi_soft_state_fini((void **)&xdb_statep
);
1880 _info(struct modinfo
*modinfop
)
1882 return (mod_info(&xdb_modlinkage
, modinfop
));
1886 xdb_get_request(xdb_t
*vdp
, blkif_request_t
*req
)
1888 void *src
= xvdi_ring_get_request(vdp
->xs_ring
);
1893 switch (vdp
->xs_blk_protocol
) {
1894 case BLKIF_PROTOCOL_NATIVE
:
1895 (void) memcpy(req
, src
, sizeof (*req
));
1897 case BLKIF_PROTOCOL_X86_32
:
1898 blkif_get_x86_32_req(req
, src
);
1900 case BLKIF_PROTOCOL_X86_64
:
1901 blkif_get_x86_64_req(req
, src
);
1904 cmn_err(CE_PANIC
, "xdb@%s: unrecognised protocol: %d",
1905 ddi_get_name_addr(vdp
->xs_dip
),
1906 vdp
->xs_blk_protocol
);
1912 xdb_push_response(xdb_t
*vdp
, uint64_t id
, uint8_t op
, uint16_t status
)
1914 ddi_acc_handle_t acchdl
= vdp
->xs_ring_hdl
;
1915 blkif_response_t
*rsp
= xvdi_ring_get_response(vdp
->xs_ring
);
1916 blkif_x86_32_response_t
*rsp_32
= (blkif_x86_32_response_t
*)rsp
;
1917 blkif_x86_64_response_t
*rsp_64
= (blkif_x86_64_response_t
*)rsp
;
1921 switch (vdp
->xs_blk_protocol
) {
1922 case BLKIF_PROTOCOL_NATIVE
:
1923 ddi_put64(acchdl
, &rsp
->id
, id
);
1924 ddi_put8(acchdl
, &rsp
->operation
, op
);
1925 ddi_put16(acchdl
, (uint16_t *)&rsp
->status
,
1926 status
== 0 ? BLKIF_RSP_OKAY
: BLKIF_RSP_ERROR
);
1928 case BLKIF_PROTOCOL_X86_32
:
1929 ddi_put64(acchdl
, &rsp_32
->id
, id
);
1930 ddi_put8(acchdl
, &rsp_32
->operation
, op
);
1931 ddi_put16(acchdl
, (uint16_t *)&rsp_32
->status
,
1932 status
== 0 ? BLKIF_RSP_OKAY
: BLKIF_RSP_ERROR
);
1934 case BLKIF_PROTOCOL_X86_64
:
1935 ddi_put64(acchdl
, &rsp_64
->id
, id
);
1936 ddi_put8(acchdl
, &rsp_64
->operation
, op
);
1937 ddi_put16(acchdl
, (uint16_t *)&rsp_64
->status
,
1938 status
== 0 ? BLKIF_RSP_OKAY
: BLKIF_RSP_ERROR
);
1941 cmn_err(CE_PANIC
, "xdb@%s: unrecognised protocol: %d",
1942 ddi_get_name_addr(vdp
->xs_dip
),
1943 vdp
->xs_blk_protocol
);
1946 return (xvdi_ring_push_response(vdp
->xs_ring
));
1950 blkif_get_x86_32_req(blkif_request_t
*dst
, blkif_x86_32_request_t
*src
)
1952 int i
, n
= BLKIF_MAX_SEGMENTS_PER_REQUEST
;
1953 dst
->operation
= src
->operation
;
1954 dst
->nr_segments
= src
->nr_segments
;
1955 dst
->handle
= src
->handle
;
1957 dst
->sector_number
= src
->sector_number
;
1958 if (n
> src
->nr_segments
)
1959 n
= src
->nr_segments
;
1960 for (i
= 0; i
< n
; i
++)
1961 dst
->seg
[i
] = src
->seg
[i
];
1965 blkif_get_x86_64_req(blkif_request_t
*dst
, blkif_x86_64_request_t
*src
)
1967 int i
, n
= BLKIF_MAX_SEGMENTS_PER_REQUEST
;
1968 dst
->operation
= src
->operation
;
1969 dst
->nr_segments
= src
->nr_segments
;
1970 dst
->handle
= src
->handle
;
1972 dst
->sector_number
= src
->sector_number
;
1973 if (n
> src
->nr_segments
)
1974 n
= src
->nr_segments
;
1975 for (i
= 0; i
< n
; i
++)
1976 dst
->seg
[i
] = src
->seg
[i
];