etc/services - sync with NetBSD-8
[minix.git] / minix / drivers / storage / virtio_blk / virtio_blk.c
blob68737bd829ac5e77da66a5fc726b3c0a92a3897c
1 /*
2 * virtio block driver for MINIX 3
4 * Copyright (c) 2013, A. Welzel, <arne.welzel@gmail.com>
6 * This software is released under the BSD license. See the LICENSE file
7 * included in the main directory of this source distribution for the
8 * license terms and conditions.
9 */
11 #include <assert.h>
13 #include <minix/drivers.h>
14 #include <minix/blockdriver_mt.h>
15 #include <minix/drvlib.h>
16 #include <minix/virtio.h>
17 #include <minix/sysutil.h>
19 #include <sys/ioc_disk.h>
21 #include "virtio_blk.h"
23 #define mystatus(tid) (status_vir[(tid)] & 0xFF)
25 #define dprintf(s) do { \
26 printf("%s: ", name); \
27 printf s; \
28 printf("\n"); \
29 } while (0)
31 /* Number of threads to use */
32 #define VIRTIO_BLK_NUM_THREADS 4
34 /* virtio-blk blocksize is always 512 bytes */
35 #define VIRTIO_BLK_BLOCK_SIZE 512
37 static const char *const name = "virtio-blk";
39 /* static device handle */
40 static struct virtio_device *blk_dev;
42 static struct virtio_blk_config blk_config;
44 struct virtio_feature blkf[] = {
45 { "barrier", VIRTIO_BLK_F_BARRIER, 0, 0 },
46 { "sizemax", VIRTIO_BLK_F_SIZE_MAX, 0, 0 },
47 { "segmax", VIRTIO_BLK_F_SEG_MAX, 0, 0 },
48 { "geometry", VIRTIO_BLK_F_GEOMETRY, 0, 0 },
49 { "read-only", VIRTIO_BLK_F_RO, 0, 0 },
50 { "blocksize", VIRTIO_BLK_F_BLK_SIZE, 0, 0 },
51 { "scsi", VIRTIO_BLK_F_SCSI, 0, 0 },
52 { "flush", VIRTIO_BLK_F_FLUSH, 0, 0 },
53 { "topology", VIRTIO_BLK_F_TOPOLOGY, 0, 0 },
54 { "idbytes", VIRTIO_BLK_ID_BYTES, 0, 0 }
57 /* State information */
58 static int spurious_interrupt = 0;
59 static int terminating = 0;
60 static int open_count = 0;
62 /* Partition magic */
63 struct device part[DEV_PER_DRIVE];
64 struct device subpart[SUB_PER_DRIVE];
66 /* Headers for requests */
67 static struct virtio_blk_outhdr *hdrs_vir;
68 static phys_bytes hdrs_phys;
70 /* Status bytes for requests.
72 * Usually a status is only one byte in length, but we need the lowest bit
73 * to propagate writable. For this reason we take u16_t and use a mask for
74 * the lower byte later.
76 static u16_t *status_vir;
77 static phys_bytes status_phys;
79 /* Prototypes */
80 static int virtio_blk_open(devminor_t minor, int access);
81 static int virtio_blk_close(devminor_t minor);
82 static ssize_t virtio_blk_transfer(devminor_t minor, int write, u64_t position,
83 endpoint_t endpt, iovec_t *iovec,
84 unsigned int cnt, int flags);
85 static int virtio_blk_ioctl(devminor_t minor, unsigned long req,
86 endpoint_t endpt, cp_grant_id_t grant, endpoint_t user_endpt);
87 static struct device * virtio_blk_part(devminor_t minor);
88 static void virtio_blk_geometry(devminor_t minor, struct part_geom *entry);
89 static void virtio_blk_device_intr(void);
90 static void virtio_blk_spurious_intr(void);
91 static void virtio_blk_intr(unsigned int irqs);
92 static int virtio_blk_device(devminor_t minor, device_id_t *id);
94 static int virtio_blk_flush(void);
95 static void virtio_blk_terminate(void);
96 static void virtio_blk_cleanup(void);
97 static int virtio_blk_status2error(u8_t status);
98 static int virtio_blk_alloc_requests(void);
99 static void virtio_blk_free_requests(void);
100 static int virtio_blk_feature_setup(void);
101 static int virtio_blk_config(void);
102 static int virtio_blk_probe(int skip);
104 /* libblockdriver driver tab */
105 static struct blockdriver virtio_blk_dtab = {
106 .bdr_type = BLOCKDRIVER_TYPE_DISK,
107 .bdr_open = virtio_blk_open,
108 .bdr_close = virtio_blk_close,
109 .bdr_transfer = virtio_blk_transfer,
110 .bdr_ioctl = virtio_blk_ioctl,
111 .bdr_part = virtio_blk_part,
112 .bdr_geometry = virtio_blk_geometry,
113 .bdr_intr = virtio_blk_intr,
114 .bdr_device = virtio_blk_device
117 static int
118 virtio_blk_open(devminor_t minor, int access)
120 struct device *dev = virtio_blk_part(minor);
122 /* Check if this device exists */
123 if (!dev)
124 return ENXIO;
126 /* Read only devices should only be mounted... read-only */
127 if ((access & BDEV_W_BIT) &&
128 virtio_host_supports(blk_dev, VIRTIO_BLK_F_RO))
129 return EACCES;
131 /* Partition magic when opened the first time or re-opened after
132 * being fully closed
134 if (open_count == 0) {
135 memset(part, 0, sizeof(part));
136 memset(subpart, 0, sizeof(subpart));
137 part[0].dv_size = blk_config.capacity * VIRTIO_BLK_BLOCK_SIZE;
138 partition(&virtio_blk_dtab, 0, P_PRIMARY, 0 /* ATAPI */);
139 blockdriver_mt_set_workers(0, VIRTIO_BLK_NUM_THREADS);
142 open_count++;
143 return OK;
146 static int
147 virtio_blk_close(devminor_t minor)
149 struct device *dev = virtio_blk_part(minor);
151 /* Check if this device exists */
152 if (!dev)
153 return ENXIO;
155 if (open_count == 0) {
156 dprintf(("Closing one too many times?"));
157 return EINVAL;
160 open_count--;
162 /* If fully closed, flush the device and set workes to 1 */
163 if (open_count == 0) {
164 virtio_blk_flush();
165 blockdriver_mt_set_workers(0, 1);
168 /* If supposed to terminate and fully closed, do it! */
169 if (terminating && open_count == 0)
170 virtio_blk_terminate();
172 return OK;
175 static int
176 prepare_bufs(struct vumap_vir *vir, struct vumap_phys *phys, int cnt, int w)
178 for (int i = 0; i < cnt ; i++) {
180 /* So you gave us a byte aligned buffer? Good job! */
181 if (phys[i].vp_addr & 1) {
182 dprintf(("byte aligned %08lx", phys[i].vp_addr));
183 return EINVAL;
186 /* Check if the buffer is good */
187 if (phys[i].vp_size != vir[i].vv_size) {
188 dprintf(("Non-contig buf %08lx", phys[i].vp_addr));
189 return EINVAL;
192 /* If write, the buffers only need to be read */
193 phys[i].vp_addr |= !w;
196 return OK;
199 static int
200 prepare_vir_vec(endpoint_t endpt, struct vumap_vir *vir, iovec_s_t *iv,
201 int cnt, vir_bytes *size)
203 /* This is pretty much the same as sum_iovec from AHCI,
204 * except that we don't support any iovecs where the size
205 * is not a multiple of 512
207 vir_bytes s, total = 0;
208 for (int i = 0; i < cnt; i++) {
209 s = iv[i].iov_size;
211 if (s == 0 || (s % VIRTIO_BLK_BLOCK_SIZE) || s > LONG_MAX) {
212 dprintf(("bad iv[%d].iov_size (%lu) from %d", i, s,
213 endpt));
214 return EINVAL;
217 total += s;
219 if (total > LONG_MAX) {
220 dprintf(("total overflow from %d", endpt));
221 return EINVAL;
224 if (endpt == SELF)
225 vir[i].vv_addr = (vir_bytes)iv[i].iov_grant;
226 else
227 vir[i].vv_grant = iv[i].iov_grant;
229 vir[i].vv_size = iv[i].iov_size;
233 *size = total;
234 return OK;
237 static ssize_t
238 virtio_blk_transfer(devminor_t minor, int write, u64_t position,
239 endpoint_t endpt, iovec_t *iovec, unsigned int cnt,
240 int flags)
242 /* Need to translate vir to phys */
243 struct vumap_vir vir[NR_IOREQS];
245 /* Physical addresses of buffers, including header and trailer */
246 struct vumap_phys phys[NR_IOREQS + 2];
248 /* Which thread is doing the transfer? */
249 thread_id_t tid = blockdriver_mt_get_tid();
251 vir_bytes size = 0;
252 vir_bytes size_tmp = 0;
253 struct device *dv;
254 u64_t sector;
255 u64_t end_part;
256 int r, pcnt = sizeof(phys) / sizeof(phys[0]);
258 iovec_s_t *iv = (iovec_s_t *)iovec;
259 int access = write ? VUA_READ : VUA_WRITE;
261 /* Make sure we don't touch this one anymore */
262 iovec = NULL;
264 if (cnt > NR_IOREQS)
265 return EINVAL;
267 /* position greater than capacity? */
268 if (position >= blk_config.capacity * VIRTIO_BLK_BLOCK_SIZE)
269 return 0;
271 dv = virtio_blk_part(minor);
273 /* Does device exist? */
274 if (!dv)
275 return ENXIO;
277 position += dv->dv_base;
278 end_part = dv->dv_base + dv->dv_size;
280 /* Hmmm, AHCI tries to fix this up, but lets just say everything
281 * needs to be sector (512 byte) aligned...
283 if (position % VIRTIO_BLK_BLOCK_SIZE) {
284 dprintf(("Non sector-aligned access %016llx", position));
285 return EINVAL;
288 sector = position / VIRTIO_BLK_BLOCK_SIZE;
290 r = prepare_vir_vec(endpt, vir, iv, cnt, &size);
292 if (r != OK)
293 return r;
295 if (position >= end_part)
296 return 0;
298 /* Truncate if the partition is smaller than that */
299 if (position + size > end_part - 1) {
300 size = end_part - position;
302 /* Fix up later */
303 size_tmp = 0;
304 cnt = 0;
305 } else {
306 /* Use all buffers */
307 size_tmp = size;
310 /* Fix up the number of vectors if size was truncated */
311 while (size_tmp < size)
312 size_tmp += vir[cnt++].vv_size;
314 /* If the last vector was too big, just truncate it */
315 if (size_tmp > size) {
316 vir[cnt - 1].vv_size = vir[cnt -1].vv_size - (size_tmp - size);
317 size_tmp -= (size_tmp - size);
320 if (size % VIRTIO_BLK_BLOCK_SIZE) {
321 dprintf(("non-sector sized read (%lu) from %d", size, endpt));
322 return EINVAL;
325 /* Map vir to phys */
326 if ((r = sys_vumap(endpt, vir, cnt, 0, access,
327 &phys[1], &pcnt)) != OK) {
329 dprintf(("Unable to map memory from %d (%d)", endpt, r));
330 return r;
333 /* Prepare the header */
334 memset(&hdrs_vir[tid], 0, sizeof(hdrs_vir[0]));
336 if (write)
337 hdrs_vir[tid].type = VIRTIO_BLK_T_OUT;
338 else
339 hdrs_vir[tid].type = VIRTIO_BLK_T_IN;
341 hdrs_vir[tid].ioprio = 0;
342 hdrs_vir[tid].sector = sector;
344 /* First the header */
345 phys[0].vp_addr = hdrs_phys + tid * sizeof(hdrs_vir[0]);
346 phys[0].vp_size = sizeof(hdrs_vir[0]);
348 /* Put the physical buffers into phys */
349 if ((r = prepare_bufs(vir, &phys[1], pcnt, write)) != OK)
350 return r;
352 /* Put the status at the end */
353 phys[pcnt + 1].vp_addr = status_phys + tid * sizeof(status_vir[0]);
354 phys[pcnt + 1].vp_size = sizeof(u8_t);
356 /* Status always needs write access */
357 phys[1 + pcnt].vp_addr |= 1;
359 /* Send addresses to queue */
360 virtio_to_queue(blk_dev, 0, phys, 2 + pcnt, &tid);
362 /* Wait for completion */
363 blockdriver_mt_sleep();
365 /* All was good */
366 if (mystatus(tid) == VIRTIO_BLK_S_OK)
367 return size;
369 /* Error path */
370 dprintf(("ERROR status=%02x sector=%llu len=%lx cnt=%d op=%s t=%d",
371 mystatus(tid), sector, size, pcnt,
372 write ? "write" : "read", tid));
374 return virtio_blk_status2error(mystatus(tid));
377 static int
378 virtio_blk_ioctl(devminor_t minor, unsigned long req, endpoint_t endpt,
379 cp_grant_id_t grant, endpoint_t UNUSED(user_endpt))
381 switch (req) {
383 case DIOCOPENCT:
384 return sys_safecopyto(endpt, grant, 0,
385 (vir_bytes) &open_count, sizeof(open_count));
387 case DIOCFLUSH:
388 return virtio_blk_flush();
392 return ENOTTY;
395 static struct device *
396 virtio_blk_part(devminor_t minor)
398 /* There's only a single drive attached to this device, alyways.
399 * Lets take some shortcuts...
402 /* Take care of d0 d0p0 ... */
403 if (minor >= 0 && minor < DEV_PER_DRIVE)
404 return &part[minor];
406 /* subparts start at MINOR_d0p0s0 */
407 if (minor >= MINOR_d0p0s0) {
408 minor -= MINOR_d0p0s0;
410 /* Only for the first disk */
411 if (minor >= SUB_PER_DRIVE)
412 return NULL;
414 return &subpart[minor];
417 return NULL;
420 static void
421 virtio_blk_geometry(devminor_t minor, struct part_geom *entry)
423 /* Only for the drive */
424 if (minor != 0)
425 return;
427 /* Only if the host supports it */
428 if(!virtio_host_supports(blk_dev, VIRTIO_BLK_F_GEOMETRY))
429 return;
431 entry->cylinders = blk_config.geometry.cylinders;
432 entry->heads = blk_config.geometry.heads;
433 entry->sectors = blk_config.geometry.sectors;
436 static void
437 virtio_blk_device_intr(void)
439 thread_id_t *tid;
441 /* Multiple requests might have finished */
442 while (!virtio_from_queue(blk_dev, 0, (void**)&tid, NULL))
443 blockdriver_mt_wakeup(*tid);
446 static void
447 virtio_blk_spurious_intr(void)
449 /* Output a single message about spurious interrupts */
450 if (spurious_interrupt)
451 return;
453 dprintf(("Got spurious interrupt"));
454 spurious_interrupt = 1;
457 static void
458 virtio_blk_intr(unsigned int irqs)
461 if (virtio_had_irq(blk_dev))
462 virtio_blk_device_intr();
463 else
464 virtio_blk_spurious_intr();
466 virtio_irq_enable(blk_dev);
469 static int
470 virtio_blk_device(devminor_t minor, device_id_t *id)
472 struct device *dev = virtio_blk_part(minor);
474 /* Check if this device exists */
475 if (!dev)
476 return ENXIO;
478 *id = 0;
479 return OK;
482 static int
483 virtio_blk_flush(void)
485 struct vumap_phys phys[2];
486 size_t phys_cnt = sizeof(phys) / sizeof(phys[0]);
488 /* Which thread is doing this request? */
489 thread_id_t tid = blockdriver_mt_get_tid();
491 /* Host may not support flushing */
492 if (!virtio_host_supports(blk_dev, VIRTIO_BLK_F_FLUSH))
493 return EOPNOTSUPP;
495 /* Prepare the header */
496 memset(&hdrs_vir[tid], 0, sizeof(hdrs_vir[0]));
497 hdrs_vir[tid].type = VIRTIO_BLK_T_FLUSH;
499 /* Let this be a barrier if the host supports it */
500 if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_BARRIER))
501 hdrs_vir[tid].type |= VIRTIO_BLK_T_BARRIER;
503 /* Header and status for the queue */
504 phys[0].vp_addr = hdrs_phys + tid * sizeof(hdrs_vir[0]);
505 phys[0].vp_size = sizeof(hdrs_vir[0]);
506 phys[1].vp_addr = status_phys + tid * sizeof(status_vir[0]);
507 phys[1].vp_size = 1;
509 /* Status always needs write access */
510 phys[1].vp_addr |= 1;
512 /* Send flush request to queue */
513 virtio_to_queue(blk_dev, 0, phys, phys_cnt, &tid);
515 blockdriver_mt_sleep();
517 /* All was good */
518 if (mystatus(tid) == VIRTIO_BLK_S_OK)
519 return OK;
521 /* Error path */
522 dprintf(("ERROR status=%02x op=flush t=%d", mystatus(tid), tid));
524 return virtio_blk_status2error(mystatus(tid));
527 static void
528 virtio_blk_terminate(void)
530 /* Don't terminate if still opened */
531 if (open_count > 0)
532 return;
534 blockdriver_mt_terminate();
537 static void
538 virtio_blk_cleanup(void)
540 /* Just free the memory we allocated */
541 virtio_blk_free_requests();
542 virtio_reset_device(blk_dev);
543 virtio_free_queues(blk_dev);
544 virtio_free_device(blk_dev);
545 blk_dev = NULL;
548 static int
549 virtio_blk_status2error(u8_t status)
551 /* Convert a status from the host to an error */
552 switch (status) {
553 case VIRTIO_BLK_S_IOERR:
554 return EIO;
555 case VIRTIO_BLK_S_UNSUPP:
556 return ENOTSUP;
557 default:
558 panic("%s: unknown status: %02x", name, status);
560 /* Never reached */
561 return OK;
564 static int
565 virtio_blk_alloc_requests(void)
567 /* Allocate memory for request headers and status field */
569 hdrs_vir = alloc_contig(VIRTIO_BLK_NUM_THREADS * sizeof(hdrs_vir[0]),
570 AC_ALIGN4K, &hdrs_phys);
572 if (!hdrs_vir)
573 return ENOMEM;
575 status_vir = alloc_contig(VIRTIO_BLK_NUM_THREADS * sizeof(status_vir[0]),
576 AC_ALIGN4K, &status_phys);
578 if (!status_vir) {
579 free_contig(hdrs_vir, VIRTIO_BLK_NUM_THREADS * sizeof(hdrs_vir[0]));
580 return ENOMEM;
583 return OK;
586 static void
587 virtio_blk_free_requests(void)
589 free_contig(hdrs_vir, VIRTIO_BLK_NUM_THREADS * sizeof(hdrs_vir[0]));
590 free_contig(status_vir, VIRTIO_BLK_NUM_THREADS * sizeof(status_vir[0]));
593 static int
594 virtio_blk_feature_setup(void)
596 /* Feature setup for virtio-blk
598 * FIXME: Besides the geometry, everything is just debug output
599 * FIXME2: magic numbers
601 if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_SEG_MAX)) {
602 blk_config.seg_max = virtio_sread32(blk_dev, 12);
603 dprintf(("Seg Max: %d", blk_config.seg_max));
606 if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_GEOMETRY)) {
607 blk_config.geometry.cylinders = virtio_sread16(blk_dev, 16);
608 blk_config.geometry.heads = virtio_sread8(blk_dev, 18);
609 blk_config.geometry.sectors = virtio_sread8(blk_dev, 19);
611 dprintf(("Geometry: cyl=%d heads=%d sectors=%d",
612 blk_config.geometry.cylinders,
613 blk_config.geometry.heads,
614 blk_config.geometry.sectors));
617 if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_SIZE_MAX))
618 dprintf(("Has size max"));
620 if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_FLUSH))
621 dprintf(("Supports flushing"));
623 if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_BLK_SIZE)) {
624 blk_config.blk_size = virtio_sread32(blk_dev, 20);
625 dprintf(("Block Size: %d", blk_config.blk_size));
628 if (virtio_host_supports(blk_dev, VIRTIO_BLK_F_BARRIER))
629 dprintf(("Supports barrier"));
631 return 0;
634 static int
635 virtio_blk_config(void)
637 u32_t sectors_low, sectors_high, size_mbs;
639 /* capacity is always there */
640 sectors_low = virtio_sread32(blk_dev, 0);
641 sectors_high = virtio_sread32(blk_dev, 4);
642 blk_config.capacity = ((u64_t)sectors_high << 32) | sectors_low;
644 /* If this gets truncated, you have a big disk... */
645 size_mbs = (u32_t)(blk_config.capacity * 512 / 1024 / 1024);
646 dprintf(("Capacity: %d MB", size_mbs));
648 /* do feature setup */
649 virtio_blk_feature_setup();
650 return 0;
653 static int
654 virtio_blk_probe(int skip)
656 int r;
658 /* sub device id for virtio-blk is 0x0002 */
659 blk_dev = virtio_setup_device(0x0002, name, blkf,
660 sizeof(blkf) / sizeof(blkf[0]),
661 VIRTIO_BLK_NUM_THREADS, skip);
662 if (!blk_dev)
663 return ENXIO;
665 /* virtio-blk has one queue only */
666 if ((r = virtio_alloc_queues(blk_dev, 1)) != OK) {
667 virtio_free_device(blk_dev);
668 return r;
671 /* Allocate memory for headers and status */
672 if ((r = virtio_blk_alloc_requests()) != OK) {
673 virtio_free_queues(blk_dev);
674 virtio_free_device(blk_dev);
675 return r;
678 virtio_blk_config();
680 /* Let the host now that we are ready */
681 virtio_device_ready(blk_dev);
683 virtio_irq_enable(blk_dev);
685 return OK;
688 static int
689 sef_cb_init_fresh(int type, sef_init_info_t *info)
691 long instance = 0;
692 int r;
694 env_parse("instance", "d", 0, &instance, 0, 255);
696 if ((r = virtio_blk_probe((int)instance)) == OK) {
697 blockdriver_announce(type);
698 return OK;
701 /* Error path */
702 if (r == ENXIO)
703 panic("%s: No device found", name);
705 if (r == ENOMEM)
706 panic("%s: Not enough memory", name);
708 panic("%s: Unexpected failure (%d)", name, r);
711 static void
712 sef_cb_signal_handler(int signo)
714 /* Ignore all signals but SIGTERM */
715 if (signo != SIGTERM)
716 return;
718 terminating = 1;
719 virtio_blk_terminate();
721 /* If we get a signal when completely closed, call
722 * exit(). We only leave the blockdriver_mt_task()
723 * loop after completing a request which is not the
724 * case for signals.
726 if (open_count == 0)
727 exit(0);
730 static void
731 sef_local_startup(void)
733 sef_setcb_init_fresh(sef_cb_init_fresh);
734 sef_setcb_signal_handler(sef_cb_signal_handler);
736 /* Enable suppor for live update. */
737 blockdriver_mt_support_lu();
739 sef_startup();
743 main(int argc, char **argv)
745 env_setargs(argc, argv);
746 sef_local_startup();
748 blockdriver_mt_task(&virtio_blk_dtab);
750 dprintf(("Terminating"));
751 virtio_blk_cleanup();
753 return OK;