Drop main() prototype. Syncs with NetBSD-8
[minix.git] / minix / drivers / storage / vnd / vnd.c
blobc751f4a263decc8efcad9001bbd8e5cb00b50bc9
1 /* VNode Disk driver, by D.C. van Moolenbroek <david@minix3.org> */
3 #include <minix/drivers.h>
4 #include <minix/blockdriver.h>
5 #include <minix/drvlib.h>
6 #include <sys/ioctl.h>
7 #include <sys/mman.h>
8 #include <sys/stat.h>
9 #include <fcntl.h>
10 #include <assert.h>
12 #define VND_BUF_SIZE 65536
14 static struct {
15 int fd; /* file descriptor for the underlying file */
16 int openct; /* number of times the device is open */
17 int exiting; /* exit after the last close? */
18 int rdonly; /* is the device set up read-only? */
19 dev_t dev; /* device on which the file resides */
20 ino_t ino; /* inode number of the file */
21 struct device part[DEV_PER_DRIVE]; /* partition bases and sizes */
22 struct device subpart[SUB_PER_DRIVE]; /* same for subpartitions */
23 struct part_geom geom; /* geometry information */
24 char *buf; /* intermediate I/O transfer buffer */
25 } state;
27 static unsigned int instance;
29 static int vnd_open(devminor_t, int);
30 static int vnd_close(devminor_t);
31 static int vnd_transfer(devminor_t, int, u64_t, endpoint_t, iovec_t *,
32 unsigned int, int);
33 static int vnd_ioctl(devminor_t, unsigned long, endpoint_t, cp_grant_id_t,
34 endpoint_t);
35 static struct device *vnd_part(devminor_t);
36 static void vnd_geometry(devminor_t, struct part_geom *);
38 static struct blockdriver vnd_dtab = {
39 .bdr_type = BLOCKDRIVER_TYPE_DISK,
40 .bdr_open = vnd_open,
41 .bdr_close = vnd_close,
42 .bdr_transfer = vnd_transfer,
43 .bdr_ioctl = vnd_ioctl,
44 .bdr_part = vnd_part,
45 .bdr_geometry = vnd_geometry
49 * Parse partition tables.
51 static void
52 vnd_partition(void)
54 memset(state.part, 0, sizeof(state.part));
55 memset(state.subpart, 0, sizeof(state.subpart));
57 state.part[0].dv_size = state.geom.size;
59 partition(&vnd_dtab, 0, P_PRIMARY, FALSE /*atapi*/);
63 * Open a device.
65 static int
66 vnd_open(devminor_t minor, int access)
68 /* No sub/partition devices are available before initialization. */
69 if (state.fd == -1 && minor != 0)
70 return ENXIO;
71 else if (state.fd != -1 && vnd_part(minor) == NULL)
72 return ENXIO;
75 * If the device either is not configured or configured as read-only,
76 * block open calls that request write permission. This is what user-
77 * land expects, although it does mean that vnconfig(8) has to open the
78 * device as read-only in order to (un)configure it.
80 if (access & BDEV_W_BIT) {
81 if (state.fd == -1)
82 return ENXIO;
83 if (state.rdonly)
84 return EACCES;
88 * Userland expects that if the device is opened after having been
89 * fully closed, partition tables are (re)parsed. Since we already
90 * parse partition tables upon initialization, we could skip this for
91 * the first open, but that would introduce more state.
93 if (state.fd != -1 && state.openct == 0) {
94 vnd_partition();
96 /* Make sure our target device didn't just disappear. */
97 if (vnd_part(minor) == NULL)
98 return ENXIO;
101 state.openct++;
103 return OK;
107 * Close a device.
109 static int
110 vnd_close(devminor_t UNUSED(minor))
112 if (state.openct == 0) {
113 printf("VND%u: closing already-closed device\n", instance);
114 return EINVAL;
117 state.openct--;
119 if (state.exiting)
120 blockdriver_terminate();
122 return OK;
126 * Copy a number of bytes from or to the caller, to or from the intermediate
127 * buffer. If the given endpoint is SELF, a local memory copy must be made.
129 static int
130 vnd_copy(iovec_s_t *iov, size_t iov_off, size_t bytes, endpoint_t endpt,
131 int do_write)
133 struct vscp_vec vvec[SCPVEC_NR], *vvp;
134 size_t off, chunk;
135 int count;
136 char *ptr;
138 assert(bytes > 0 && bytes <= VND_BUF_SIZE);
140 vvp = vvec;
141 count = 0;
143 for (off = 0; off < bytes; off += chunk) {
144 chunk = MIN(bytes - off, iov->iov_size - iov_off);
146 if (endpt == SELF) {
147 ptr = (char *) iov->iov_grant + iov_off;
149 if (do_write)
150 memcpy(&state.buf[off], ptr, chunk);
151 else
152 memcpy(ptr, &state.buf[off], chunk);
153 } else {
154 assert(count < SCPVEC_NR); /* SCPVEC_NR >= NR_IOREQS */
156 vvp->v_from = do_write ? endpt : SELF;
157 vvp->v_to = do_write ? SELF : endpt;
158 vvp->v_bytes = chunk;
159 vvp->v_gid = iov->iov_grant;
160 vvp->v_offset = iov_off;
161 vvp->v_addr = (vir_bytes) &state.buf[off];
163 vvp++;
164 count++;
167 iov_off += chunk;
168 if (iov_off == iov->iov_size) {
169 iov++;
170 iov_off = 0;
174 if (endpt != SELF)
175 return sys_vsafecopy(vvec, count);
176 else
177 return OK;
181 * Advance the given I/O vector, and the offset into its first element, by the
182 * given number of bytes.
184 static iovec_s_t *
185 vnd_advance(iovec_s_t *iov, size_t *iov_offp, size_t bytes)
187 size_t iov_off;
189 assert(bytes > 0 && bytes <= VND_BUF_SIZE);
191 iov_off = *iov_offp;
193 while (bytes > 0) {
194 if (bytes >= iov->iov_size - iov_off) {
195 bytes -= iov->iov_size - iov_off;
196 iov++;
197 iov_off = 0;
198 } else {
199 iov_off += bytes;
200 bytes = 0;
204 *iov_offp = iov_off;
205 return iov;
209 * Perform data transfer on the selected device.
211 static int
212 vnd_transfer(devminor_t minor, int do_write, u64_t position,
213 endpoint_t endpt, iovec_t *iovt, unsigned int nr_req, int flags)
215 struct device *dv;
216 iovec_s_t *iov;
217 size_t off, chunk, bytes, iov_off;
218 ssize_t r;
219 unsigned int i;
221 iov = (iovec_s_t *) iovt;
223 if (state.fd == -1 || (dv = vnd_part(minor)) == NULL)
224 return ENXIO;
226 /* Prevent write operations on devices opened as write-only. */
227 if (do_write && state.rdonly)
228 return EACCES;
230 /* Determine the total number of bytes to transfer. */
231 if (position >= dv->dv_size)
232 return 0;
234 bytes = 0;
236 for (i = 0; i < nr_req; i++) {
237 if (iov[i].iov_size == 0 || iov[i].iov_size > LONG_MAX)
238 return EINVAL;
239 bytes += iov[i].iov_size;
240 if (bytes > LONG_MAX)
241 return EINVAL;
244 if (bytes > dv->dv_size - position)
245 bytes = dv->dv_size - position;
247 position += dv->dv_base;
249 /* Perform the actual transfer, in chunks if necessary. */
250 iov_off = 0;
252 for (off = 0; off < bytes; off += chunk) {
253 chunk = MIN(bytes - off, VND_BUF_SIZE);
255 assert((unsigned int) (iov - (iovec_s_t *) iovt) < nr_req);
257 /* For reads, read in the data for the chunk; possibly less. */
258 if (!do_write) {
259 chunk = r = pread(state.fd, state.buf, chunk,
260 position);
262 if (r < 0) {
263 printf("VND%u: pread failed (%d)\n", instance,
264 -errno);
265 return -errno;
267 if (r == 0)
268 break;
271 /* Copy the data for this chunk from or to the caller. */
272 if ((r = vnd_copy(iov, iov_off, chunk, endpt, do_write)) < 0) {
273 printf("VND%u: data copy failed (%d)\n", instance, r);
274 return r;
277 /* For writes, write the data to the file; possibly less. */
278 if (do_write) {
279 chunk = r = pwrite(state.fd, state.buf, chunk,
280 position);
282 if (r <= 0) {
283 if (r < 0)
284 r = -errno;
285 printf("VND%u: pwrite failed (%d)\n", instance,
287 return (r < 0) ? r : EIO;
291 /* Move ahead on the I/O vector and the file position. */
292 iov = vnd_advance(iov, &iov_off, chunk);
294 position += chunk;
297 /* If force-write is requested, flush the underlying file to disk. */
298 if (do_write && (flags & BDEV_FORCEWRITE))
299 fsync(state.fd);
301 /* Return the number of bytes transferred. */
302 return off;
306 * Initialize the size and geometry for the device and any partitions. If the
307 * user provided a geometry, this will be used; otherwise, a geometry will be
308 * computed.
310 static int
311 vnd_layout(u64_t size, struct vnd_ioctl *vnd)
313 u64_t sectors;
315 state.geom.base = 0ULL;
317 if (vnd->vnd_flags & VNDIOF_HASGEOM) {
319 * The geometry determines the accessible part of the file.
320 * The resulting size must not exceed the file size.
322 state.geom.cylinders = vnd->vnd_geom.vng_ncylinders;
323 state.geom.heads = vnd->vnd_geom.vng_ntracks;
324 state.geom.sectors = vnd->vnd_geom.vng_nsectors;
326 state.geom.size = (u64_t) state.geom.cylinders *
327 state.geom.heads * state.geom.sectors *
328 vnd->vnd_geom.vng_secsize;
329 if (state.geom.size == 0 || state.geom.size > size)
330 return EINVAL;
331 } else {
332 sectors = size / SECTOR_SIZE;
333 state.geom.size = sectors * SECTOR_SIZE;
335 if (sectors >= 32 * 64) {
336 state.geom.cylinders = sectors / (32 * 64);
337 state.geom.heads = 64;
338 state.geom.sectors = 32;
339 } else {
340 state.geom.cylinders = sectors;
341 state.geom.heads = 1;
342 state.geom.sectors = 1;
347 * Parse partition tables immediately, so that (sub)partitions can be
348 * opened right away. The first open will perform the same procedure,
349 * but that is only necessary to match userland expectations.
351 vnd_partition();
353 return OK;
357 * Process I/O control requests.
359 static int
360 vnd_ioctl(devminor_t UNUSED(minor), unsigned long request, endpoint_t endpt,
361 cp_grant_id_t grant, endpoint_t user_endpt)
363 struct vnd_ioctl vnd;
364 struct vnd_user vnu;
365 struct stat st;
366 int r;
368 switch (request) {
369 case VNDIOCSET:
371 * The VND must not be busy. Note that the caller has the
372 * device open to perform the IOCTL request.
374 if (state.fd != -1 || state.openct != 1)
375 return EBUSY;
377 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &vnd,
378 sizeof(vnd))) != OK)
379 return r;
382 * Issue a special VFS backcall that copies a file descriptor
383 * to the current process, from the user process ultimately
384 * making the IOCTL call. The result is either a newly
385 * allocated file descriptor or an error.
387 if ((r = copyfd(user_endpt, vnd.vnd_fildes, COPYFD_FROM)) < 0)
388 return r;
390 state.fd = r;
392 /* The target file must be regular. */
393 if (fstat(state.fd, &st) == -1) {
394 printf("VND%u: fstat failed (%d)\n", instance, -errno);
395 r = -errno;
397 if (r == OK && !S_ISREG(st.st_mode))
398 r = EINVAL;
401 * Allocate memory for an intermediate I/O transfer buffer. In
402 * order to save on memory in the common case, the buffer is
403 * only allocated when the vnd is in use. We use mmap instead
404 * of malloc to allow the memory to be actually freed later.
406 if (r == OK) {
407 state.buf = mmap(NULL, VND_BUF_SIZE, PROT_READ |
408 PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
409 if (state.buf == MAP_FAILED)
410 r = ENOMEM;
413 if (r != OK) {
414 close(state.fd);
415 state.fd = -1;
416 return r;
419 /* Set various device state fields. */
420 state.dev = st.st_dev;
421 state.ino = st.st_ino;
422 state.rdonly = !!(vnd.vnd_flags & VNDIOF_READONLY);
424 r = vnd_layout(st.st_size, &vnd);
426 /* Upon success, return the device size to userland. */
427 if (r == OK) {
428 vnd.vnd_size = state.geom.size;
430 r = sys_safecopyto(endpt, grant, 0, (vir_bytes) &vnd,
431 sizeof(vnd));
434 if (r != OK) {
435 munmap(state.buf, VND_BUF_SIZE);
436 close(state.fd);
437 state.fd = -1;
440 return r;
442 case VNDIOCCLR:
443 /* The VND can only be cleared if it has been configured. */
444 if (state.fd == -1)
445 return ENXIO;
447 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &vnd,
448 sizeof(vnd))) != OK)
449 return r;
451 /* The caller has the device open to do the IOCTL request. */
452 if (!(vnd.vnd_flags & VNDIOF_FORCE) && state.openct != 1)
453 return EBUSY;
456 * Close the associated file descriptor immediately, but do not
457 * allow reuse until the device has been closed by the other
458 * users.
460 munmap(state.buf, VND_BUF_SIZE);
461 close(state.fd);
462 state.fd = -1;
464 return OK;
466 case VNDIOCGET:
468 * We need not copy in the given structure. It would contain
469 * the requested unit number, but each driver instance provides
470 * only one unit anyway.
473 memset(&vnu, 0, sizeof(vnu));
475 vnu.vnu_unit = instance;
477 /* Leave these fields zeroed if the device is not in use. */
478 if (state.fd != -1) {
479 vnu.vnu_dev = state.dev;
480 vnu.vnu_ino = state.ino;
483 return sys_safecopyto(endpt, grant, 0, (vir_bytes) &vnu,
484 sizeof(vnu));
486 case DIOCOPENCT:
487 return sys_safecopyto(endpt, grant, 0,
488 (vir_bytes) &state.openct, sizeof(state.openct));
490 case DIOCFLUSH:
491 if (state.fd == -1)
492 return ENXIO;
494 fsync(state.fd);
496 return OK;
499 return ENOTTY;
503 * Return a pointer to the partition structure for the given minor device.
505 static struct device *
506 vnd_part(devminor_t minor)
508 if (minor >= 0 && minor < DEV_PER_DRIVE)
509 return &state.part[minor];
510 else if ((unsigned int) (minor -= MINOR_d0p0s0) < SUB_PER_DRIVE)
511 return &state.subpart[minor];
512 else
513 return NULL;
517 * Return geometry information.
519 static void
520 vnd_geometry(devminor_t UNUSED(minor), struct part_geom *part)
522 part->cylinders = state.geom.cylinders;
523 part->heads = state.geom.heads;
524 part->sectors = state.geom.sectors;
528 * Initialize the device.
530 static int
531 vnd_init(int UNUSED(type), sef_init_info_t *UNUSED(info))
533 long v;
536 * No support for crash recovery. The driver would have no way to
537 * reacquire the file descriptor for the target file.
541 * The instance number is used for two purposes: reporting errors, and
542 * returning the proper unit number to userland in VNDIOCGET calls.
544 v = 0;
545 (void) env_parse("instance", "d", 0, &v, 0, 255);
546 instance = (unsigned int) v;
548 state.openct = 0;
549 state.exiting = FALSE;
550 state.fd = -1;
552 return OK;
556 * Process an incoming signal.
558 static void
559 vnd_signal(int signo)
562 /* In case of a termination signal, initiate driver shutdown. */
563 if (signo != SIGTERM)
564 return;
566 state.exiting = TRUE;
568 /* Keep running until the device has been fully closed. */
569 if (state.openct == 0)
570 blockdriver_terminate();
574 * Set callbacks and initialize the System Event Framework (SEF).
576 static void
577 vnd_startup(void)
580 /* Register init and signal callbacks. */
581 sef_setcb_init_fresh(vnd_init);
582 sef_setcb_signal_handler(vnd_signal);
584 /* Let SEF perform startup. */
585 sef_startup();
589 * Driver task.
592 main(int argc, char **argv)
595 /* Initialize the driver. */
596 env_setargs(argc, argv);
597 vnd_startup();
599 /* Process requests until shutdown. */
600 blockdriver_task(&vnd_dtab);
602 return 0;