Sync usage with man page.
[netbsd-mini2440.git] / external / gpl2 / lvm2 / dist / lib / device / dev-io.c
blobb644b579dee07462571565559884cde04e6ecfc6
1 /* $NetBSD: dev-io.c,v 1.5 2009/12/02 00:58:03 haad Exp $ */
3 /*
4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
7 * This file is part of LVM2.
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 #include "lib.h"
19 #include "lvm-types.h"
20 #include "device.h"
21 #include "metadata.h"
22 #include "lvmcache.h"
23 #include "memlock.h"
24 #include "locking.h"
26 #include <limits.h>
27 #include <sys/stat.h>
28 #include <fcntl.h>
29 #include <unistd.h>
30 #include <sys/ioctl.h>
32 #ifdef linux
33 # define u64 uint64_t /* Missing without __KERNEL__ */
34 # undef WNOHANG /* Avoid redefinition */
35 # undef WUNTRACED /* Avoid redefinition */
36 # include <linux/fs.h> /* For block ioctl definitions */
37 # define BLKSIZE_SHIFT SECTOR_SHIFT
38 # ifndef BLKGETSIZE64 /* fs.h out-of-date */
39 # define BLKGETSIZE64 _IOR(0x12, 114, size_t)
40 # endif /* BLKGETSIZE64 */
41 #elif __NetBSD__
42 # include <sys/disk.h>
43 # include <sys/disklabel.h>
44 # include <sys/param.h>
45 #else
46 # include <sys/disk.h>
47 # define BLKBSZGET DKIOCGETBLOCKSIZE
48 # define BLKSSZGET DKIOCGETBLOCKSIZE
49 # define BLKGETSIZE64 DKIOCGETBLOCKCOUNT
50 # define BLKFLSBUF DKIOCSYNCHRONIZECACHE
51 # define BLKSIZE_SHIFT 0
52 #endif
54 #ifdef O_DIRECT_SUPPORT
55 # ifndef O_DIRECT
56 # error O_DIRECT support configured but O_DIRECT definition not found in headers
57 # endif
58 #endif
60 static DM_LIST_INIT(_open_devices);
62 /*-----------------------------------------------------------------
63 * The standard io loop that keeps submitting an io until it's
64 * all gone.
65 *---------------------------------------------------------------*/
66 static int _io(struct device_area *where, void *buffer, int should_write)
68 int fd = dev_fd(where->dev);
69 ssize_t n = 0;
70 size_t total = 0;
72 if (fd < 0) {
73 log_error("Attempt to read an unopened device (%s).",
74 dev_name(where->dev));
75 return 0;
79 * Skip all writes in test mode.
81 if (should_write && test_mode())
82 return 1;
84 if (where->size > SSIZE_MAX) {
85 log_error("Read size too large: %" PRIu64, where->size);
86 return 0;
89 if (lseek(fd, (off_t) where->start, SEEK_SET) < 0) {
90 log_error("%s: lseek %" PRIu64 " failed: %s",
91 dev_name(where->dev), (uint64_t) where->start,
92 strerror(errno));
93 return 0;
96 while (total < (size_t) where->size) {
98 n = should_write ?
99 write(fd, buffer, (size_t) where->size - total) :
100 read(fd, buffer, (size_t) where->size - total);
101 while ((n < 0) && ((errno == EINTR) || (errno == EAGAIN)));
103 if (n < 0)
104 log_error("%s: %s failed after %" PRIu64 " of %" PRIu64
105 " at %" PRIu64 ": %s", dev_name(where->dev),
106 should_write ? "write" : "read",
107 (uint64_t) total,
108 (uint64_t) where->size,
109 (uint64_t) where->start, strerror(errno));
111 if (n <= 0)
112 break;
114 total += n;
115 buffer += n;
118 return (total == (size_t) where->size);
121 /*-----------------------------------------------------------------
122 * LVM2 uses O_DIRECT when performing metadata io, which requires
123 * block size aligned accesses. If any io is not aligned we have
124 * to perform the io via a bounce buffer, obviously this is quite
125 * inefficient.
126 *---------------------------------------------------------------*/
129 * Get the sector size from an _open_ device.
131 static int _get_block_size(struct device *dev, unsigned int *size)
133 const char *name = dev_name(dev);
134 #ifdef __NetBSD__
135 struct disklabel lab;
136 #endif
138 if ((dev->block_size == -1)) {
139 #ifdef __NetBSD__
140 if (ioctl(dev_fd(dev), DIOCGDINFO, &lab) < 0) {
141 dev->block_size = DEV_BSIZE;
142 } else
143 dev->block_size = lab.d_secsize;
144 #else
145 if (ioctl(dev_fd(dev), BLKBSZGET, &dev->block_size) < 0) {
146 log_sys_error("ioctl BLKBSZGET", name);
147 return 0;
149 #endif
150 log_debug("%s: block size is %u bytes", name, dev->block_size);
153 *size = (unsigned int) dev->block_size;
155 return 1;
159 * Widens a region to be an aligned region.
161 static void _widen_region(unsigned int block_size, struct device_area *region,
162 struct device_area *result)
164 uint64_t mask = block_size - 1, delta;
165 memcpy(result, region, sizeof(*result));
167 /* adjust the start */
168 delta = result->start & mask;
169 if (delta) {
170 result->start -= delta;
171 result->size += delta;
174 /* adjust the end */
175 delta = (result->start + result->size) & mask;
176 if (delta)
177 result->size += block_size - delta;
180 static int _aligned_io(struct device_area *where, void *buffer,
181 int should_write)
183 void *bounce;
184 unsigned int block_size = 0;
185 uintptr_t mask;
186 struct device_area widened;
188 if (!(where->dev->flags & DEV_REGULAR) &&
189 !_get_block_size(where->dev, &block_size))
190 return_0;
192 if (!block_size)
193 block_size = lvm_getpagesize();
195 _widen_region(block_size, where, &widened);
197 /* Do we need to use a bounce buffer? */
198 mask = block_size - 1;
199 if (!memcmp(where, &widened, sizeof(widened)) &&
200 !((uintptr_t) buffer & mask))
201 return _io(where, buffer, should_write);
203 /* Allocate a bounce buffer with an extra block */
204 if (!(bounce = alloca((size_t) widened.size + block_size))) {
205 log_error("Bounce buffer alloca failed");
206 return 0;
210 * Realign start of bounce buffer (using the extra sector)
212 if (((uintptr_t) bounce) & mask)
213 bounce = (void *) ((((uintptr_t) bounce) + mask) & ~mask);
215 /* channel the io through the bounce buffer */
216 if (!_io(&widened, bounce, 0)) {
217 if (!should_write)
218 return_0;
219 /* FIXME pre-extend the file */
220 memset(bounce, '\n', widened.size);
223 if (should_write) {
224 memcpy(bounce + (where->start - widened.start), buffer,
225 (size_t) where->size);
227 /* ... then we write */
228 return _io(&widened, bounce, 1);
231 memcpy(buffer, bounce + (where->start - widened.start),
232 (size_t) where->size);
234 return 1;
237 static int _dev_get_size_file(const struct device *dev, uint64_t *size)
239 const char *name = dev_name(dev);
240 struct stat info;
242 if (stat(name, &info)) {
243 log_sys_error("stat", name);
244 return 0;
247 *size = info.st_size;
248 *size >>= SECTOR_SHIFT; /* Convert to sectors */
250 log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size);
252 return 1;
255 static int _dev_get_size_dev(const struct device *dev, uint64_t *size)
257 int fd;
258 const char *name = dev_name(dev);
259 #ifdef __NetBSD__
260 struct disklabel lab;
261 struct dkwedge_info dkw;
262 #endif
264 if ((fd = open(name, O_RDONLY)) < 0) {
265 #ifndef __NetBSD__
266 log_sys_error("open", name);
267 #endif
268 return 0;
271 #ifdef __NetBSD__
272 if ((*size = lseek (fd, 0, SEEK_END)) < 0) {
273 log_sys_error("lseek SEEK_END", name);
274 close(fd);
275 return 0;
278 if (ioctl(fd, DIOCGDINFO, &lab) < 0) {
279 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) < 0) {
280 log_debug("ioctl DIOCGWEDGEINFO", name);
281 close(fd);
282 return 0;
283 } else
284 if (dkw.dkw_size)
285 *size = dkw.dkw_size;
286 } else
287 if (lab.d_secsize)
288 *size /= lab.d_secsize;
289 #else
290 if (ioctl(fd, BLKGETSIZE64, size) < 0) {
291 log_sys_error("ioctl BLKGETSIZE64", name);
292 if (close(fd))
293 log_sys_error("close", name);
294 return 0;
297 *size >>= BLKSIZE_SHIFT; /* Convert to sectors */
298 #endif
299 if (close(fd))
300 log_sys_error("close", name);
302 log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size);
304 return 1;
307 static int _dev_read_ahead_dev(struct device *dev, uint32_t *read_ahead)
309 #ifdef linux
310 long read_ahead_long;
312 if (dev->read_ahead != -1) {
313 *read_ahead = (uint32_t) dev->read_ahead;
314 return 1;
317 if (!dev_open(dev))
318 return_0;
320 if (ioctl(dev->fd, BLKRAGET, &read_ahead_long) < 0) {
321 log_sys_error("ioctl BLKRAGET", dev_name(dev));
322 if (!dev_close(dev))
323 stack;
324 return 0;
327 if (!dev_close(dev))
328 stack;
330 *read_ahead = (uint32_t) read_ahead_long;
331 dev->read_ahead = read_ahead_long;
333 log_very_verbose("%s: read_ahead is %u sectors",
334 dev_name(dev), *read_ahead);
335 #endif
336 return 1;
339 /*-----------------------------------------------------------------
340 * Public functions
341 *---------------------------------------------------------------*/
343 int dev_get_size(const struct device *dev, uint64_t *size)
345 if (!dev)
346 return 0;
348 if ((dev->flags & DEV_REGULAR))
349 return _dev_get_size_file(dev, size);
350 else
351 return _dev_get_size_dev(dev, size);
354 int dev_get_read_ahead(struct device *dev, uint32_t *read_ahead)
356 if (!dev)
357 return 0;
359 if (dev->flags & DEV_REGULAR) {
360 *read_ahead = 0;
361 return 1;
364 return _dev_read_ahead_dev(dev, read_ahead);
367 /* FIXME Unused
368 int dev_get_sectsize(struct device *dev, uint32_t *size)
370 int fd;
371 int s;
372 const char *name = dev_name(dev);
374 if ((fd = open(name, O_RDONLY)) < 0) {
375 log_sys_error("open", name);
376 return 0;
379 if (ioctl(fd, BLKSSZGET, &s) < 0) {
380 log_sys_error("ioctl BLKSSZGET", name);
381 if (close(fd))
382 log_sys_error("close", name);
383 return 0;
386 if (close(fd))
387 log_sys_error("close", name);
389 *size = (uint32_t) s;
391 log_very_verbose("%s: sector size is %" PRIu32 " bytes", name, *size);
393 return 1;
397 void dev_flush(struct device *dev)
399 #ifdef __linux__
400 if (!(dev->flags & DEV_REGULAR) && ioctl(dev->fd, BLKFLSBUF, 0) >= 0)
401 return;
402 #endif
404 if (fsync(dev->fd) >= 0)
405 return;
407 sync();
410 int dev_open_flags(struct device *dev, int flags, int direct, int quiet)
412 struct stat buf;
413 const char *name;
414 int need_excl = 0, need_rw = 0;
416 if ((flags & O_ACCMODE) == O_RDWR)
417 need_rw = 1;
419 if ((flags & O_EXCL))
420 need_excl = 1;
422 if (dev->fd >= 0) {
423 if (((dev->flags & DEV_OPENED_RW) || !need_rw) &&
424 ((dev->flags & DEV_OPENED_EXCL) || !need_excl)) {
425 dev->open_count++;
426 return 1;
429 if (dev->open_count && !need_excl) {
430 /* FIXME Ensure we never get here */
431 log_debug("WARNING: %s already opened read-only",
432 dev_name(dev));
433 dev->open_count++;
436 dev_close_immediate(dev);
439 if (memlock())
440 log_error("WARNING: dev_open(%s) called while suspended",
441 dev_name(dev));
443 if (dev->flags & DEV_REGULAR)
444 name = dev_name(dev);
445 else if (!(name = dev_name_confirmed(dev, quiet)))
446 return_0;
448 if (!(dev->flags & DEV_REGULAR)) {
449 if (stat(name, &buf) < 0) {
450 log_sys_error("%s: stat failed", name);
451 return 0;
453 if (buf.st_rdev != dev->dev) {
454 log_error("%s: device changed", name);
455 return 0;
459 #ifdef O_DIRECT_SUPPORT
460 if (direct) {
461 if (!(dev->flags & DEV_O_DIRECT_TESTED))
462 dev->flags |= DEV_O_DIRECT;
464 if ((dev->flags & DEV_O_DIRECT))
465 flags |= O_DIRECT;
467 #endif
469 #ifdef O_NOATIME
470 /* Don't update atime on device inodes */
471 if (!(dev->flags & DEV_REGULAR))
472 flags |= O_NOATIME;
473 #endif
475 if ((dev->fd = open(name, flags, 0777)) < 0) {
476 #ifdef O_DIRECT_SUPPORT
477 if (direct && !(dev->flags & DEV_O_DIRECT_TESTED)) {
478 flags &= ~O_DIRECT;
479 if ((dev->fd = open(name, flags, 0777)) >= 0) {
480 dev->flags &= ~DEV_O_DIRECT;
481 log_debug("%s: Not using O_DIRECT", name);
482 goto opened;
485 #endif
486 if (quiet)
487 log_sys_debug("open", name);
488 else
489 log_sys_error("open", name);
491 return 0;
494 #ifdef O_DIRECT_SUPPORT
495 opened:
496 if (direct)
497 dev->flags |= DEV_O_DIRECT_TESTED;
498 #endif
499 dev->open_count++;
500 dev->flags &= ~DEV_ACCESSED_W;
502 if (need_rw)
503 dev->flags |= DEV_OPENED_RW;
504 else
505 dev->flags &= ~DEV_OPENED_RW;
507 if (need_excl)
508 dev->flags |= DEV_OPENED_EXCL;
509 else
510 dev->flags &= ~DEV_OPENED_EXCL;
512 if (!(dev->flags & DEV_REGULAR) &&
513 ((fstat(dev->fd, &buf) < 0) || (buf.st_rdev != dev->dev))) {
514 log_error("%s: fstat failed: Has device name changed?", name);
515 dev_close_immediate(dev);
516 return 0;
519 #ifndef O_DIRECT_SUPPORT
520 if (!(dev->flags & DEV_REGULAR))
521 dev_flush(dev);
522 #endif
524 if ((flags & O_CREAT) && !(flags & O_TRUNC))
525 dev->end = lseek(dev->fd, (off_t) 0, SEEK_END);
527 dm_list_add(&_open_devices, &dev->open_list);
529 log_debug("Opened %s %s%s%s", dev_name(dev),
530 dev->flags & DEV_OPENED_RW ? "RW" : "RO",
531 dev->flags & DEV_OPENED_EXCL ? " O_EXCL" : "",
532 dev->flags & DEV_O_DIRECT ? " O_DIRECT" : "");
534 return 1;
537 int dev_open_quiet(struct device *dev)
539 int flags;
541 flags = vg_write_lock_held() ? O_RDWR : O_RDONLY;
543 return dev_open_flags(dev, flags, 1, 1);
546 int dev_open(struct device *dev)
548 int flags;
550 flags = vg_write_lock_held() ? O_RDWR : O_RDONLY;
552 return dev_open_flags(dev, flags, 1, 0);
555 int dev_test_excl(struct device *dev)
557 int flags;
558 int r;
560 flags = vg_write_lock_held() ? O_RDWR : O_RDONLY;
561 flags |= O_EXCL;
563 r = dev_open_flags(dev, flags, 1, 1);
564 if (r)
565 dev_close_immediate(dev);
567 return r;
570 static void _close(struct device *dev)
572 if (close(dev->fd))
573 log_sys_error("close", dev_name(dev));
574 dev->fd = -1;
575 dev->block_size = -1;
576 dm_list_del(&dev->open_list);
578 log_debug("Closed %s", dev_name(dev));
580 if (dev->flags & DEV_ALLOCED) {
581 dm_free((void *) dm_list_item(dev->aliases.n, struct str_list)->
582 str);
583 dm_free(dev->aliases.n);
584 dm_free(dev);
588 static int _dev_close(struct device *dev, int immediate)
590 struct lvmcache_info *info;
592 if (dev->fd < 0) {
593 log_error("Attempt to close device '%s' "
594 "which is not open.", dev_name(dev));
595 return 0;
598 #ifndef O_DIRECT_SUPPORT
599 if (dev->flags & DEV_ACCESSED_W)
600 dev_flush(dev);
601 #endif
603 if (dev->open_count > 0)
604 dev->open_count--;
606 if (immediate && dev->open_count)
607 log_debug("%s: Immediate close attempt while still referenced",
608 dev_name(dev));
610 /* Close unless device is known to belong to a locked VG */
611 if (immediate ||
612 (dev->open_count < 1 &&
613 (!(info = info_from_pvid(dev->pvid, 0)) ||
614 !info->vginfo ||
615 !vgname_is_locked(info->vginfo->vgname))))
616 _close(dev);
618 return 1;
621 int dev_close(struct device *dev)
623 return _dev_close(dev, 0);
626 int dev_close_immediate(struct device *dev)
628 return _dev_close(dev, 1);
631 void dev_close_all(void)
633 struct dm_list *doh, *doht;
634 struct device *dev;
636 dm_list_iterate_safe(doh, doht, &_open_devices) {
637 dev = dm_list_struct_base(doh, struct device, open_list);
638 if (dev->open_count < 1)
639 _close(dev);
643 int dev_read(struct device *dev, uint64_t offset, size_t len, void *buffer)
645 struct device_area where;
647 if (!dev->open_count)
648 return_0;
650 where.dev = dev;
651 where.start = offset;
652 where.size = len;
654 return _aligned_io(&where, buffer, 0);
658 * Read from 'dev' into 'buf', possibly in 2 distinct regions, denoted
659 * by (offset,len) and (offset2,len2). Thus, the total size of
660 * 'buf' should be len+len2.
662 int dev_read_circular(struct device *dev, uint64_t offset, size_t len,
663 uint64_t offset2, size_t len2, void *buf)
665 if (!dev_read(dev, offset, len, buf)) {
666 log_error("Read from %s failed", dev_name(dev));
667 return 0;
671 * The second region is optional, and allows for
672 * a circular buffer on the device.
674 if (!len2)
675 return 1;
677 if (!dev_read(dev, offset2, len2, buf + len)) {
678 log_error("Circular read from %s failed",
679 dev_name(dev));
680 return 0;
683 return 1;
686 /* FIXME If O_DIRECT can't extend file, dev_extend first; dev_truncate after.
687 * But fails if concurrent processes writing
690 /* FIXME pre-extend the file */
691 int dev_append(struct device *dev, size_t len, void *buffer)
693 int r;
695 if (!dev->open_count)
696 return_0;
698 r = dev_write(dev, dev->end, len, buffer);
699 dev->end += (uint64_t) len;
701 #ifndef O_DIRECT_SUPPORT
702 dev_flush(dev);
703 #endif
704 return r;
707 int dev_write(struct device *dev, uint64_t offset, size_t len, void *buffer)
709 struct device_area where;
711 if (!dev->open_count)
712 return_0;
714 where.dev = dev;
715 where.start = offset;
716 where.size = len;
718 dev->flags |= DEV_ACCESSED_W;
720 return _aligned_io(&where, buffer, 1);
723 int dev_set(struct device *dev, uint64_t offset, size_t len, int value)
725 size_t s;
726 char buffer[4096] __attribute((aligned(8)));
728 if (!dev_open(dev))
729 return_0;
731 if ((offset % SECTOR_SIZE) || (len % SECTOR_SIZE))
732 log_debug("Wiping %s at %" PRIu64 " length %" PRIsize_t,
733 dev_name(dev), offset, len);
734 else
735 log_debug("Wiping %s at sector %" PRIu64 " length %" PRIsize_t
736 " sectors", dev_name(dev), offset >> SECTOR_SHIFT,
737 len >> SECTOR_SHIFT);
739 memset(buffer, value, sizeof(buffer));
740 while (1) {
741 s = len > sizeof(buffer) ? sizeof(buffer) : len;
742 if (!dev_write(dev, offset, s, buffer))
743 break;
745 len -= s;
746 if (!len)
747 break;
749 offset += s;
752 dev->flags |= DEV_ACCESSED_W;
754 if (!dev_close(dev))
755 stack;
757 return (len == 0);