etc/services - sync with NetBSD-8
[minix.git] / minix / lib / libminixfs / bio.c
blob6d0e0fb2f87fd5e880007f827af81c03c3bcf544
1 /*
2 * This file provides an implementation for block I/O functions as expected by
3 * libfsdriver for root file systems. In particular, the lmfs_driver function
4 * can be used to implement fdr_driver, the lmfs_bio function can be used to
5 * implement the fdr_bread, fdr_bwrite, and fdr_bpeek hooks, and the the
6 * lmfs_bflush function can be used to implement the fdr_bflush hook. At the
7 * very least, a file system that makes use of the provided functionality
8 * must adhere to the following rules:
10 * o it must initialize this library in order to set up a buffer pool for
11 * use by these functions, using the lmfs_buf_pool function; the
12 * recommended number of blocks for *non*-disk-backed file systems is
13 * LMFS_MAX_PREFETCH buffers (disk-backed file systems typically use many
14 * more);
15 * o it must enable VM caching in order to support memory mapping of block
16 * devices, using the lmfs_may_use_vmcache function;
17 * o it must either use lmfs_flushall as implementation for the fdr_sync
18 * hook, or call lmfs_flushall as part of its own fdr_sync implementation.
20 * In addition, a disk-backed file system (as opposed to e.g. a networked file
21 * system that intends to be able to serve as a root file system) should
22 * consider the following points:
24 * o it may restrict calls to fdr_bwrite on the mounted partition, for
25 * example to the partition's first 1024 bytes; it should generally not
26 * prevent that area from being written even if the file system is mounted
27 * read-only;
28 * o it is free to set its own block size, although the default block size
29 * works fine for raw block I/O as well.
32 #include <minix/drivers.h>
33 #include <minix/libminixfs.h>
34 #include <minix/fsdriver.h>
35 #include <minix/bdev.h>
36 #include <minix/partition.h>
37 #include <sys/ioctl.h>
38 #include <assert.h>
40 #include "inc.h"
43 * Set the driver label of the device identified by 'dev' to 'label'. While
44 * 'dev' is a full device number, only its major device number is to be used.
45 * This is a very thin wrapper right now, but eventually we will want to hide
46 * all of libbdev from file systems that use this library, so it is a start.
48 void
49 lmfs_driver(dev_t dev, char *label)
52 bdev_driver(dev, label);
56 * Prefetch up to "nblocks" blocks on "dev" starting from block number "block".
57 * The size to be used for the last block in the range is given as "last_size".
58 * Stop early when either the I/O request fills up or when a block is already
59 * found to be in the cache. The latter is likely to happen often, since this
60 * function is called before getting each block for reading. Prefetching is a
61 * strictly best-effort operation, and may fail silently.
62 * TODO: limit according to the number of available buffers.
64 static void
65 block_prefetch(dev_t dev, block64_t block, unsigned int nblocks,
66 size_t block_size, size_t last_size)
68 struct buf *bp;
69 unsigned int count, limit;
70 int r;
72 limit = lmfs_readahead_limit();
73 assert(limit >= 1 && limit <= LMFS_MAX_PREFETCH);
75 if (nblocks > limit) {
76 nblocks = limit;
78 last_size = block_size;
81 for (count = 0; count < nblocks; count++) {
82 if (count == nblocks - 1 && last_size < block_size)
83 r = lmfs_get_partial_block(&bp, dev, block + count,
84 PEEK, last_size);
85 else
86 r = lmfs_get_block(&bp, dev, block + count, PEEK);
88 if (r == OK) {
89 lmfs_put_block(bp);
91 last_size = block_size;
93 break;
97 if (count > 0)
98 lmfs_readahead(dev, block, count, last_size);
102 * Perform block I/O, on "dev", starting from offset "pos", for a total of
103 * "bytes" bytes. Reading, writing, and peeking are highly similar, and thus,
104 * this function implements all of them. The "call" parameter indicates the
105 * call type (one of FSC_READ, FSC_WRITE, FSC_PEEK). For read and write calls,
106 * "data" will identify the user buffer to use; for peek calls, "data" is set
107 * to NULL. In all cases, this function returns the number of bytes
108 * successfully transferred, 0 on end-of-file conditions, and a negative error
109 * code if no bytes could be transferred due to an error. Dirty data is not
110 * flushed immediately, and thus, a successful write only indicates that the
111 * data have been taken in by the cache (for immediate I/O, a character device
112 * would have to be used, but MINIX3 no longer supports this), which may be
113 * follwed later by silent failures. End-of-file conditions are always
114 * reported immediately, though.
116 ssize_t
117 lmfs_bio(dev_t dev, struct fsdriver_data * data, size_t bytes, off_t pos,
118 int call)
120 block64_t block;
121 struct part_geom part;
122 size_t block_size, off, block_off, last_size, size, chunk;
123 unsigned int blocks_left;
124 struct buf *bp;
125 int r, do_write, how;
127 if (dev == NO_DEV)
128 return EINVAL;
130 block_size = lmfs_fs_block_size();
131 do_write = (call == FSC_WRITE);
133 assert(block_size > 0);
135 if (bytes == 0)
136 return 0; /* just in case */
138 if (pos < 0 || bytes > SSIZE_MAX || pos > INT64_MAX - bytes + 1)
139 return EINVAL;
142 * Get the partition size, so that we can handle EOF ourselves.
143 * Unfortunately, we cannot cache the results between calls, since we
144 * do not get to see DIOCSETP ioctls--see also repartition(8).
146 if ((r = bdev_ioctl(dev, DIOCGETP, &part, NONE /*user_endpt*/)) != OK)
147 return r;
149 if ((uint64_t)pos >= part.size)
150 return 0; /* EOF */
152 if ((uint64_t)pos > part.size - bytes)
153 bytes = part.size - pos;
155 off = 0;
156 block = pos / block_size;
157 block_off = (size_t)(pos % block_size);
158 blocks_left = howmany(block_off + bytes, block_size);
160 assert(blocks_left > 0);
163 * If the last block we need is also the last block of the device,
164 * see how many bytes we should actually transfer for that block.
166 if (block + blocks_left - 1 == part.size / block_size)
167 last_size = part.size % block_size;
168 else
169 last_size = block_size;
171 r = OK;
173 for (off = 0; off < bytes && blocks_left > 0; off += chunk) {
174 size = (blocks_left == 1) ? last_size : block_size;
176 chunk = size - block_off;
177 if (chunk > bytes - off)
178 chunk = bytes - off;
180 assert(chunk > 0 && chunk <= size);
183 * For read requests, help the block driver form larger I/O
184 * requests.
186 if (!do_write)
187 block_prefetch(dev, block, blocks_left, block_size,
188 last_size);
191 * Do not read the block from disk if we will end up
192 * overwriting all of its contents.
194 how = (do_write && chunk == size) ? NO_READ : NORMAL;
196 if (size < block_size)
197 r = lmfs_get_partial_block(&bp, dev, block, how, size);
198 else
199 r = lmfs_get_block(&bp, dev, block, how);
201 if (r != OK) {
202 printf("libminixfs: error getting block <%"PRIx64","
203 "%"PRIu64"> for device I/O (%d)\n", dev, block, r);
205 break;
208 /* Perform the actual copy. */
209 if (r == OK && data != NULL) {
210 if (do_write) {
211 r = fsdriver_copyin(data, off,
212 (char *)bp->data + block_off, chunk);
215 * Mark the block as dirty even if the copy
216 * failed, since the copy may in fact have
217 * succeeded partially. This is an interface
218 * issue that should be resolved at some point,
219 * but for now we do not want the cache to be
220 * desynchronized from the disk contents.
222 lmfs_markdirty(bp);
223 } else
224 r = fsdriver_copyout(data, off,
225 (char *)bp->data + block_off, chunk);
228 lmfs_put_block(bp);
230 if (r != OK)
231 break;
233 block++;
234 block_off = 0;
235 blocks_left--;
239 * If we were not able to do any I/O, return the error. Otherwise,
240 * return how many bytes we did manage to transfer.
242 if (r != OK && off == 0)
243 return r;
245 return off;
249 * Perform a flush request on a block device, flushing and invalidating all
250 * blocks associated with this device, both in the local cache and in VM.
251 * This operation is called after a block device is closed and must prevent
252 * that stale copies of blocks remain in any cache.
254 void
255 lmfs_bflush(dev_t dev)
258 /* First flush any dirty blocks on this device to disk. */
259 lmfs_flushdev(dev);
261 /* Then purge any blocks associated with the device. */
262 lmfs_invalidate(dev);