Drop main() prototype. Syncs with NetBSD-8
[minix.git] / minix / fs / mfs / read.c
blobe4c0ed9ab8af8e07ec165b1f8670fe387263bca2
1 #include "fs.h"
2 #include <stddef.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include "buf.h"
6 #include "inode.h"
7 #include "super.h"
8 #include <sys/param.h>
9 #include <sys/dirent.h>
10 #include <assert.h>
13 static struct buf *rahead(struct inode *rip, block_t baseblock, u64_t
14 position, unsigned bytes_ahead);
15 static int rw_chunk(struct inode *rip, u64_t position, unsigned off,
16 size_t chunk, unsigned left, int call, struct fsdriver_data *data,
17 unsigned buf_off, unsigned int block_size, int *completed);
20 /*===========================================================================*
21 * fs_readwrite *
22 *===========================================================================*/
23 ssize_t fs_readwrite(ino_t ino_nr, struct fsdriver_data *data, size_t nrbytes,
24 off_t position, int call)
26 int r;
27 int regular;
28 off_t f_size, bytes_left;
29 size_t off, cum_io, block_size, chunk;
30 mode_t mode_word;
31 int completed;
32 struct inode *rip;
34 r = OK;
36 /* Find the inode referred */
37 if ((rip = find_inode(fs_dev, ino_nr)) == NULL)
38 return(EINVAL);
40 mode_word = rip->i_mode & I_TYPE;
41 regular = (mode_word == I_REGULAR);
43 /* Determine blocksize */
44 block_size = rip->i_sp->s_block_size;
45 f_size = rip->i_size;
47 /* If this is file i/o, check we can write */
48 if (call == FSC_WRITE) {
49 if(rip->i_sp->s_rd_only)
50 return EROFS;
52 /* Check in advance to see if file will grow too big. */
53 if (position > (off_t) (rip->i_sp->s_max_size - nrbytes))
54 return(EFBIG);
56 /* Clear the zone containing present EOF if hole about
57 * to be created. This is necessary because all unwritten
58 * blocks prior to the EOF must read as zeros.
60 if(position > f_size) clear_zone(rip, f_size, 0);
63 cum_io = 0;
64 /* Split the transfer into chunks that don't span two blocks. */
65 while (nrbytes > 0) {
66 off = ((unsigned int) position) % block_size; /* offset in blk*/
67 chunk = block_size - off;
68 if (chunk > nrbytes)
69 chunk = nrbytes;
71 if (call != FSC_WRITE) {
72 bytes_left = f_size - position;
73 if (position >= f_size) break; /* we are beyond EOF */
74 if (chunk > (unsigned int) bytes_left) chunk = bytes_left;
77 /* Read or write 'chunk' bytes. */
78 r = rw_chunk(rip, ((u64_t)((unsigned long)position)), off, chunk,
79 nrbytes, call, data, cum_io, block_size, &completed);
81 if (r != OK) break;
83 /* Update counters and pointers. */
84 nrbytes -= chunk; /* bytes yet to be read */
85 cum_io += chunk; /* bytes read so far */
86 position += (off_t) chunk; /* position within the file */
89 /* On write, update file size and access time. */
90 if (call == FSC_WRITE) {
91 if (regular || mode_word == I_DIRECTORY) {
92 if (position > f_size) rip->i_size = position;
96 rip->i_seek = NO_SEEK;
98 if (r != OK)
99 return r;
101 /* even on a ROFS, writing to a device node on it is fine,
102 * just don't update the inode stats for it. And dito for reading.
104 if (!rip->i_sp->s_rd_only) {
105 if (call == FSC_READ) rip->i_update |= ATIME;
106 if (call == FSC_WRITE) rip->i_update |= CTIME | MTIME;
107 IN_MARKDIRTY(rip); /* inode is thus now dirty */
110 return cum_io;
114 /*===========================================================================*
115 * rw_chunk *
116 *===========================================================================*/
117 static int rw_chunk(rip, position, off, chunk, left, call, data, buf_off,
118 block_size, completed)
119 register struct inode *rip; /* pointer to inode for file to be rd/wr */
120 u64_t position; /* position within file to read or write */
121 unsigned off; /* off within the current block */
122 size_t chunk; /* number of bytes to read or write */
123 unsigned left; /* max number of bytes wanted after position */
124 int call; /* FSC_READ, FSC_WRITE, or FSC_PEEK */
125 struct fsdriver_data *data; /* structure for (remote) user buffer */
126 unsigned buf_off; /* offset in user buffer */
127 unsigned int block_size; /* block size of FS operating on */
128 int *completed; /* number of bytes copied */
130 /* Read or write (part of) a block. */
131 struct buf *bp = NULL;
132 register int r = OK;
133 int n;
134 block_t b;
135 dev_t dev;
136 ino_t ino = VMC_NO_INODE;
137 u64_t ino_off = rounddown(position, block_size);
139 *completed = 0;
141 if (ex64hi(position) != 0)
142 panic("rw_chunk: position too high");
143 b = read_map(rip, (off_t) ex64lo(position), 0);
144 dev = rip->i_dev;
145 ino = rip->i_num;
146 assert(ino != VMC_NO_INODE);
148 if (b == NO_BLOCK) {
149 if (call == FSC_READ) {
150 /* Reading from a nonexistent block. Must read as all zeros.*/
151 r = fsdriver_zero(data, buf_off, chunk);
152 if(r != OK) {
153 printf("MFS: fsdriver_zero failed\n");
155 return r;
156 } else if (call == FSC_PEEK) {
157 /* Peeking a nonexistent block. Report to VM. */
158 lmfs_zero_block_ino(dev, ino, ino_off);
159 return OK;
160 } else {
161 /* Writing to a nonexistent block.
162 * Create and enter in inode.
164 if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL)
165 return(err_code);
167 } else if (call != FSC_WRITE) {
168 /* Read and read ahead if convenient. */
169 bp = rahead(rip, b, position, left);
170 } else {
171 /* Normally an existing block to be partially overwritten is first read
172 * in. However, a full block need not be read in. If it is already in
173 * the cache, acquire it, otherwise just acquire a free buffer.
175 n = (chunk == block_size ? NO_READ : NORMAL);
176 if (off == 0 && (off_t) ex64lo(position) >= rip->i_size)
177 n = NO_READ;
178 assert(ino != VMC_NO_INODE);
179 assert(!(ino_off % block_size));
180 if ((r = lmfs_get_block_ino(&bp, dev, b, n, ino, ino_off)) != OK)
181 panic("MFS: error getting block (%llu,%u): %d", dev, b, r);
184 /* In all cases, bp now points to a valid buffer. */
185 assert(bp != NULL);
187 if (call == FSC_WRITE && chunk != block_size &&
188 (off_t) ex64lo(position) >= rip->i_size && off == 0) {
189 zero_block(bp);
192 if (call == FSC_READ) {
193 /* Copy a chunk from the block buffer to user space. */
194 r = fsdriver_copyout(data, buf_off, b_data(bp)+off, chunk);
195 } else if (call == FSC_WRITE) {
196 /* Copy a chunk from user space to the block buffer. */
197 r = fsdriver_copyin(data, buf_off, b_data(bp)+off, chunk);
198 MARKDIRTY(bp);
201 put_block(bp);
203 return(r);
207 /*===========================================================================*
208 * read_map *
209 *===========================================================================*/
210 block_t read_map(rip, position, opportunistic)
211 register struct inode *rip; /* ptr to inode to map from */
212 off_t position; /* position in file whose blk wanted */
213 int opportunistic; /* if nonzero, only use cache for metadata */
215 /* Given an inode and a position within the corresponding file, locate the
216 * block (not zone) number in which that position is to be found and return it.
219 struct buf *bp;
220 zone_t z;
221 int scale, boff, index, zind;
222 unsigned int dzones, nr_indirects;
223 block_t b;
224 unsigned long excess, zone, block_pos;
225 int iomode;
227 iomode = opportunistic ? PEEK : NORMAL;
229 scale = rip->i_sp->s_log_zone_size; /* for block-zone conversion */
230 block_pos = position/rip->i_sp->s_block_size; /* relative blk # in file */
231 zone = block_pos >> scale; /* position's zone */
232 boff = (int) (block_pos - (zone << scale) ); /* relative blk # within zone */
233 dzones = rip->i_ndzones;
234 nr_indirects = rip->i_nindirs;
236 /* Is 'position' to be found in the inode itself? */
237 if (zone < dzones) {
238 zind = (int) zone; /* index should be an int */
239 z = rip->i_zone[zind];
240 if (z == NO_ZONE) return(NO_BLOCK);
241 b = (block_t) ((z << scale) + boff);
242 return(b);
245 /* It is not in the inode, so it must be single or double indirect. */
246 excess = zone - dzones; /* first Vx_NR_DZONES don't count */
248 if (excess < nr_indirects) {
249 /* 'position' can be located via the single indirect block. */
250 z = rip->i_zone[dzones];
251 } else {
252 /* 'position' can be located via the double indirect block. */
253 if ( (z = rip->i_zone[dzones+1]) == NO_ZONE) return(NO_BLOCK);
254 excess -= nr_indirects; /* single indir doesn't count*/
255 b = (block_t) z << scale;
256 ASSERT(rip->i_dev != NO_DEV);
257 index = (int) (excess/nr_indirects);
258 if ((unsigned int) index > rip->i_nindirs)
259 return(NO_BLOCK); /* Can't go beyond double indirects */
260 bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */
261 if (bp == NULL)
262 return NO_BLOCK; /* peeking failed */
263 z = rd_indir(bp, index); /* z= zone for single*/
264 put_block(bp); /* release double ind block */
265 excess = excess % nr_indirects; /* index into single ind blk */
268 /* 'z' is zone num for single indirect block; 'excess' is index into it. */
269 if (z == NO_ZONE) return(NO_BLOCK);
270 b = (block_t) z << scale; /* b is blk # for single ind */
271 bp = get_block(rip->i_dev, b, iomode); /* get single indirect block */
272 if (bp == NULL)
273 return NO_BLOCK; /* peeking failed */
274 z = rd_indir(bp, (int) excess); /* get block pointed to */
275 put_block(bp); /* release single indir blk */
276 if (z == NO_ZONE) return(NO_BLOCK);
277 b = (block_t) ((z << scale) + boff);
278 return(b);
281 struct buf *get_block_map(register struct inode *rip, u64_t position)
283 struct buf *bp;
284 int r, block_size;
285 block_t b = read_map(rip, position, 0); /* get block number */
286 if(b == NO_BLOCK)
287 return NULL;
288 block_size = get_block_size(rip->i_dev);
289 position = rounddown(position, block_size);
290 assert(rip->i_num != VMC_NO_INODE);
291 if ((r = lmfs_get_block_ino(&bp, rip->i_dev, b, NORMAL, rip->i_num,
292 position)) != OK)
293 panic("MFS: error getting block (%llu,%u): %d",
294 rip->i_dev, b, r);
295 return bp;
298 /*===========================================================================*
299 * rd_indir *
300 *===========================================================================*/
301 zone_t rd_indir(bp, index)
302 struct buf *bp; /* pointer to indirect block */
303 int index; /* index into *bp */
305 struct super_block *sp;
306 zone_t zone;
308 if(bp == NULL)
309 panic("rd_indir() on NULL");
311 sp = &superblock;
313 /* read a zone from an indirect block */
314 assert(sp->s_version == V3);
315 zone = (zone_t) conv4(sp->s_native, (long) b_v2_ind(bp)[index]);
317 if (zone != NO_ZONE &&
318 (zone < (zone_t) sp->s_firstdatazone || zone >= sp->s_zones)) {
319 printf("Illegal zone number %ld in indirect block, index %d\n",
320 (long) zone, index);
321 panic("check file system");
324 return(zone);
327 /*===========================================================================*
328 * rahead *
329 *===========================================================================*/
330 static struct buf *rahead(rip, baseblock, position, bytes_ahead)
331 register struct inode *rip; /* pointer to inode for file to be read */
332 block_t baseblock; /* block at current position */
333 u64_t position; /* position within file */
334 unsigned bytes_ahead; /* bytes beyond position for immediate use */
336 /* Fetch a block from the cache or the device. If a physical read is
337 * required, prefetch as many more blocks as convenient into the cache.
338 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
339 * The device driver may decide it knows better and stop reading at a
340 * cylinder boundary (or after an error). Rw_scattered() puts an optional
341 * flag on all reads to allow this.
343 /* Minimum number of blocks to prefetch. */
344 # define BLOCKS_MINIMUM 32
345 int r, scale, read_q_size;
346 unsigned int blocks_ahead, fragment, block_size;
347 block_t block, blocks_left;
348 off_t ind1_pos;
349 dev_t dev;
350 struct buf *bp;
351 static block64_t read_q[LMFS_MAX_PREFETCH];
352 u64_t position_running;
354 dev = rip->i_dev;
355 assert(dev != NO_DEV);
357 block_size = get_block_size(dev);
359 block = baseblock;
361 fragment = position % block_size;
362 position -= fragment;
363 position_running = position;
364 bytes_ahead += fragment;
365 blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
367 r = lmfs_get_block_ino(&bp, dev, block, PEEK, rip->i_num, position);
368 if (r == OK)
369 return(bp);
370 if (r != ENOENT)
371 panic("MFS: error getting block (%llu,%u): %d", dev, block, r);
373 /* The best guess for the number of blocks to prefetch: A lot.
374 * It is impossible to tell what the device looks like, so we don't even
375 * try to guess the geometry, but leave it to the driver.
377 * The floppy driver can read a full track with no rotational delay, and it
378 * avoids reading partial tracks if it can, so handing it enough buffers to
379 * read two tracks is perfect. (Two, because some diskette types have
380 * an odd number of sectors per track, so a block may span tracks.)
382 * The disk drivers don't try to be smart. With todays disks it is
383 * impossible to tell what the real geometry looks like, so it is best to
384 * read as much as you can. With luck the caching on the drive allows
385 * for a little time to start the next read.
387 * The current solution below is a bit of a hack, it just reads blocks from
388 * the current file position hoping that more of the file can be found. A
389 * better solution must look at the already available zone pointers and
390 * indirect blocks (but don't call read_map!).
393 blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) /
394 block_size;
396 /* Go for the first indirect block if we are in its neighborhood. */
397 scale = rip->i_sp->s_log_zone_size;
398 ind1_pos = (off_t) rip->i_ndzones * (block_size << scale);
399 if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) {
400 blocks_ahead++;
401 blocks_left++;
404 /* Read at least the minimum number of blocks, but not after a seek. */
405 if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
406 blocks_ahead = BLOCKS_MINIMUM;
408 /* Can't go past end of file. */
409 if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
411 /* No more than the maximum request. */
412 if (blocks_ahead > LMFS_MAX_PREFETCH) blocks_ahead = LMFS_MAX_PREFETCH;
414 read_q_size = 0;
416 /* Acquire block buffers. */
417 for (;;) {
418 block_t thisblock;
419 read_q[read_q_size++] = block;
421 if (--blocks_ahead == 0) break;
423 block++;
424 position_running += block_size;
426 thisblock = read_map(rip, (off_t) ex64lo(position_running), 1);
427 if (thisblock != NO_BLOCK) {
428 r = lmfs_get_block_ino(&bp, dev, thisblock, PEEK, rip->i_num,
429 position_running);
430 block = thisblock;
431 } else
432 r = lmfs_get_block(&bp, dev, block, PEEK);
434 if (r == OK) {
435 /* Oops, block already in the cache, get out. */
436 put_block(bp);
437 break;
439 if (r != ENOENT)
440 panic("MFS: error getting block (%llu,%u): %d", dev, block, r);
442 lmfs_prefetch(dev, read_q, read_q_size);
444 r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position);
445 if (r != OK)
446 panic("MFS: error getting block (%llu,%u): %d", dev, baseblock, r);
447 return bp;
451 /*===========================================================================*
452 * fs_getdents *
453 *===========================================================================*/
454 ssize_t fs_getdents(ino_t ino_nr, struct fsdriver_data *data, size_t bytes,
455 off_t *posp)
457 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + MFS_NAME_MAX + 1)
458 #define GETDENTS_ENTRIES 8
459 static char getdents_buf[GETDENTS_BUFSIZE * GETDENTS_ENTRIES];
460 struct fsdriver_dentry fsdentry;
461 struct inode *rip, *entrip;
462 int r, done;
463 unsigned int block_size, len, type;
464 off_t pos, off, block_pos, new_pos, ent_pos;
465 struct buf *bp;
466 struct direct *dp;
467 char *cp;
469 /* Check whether the position is properly aligned */
470 pos = *posp;
471 if( (unsigned int) pos % DIR_ENTRY_SIZE)
472 return(ENOENT);
474 if( (rip = get_inode(fs_dev, ino_nr)) == NULL)
475 return(EINVAL);
477 block_size = rip->i_sp->s_block_size;
478 off = (pos % block_size); /* Offset in block */
479 block_pos = pos - off;
480 done = FALSE; /* Stop processing directory blocks when done is set */
482 fsdriver_dentry_init(&fsdentry, data, bytes, getdents_buf,
483 sizeof(getdents_buf));
485 /* The default position for the next request is EOF. If the user's buffer
486 * fills up before EOF, new_pos will be modified. */
487 new_pos = rip->i_size;
489 r = 0;
491 for(; block_pos < rip->i_size; block_pos += block_size) {
492 /* Since directories don't have holes, 'bp' cannot be NULL. */
493 bp = get_block_map(rip, block_pos); /* get a dir block */
494 assert(bp != NULL);
496 /* Search a directory block. */
497 if (block_pos < pos)
498 dp = &b_dir(bp)[off / DIR_ENTRY_SIZE];
499 else
500 dp = &b_dir(bp)[0];
501 for (; dp < &b_dir(bp)[NR_DIR_ENTRIES(block_size)]; dp++) {
502 if (dp->mfs_d_ino == 0)
503 continue; /* Entry is not in use */
505 /* Compute the length of the name */
506 cp = memchr(dp->mfs_d_name, '\0', sizeof(dp->mfs_d_name));
507 if (cp == NULL)
508 len = sizeof(dp->mfs_d_name);
509 else
510 len = cp - (dp->mfs_d_name);
512 /* Need the position of this entry in the directory */
513 ent_pos = block_pos + ((char *) dp - (char *) bp->data);
515 /* We also need(?) the file type of the target inode. */
516 if (!(entrip = get_inode(fs_dev, (ino_t) dp->mfs_d_ino)))
517 panic("unexpected get_inode failure");
518 type = IFTODT(entrip->i_mode);
519 put_inode(entrip);
521 /* MFS does not store file types in its directory entries, and
522 * fetching the mode from the inode is seriously expensive.
523 * Userland should always be prepared to receive DT_UNKNOWN.
525 r = fsdriver_dentry_add(&fsdentry, (ino_t) dp->mfs_d_ino,
526 dp->mfs_d_name, len, type);
528 /* If the user buffer is full, or an error occurred, stop. */
529 if (r <= 0) {
530 done = TRUE;
532 /* Record the position of this entry, it is the
533 * starting point of the next request (unless the
534 * postion is modified with lseek).
536 new_pos = ent_pos;
537 break;
541 put_block(bp);
542 if (done)
543 break;
546 if (r >= 0 && (r = fsdriver_dentry_finish(&fsdentry)) >= 0) {
547 *posp = new_pos;
548 if(!rip->i_sp->s_rd_only) {
549 rip->i_update |= ATIME;
550 IN_MARKDIRTY(rip);
554 put_inode(rip); /* release the inode */
555 return(r);