packages: don't put oss on cd.
[minix.git] / servers / mfs / read.c
blob55e47a6b27e181d2434e6594371e23486fbef43b
1 #include "fs.h"
2 #include <fcntl.h>
3 #include <stddef.h>
4 #include <string.h>
5 #include <unistd.h>
6 #include <stdlib.h>
7 #include <minix/com.h>
8 #include <minix/u64.h>
9 #include "buf.h"
10 #include "inode.h"
11 #include "super.h"
12 #include <minix/vfsif.h>
14 FORWARD _PROTOTYPE( int rw_chunk, (struct inode *rip, u64_t position,
15 unsigned off, int chunk, unsigned left, int rw_flag,
16 cp_grant_id_t gid, unsigned buf_off, int block_size, int *completed));
18 PRIVATE char getdents_buf[GETDENTS_BUFSIZ];
20 /*===========================================================================*
21 * fs_readwrite *
22 *===========================================================================*/
23 PUBLIC int fs_readwrite(void)
25 int r, rw_flag, chunk, block_size, block_spec;
26 int regular, nrbytes;
27 cp_grant_id_t gid;
28 off_t position, f_size, bytes_left;
29 unsigned int off, cum_io;
30 mode_t mode_word;
31 int completed, r2 = OK;
32 struct inode *rip;
34 r = OK;
36 /* Find the inode referred */
37 if ((rip = find_inode(fs_dev, fs_m_in.REQ_INODE_NR)) == NULL)
38 return(EINVAL);
40 mode_word = rip->i_mode & I_TYPE;
41 regular = (mode_word == I_REGULAR || mode_word == I_NAMED_PIPE);
42 block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0);
44 /* Determine blocksize */
45 block_size = (block_spec ?
46 get_block_size(rip->i_zone[0]) : rip->i_sp->s_block_size);
48 f_size = (block_spec ? ULONG_MAX : rip->i_size);
50 /* Get the values from the request message */
51 rw_flag = (fs_m_in.m_type == REQ_READ ? READING : WRITING);
52 gid = fs_m_in.REQ_GRANT;
53 position = fs_m_in.REQ_SEEK_POS_LO;
54 nrbytes = (unsigned) fs_m_in.REQ_NBYTES;
56 rdwt_err = OK; /* set to EIO if disk error occurs */
58 if (rw_flag == WRITING && block_spec == 0) {
59 /* Check in advance to see if file will grow too big. */
60 if (position > rip->i_sp->s_max_size - nrbytes)
61 return(EFBIG);
63 /* Clear the zone containing present EOF if hole about
64 * to be created. This is necessary because all unwritten
65 * blocks prior to the EOF must read as zeros. */
66 if(position > f_size) clear_zone(rip, f_size, 0);
69 cum_io = 0;
70 /* Split the transfer into chunks that don't span two blocks. */
71 while (nrbytes != 0) {
72 off = (unsigned int) (position % block_size);/* offset in blk*/
73 chunk = MIN(nrbytes, block_size - off);
75 if (chunk < 0) chunk = block_size - off;
77 if (rw_flag == READING) {
78 bytes_left = f_size - position;
79 if (position >= f_size) break; /* we are beyond EOF */
80 if (chunk > bytes_left) chunk = (int) bytes_left;
83 /* Read or write 'chunk' bytes. */
84 r = rw_chunk(rip, cvul64(position), off, chunk, (unsigned) nrbytes,
85 rw_flag, gid, cum_io, block_size, &completed);
87 if (r != OK) break; /* EOF reached */
88 if (rdwt_err < 0) break;
90 /* Update counters and pointers. */
91 nrbytes -= chunk; /* bytes yet to be read */
92 cum_io += chunk; /* bytes read so far */
93 position += chunk; /* position within the file */
96 fs_m_out.RES_SEEK_POS_LO = position; /* It might change later and the VFS
97 has to know this value */
99 /* On write, update file size and access time. */
100 if (rw_flag == WRITING) {
101 if (regular || mode_word == I_DIRECTORY) {
102 if (position > f_size) rip->i_size = position;
106 /* Check to see if read-ahead is called for, and if so, set it up. */
107 if(rw_flag == READING && rip->i_seek == NO_SEEK &&
108 position % block_size == 0 && (regular || mode_word == I_DIRECTORY)) {
109 rdahed_inode = rip;
110 rdahedpos = position;
113 rip->i_seek = NO_SEEK;
115 if (rdwt_err != OK) r = rdwt_err; /* check for disk error */
116 if (rdwt_err == END_OF_FILE) r = OK;
118 /* if user-space copying failed, read/write failed. */
119 if (r == OK && r2 != OK)
120 r = r2;
122 if (r == OK) {
123 if (rw_flag == READING) rip->i_update |= ATIME;
124 if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME;
125 rip->i_dirt = DIRTY; /* inode is thus now dirty */
128 fs_m_out.RES_NBYTES = cum_io;
130 return(r);
134 /*===========================================================================*
135 * fs_breadwrite *
136 *===========================================================================*/
137 PUBLIC int fs_breadwrite(void)
139 int r, rw_flag, chunk, block_size;
140 cp_grant_id_t gid;
141 int nrbytes;
142 u64_t position;
143 unsigned int off, cum_io;
144 int completed;
146 /* Pseudo inode for rw_chunk */
147 struct inode rip;
149 r = OK;
151 /* Get the values from the request message */
152 rw_flag = (fs_m_in.m_type == REQ_BREAD ? READING : WRITING);
153 gid = fs_m_in.REQ_GRANT;
154 position = make64(fs_m_in.REQ_SEEK_POS_LO, fs_m_in.REQ_SEEK_POS_HI);
155 nrbytes = (unsigned) fs_m_in.REQ_NBYTES;
157 block_size = get_block_size(fs_m_in.REQ_DEV2);
159 rip.i_zone[0] = fs_m_in.REQ_DEV2;
160 rip.i_mode = I_BLOCK_SPECIAL;
161 rip.i_size = 0;
163 rdwt_err = OK; /* set to EIO if disk error occurs */
165 cum_io = 0;
166 /* Split the transfer into chunks that don't span two blocks. */
167 while (nrbytes != 0) {
168 off = rem64u(position, block_size); /* offset in blk*/
170 chunk = MIN(nrbytes, block_size - off);
171 if (chunk < 0) chunk = block_size - off;
173 /* Read or write 'chunk' bytes. */
174 r = rw_chunk(&rip, position, off, chunk, (unsigned) nrbytes,
175 rw_flag, gid, cum_io, block_size, &completed);
177 if (r != OK) break; /* EOF reached */
178 if (rdwt_err < 0) break;
180 /* Update counters and pointers. */
181 nrbytes -= chunk; /* bytes yet to be read */
182 cum_io += chunk; /* bytes read so far */
183 position= add64ul(position, chunk); /* position within the file */
186 fs_m_out.RES_SEEK_POS_LO = ex64lo(position);
187 fs_m_out.RES_SEEK_POS_HI = ex64hi(position);
189 if (rdwt_err != OK) r = rdwt_err; /* check for disk error */
190 if (rdwt_err == END_OF_FILE) r = OK;
192 fs_m_out.RES_NBYTES = cum_io;
194 return(r);
198 /*===========================================================================*
199 * rw_chunk *
200 *===========================================================================*/
201 PRIVATE int rw_chunk(rip, position, off, chunk, left, rw_flag, gid,
202 buf_off, block_size, completed)
203 register struct inode *rip; /* pointer to inode for file to be rd/wr */
204 u64_t position; /* position within file to read or write */
205 unsigned off; /* off within the current block */
206 int chunk; /* number of bytes to read or write */
207 unsigned left; /* max number of bytes wanted after position */
208 int rw_flag; /* READING or WRITING */
209 cp_grant_id_t gid; /* grant */
210 unsigned buf_off; /* offset in grant */
211 int block_size; /* block size of FS operating on */
212 int *completed; /* number of bytes copied */
214 /* Read or write (part of) a block. */
216 register struct buf *bp;
217 register int r = OK;
218 int n, block_spec;
219 block_t b;
220 dev_t dev;
222 *completed = 0;
224 block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
226 if (block_spec) {
227 b = div64u(position, block_size);
228 dev = (dev_t) rip->i_zone[0];
229 } else {
230 if (ex64hi(position) != 0)
231 panic("rw_chunk: position too high");
232 b = read_map(rip, ex64lo(position));
233 dev = rip->i_dev;
236 if (!block_spec && b == NO_BLOCK) {
237 if (rw_flag == READING) {
238 /* Reading from a nonexistent block. Must read as all zeros.*/
239 bp = get_block(NO_DEV, NO_BLOCK, NORMAL); /* get a buffer */
240 zero_block(bp);
241 } else {
242 /* Writing to a nonexistent block. Create and enter in inode.*/
243 if ((bp= new_block(rip, ex64lo(position))) == NULL)
244 return(err_code);
246 } else if (rw_flag == READING) {
247 /* Read and read ahead if convenient. */
248 bp = rahead(rip, b, position, left);
249 } else {
250 /* Normally an existing block to be partially overwritten is first read
251 * in. However, a full block need not be read in. If it is already in
252 * the cache, acquire it, otherwise just acquire a free buffer.
254 n = (chunk == block_size ? NO_READ : NORMAL);
255 if (!block_spec && off == 0 && ex64lo(position) >= rip->i_size)
256 n = NO_READ;
257 bp = get_block(dev, b, n);
260 /* In all cases, bp now points to a valid buffer. */
261 if (bp == NULL)
262 panic("bp not valid in rw_chunk; this can't happen");
264 if (rw_flag == WRITING && chunk != block_size && !block_spec &&
265 ex64lo(position) >= rip->i_size && off == 0) {
266 zero_block(bp);
269 if (rw_flag == READING) {
270 /* Copy a chunk from the block buffer to user space. */
271 r = sys_safecopyto(FS_PROC_NR, gid, buf_off,
272 (vir_bytes) (bp->b_data+off), (phys_bytes) chunk, D);
273 } else {
274 /* Copy a chunk from user space to the block buffer. */
275 r = sys_safecopyfrom(FS_PROC_NR, gid, buf_off,
276 (vir_bytes) (bp->b_data+off), (phys_bytes) chunk, D);
277 bp->b_dirt = DIRTY;
280 n = (off + chunk == block_size ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK);
281 put_block(bp, n);
283 return(r);
287 /*===========================================================================*
288 * read_map *
289 *===========================================================================*/
290 PUBLIC block_t read_map(rip, position)
291 register struct inode *rip; /* ptr to inode to map from */
292 off_t position; /* position in file whose blk wanted */
294 /* Given an inode and a position within the corresponding file, locate the
295 * block (not zone) number in which that position is to be found and return it.
298 register struct buf *bp;
299 register zone_t z;
300 int scale, boff, dzones, nr_indirects, index, zind, ex;
301 block_t b;
302 long excess, zone, block_pos;
304 scale = rip->i_sp->s_log_zone_size; /* for block-zone conversion */
305 block_pos = position/rip->i_sp->s_block_size; /* relative blk # in file */
306 zone = block_pos >> scale; /* position's zone */
307 boff = (int) (block_pos - (zone << scale) ); /* relative blk # within zone */
308 dzones = rip->i_ndzones;
309 nr_indirects = rip->i_nindirs;
311 /* Is 'position' to be found in the inode itself? */
312 if (zone < dzones) {
313 zind = (int) zone; /* index should be an int */
314 z = rip->i_zone[zind];
315 if (z == NO_ZONE) return(NO_BLOCK);
316 b = ((block_t) z << scale) + boff;
317 return(b);
320 /* It is not in the inode, so it must be single or double indirect. */
321 excess = zone - dzones; /* first Vx_NR_DZONES don't count */
323 if (excess < nr_indirects) {
324 /* 'position' can be located via the single indirect block. */
325 z = rip->i_zone[dzones];
326 } else {
327 /* 'position' can be located via the double indirect block. */
328 if ( (z = rip->i_zone[dzones+1]) == NO_ZONE) return(NO_BLOCK);
329 excess -= nr_indirects; /* single indir doesn't count*/
330 b = (block_t) z << scale;
331 ASSERT(rip->i_dev != NO_DEV);
332 bp = get_block(rip->i_dev, b, NORMAL); /* get double indirect block */
333 index = (int) (excess/nr_indirects);
334 ASSERT(bp->b_dev != NO_DEV);
335 ASSERT(bp->b_dev == rip->i_dev);
336 z = rd_indir(bp, index); /* z= zone for single*/
337 put_block(bp, INDIRECT_BLOCK); /* release double ind block */
338 excess = excess % nr_indirects; /* index into single ind blk */
341 /* 'z' is zone num for single indirect block; 'excess' is index into it. */
342 if (z == NO_ZONE) return(NO_BLOCK);
343 b = (block_t) z << scale; /* b is blk # for single ind */
344 bp = get_block(rip->i_dev, b, NORMAL); /* get single indirect block */
345 ex = (int) excess; /* need an integer */
346 z = rd_indir(bp, ex); /* get block pointed to */
347 put_block(bp, INDIRECT_BLOCK); /* release single indir blk */
348 if (z == NO_ZONE) return(NO_BLOCK);
349 b = ((block_t) z << scale) + boff;
350 return(b);
354 /*===========================================================================*
355 * rd_indir *
356 *===========================================================================*/
357 PUBLIC zone_t rd_indir(bp, index)
358 struct buf *bp; /* pointer to indirect block */
359 int index; /* index into *bp */
361 /* Given a pointer to an indirect block, read one entry. The reason for
362 * making a separate routine out of this is that there are four cases:
363 * V1 (IBM and 68000), and V2 (IBM and 68000).
366 struct super_block *sp;
367 zone_t zone; /* V2 zones are longs (shorts in V1) */
369 if(bp == NULL)
370 panic("rd_indir() on NULL");
372 sp = get_super(bp->b_dev); /* need super block to find file sys type */
374 /* read a zone from an indirect block */
375 if (sp->s_version == V1)
376 zone = (zone_t) conv2(sp->s_native, (int) bp->b_v1_ind[index]);
377 else
378 zone = (zone_t) conv4(sp->s_native, (long) bp->b_v2_ind[index]);
380 if (zone != NO_ZONE &&
381 (zone < (zone_t) sp->s_firstdatazone || zone >= sp->s_zones)) {
382 printf("Illegal zone number %ld in indirect block, index %d\n",
383 (long) zone, index);
384 panic("check file system");
387 return(zone);
391 /*===========================================================================*
392 * read_ahead *
393 *===========================================================================*/
394 PUBLIC void read_ahead()
396 /* Read a block into the cache before it is needed. */
397 int block_size;
398 register struct inode *rip;
399 struct buf *bp;
400 block_t b;
402 rip = rdahed_inode; /* pointer to inode to read ahead from */
403 block_size = get_block_size(rip->i_dev);
404 rdahed_inode = NULL; /* turn off read ahead */
405 if ( (b = read_map(rip, rdahedpos)) == NO_BLOCK) return; /* at EOF */
406 bp = rahead(rip, b, cvul64(rdahedpos), block_size);
407 put_block(bp, PARTIAL_DATA_BLOCK);
411 /*===========================================================================*
412 * rahead *
413 *===========================================================================*/
414 PUBLIC struct buf *rahead(rip, baseblock, position, bytes_ahead)
415 register struct inode *rip; /* pointer to inode for file to be read */
416 block_t baseblock; /* block at current position */
417 u64_t position; /* position within file */
418 unsigned bytes_ahead; /* bytes beyond position for immediate use */
420 /* Fetch a block from the cache or the device. If a physical read is
421 * required, prefetch as many more blocks as convenient into the cache.
422 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
423 * The device driver may decide it knows better and stop reading at a
424 * cylinder boundary (or after an error). Rw_scattered() puts an optional
425 * flag on all reads to allow this.
427 int block_size;
428 /* Minimum number of blocks to prefetch. */
429 # define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32)
430 int block_spec, scale, read_q_size;
431 unsigned int blocks_ahead, fragment;
432 block_t block, blocks_left;
433 off_t ind1_pos;
434 dev_t dev;
435 struct buf *bp;
436 static int readqsize = 0;
437 static struct buf **read_q;
439 if(readqsize != nr_bufs) {
440 if(readqsize > 0)
441 free(read_q);
442 if(!(read_q = malloc(sizeof(read_q[0])*nr_bufs)))
443 panic("couldn't allocate read_q");
444 readqsize = nr_bufs;
447 block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
448 if (block_spec)
449 dev = (dev_t) rip->i_zone[0];
450 else
451 dev = rip->i_dev;
453 block_size = get_block_size(dev);
455 block = baseblock;
456 bp = get_block(dev, block, PREFETCH);
457 if (bp->b_dev != NO_DEV) return(bp);
459 /* The best guess for the number of blocks to prefetch: A lot.
460 * It is impossible to tell what the device looks like, so we don't even
461 * try to guess the geometry, but leave it to the driver.
463 * The floppy driver can read a full track with no rotational delay, and it
464 * avoids reading partial tracks if it can, so handing it enough buffers to
465 * read two tracks is perfect. (Two, because some diskette types have
466 * an odd number of sectors per track, so a block may span tracks.)
468 * The disk drivers don't try to be smart. With todays disks it is
469 * impossible to tell what the real geometry looks like, so it is best to
470 * read as much as you can. With luck the caching on the drive allows
471 * for a little time to start the next read.
473 * The current solution below is a bit of a hack, it just reads blocks from
474 * the current file position hoping that more of the file can be found. A
475 * better solution must look at the already available zone pointers and
476 * indirect blocks (but don't call read_map!).
479 fragment = rem64u(position, block_size);
480 position= sub64u(position, fragment);
481 bytes_ahead += fragment;
483 blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
485 if (block_spec && rip->i_size == 0) {
486 blocks_left = NR_IOREQS;
487 } else {
488 blocks_left = (rip->i_size - ex64lo(position) + block_size - 1) /
489 block_size;
491 /* Go for the first indirect block if we are in its neighborhood. */
492 if (!block_spec) {
493 scale = rip->i_sp->s_log_zone_size;
494 ind1_pos = (off_t) rip->i_ndzones * (block_size << scale);
495 if (ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) {
496 blocks_ahead++;
497 blocks_left++;
502 /* No more than the maximum request. */
503 if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS;
505 /* Read at least the minimum number of blocks, but not after a seek. */
506 if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
507 blocks_ahead = BLOCKS_MINIMUM;
509 /* Can't go past end of file. */
510 if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
512 read_q_size = 0;
514 /* Acquire block buffers. */
515 for (;;) {
516 read_q[read_q_size++] = bp;
518 if (--blocks_ahead == 0) break;
520 /* Don't trash the cache, leave 4 free. */
521 if (bufs_in_use >= nr_bufs - 4) break;
523 block++;
525 bp = get_block(dev, block, PREFETCH);
526 if (bp->b_dev != NO_DEV) {
527 /* Oops, block already in the cache, get out. */
528 put_block(bp, FULL_DATA_BLOCK);
529 break;
532 rw_scattered(dev, read_q, read_q_size, READING);
533 return(get_block(dev, baseblock, NORMAL));
537 /*===========================================================================*
538 * fs_getdents *
539 *===========================================================================*/
540 PUBLIC int fs_getdents(void)
542 register struct inode *rip;
543 int o, r, block_size, len, reclen, done;
544 ino_t ino;
545 block_t b;
546 cp_grant_id_t gid;
547 size_t size, tmpbuf_off, userbuf_off;
548 off_t pos, off, block_pos, new_pos, ent_pos;
549 struct buf *bp;
550 struct direct *dp;
551 struct dirent *dep;
552 char *cp;
554 ino = fs_m_in.REQ_INODE_NR;
555 gid = fs_m_in.REQ_GRANT;
556 size = fs_m_in.REQ_MEM_SIZE;
557 pos = fs_m_in.REQ_SEEK_POS_LO;
559 /* Check whether the position is properly aligned */
560 if(pos % DIR_ENTRY_SIZE)
561 return(ENOENT);
563 if( (rip = get_inode(fs_dev, ino)) == NULL)
564 return(EINVAL);
566 block_size = rip->i_sp->s_block_size;
567 off = (pos % block_size); /* Offset in block */
568 block_pos = pos - off;
569 done = FALSE; /* Stop processing directory blocks when done is set */
571 tmpbuf_off = 0; /* Offset in getdents_buf */
572 memset(getdents_buf, '\0', GETDENTS_BUFSIZ); /* Avoid leaking any data */
573 userbuf_off = 0; /* Offset in the user's buffer */
575 /* The default position for the next request is EOF. If the user's buffer
576 * fills up before EOF, new_pos will be modified. */
577 new_pos = rip->i_size;
579 for(; block_pos < rip->i_size; block_pos += block_size) {
580 b = read_map(rip, block_pos); /* get block number */
582 /* Since directories don't have holes, 'b' cannot be NO_BLOCK. */
583 bp = get_block(rip->i_dev, b, NORMAL); /* get a dir block */
585 if(bp == NO_BLOCK)
586 panic("get_block returned NO_BLOCK");
588 /* Search a directory block. */
589 if (block_pos < pos)
590 dp = &bp->b_dir[off / DIR_ENTRY_SIZE];
591 else
592 dp = &bp->b_dir[0];
593 for (; dp < &bp->b_dir[NR_DIR_ENTRIES(block_size)]; dp++) {
594 if (dp->d_ino == 0)
595 continue; /* Entry is not in use */
597 /* Compute the length of the name */
598 cp = memchr(dp->d_name, '\0', NAME_MAX);
599 if (cp == NULL)
600 len = NAME_MAX;
601 else
602 len = cp-dp->d_name;
604 /* Compute record length */
605 reclen = offsetof(struct dirent, d_name) + len + 1;
606 o = (reclen % sizeof(long));
607 if (o != 0)
608 reclen += sizeof(long) - o;
610 /* Need the position of this entry in the directory */
611 ent_pos = block_pos + ((char *)dp - bp->b_data);
613 if(tmpbuf_off + reclen > GETDENTS_BUFSIZ) {
614 r = sys_safecopyto(FS_PROC_NR, gid, userbuf_off,
615 (vir_bytes)getdents_buf,
616 tmpbuf_off, D);
617 if (r != OK)
618 panic("fs_getdents: sys_safecopyto failed: %d", r);
620 userbuf_off += tmpbuf_off;
621 tmpbuf_off = 0;
624 if(userbuf_off + tmpbuf_off + reclen > size) {
625 /* The user has no space for one more record */
626 done = TRUE;
628 /* Record the position of this entry, it is the
629 * starting point of the next request (unless the
630 * postion is modified with lseek).
632 new_pos = ent_pos;
633 break;
636 dep = (struct dirent *)&getdents_buf[tmpbuf_off];
637 dep->d_ino = dp->d_ino;
638 dep->d_off = ent_pos;
639 dep->d_reclen = reclen;
640 memcpy(dep->d_name, dp->d_name, len);
641 dep->d_name[len] = '\0';
642 tmpbuf_off += reclen;
645 put_block(bp, DIRECTORY_BLOCK);
646 if(done)
647 break;
650 if(tmpbuf_off != 0) {
651 r = sys_safecopyto(FS_PROC_NR, gid, userbuf_off,
652 (vir_bytes) getdents_buf, tmpbuf_off, D);
653 if (r != OK)
654 panic("fs_getdents: sys_safecopyto failed: %d", r);
656 userbuf_off += tmpbuf_off;
659 if(done && userbuf_off == 0)
660 r = EINVAL; /* The user's buffer is too small */
661 else {
662 fs_m_out.RES_NBYTES = userbuf_off;
663 fs_m_out.RES_SEEK_POS_LO = new_pos;
664 rip->i_update |= ATIME;
665 rip->i_dirt = DIRTY;
666 r = OK;
669 put_inode(rip); /* release the inode */
670 return(r);