Drop main() prototype. Syncs with NetBSD-8
[minix.git] / minix / fs / ext2 / read.c
blob26595c5103203c16e2f81c80a4e7c7f0a5998e9b
1 /* Created (MFS based):
2 * February 2010 (Evgeniy Ivanov)
3 */
5 #include "fs.h"
6 #include <stddef.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include "buf.h"
10 #include "inode.h"
11 #include "super.h"
12 #include <sys/param.h>
13 #include <sys/dirent.h>
14 #include <assert.h>
17 static struct buf *rahead(struct inode *rip, block_t baseblock, u64_t
18 position, unsigned bytes_ahead);
19 static int rw_chunk(struct inode *rip, u64_t position, unsigned off,
20 size_t chunk, unsigned left, int call, struct fsdriver_data *data,
21 unsigned buf_off, unsigned int block_size, int *completed);
23 /*===========================================================================*
24 * fs_readwrite *
25 *===========================================================================*/
26 ssize_t fs_readwrite(ino_t ino_nr, struct fsdriver_data *data, size_t nrbytes,
27 off_t position, int call)
29 int r;
30 int regular;
31 off_t f_size, bytes_left;
32 size_t off, cum_io, block_size, chunk;
33 mode_t mode_word;
34 int completed;
35 struct inode *rip;
37 r = OK;
39 /* Find the inode referred */
40 if ((rip = find_inode(fs_dev, ino_nr)) == NULL)
41 return(EINVAL);
43 mode_word = rip->i_mode & I_TYPE;
44 regular = (mode_word == I_REGULAR);
46 /* Determine blocksize */
47 block_size = rip->i_sp->s_block_size;
48 f_size = rip->i_size;
49 if (f_size < 0) f_size = MAX_FILE_POS;
51 if (call == FSC_WRITE) {
52 /* Check in advance to see if file will grow too big. */
53 if (position > (off_t) (rip->i_sp->s_max_size - nrbytes))
54 return(EFBIG);
57 cum_io = 0;
58 /* Split the transfer into chunks that don't span two blocks. */
59 while (nrbytes != 0) {
60 off = (unsigned int) (position % block_size);/* offset in blk*/
61 chunk = block_size - off;
62 if (chunk > nrbytes)
63 chunk = nrbytes;
65 if (call == FSC_READ) {
66 bytes_left = f_size - position;
67 if (position >= f_size) break; /* we are beyond EOF */
68 if (chunk > bytes_left) chunk = (int) bytes_left;
71 /* Read or write 'chunk' bytes. */
72 r = rw_chunk(rip, ((u64_t)((unsigned long)position)), off, chunk,
73 nrbytes, call, data, cum_io, block_size, &completed);
75 if (r != OK) break;
77 /* Update counters and pointers. */
78 nrbytes -= chunk; /* bytes yet to be read */
79 cum_io += chunk; /* bytes read so far */
80 position += (off_t) chunk; /* position within the file */
83 /* On write, update file size and access time. */
84 if (call == FSC_WRITE) {
85 if (regular || mode_word == I_DIRECTORY) {
86 if (position > f_size) rip->i_size = position;
90 rip->i_seek = NO_SEEK;
92 if (r != OK)
93 return r;
95 if (call == FSC_READ) rip->i_update |= ATIME;
96 if (call == FSC_WRITE) rip->i_update |= CTIME | MTIME;
97 rip->i_dirt = IN_DIRTY; /* inode is thus now dirty */
99 return(cum_io);
103 /*===========================================================================*
104 * rw_chunk *
105 *===========================================================================*/
106 static int rw_chunk(rip, position, off, chunk, left, call, data, buf_off,
107 block_size, completed)
108 register struct inode *rip; /* pointer to inode for file to be rd/wr */
109 u64_t position; /* position within file to read or write */
110 unsigned off; /* off within the current block */
111 size_t chunk; /* number of bytes to read or write */
112 unsigned left; /* max number of bytes wanted after position */
113 int call; /* FSC_READ, FSC_WRITE, or FSC_PEEK */
114 struct fsdriver_data *data; /* structure for (remote) user buffer */
115 unsigned buf_off; /* offset in user buffer */
116 unsigned int block_size; /* block size of FS operating on */
117 int *completed; /* number of bytes copied */
119 /* Read or write (part of) a block. */
121 struct buf *bp = NULL;
122 register int r = OK;
123 int n;
124 block_t b;
125 dev_t dev;
126 ino_t ino = VMC_NO_INODE;
127 u64_t ino_off = rounddown(position, block_size);
129 *completed = 0;
131 if (ex64hi(position) != 0)
132 panic("rw_chunk: position too high");
133 b = read_map(rip, (off_t) ex64lo(position), 0);
134 dev = rip->i_dev;
135 ino = rip->i_num;
136 assert(ino != VMC_NO_INODE);
138 if (b == NO_BLOCK) {
139 if (call == FSC_READ) {
140 /* Reading from a nonexistent block. Must read as all zeros.*/
141 r = fsdriver_zero(data, buf_off, chunk);
142 if(r != OK) {
143 printf("ext2fs: fsdriver_zero failed\n");
145 return r;
146 } else if (call == FSC_PEEK) {
147 /* Peeking a nonexistent block. Report to VM. */
148 lmfs_zero_block_ino(dev, ino, ino_off);
149 return OK;
150 } else {
151 /* Writing to a nonexistent block.
152 * Create and enter in inode.
154 if ((bp = new_block(rip, (off_t) ex64lo(position))) == NULL)
155 return(err_code);
157 } else if (call != FSC_WRITE) {
158 /* Read and read ahead if convenient. */
159 bp = rahead(rip, b, position, left);
160 } else {
161 /* Normally an existing block to be partially overwritten is first read
162 * in. However, a full block need not be read in. If it is already in
163 * the cache, acquire it, otherwise just acquire a free buffer.
165 n = (chunk == block_size ? NO_READ : NORMAL);
166 if (off == 0 && (off_t) ex64lo(position) >= rip->i_size)
167 n = NO_READ;
168 assert(ino != VMC_NO_INODE);
169 assert(!(ino_off % block_size));
170 if ((r = lmfs_get_block_ino(&bp, dev, b, n, ino, ino_off)) != OK)
171 panic("ext2: error getting block (%llu,%u): %d", dev, b, r);
174 /* In all cases, bp now points to a valid buffer. */
175 if (bp == NULL)
176 panic("bp not valid in rw_chunk, this can't happen");
178 if (call == FSC_WRITE && chunk != block_size &&
179 (off_t) ex64lo(position) >= rip->i_size && off == 0) {
180 zero_block(bp);
183 if (call == FSC_READ) {
184 /* Copy a chunk from the block buffer to user space. */
185 r = fsdriver_copyout(data, buf_off, b_data(bp)+off, chunk);
186 } else if (call == FSC_WRITE) {
187 /* Copy a chunk from user space to the block buffer. */
188 r = fsdriver_copyin(data, buf_off, b_data(bp)+off, chunk);
189 lmfs_markdirty(bp);
192 put_block(bp);
194 return(r);
198 /*===========================================================================*
199 * read_map *
200 *===========================================================================*/
201 block_t read_map(rip, position, opportunistic)
202 register struct inode *rip; /* ptr to inode to map from */
203 off_t position; /* position in file whose blk wanted */
204 int opportunistic;
206 /* Given an inode and a position within the corresponding file, locate the
207 * block number in which that position is to be found and return it.
210 struct buf *bp;
211 int mindex;
212 block_t b;
213 unsigned long excess, block_pos;
214 static char first_time = TRUE;
215 static long addr_in_block;
216 static long addr_in_block2;
217 static long doub_ind_s;
218 static long triple_ind_s;
219 static long out_range_s;
220 int iomode;
222 iomode = opportunistic ? PEEK : NORMAL;
224 if (first_time) {
225 addr_in_block = rip->i_sp->s_block_size / BLOCK_ADDRESS_BYTES;
226 addr_in_block2 = addr_in_block * addr_in_block;
227 doub_ind_s = EXT2_NDIR_BLOCKS + addr_in_block;
228 triple_ind_s = doub_ind_s + addr_in_block2;
229 out_range_s = triple_ind_s + addr_in_block2 * addr_in_block;
230 first_time = FALSE;
233 block_pos = position / rip->i_sp->s_block_size; /* relative blk # in file */
235 /* Is 'position' to be found in the inode itself? */
236 if (block_pos < EXT2_NDIR_BLOCKS)
237 return(rip->i_block[block_pos]);
239 /* It is not in the inode, so it must be single, double or triple indirect */
240 if (block_pos < doub_ind_s) {
241 b = rip->i_block[EXT2_NDIR_BLOCKS]; /* address of single indirect block */
242 mindex = block_pos - EXT2_NDIR_BLOCKS;
243 } else if (block_pos >= out_range_s) { /* TODO: do we need it? */
244 return(NO_BLOCK);
245 } else {
246 /* double or triple indirect block. At first if it's triple,
247 * find double indirect block.
249 excess = block_pos - doub_ind_s;
250 b = rip->i_block[EXT2_DIND_BLOCK];
251 if (block_pos >= triple_ind_s) {
252 b = rip->i_block[EXT2_TIND_BLOCK];
253 if (b == NO_BLOCK) return(NO_BLOCK);
254 bp = get_block(rip->i_dev, b, NORMAL); /* get triple ind block */
255 excess = block_pos - triple_ind_s;
256 mindex = excess / addr_in_block2;
257 b = rd_indir(bp, mindex); /* num of double ind block */
258 put_block(bp); /* release triple ind block */
259 excess = excess % addr_in_block2;
261 if (b == NO_BLOCK) return(NO_BLOCK);
262 bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */
263 if (bp == NULL)
264 return NO_BLOCK; /* peeking failed */
265 mindex = excess / addr_in_block;
266 b = rd_indir(bp, mindex); /* num of single ind block */
267 put_block(bp); /* release double ind block */
268 mindex = excess % addr_in_block; /* index into single ind blk */
270 if (b == NO_BLOCK) return(NO_BLOCK);
271 bp = get_block(rip->i_dev, b, iomode); /* get single indirect block */
272 if (bp == NULL)
273 return NO_BLOCK; /* peeking failed */
275 b = rd_indir(bp, mindex);
276 put_block(bp); /* release single ind block */
278 return(b);
281 struct buf *get_block_map(register struct inode *rip, u64_t position)
283 struct buf *bp;
284 int r, block_size;
285 block_t b = read_map(rip, position, 0); /* get block number */
286 if(b == NO_BLOCK)
287 return NULL;
288 block_size = get_block_size(rip->i_dev);
289 position = rounddown(position, block_size);
290 assert(rip->i_num != VMC_NO_INODE);
291 if ((r = lmfs_get_block_ino(&bp, rip->i_dev, b, NORMAL, rip->i_num,
292 position)) != OK)
293 panic("ext2: error getting block (%llu,%u): %d",
294 rip->i_dev, b, r);
295 return bp;
298 /*===========================================================================*
299 * rd_indir *
300 *===========================================================================*/
301 block_t rd_indir(bp, mindex)
302 struct buf *bp; /* pointer to indirect block */
303 int mindex; /* index into *bp */
305 if (bp == NULL)
306 panic("rd_indir() on NULL");
307 /* TODO: use conv call */
308 return conv4(le_CPU, b_ind(bp)[mindex]);
312 /*===========================================================================*
313 * rahead *
314 *===========================================================================*/
315 static struct buf *rahead(rip, baseblock, position, bytes_ahead)
316 register struct inode *rip; /* pointer to inode for file to be read */
317 block_t baseblock; /* block at current position */
318 u64_t position; /* position within file */
319 unsigned bytes_ahead; /* bytes beyond position for immediate use */
321 /* Fetch a block from the cache or the device. If a physical read is
322 * required, prefetch as many more blocks as convenient into the cache.
323 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
324 * The device driver may decide it knows better and stop reading at a
325 * cylinder boundary (or after an error). Rw_scattered() puts an optional
326 * flag on all reads to allow this.
328 /* Minimum number of blocks to prefetch. */
329 # define BLOCKS_MINIMUM 32
330 int r, read_q_size;
331 unsigned int blocks_ahead, fragment, block_size;
332 block_t block, blocks_left;
333 off_t ind1_pos;
334 dev_t dev;
335 struct buf *bp = NULL;
336 static block64_t read_q[LMFS_MAX_PREFETCH];
337 u64_t position_running;
339 dev = rip->i_dev;
340 assert(dev != NO_DEV);
341 block_size = get_block_size(dev);
343 block = baseblock;
345 fragment = position % block_size;
346 position -= fragment;
347 position_running = position;
348 bytes_ahead += fragment;
349 blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
351 r = lmfs_get_block_ino(&bp, dev, block, PEEK, rip->i_num, position);
352 if (r == OK)
353 return(bp);
354 if (r != ENOENT)
355 panic("ext2: error getting block (%llu,%u): %d", dev, block, r);
357 /* The best guess for the number of blocks to prefetch: A lot.
358 * It is impossible to tell what the device looks like, so we don't even
359 * try to guess the geometry, but leave it to the driver.
361 * The floppy driver can read a full track with no rotational delay, and it
362 * avoids reading partial tracks if it can, so handing it enough buffers to
363 * read two tracks is perfect. (Two, because some diskette types have
364 * an odd number of sectors per track, so a block may span tracks.)
366 * The disk drivers don't try to be smart. With todays disks it is
367 * impossible to tell what the real geometry looks like, so it is best to
368 * read as much as you can. With luck the caching on the drive allows
369 * for a little time to start the next read.
371 * The current solution below is a bit of a hack, it just reads blocks from
372 * the current file position hoping that more of the file can be found. A
373 * better solution must look at the already available
374 * indirect blocks (but don't call read_map!).
377 blocks_left = (block_t) (rip->i_size-ex64lo(position)+(block_size-1)) /
378 block_size;
380 /* Go for the first indirect block if we are in its neighborhood. */
381 ind1_pos = (EXT2_NDIR_BLOCKS) * block_size;
382 if ((off_t) ex64lo(position) <= ind1_pos && rip->i_size > ind1_pos) {
383 blocks_ahead++;
384 blocks_left++;
387 /* Read at least the minimum number of blocks, but not after a seek. */
388 if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
389 blocks_ahead = BLOCKS_MINIMUM;
391 /* Can't go past end of file. */
392 if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
394 /* No more than the maximum request. */
395 if (blocks_ahead > LMFS_MAX_PREFETCH) blocks_ahead = LMFS_MAX_PREFETCH;
397 read_q_size = 0;
399 /* Acquire block buffers. */
400 for (;;) {
401 block_t thisblock;
402 read_q[read_q_size++] = block;
404 if (--blocks_ahead == 0) break;
406 block++;
407 position_running += block_size;
409 thisblock = read_map(rip, (off_t) ex64lo(position_running), 1);
410 if (thisblock != NO_BLOCK) {
411 r = lmfs_get_block_ino(&bp, dev, thisblock, PEEK, rip->i_num,
412 position_running);
413 block = thisblock;
414 } else
415 r = lmfs_get_block(&bp, dev, block, PEEK);
417 if (r == OK) {
418 /* Oops, block already in the cache, get out. */
419 put_block(bp);
420 break;
422 if (r != ENOENT)
423 panic("ext2: error getting block (%llu,%u): %d", dev, block,
426 lmfs_prefetch(dev, read_q, read_q_size);
428 r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position);
429 if (r != OK)
430 panic("ext2: error getting block (%llu,%u): %d", dev, baseblock, r);
431 return bp;
435 /*===========================================================================*
436 * get_dtype *
437 *===========================================================================*/
438 static unsigned int get_dtype(struct ext2_disk_dir_desc *dp)
440 /* Return the type of the file identified by the given directory entry. */
442 if (!HAS_INCOMPAT_FEATURE(superblock, INCOMPAT_FILETYPE))
443 return DT_UNKNOWN;
445 switch (dp->d_file_type) {
446 case EXT2_FT_REG_FILE: return DT_REG;
447 case EXT2_FT_DIR: return DT_DIR;
448 case EXT2_FT_SYMLINK: return DT_LNK;
449 case EXT2_FT_BLKDEV: return DT_BLK;
450 case EXT2_FT_CHRDEV: return DT_CHR;
451 case EXT2_FT_FIFO: return DT_FIFO;
452 default: return DT_UNKNOWN;
456 /*===========================================================================*
457 * fs_getdents *
458 *===========================================================================*/
459 ssize_t fs_getdents(ino_t ino_nr, struct fsdriver_data *data, size_t bytes,
460 off_t *posp)
462 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + EXT2_NAME_MAX + 1)
463 #define GETDENTS_ENTRIES 8
464 static char getdents_buf[GETDENTS_BUFSIZE * GETDENTS_ENTRIES];
465 struct fsdriver_dentry fsdentry;
466 struct inode *rip;
467 int r, done;
468 unsigned int block_size, len;
469 off_t pos, off, block_pos, new_pos, ent_pos;
470 struct buf *bp;
471 struct ext2_disk_dir_desc *d_desc;
472 ino_t child_nr;
474 /* Check whether the position is properly aligned */
475 pos = *posp;
476 if ((unsigned int) pos % DIR_ENTRY_ALIGN)
477 return(ENOENT);
479 if ((rip = get_inode(fs_dev, ino_nr)) == NULL)
480 return(EINVAL);
482 block_size = rip->i_sp->s_block_size;
483 off = (pos % block_size); /* Offset in block */
484 block_pos = pos - off;
485 done = FALSE; /* Stop processing directory blocks when done is set */
487 fsdriver_dentry_init(&fsdentry, data, bytes, getdents_buf,
488 sizeof(getdents_buf));
490 /* The default position for the next request is EOF. If the user's buffer
491 * fills up before EOF, new_pos will be modified. */
492 new_pos = rip->i_size;
494 r = 0;
496 for (; block_pos < rip->i_size; block_pos += block_size) {
497 off_t temp_pos = block_pos;
498 /* Since directories don't have holes, 'bp' cannot be NULL. */
499 bp = get_block_map(rip, block_pos); /* get a dir block */
500 assert(bp != NULL);
501 assert(bp != NULL);
503 /* Search a directory block. */
504 d_desc = (struct ext2_disk_dir_desc*) &b_data(bp);
506 /* we need to seek to entry at off bytes.
507 * when NEXT_DISC_DIR_POS == block_size it's last dentry.
509 for (; temp_pos + conv2(le_CPU, d_desc->d_rec_len) <= pos
510 && NEXT_DISC_DIR_POS(d_desc, &b_data(bp)) < block_size;
511 d_desc = NEXT_DISC_DIR_DESC(d_desc)) {
512 temp_pos += conv2(le_CPU, d_desc->d_rec_len);
515 for (; CUR_DISC_DIR_POS(d_desc, &b_data(bp)) < block_size;
516 d_desc = NEXT_DISC_DIR_DESC(d_desc)) {
517 if (d_desc->d_ino == 0)
518 continue; /* Entry is not in use */
520 len = d_desc->d_name_len;
521 assert(len <= NAME_MAX);
522 assert(len <= EXT2_NAME_MAX);
524 /* Need the position of this entry in the directory */
525 ent_pos = block_pos + ((char *)d_desc - b_data(bp));
527 child_nr = (ino_t) conv4(le_CPU, d_desc->d_ino);
528 r = fsdriver_dentry_add(&fsdentry, child_nr, d_desc->d_name,
529 len, get_dtype(d_desc));
531 /* If the user buffer is full, or an error occurred, stop. */
532 if (r <= 0) {
533 done = TRUE;
535 /* Record the position of this entry, it is the
536 * starting point of the next request (unless the
537 * position is modified with lseek).
539 new_pos = ent_pos;
540 break;
544 put_block(bp);
545 if (done)
546 break;
549 if (r >= 0 && (r = fsdriver_dentry_finish(&fsdentry)) >= 0) {
550 *posp = new_pos;
551 rip->i_update |= ATIME;
552 rip->i_dirt = IN_DIRTY;
555 put_inode(rip); /* release the inode */
556 return(r);