1 /* Created (MFS based):
2 * February 2010 (Evgeniy Ivanov)
10 #include <minix/u64.h>
14 #include <minix/vfsif.h>
16 #include <sys/param.h>
19 static struct buf
*rahead(struct inode
*rip
, block_t baseblock
, u64_t
20 position
, unsigned bytes_ahead
);
21 static int rw_chunk(struct inode
*rip
, u64_t position
, unsigned off
,
22 size_t chunk
, unsigned left
, int rw_flag
, cp_grant_id_t gid
, unsigned
23 buf_off
, unsigned int block_size
, int *completed
);
25 static off_t rdahedpos
; /* position to read ahead */
26 static struct inode
*rdahed_inode
; /* pointer to inode to read ahead */
28 /*===========================================================================*
30 *===========================================================================*/
31 int fs_readwrite(void)
33 int r
, rw_flag
, block_spec
;
36 off_t position
, f_size
, bytes_left
;
37 unsigned int off
, cum_io
, block_size
, chunk
;
45 /* Find the inode referred */
46 if ((rip
= find_inode(fs_dev
, (ino_t
) fs_m_in
.REQ_INODE_NR
)) == NULL
)
49 mode_word
= rip
->i_mode
& I_TYPE
;
50 regular
= (mode_word
== I_REGULAR
|| mode_word
== I_NAMED_PIPE
);
51 block_spec
= (mode_word
== I_BLOCK_SPECIAL
? 1 : 0);
53 /* Determine blocksize */
55 block_size
= get_block_size( (dev_t
) rip
->i_block
[0]);
56 f_size
= MAX_FILE_POS
;
58 block_size
= rip
->i_sp
->s_block_size
;
60 if (f_size
< 0) f_size
= MAX_FILE_POS
;
63 /* Get the values from the request message */
64 rw_flag
= (fs_m_in
.m_type
== REQ_READ
? READING
: WRITING
);
65 gid
= (cp_grant_id_t
) fs_m_in
.REQ_GRANT
;
66 position
= (off_t
) fs_m_in
.REQ_SEEK_POS_LO
;
67 nrbytes
= (size_t) fs_m_in
.REQ_NBYTES
;
69 rdwt_err
= OK
; /* set to EIO if disk error occurs */
71 if (rw_flag
== WRITING
&& !block_spec
) {
72 /* Check in advance to see if file will grow too big. */
73 if (position
> (off_t
) (rip
->i_sp
->s_max_size
- nrbytes
))
78 /* Split the transfer into chunks that don't span two blocks. */
79 while (nrbytes
!= 0) {
80 off
= (unsigned int) (position
% block_size
);/* offset in blk*/
81 chunk
= MIN(nrbytes
, block_size
- off
);
83 if (rw_flag
== READING
) {
84 bytes_left
= f_size
- position
;
85 if (position
>= f_size
) break; /* we are beyond EOF */
86 if (chunk
> bytes_left
) chunk
= (int) bytes_left
;
89 /* Read or write 'chunk' bytes. */
90 r
= rw_chunk(rip
, cvul64((unsigned long) position
), off
, chunk
,
91 nrbytes
, rw_flag
, gid
, cum_io
, block_size
, &completed
);
93 if (r
!= OK
) break; /* EOF reached */
94 if (rdwt_err
< 0) break;
96 /* Update counters and pointers. */
97 nrbytes
-= chunk
; /* bytes yet to be read */
98 cum_io
+= chunk
; /* bytes read so far */
99 position
+= (off_t
) chunk
; /* position within the file */
102 fs_m_out
.RES_SEEK_POS_LO
= position
; /* It might change later and the VFS
103 has to know this value */
105 /* On write, update file size and access time. */
106 if (rw_flag
== WRITING
) {
107 if (regular
|| mode_word
== I_DIRECTORY
) {
108 if (position
> f_size
) rip
->i_size
= position
;
112 /* Check to see if read-ahead is called for, and if so, set it up. */
113 if(rw_flag
== READING
&& rip
->i_seek
== NO_SEEK
&&
114 (unsigned int) position
% block_size
== 0 &&
115 (regular
|| mode_word
== I_DIRECTORY
)) {
117 rdahedpos
= position
;
120 rip
->i_seek
= NO_SEEK
;
122 if (rdwt_err
!= OK
) r
= rdwt_err
; /* check for disk error */
123 if (rdwt_err
== END_OF_FILE
) r
= OK
;
126 if (rw_flag
== READING
) rip
->i_update
|= ATIME
;
127 if (rw_flag
== WRITING
) rip
->i_update
|= CTIME
| MTIME
;
128 rip
->i_dirt
= IN_DIRTY
; /* inode is thus now dirty */
131 fs_m_out
.RES_NBYTES
= cum_io
;
137 /*===========================================================================*
139 *===========================================================================*/
140 int fs_breadwrite(void)
142 int r
, rw_flag
, completed
;
145 unsigned int off
, cum_io
, chunk
, block_size
;
148 /* Pseudo inode for rw_chunk */
153 /* Get the values from the request message */
154 rw_flag
= (fs_m_in
.m_type
== REQ_BREAD
? READING
: WRITING
);
155 gid
= (cp_grant_id_t
) fs_m_in
.REQ_GRANT
;
156 position
= make64((unsigned long) fs_m_in
.REQ_SEEK_POS_LO
,
157 (unsigned long) fs_m_in
.REQ_SEEK_POS_HI
);
158 nrbytes
= (size_t) fs_m_in
.REQ_NBYTES
;
160 block_size
= get_block_size( (dev_t
) fs_m_in
.REQ_DEV2
);
162 rip
.i_block
[0] = (block_t
) fs_m_in
.REQ_DEV2
;
163 rip
.i_mode
= I_BLOCK_SPECIAL
;
166 rdwt_err
= OK
; /* set to EIO if disk error occurs */
169 /* Split the transfer into chunks that don't span two blocks. */
170 while (nrbytes
> 0) {
171 off
= rem64u(position
, block_size
); /* offset in blk*/
172 chunk
= min(nrbytes
, block_size
- off
);
174 /* Read or write 'chunk' bytes. */
175 r
= rw_chunk(&rip
, position
, off
, chunk
, nrbytes
, rw_flag
, gid
,
176 cum_io
, block_size
, &completed
);
178 if (r
!= OK
) break; /* EOF reached */
179 if (rdwt_err
< 0) break;
181 /* Update counters and pointers. */
182 nrbytes
-= chunk
; /* bytes yet to be read */
183 cum_io
+= chunk
; /* bytes read so far */
184 position
= add64ul(position
, chunk
); /* position within the file */
187 fs_m_out
.RES_SEEK_POS_LO
= ex64lo(position
);
188 fs_m_out
.RES_SEEK_POS_HI
= ex64hi(position
);
190 if (rdwt_err
!= OK
) r
= rdwt_err
; /* check for disk error */
191 if (rdwt_err
== END_OF_FILE
) r
= OK
;
193 fs_m_out
.RES_NBYTES
= cum_io
;
199 /*===========================================================================*
201 *===========================================================================*/
202 static int rw_chunk(rip
, position
, off
, chunk
, left
, rw_flag
, gid
,
203 buf_off
, block_size
, completed
)
204 register struct inode
*rip
; /* pointer to inode for file to be rd/wr */
205 u64_t position
; /* position within file to read or write */
206 unsigned off
; /* off within the current block */
207 unsigned int chunk
; /* number of bytes to read or write */
208 unsigned left
; /* max number of bytes wanted after position */
209 int rw_flag
; /* READING or WRITING */
210 cp_grant_id_t gid
; /* grant */
211 unsigned buf_off
; /* offset in grant */
212 unsigned int block_size
; /* block size of FS operating on */
213 int *completed
; /* number of bytes copied */
215 /* Read or write (part of) a block. */
217 register struct buf
*bp
;
225 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
228 b
= div64u(position
, block_size
);
229 dev
= (dev_t
) rip
->i_block
[0];
231 if (ex64hi(position
) != 0)
232 panic("rw_chunk: position too high");
233 b
= read_map(rip
, (off_t
) ex64lo(position
));
237 if (!block_spec
&& b
== NO_BLOCK
) {
238 if (rw_flag
== READING
) {
239 /* Reading from a nonexistent block. Must read as all zeros.*/
240 bp
= get_block(NO_DEV
, NO_BLOCK
, NORMAL
); /* get a buffer */
243 /* Writing to a nonexistent block. Create and enter in inode.*/
244 if ((bp
= new_block(rip
, (off_t
) ex64lo(position
))) == NULL
)
247 } else if (rw_flag
== READING
) {
248 /* Read and read ahead if convenient. */
249 bp
= rahead(rip
, b
, position
, left
);
251 /* Normally an existing block to be partially overwritten is first read
252 * in. However, a full block need not be read in. If it is already in
253 * the cache, acquire it, otherwise just acquire a free buffer.
255 n
= (chunk
== block_size
? NO_READ
: NORMAL
);
256 if (!block_spec
&& off
== 0 && (off_t
) ex64lo(position
) >= rip
->i_size
)
258 bp
= get_block(dev
, b
, n
);
261 /* In all cases, bp now points to a valid buffer. */
263 panic("bp not valid in rw_chunk, this can't happen");
265 if (rw_flag
== WRITING
&& chunk
!= block_size
&& !block_spec
&&
266 (off_t
) ex64lo(position
) >= rip
->i_size
&& off
== 0) {
270 if (rw_flag
== READING
) {
271 /* Copy a chunk from the block buffer to user space. */
272 r
= sys_safecopyto(VFS_PROC_NR
, gid
, (vir_bytes
) buf_off
,
273 (vir_bytes
) (b_data(bp
)+off
), (size_t) chunk
);
275 /* Copy a chunk from user space to the block buffer. */
276 r
= sys_safecopyfrom(VFS_PROC_NR
, gid
, (vir_bytes
) buf_off
,
277 (vir_bytes
) (b_data(bp
)+off
), (size_t) chunk
);
281 n
= (off
+ chunk
== block_size
? FULL_DATA_BLOCK
: PARTIAL_DATA_BLOCK
);
288 /*===========================================================================*
290 *===========================================================================*/
291 block_t
read_map(rip
, position
)
292 register struct inode
*rip
; /* ptr to inode to map from */
293 off_t position
; /* position in file whose blk wanted */
295 /* Given an inode and a position within the corresponding file, locate the
296 * block number in which that position is to be found and return it.
302 unsigned long excess
, block_pos
;
303 static char first_time
= TRUE
;
304 static long addr_in_block
;
305 static long addr_in_block2
;
306 static long doub_ind_s
;
307 static long triple_ind_s
;
308 static long out_range_s
;
311 addr_in_block
= rip
->i_sp
->s_block_size
/ BLOCK_ADDRESS_BYTES
;
312 addr_in_block2
= addr_in_block
* addr_in_block
;
313 doub_ind_s
= EXT2_NDIR_BLOCKS
+ addr_in_block
;
314 triple_ind_s
= doub_ind_s
+ addr_in_block2
;
315 out_range_s
= triple_ind_s
+ addr_in_block2
* addr_in_block
;
319 block_pos
= position
/ rip
->i_sp
->s_block_size
; /* relative blk # in file */
321 /* Is 'position' to be found in the inode itself? */
322 if (block_pos
< EXT2_NDIR_BLOCKS
)
323 return(rip
->i_block
[block_pos
]);
325 /* It is not in the inode, so it must be single, double or triple indirect */
326 if (block_pos
< doub_ind_s
) {
327 b
= rip
->i_block
[EXT2_NDIR_BLOCKS
]; /* address of single indirect block */
328 index
= block_pos
- EXT2_NDIR_BLOCKS
;
329 } else if (block_pos
>= out_range_s
) { /* TODO: do we need it? */
332 /* double or triple indirect block. At first if it's triple,
333 * find double indirect block.
335 excess
= block_pos
- doub_ind_s
;
336 b
= rip
->i_block
[EXT2_DIND_BLOCK
];
337 if (block_pos
>= triple_ind_s
) {
338 b
= rip
->i_block
[EXT2_TIND_BLOCK
];
339 if (b
== NO_BLOCK
) return(NO_BLOCK
);
340 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get triple ind block */
341 ASSERT(lmfs_dev(bp
) != NO_DEV
);
342 ASSERT(lmfs_dev(bp
) == rip
->i_dev
);
343 excess
= block_pos
- triple_ind_s
;
344 index
= excess
/ addr_in_block2
;
345 b
= rd_indir(bp
, index
); /* num of double ind block */
346 put_block(bp
, INDIRECT_BLOCK
); /* release triple ind block */
347 excess
= excess
% addr_in_block2
;
349 if (b
== NO_BLOCK
) return(NO_BLOCK
);
350 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get double indirect block */
351 ASSERT(lmfs_dev(bp
) != NO_DEV
);
352 ASSERT(lmfs_dev(bp
) == rip
->i_dev
);
353 index
= excess
/ addr_in_block
;
354 b
= rd_indir(bp
, index
); /* num of single ind block */
355 put_block(bp
, INDIRECT_BLOCK
); /* release double ind block */
356 index
= excess
% addr_in_block
; /* index into single ind blk */
358 if (b
== NO_BLOCK
) return(NO_BLOCK
);
359 bp
= get_block(rip
->i_dev
, b
, NORMAL
);
360 ASSERT(lmfs_dev(bp
) != NO_DEV
);
361 ASSERT(lmfs_dev(bp
) == rip
->i_dev
);
362 b
= rd_indir(bp
, index
);
363 put_block(bp
, INDIRECT_BLOCK
); /* release single ind block */
369 /*===========================================================================*
371 *===========================================================================*/
372 block_t
rd_indir(bp
, index
)
373 struct buf
*bp
; /* pointer to indirect block */
374 int index
; /* index into *bp */
377 panic("rd_indir() on NULL");
378 /* TODO: use conv call */
379 return conv4(le_CPU
, b_ind(bp
)[index
]);
383 /*===========================================================================*
385 *===========================================================================*/
388 /* Read a block into the cache before it is needed. */
389 unsigned int block_size
;
390 register struct inode
*rip
;
397 rip
= rdahed_inode
; /* pointer to inode to read ahead from */
398 block_size
= get_block_size(rip
->i_dev
);
399 rdahed_inode
= NULL
; /* turn off read ahead */
400 if ( (b
= read_map(rip
, rdahedpos
)) == NO_BLOCK
) return; /* at EOF */
402 assert(rdahedpos
>= 0); /* So we can safely cast it to unsigned below */
404 bp
= rahead(rip
, b
, cvul64((unsigned long) rdahedpos
), block_size
);
405 put_block(bp
, PARTIAL_DATA_BLOCK
);
409 /*===========================================================================*
411 *===========================================================================*/
412 static struct buf
*rahead(rip
, baseblock
, position
, bytes_ahead
)
413 register struct inode
*rip
; /* pointer to inode for file to be read */
414 block_t baseblock
; /* block at current position */
415 u64_t position
; /* position within file */
416 unsigned bytes_ahead
; /* bytes beyond position for immediate use */
418 /* Fetch a block from the cache or the device. If a physical read is
419 * required, prefetch as many more blocks as convenient into the cache.
420 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
421 * The device driver may decide it knows better and stop reading at a
422 * cylinder boundary (or after an error). Rw_scattered() puts an optional
423 * flag on all reads to allow this.
425 /* Minimum number of blocks to prefetch. */
426 # define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32)
427 int nr_bufs
= lmfs_nr_bufs();
428 int block_spec
, read_q_size
;
429 unsigned int blocks_ahead
, fragment
, block_size
;
430 block_t block
, blocks_left
;
433 struct buf
*bp
= NULL
;
434 static unsigned int readqsize
= 0;
435 static struct buf
**read_q
= NULL
;
437 if(readqsize
!= nr_bufs
) {
439 assert(read_q
!= NULL
);
445 assert(readqsize
== 0);
446 assert(read_q
== NULL
);
448 if(!(read_q
= malloc(sizeof(read_q
[0])*nr_bufs
)))
449 panic("couldn't allocate read_q");
453 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
455 dev
= (dev_t
) rip
->i_block
[0];
459 block_size
= get_block_size(dev
);
462 bp
= get_block(dev
, block
, PREFETCH
);
464 if (lmfs_dev(bp
) != NO_DEV
) return(bp
);
466 /* The best guess for the number of blocks to prefetch: A lot.
467 * It is impossible to tell what the device looks like, so we don't even
468 * try to guess the geometry, but leave it to the driver.
470 * The floppy driver can read a full track with no rotational delay, and it
471 * avoids reading partial tracks if it can, so handing it enough buffers to
472 * read two tracks is perfect. (Two, because some diskette types have
473 * an odd number of sectors per track, so a block may span tracks.)
475 * The disk drivers don't try to be smart. With todays disks it is
476 * impossible to tell what the real geometry looks like, so it is best to
477 * read as much as you can. With luck the caching on the drive allows
478 * for a little time to start the next read.
480 * The current solution below is a bit of a hack, it just reads blocks from
481 * the current file position hoping that more of the file can be found. A
482 * better solution must look at the already available
483 * indirect blocks (but don't call read_map!).
486 fragment
= rem64u(position
, block_size
);
487 position
= sub64u(position
, fragment
);
488 bytes_ahead
+= fragment
;
490 blocks_ahead
= (bytes_ahead
+ block_size
- 1) / block_size
;
492 if (block_spec
&& rip
->i_size
== 0) {
493 blocks_left
= (block_t
) NR_IOREQS
;
495 blocks_left
= (block_t
) (rip
->i_size
-ex64lo(position
)+(block_size
-1)) /
498 /* Go for the first indirect block if we are in its neighborhood. */
500 ind1_pos
= (EXT2_NDIR_BLOCKS
) * block_size
;
501 if ((off_t
) ex64lo(position
) <= ind1_pos
&& rip
->i_size
> ind1_pos
) {
508 /* No more than the maximum request. */
509 if (blocks_ahead
> NR_IOREQS
) blocks_ahead
= NR_IOREQS
;
511 /* Read at least the minimum number of blocks, but not after a seek. */
512 if (blocks_ahead
< BLOCKS_MINIMUM
&& rip
->i_seek
== NO_SEEK
)
513 blocks_ahead
= BLOCKS_MINIMUM
;
515 /* Can't go past end of file. */
516 if (blocks_ahead
> blocks_left
) blocks_ahead
= blocks_left
;
520 /* Acquire block buffers. */
522 read_q
[read_q_size
++] = bp
;
524 if (--blocks_ahead
== 0) break;
526 /* Don't trash the cache, leave 4 free. */
527 if (lmfs_bufs_in_use() >= nr_bufs
- 4) break;
531 bp
= get_block(dev
, block
, PREFETCH
);
532 if (lmfs_dev(bp
) != NO_DEV
) {
533 /* Oops, block already in the cache, get out. */
534 put_block(bp
, FULL_DATA_BLOCK
);
538 lmfs_rw_scattered(dev
, read_q
, read_q_size
, READING
);
539 return(get_block(dev
, baseblock
, NORMAL
));
543 /*===========================================================================*
545 *===========================================================================*/
546 int fs_getdents(void)
548 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + EXT2_NAME_MAX + 1)
549 #define GETDENTS_ENTRIES 8
550 static char getdents_buf
[GETDENTS_BUFSIZE
* GETDENTS_ENTRIES
];
553 unsigned int block_size
, len
, reclen
;
557 size_t size
, tmpbuf_off
, userbuf_off
;
558 off_t pos
, off
, block_pos
, new_pos
, ent_pos
;
560 struct ext2_disk_dir_desc
*d_desc
;
563 ino
= (ino_t
) fs_m_in
.REQ_INODE_NR
;
564 gid
= (gid_t
) fs_m_in
.REQ_GRANT
;
565 size
= (size_t) fs_m_in
.REQ_MEM_SIZE
;
566 pos
= (off_t
) fs_m_in
.REQ_SEEK_POS_LO
;
568 /* Check whether the position is properly aligned */
569 if ((unsigned int) pos
% DIR_ENTRY_ALIGN
)
572 if ((rip
= get_inode(fs_dev
, ino
)) == NULL
)
575 block_size
= rip
->i_sp
->s_block_size
;
576 off
= (pos
% block_size
); /* Offset in block */
577 block_pos
= pos
- off
;
578 done
= FALSE
; /* Stop processing directory blocks when done is set */
580 memset(getdents_buf
, '\0', sizeof(getdents_buf
)); /* Avoid leaking any data */
581 tmpbuf_off
= 0; /* Offset in getdents_buf */
582 userbuf_off
= 0; /* Offset in the user's buffer */
584 /* The default position for the next request is EOF. If the user's buffer
585 * fills up before EOF, new_pos will be modified. */
586 new_pos
= rip
->i_size
;
588 for (; block_pos
< rip
->i_size
; block_pos
+= block_size
) {
589 off_t temp_pos
= block_pos
;
590 b
= read_map(rip
, block_pos
); /* get block number */
591 /* Since directories don't have holes, 'b' cannot be NO_BLOCK. */
592 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get a dir block */
595 /* Search a directory block. */
596 d_desc
= (struct ext2_disk_dir_desc
*) &b_data(bp
);
598 /* we need to seek to entry at off bytes.
599 * when NEXT_DISC_DIR_POS == block_size it's last dentry.
601 for (; temp_pos
+ conv2(le_CPU
, d_desc
->d_rec_len
) <= pos
602 && NEXT_DISC_DIR_POS(d_desc
, &b_data(bp
)) < block_size
;
603 d_desc
= NEXT_DISC_DIR_DESC(d_desc
)) {
604 temp_pos
+= conv2(le_CPU
, d_desc
->d_rec_len
);
607 for (; CUR_DISC_DIR_POS(d_desc
, &b_data(bp
)) < block_size
;
608 d_desc
= NEXT_DISC_DIR_DESC(d_desc
)) {
609 if (d_desc
->d_ino
== 0)
610 continue; /* Entry is not in use */
612 if (d_desc
->d_name_len
> NAME_MAX
||
613 d_desc
->d_name_len
> EXT2_NAME_MAX
) {
614 len
= min(NAME_MAX
, EXT2_NAME_MAX
);
616 len
= d_desc
->d_name_len
;
619 /* Compute record length */
620 reclen
= offsetof(struct dirent
, d_name
) + len
+ 1;
621 o
= (reclen
% sizeof(long));
623 reclen
+= sizeof(long) - o
;
625 /* Need the position of this entry in the directory */
626 ent_pos
= block_pos
+ ((char *)d_desc
- b_data(bp
));
628 if (userbuf_off
+ tmpbuf_off
+ reclen
>= size
) {
629 /* The user has no space for one more record */
632 /* Record the position of this entry, it is the
633 * starting point of the next request (unless the
634 * position is modified with lseek).
640 if (tmpbuf_off
+ reclen
>= GETDENTS_BUFSIZE
*GETDENTS_ENTRIES
) {
641 r
= sys_safecopyto(VFS_PROC_NR
, gid
,
642 (vir_bytes
) userbuf_off
,
643 (vir_bytes
) getdents_buf
,
644 (size_t) tmpbuf_off
);
649 userbuf_off
+= tmpbuf_off
;
653 dep
= (struct dirent
*) &getdents_buf
[tmpbuf_off
];
654 dep
->d_ino
= conv4(le_CPU
, d_desc
->d_ino
);
655 dep
->d_off
= ent_pos
;
656 dep
->d_reclen
= (unsigned short) reclen
;
657 memcpy(dep
->d_name
, d_desc
->d_name
, len
);
658 dep
->d_name
[len
] = '\0';
659 tmpbuf_off
+= reclen
;
662 put_block(bp
, DIRECTORY_BLOCK
);
667 if (tmpbuf_off
!= 0) {
668 r
= sys_safecopyto(VFS_PROC_NR
, gid
, (vir_bytes
) userbuf_off
,
669 (vir_bytes
) getdents_buf
, (size_t) tmpbuf_off
);
675 userbuf_off
+= tmpbuf_off
;
678 if (done
&& userbuf_off
== 0)
679 r
= EINVAL
; /* The user's buffer is too small */
681 fs_m_out
.RES_NBYTES
= userbuf_off
;
682 fs_m_out
.RES_SEEK_POS_LO
= new_pos
;
683 rip
->i_update
|= ATIME
;
684 rip
->i_dirt
= IN_DIRTY
;
688 put_inode(rip
); /* release the inode */