1 /* Created (MFS based):
2 * February 2010 (Evgeniy Ivanov)
10 #include <minix/u64.h>
14 #include <minix/vfsif.h>
16 #include <sys/param.h>
19 static struct buf
*rahead(struct inode
*rip
, block_t baseblock
, u64_t
20 position
, unsigned bytes_ahead
);
21 static int rw_chunk(struct inode
*rip
, u64_t position
, unsigned off
,
22 size_t chunk
, unsigned left
, int rw_flag
, cp_grant_id_t gid
, unsigned
23 buf_off
, unsigned int block_size
, int *completed
);
25 static off_t rdahedpos
; /* position to read ahead */
26 static struct inode
*rdahed_inode
; /* pointer to inode to read ahead */
28 /*===========================================================================*
30 *===========================================================================*/
31 int fs_readwrite(void)
33 int r
, rw_flag
, block_spec
;
36 off_t position
, f_size
, bytes_left
;
37 unsigned int off
, cum_io
, block_size
, chunk
;
45 /* Find the inode referred */
46 if ((rip
= find_inode(fs_dev
, (ino_t
) fs_m_in
.REQ_INODE_NR
)) == NULL
)
49 mode_word
= rip
->i_mode
& I_TYPE
;
50 regular
= (mode_word
== I_REGULAR
|| mode_word
== I_NAMED_PIPE
);
51 block_spec
= (mode_word
== I_BLOCK_SPECIAL
? 1 : 0);
53 /* Determine blocksize */
55 block_size
= get_block_size( (dev_t
) rip
->i_block
[0]);
56 f_size
= MAX_FILE_POS
;
58 block_size
= rip
->i_sp
->s_block_size
;
60 if (f_size
< 0) f_size
= MAX_FILE_POS
;
63 /* Get the values from the request message */
64 rw_flag
= (fs_m_in
.m_type
== REQ_READ
? READING
: WRITING
);
65 gid
= (cp_grant_id_t
) fs_m_in
.REQ_GRANT
;
66 position
= (off_t
) fs_m_in
.REQ_SEEK_POS_LO
;
67 nrbytes
= (size_t) fs_m_in
.REQ_NBYTES
;
69 rdwt_err
= OK
; /* set to EIO if disk error occurs */
71 if (rw_flag
== WRITING
&& !block_spec
) {
72 /* Check in advance to see if file will grow too big. */
73 if (position
> (off_t
) (rip
->i_sp
->s_max_size
- nrbytes
))
78 /* Split the transfer into chunks that don't span two blocks. */
79 while (nrbytes
!= 0) {
80 off
= (unsigned int) (position
% block_size
);/* offset in blk*/
81 chunk
= MIN(nrbytes
, block_size
- off
);
83 if (rw_flag
== READING
) {
84 bytes_left
= f_size
- position
;
85 if (position
>= f_size
) break; /* we are beyond EOF */
86 if (chunk
> bytes_left
) chunk
= (int) bytes_left
;
89 /* Read or write 'chunk' bytes. */
90 r
= rw_chunk(rip
, cvul64((unsigned long) position
), off
, chunk
,
91 nrbytes
, rw_flag
, gid
, cum_io
, block_size
, &completed
);
93 if (r
!= OK
) break; /* EOF reached */
94 if (rdwt_err
< 0) break;
96 /* Update counters and pointers. */
97 nrbytes
-= chunk
; /* bytes yet to be read */
98 cum_io
+= chunk
; /* bytes read so far */
99 position
+= (off_t
) chunk
; /* position within the file */
102 fs_m_out
.RES_SEEK_POS_LO
= position
; /* It might change later and the VFS
103 has to know this value */
105 /* On write, update file size and access time. */
106 if (rw_flag
== WRITING
) {
107 if (regular
|| mode_word
== I_DIRECTORY
) {
108 if (position
> f_size
) rip
->i_size
= position
;
112 /* Check to see if read-ahead is called for, and if so, set it up. */
113 if(rw_flag
== READING
&& rip
->i_seek
== NO_SEEK
&&
114 (unsigned int) position
% block_size
== 0 &&
115 (regular
|| mode_word
== I_DIRECTORY
)) {
117 rdahedpos
= position
;
120 rip
->i_seek
= NO_SEEK
;
122 if (rdwt_err
!= OK
) r
= rdwt_err
; /* check for disk error */
123 if (rdwt_err
== END_OF_FILE
) r
= OK
;
126 if (rw_flag
== READING
) rip
->i_update
|= ATIME
;
127 if (rw_flag
== WRITING
) rip
->i_update
|= CTIME
| MTIME
;
128 rip
->i_dirt
= DIRTY
; /* inode is thus now dirty */
131 fs_m_out
.RES_NBYTES
= cum_io
;
137 /*===========================================================================*
139 *===========================================================================*/
140 int fs_breadwrite(void)
142 int r
, rw_flag
, completed
;
145 unsigned int off
, cum_io
, chunk
, block_size
;
148 /* Pseudo inode for rw_chunk */
153 /* Get the values from the request message */
154 rw_flag
= (fs_m_in
.m_type
== REQ_BREAD
? READING
: WRITING
);
155 gid
= (cp_grant_id_t
) fs_m_in
.REQ_GRANT
;
156 position
= make64((unsigned long) fs_m_in
.REQ_SEEK_POS_LO
,
157 (unsigned long) fs_m_in
.REQ_SEEK_POS_HI
);
158 nrbytes
= (size_t) fs_m_in
.REQ_NBYTES
;
160 block_size
= get_block_size( (dev_t
) fs_m_in
.REQ_DEV2
);
162 rip
.i_block
[0] = (block_t
) fs_m_in
.REQ_DEV2
;
163 rip
.i_mode
= I_BLOCK_SPECIAL
;
166 rdwt_err
= OK
; /* set to EIO if disk error occurs */
169 /* Split the transfer into chunks that don't span two blocks. */
170 while (nrbytes
> 0) {
171 off
= rem64u(position
, block_size
); /* offset in blk*/
172 chunk
= min(nrbytes
, block_size
- off
);
174 /* Read or write 'chunk' bytes. */
175 r
= rw_chunk(&rip
, position
, off
, chunk
, nrbytes
, rw_flag
, gid
,
176 cum_io
, block_size
, &completed
);
178 if (r
!= OK
) break; /* EOF reached */
179 if (rdwt_err
< 0) break;
181 /* Update counters and pointers. */
182 nrbytes
-= chunk
; /* bytes yet to be read */
183 cum_io
+= chunk
; /* bytes read so far */
184 position
= add64ul(position
, chunk
); /* position within the file */
187 fs_m_out
.RES_SEEK_POS_LO
= ex64lo(position
);
188 fs_m_out
.RES_SEEK_POS_HI
= ex64hi(position
);
190 if (rdwt_err
!= OK
) r
= rdwt_err
; /* check for disk error */
191 if (rdwt_err
== END_OF_FILE
) r
= OK
;
193 fs_m_out
.RES_NBYTES
= cum_io
;
199 /*===========================================================================*
201 *===========================================================================*/
202 static int rw_chunk(rip
, position
, off
, chunk
, left
, rw_flag
, gid
,
203 buf_off
, block_size
, completed
)
204 register struct inode
*rip
; /* pointer to inode for file to be rd/wr */
205 u64_t position
; /* position within file to read or write */
206 unsigned off
; /* off within the current block */
207 unsigned int chunk
; /* number of bytes to read or write */
208 unsigned left
; /* max number of bytes wanted after position */
209 int rw_flag
; /* READING or WRITING */
210 cp_grant_id_t gid
; /* grant */
211 unsigned buf_off
; /* offset in grant */
212 unsigned int block_size
; /* block size of FS operating on */
213 int *completed
; /* number of bytes copied */
215 /* Read or write (part of) a block. */
217 register struct buf
*bp
;
225 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
228 b
= div64u(position
, block_size
);
229 dev
= (dev_t
) rip
->i_block
[0];
231 if (ex64hi(position
) != 0)
232 panic("rw_chunk: position too high");
233 b
= read_map(rip
, (off_t
) ex64lo(position
));
237 if (!block_spec
&& b
== NO_BLOCK
) {
238 if (rw_flag
== READING
) {
239 /* Reading from a nonexistent block. Must read as all zeros.*/
240 bp
= get_block(NO_DEV
, NO_BLOCK
, NORMAL
); /* get a buffer */
243 /* Writing to a nonexistent block. Create and enter in inode.*/
244 if ((bp
= new_block(rip
, (off_t
) ex64lo(position
))) == NULL
)
247 } else if (rw_flag
== READING
) {
248 /* Read and read ahead if convenient. */
249 bp
= rahead(rip
, b
, position
, left
);
251 /* Normally an existing block to be partially overwritten is first read
252 * in. However, a full block need not be read in. If it is already in
253 * the cache, acquire it, otherwise just acquire a free buffer.
255 n
= (chunk
== block_size
? NO_READ
: NORMAL
);
256 if (!block_spec
&& off
== 0 && (off_t
) ex64lo(position
) >= rip
->i_size
)
258 bp
= get_block(dev
, b
, n
);
261 /* In all cases, bp now points to a valid buffer. */
263 panic("bp not valid in rw_chunk, this can't happen");
265 if (rw_flag
== WRITING
&& chunk
!= block_size
&& !block_spec
&&
266 (off_t
) ex64lo(position
) >= rip
->i_size
&& off
== 0) {
270 if (rw_flag
== READING
) {
271 /* Copy a chunk from the block buffer to user space. */
272 r
= sys_safecopyto(VFS_PROC_NR
, gid
, (vir_bytes
) buf_off
,
273 (vir_bytes
) (bp
->b_data
+off
), (size_t) chunk
);
275 /* Copy a chunk from user space to the block buffer. */
276 r
= sys_safecopyfrom(VFS_PROC_NR
, gid
, (vir_bytes
) buf_off
,
277 (vir_bytes
) (bp
->b_data
+off
), (size_t) chunk
);
281 n
= (off
+ chunk
== block_size
? FULL_DATA_BLOCK
: PARTIAL_DATA_BLOCK
);
288 /*===========================================================================*
290 *===========================================================================*/
291 block_t
read_map(rip
, position
)
292 register struct inode
*rip
; /* ptr to inode to map from */
293 off_t position
; /* position in file whose blk wanted */
295 /* Given an inode and a position within the corresponding file, locate the
296 * block number in which that position is to be found and return it.
302 unsigned long excess
, block_pos
;
303 static char first_time
= TRUE
;
304 static long addr_in_block
;
305 static long addr_in_block2
;
306 static long doub_ind_s
;
307 static long triple_ind_s
;
308 static long out_range_s
;
311 addr_in_block
= rip
->i_sp
->s_block_size
/ BLOCK_ADDRESS_BYTES
;
312 addr_in_block2
= addr_in_block
* addr_in_block
;
313 doub_ind_s
= EXT2_NDIR_BLOCKS
+ addr_in_block
;
314 triple_ind_s
= doub_ind_s
+ addr_in_block2
;
315 out_range_s
= triple_ind_s
+ addr_in_block2
* addr_in_block
;
319 block_pos
= position
/ rip
->i_sp
->s_block_size
; /* relative blk # in file */
321 /* Is 'position' to be found in the inode itself? */
322 if (block_pos
< EXT2_NDIR_BLOCKS
)
323 return(rip
->i_block
[block_pos
]);
325 /* It is not in the inode, so it must be single, double or triple indirect */
326 if (block_pos
< doub_ind_s
) {
327 b
= rip
->i_block
[EXT2_NDIR_BLOCKS
]; /* address of single indirect block */
328 index
= block_pos
- EXT2_NDIR_BLOCKS
;
329 } else if (block_pos
>= out_range_s
) { /* TODO: do we need it? */
332 /* double or triple indirect block. At first if it's triple,
333 * find double indirect block.
335 excess
= block_pos
- doub_ind_s
;
336 b
= rip
->i_block
[EXT2_DIND_BLOCK
];
337 if (block_pos
>= triple_ind_s
) {
338 b
= rip
->i_block
[EXT2_TIND_BLOCK
];
339 if (b
== NO_BLOCK
) return(NO_BLOCK
);
340 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get triple ind block */
341 ASSERT(bp
->b_dev
!= NO_DEV
);
342 ASSERT(bp
->b_dev
== rip
->i_dev
);
343 excess
= block_pos
- triple_ind_s
;
344 index
= excess
/ addr_in_block2
;
345 b
= rd_indir(bp
, index
); /* num of double ind block */
346 put_block(bp
, INDIRECT_BLOCK
); /* release triple ind block */
347 excess
= excess
% addr_in_block2
;
349 if (b
== NO_BLOCK
) return(NO_BLOCK
);
350 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get double indirect block */
351 ASSERT(bp
->b_dev
!= NO_DEV
);
352 ASSERT(bp
->b_dev
== rip
->i_dev
);
353 index
= excess
/ addr_in_block
;
354 b
= rd_indir(bp
, index
); /* num of single ind block */
355 put_block(bp
, INDIRECT_BLOCK
); /* release double ind block */
356 index
= excess
% addr_in_block
; /* index into single ind blk */
358 if (b
== NO_BLOCK
) return(NO_BLOCK
);
359 bp
= get_block(rip
->i_dev
, b
, NORMAL
);
360 ASSERT(bp
->b_dev
!= NO_DEV
);
361 ASSERT(bp
->b_dev
== rip
->i_dev
);
362 b
= rd_indir(bp
, index
);
363 put_block(bp
, INDIRECT_BLOCK
); /* release single ind block */
369 /*===========================================================================*
371 *===========================================================================*/
372 block_t
rd_indir(bp
, index
)
373 struct buf
*bp
; /* pointer to indirect block */
374 int index
; /* index into *bp */
377 panic("rd_indir() on NULL");
378 /* TODO: use conv call */
379 return conv4(le_CPU
, bp
->b_ind
[index
]);
383 /*===========================================================================*
385 *===========================================================================*/
388 /* Read a block into the cache before it is needed. */
389 unsigned int block_size
;
390 register struct inode
*rip
;
397 rip
= rdahed_inode
; /* pointer to inode to read ahead from */
398 block_size
= get_block_size(rip
->i_dev
);
399 rdahed_inode
= NULL
; /* turn off read ahead */
400 if ( (b
= read_map(rip
, rdahedpos
)) == NO_BLOCK
) return; /* at EOF */
402 assert(rdahedpos
>= 0); /* So we can safely cast it to unsigned below */
404 bp
= rahead(rip
, b
, cvul64((unsigned long) rdahedpos
), block_size
);
405 put_block(bp
, PARTIAL_DATA_BLOCK
);
409 /*===========================================================================*
411 *===========================================================================*/
412 static struct buf
*rahead(rip
, baseblock
, position
, bytes_ahead
)
413 register struct inode
*rip
; /* pointer to inode for file to be read */
414 block_t baseblock
; /* block at current position */
415 u64_t position
; /* position within file */
416 unsigned bytes_ahead
; /* bytes beyond position for immediate use */
418 /* Fetch a block from the cache or the device. If a physical read is
419 * required, prefetch as many more blocks as convenient into the cache.
420 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
421 * The device driver may decide it knows better and stop reading at a
422 * cylinder boundary (or after an error). Rw_scattered() puts an optional
423 * flag on all reads to allow this.
425 /* Minimum number of blocks to prefetch. */
426 # define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32)
427 int block_spec
, read_q_size
;
428 unsigned int blocks_ahead
, fragment
, block_size
;
429 block_t block
, blocks_left
;
432 struct buf
*bp
= NULL
;
433 static unsigned int readqsize
= 0;
434 static struct buf
**read_q
= NULL
;
436 if(readqsize
!= nr_bufs
) {
438 assert(read_q
!= NULL
);
444 assert(readqsize
== 0);
445 assert(read_q
== NULL
);
447 if(!(read_q
= malloc(sizeof(read_q
[0])*nr_bufs
)))
448 panic("couldn't allocate read_q");
452 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
454 dev
= (dev_t
) rip
->i_block
[0];
458 block_size
= get_block_size(dev
);
461 bp
= get_block(dev
, block
, PREFETCH
);
463 if (bp
->b_dev
!= NO_DEV
) return(bp
);
465 /* The best guess for the number of blocks to prefetch: A lot.
466 * It is impossible to tell what the device looks like, so we don't even
467 * try to guess the geometry, but leave it to the driver.
469 * The floppy driver can read a full track with no rotational delay, and it
470 * avoids reading partial tracks if it can, so handing it enough buffers to
471 * read two tracks is perfect. (Two, because some diskette types have
472 * an odd number of sectors per track, so a block may span tracks.)
474 * The disk drivers don't try to be smart. With todays disks it is
475 * impossible to tell what the real geometry looks like, so it is best to
476 * read as much as you can. With luck the caching on the drive allows
477 * for a little time to start the next read.
479 * The current solution below is a bit of a hack, it just reads blocks from
480 * the current file position hoping that more of the file can be found. A
481 * better solution must look at the already available
482 * indirect blocks (but don't call read_map!).
485 fragment
= rem64u(position
, block_size
);
486 position
= sub64u(position
, fragment
);
487 bytes_ahead
+= fragment
;
489 blocks_ahead
= (bytes_ahead
+ block_size
- 1) / block_size
;
491 if (block_spec
&& rip
->i_size
== 0) {
492 blocks_left
= (block_t
) NR_IOREQS
;
494 blocks_left
= (block_t
) (rip
->i_size
-ex64lo(position
)+(block_size
-1)) /
497 /* Go for the first indirect block if we are in its neighborhood. */
499 ind1_pos
= (EXT2_NDIR_BLOCKS
) * block_size
;
500 if ((off_t
) ex64lo(position
) <= ind1_pos
&& rip
->i_size
> ind1_pos
) {
507 /* No more than the maximum request. */
508 if (blocks_ahead
> NR_IOREQS
) blocks_ahead
= NR_IOREQS
;
510 /* Read at least the minimum number of blocks, but not after a seek. */
511 if (blocks_ahead
< BLOCKS_MINIMUM
&& rip
->i_seek
== NO_SEEK
)
512 blocks_ahead
= BLOCKS_MINIMUM
;
514 /* Can't go past end of file. */
515 if (blocks_ahead
> blocks_left
) blocks_ahead
= blocks_left
;
519 /* Acquire block buffers. */
521 read_q
[read_q_size
++] = bp
;
523 if (--blocks_ahead
== 0) break;
525 /* Don't trash the cache, leave 4 free. */
526 if (bufs_in_use
>= nr_bufs
- 4) break;
530 bp
= get_block(dev
, block
, PREFETCH
);
531 if (bp
->b_dev
!= NO_DEV
) {
532 /* Oops, block already in the cache, get out. */
533 put_block(bp
, FULL_DATA_BLOCK
);
537 rw_scattered(dev
, read_q
, read_q_size
, READING
);
538 return(get_block(dev
, baseblock
, NORMAL
));
542 /*===========================================================================*
544 *===========================================================================*/
545 int fs_getdents(void)
547 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + EXT2_NAME_MAX + 1)
548 #define GETDENTS_ENTRIES 8
549 static char getdents_buf
[GETDENTS_BUFSIZE
* GETDENTS_ENTRIES
];
552 unsigned int block_size
, len
, reclen
;
556 size_t size
, tmpbuf_off
, userbuf_off
;
557 off_t pos
, off
, block_pos
, new_pos
, ent_pos
;
559 struct ext2_disk_dir_desc
*d_desc
;
562 ino
= (ino_t
) fs_m_in
.REQ_INODE_NR
;
563 gid
= (gid_t
) fs_m_in
.REQ_GRANT
;
564 size
= (size_t) fs_m_in
.REQ_MEM_SIZE
;
565 pos
= (off_t
) fs_m_in
.REQ_SEEK_POS_LO
;
567 /* Check whether the position is properly aligned */
568 if ((unsigned int) pos
% DIR_ENTRY_ALIGN
)
571 if ((rip
= get_inode(fs_dev
, ino
)) == NULL
)
574 block_size
= rip
->i_sp
->s_block_size
;
575 off
= (pos
% block_size
); /* Offset in block */
576 block_pos
= pos
- off
;
577 done
= FALSE
; /* Stop processing directory blocks when done is set */
579 memset(getdents_buf
, '\0', sizeof(getdents_buf
)); /* Avoid leaking any data */
580 tmpbuf_off
= 0; /* Offset in getdents_buf */
581 userbuf_off
= 0; /* Offset in the user's buffer */
583 /* The default position for the next request is EOF. If the user's buffer
584 * fills up before EOF, new_pos will be modified. */
585 new_pos
= rip
->i_size
;
587 for (; block_pos
< rip
->i_size
; block_pos
+= block_size
) {
588 off_t temp_pos
= block_pos
;
589 b
= read_map(rip
, block_pos
); /* get block number */
590 /* Since directories don't have holes, 'b' cannot be NO_BLOCK. */
591 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get a dir block */
594 /* Search a directory block. */
595 d_desc
= (struct ext2_disk_dir_desc
*) &bp
->b_data
;
597 /* we need to seek to entry at off bytes.
598 * when NEXT_DISC_DIR_POS == block_size it's last dentry.
600 for (; temp_pos
+ conv2(le_CPU
, d_desc
->d_rec_len
) <= pos
601 && NEXT_DISC_DIR_POS(d_desc
, &bp
->b_data
) < block_size
;
602 d_desc
= NEXT_DISC_DIR_DESC(d_desc
)) {
603 temp_pos
+= conv2(le_CPU
, d_desc
->d_rec_len
);
606 for (; CUR_DISC_DIR_POS(d_desc
, &bp
->b_data
) < block_size
;
607 d_desc
= NEXT_DISC_DIR_DESC(d_desc
)) {
608 if (d_desc
->d_ino
== 0)
609 continue; /* Entry is not in use */
611 if (d_desc
->d_name_len
> NAME_MAX
||
612 d_desc
->d_name_len
> EXT2_NAME_MAX
) {
613 len
= min(NAME_MAX
, EXT2_NAME_MAX
);
615 len
= d_desc
->d_name_len
;
618 /* Compute record length */
619 reclen
= offsetof(struct dirent
, d_name
) + len
+ 1;
620 o
= (reclen
% sizeof(long));
622 reclen
+= sizeof(long) - o
;
624 /* Need the position of this entry in the directory */
625 ent_pos
= block_pos
+ ((char *)d_desc
- bp
->b_data
);
627 if (userbuf_off
+ tmpbuf_off
+ reclen
>= size
) {
628 /* The user has no space for one more record */
631 /* Record the position of this entry, it is the
632 * starting point of the next request (unless the
633 * position is modified with lseek).
639 if (tmpbuf_off
+ reclen
>= GETDENTS_BUFSIZE
*GETDENTS_ENTRIES
) {
640 r
= sys_safecopyto(VFS_PROC_NR
, gid
,
641 (vir_bytes
) userbuf_off
,
642 (vir_bytes
) getdents_buf
,
643 (size_t) tmpbuf_off
);
648 userbuf_off
+= tmpbuf_off
;
652 dep
= (struct dirent
*) &getdents_buf
[tmpbuf_off
];
653 dep
->d_ino
= conv4(le_CPU
, d_desc
->d_ino
);
654 dep
->d_off
= ent_pos
;
655 dep
->d_reclen
= (unsigned short) reclen
;
656 memcpy(dep
->d_name
, d_desc
->d_name
, len
);
657 dep
->d_name
[len
] = '\0';
658 tmpbuf_off
+= reclen
;
661 put_block(bp
, DIRECTORY_BLOCK
);
666 if (tmpbuf_off
!= 0) {
667 r
= sys_safecopyto(VFS_PROC_NR
, gid
, (vir_bytes
) userbuf_off
,
668 (vir_bytes
) getdents_buf
, (size_t) tmpbuf_off
);
674 userbuf_off
+= tmpbuf_off
;
677 if (done
&& userbuf_off
== 0)
678 r
= EINVAL
; /* The user's buffer is too small */
680 fs_m_out
.RES_NBYTES
= userbuf_off
;
681 fs_m_out
.RES_SEEK_POS_LO
= new_pos
;
682 rip
->i_update
|= ATIME
;
687 put_inode(rip
); /* release the inode */