12 #include <minix/vfsif.h>
14 FORWARD
_PROTOTYPE( int rw_chunk
, (struct inode
*rip
, u64_t position
,
15 unsigned off
, int chunk
, unsigned left
, int rw_flag
,
16 cp_grant_id_t gid
, unsigned buf_off
, int block_size
, int *completed
));
18 PRIVATE
char getdents_buf
[GETDENTS_BUFSIZ
];
20 /*===========================================================================*
22 *===========================================================================*/
23 PUBLIC
int fs_readwrite(void)
25 int r
, rw_flag
, chunk
, block_size
, block_spec
;
28 off_t position
, f_size
, bytes_left
;
29 unsigned int off
, cum_io
;
31 int completed
, r2
= OK
;
36 /* Find the inode referred */
37 if ((rip
= find_inode(fs_dev
, fs_m_in
.REQ_INODE_NR
)) == NULL
)
40 mode_word
= rip
->i_mode
& I_TYPE
;
41 regular
= (mode_word
== I_REGULAR
|| mode_word
== I_NAMED_PIPE
);
42 block_spec
= (mode_word
== I_BLOCK_SPECIAL
? 1 : 0);
44 /* Determine blocksize */
45 block_size
= (block_spec
?
46 get_block_size(rip
->i_zone
[0]) : rip
->i_sp
->s_block_size
);
48 f_size
= (block_spec
? ULONG_MAX
: rip
->i_size
);
50 /* Get the values from the request message */
51 rw_flag
= (fs_m_in
.m_type
== REQ_READ
? READING
: WRITING
);
52 gid
= fs_m_in
.REQ_GRANT
;
53 position
= fs_m_in
.REQ_SEEK_POS_LO
;
54 nrbytes
= (unsigned) fs_m_in
.REQ_NBYTES
;
56 rdwt_err
= OK
; /* set to EIO if disk error occurs */
58 if (rw_flag
== WRITING
&& block_spec
== 0) {
59 /* Check in advance to see if file will grow too big. */
60 if (position
> rip
->i_sp
->s_max_size
- nrbytes
)
63 /* Clear the zone containing present EOF if hole about
64 * to be created. This is necessary because all unwritten
65 * blocks prior to the EOF must read as zeros. */
66 if(position
> f_size
) clear_zone(rip
, f_size
, 0);
70 /* Split the transfer into chunks that don't span two blocks. */
71 while (nrbytes
!= 0) {
72 off
= (unsigned int) (position
% block_size
);/* offset in blk*/
73 chunk
= MIN(nrbytes
, block_size
- off
);
75 if (chunk
< 0) chunk
= block_size
- off
;
77 if (rw_flag
== READING
) {
78 bytes_left
= f_size
- position
;
79 if (position
>= f_size
) break; /* we are beyond EOF */
80 if (chunk
> bytes_left
) chunk
= (int) bytes_left
;
83 /* Read or write 'chunk' bytes. */
84 r
= rw_chunk(rip
, cvul64(position
), off
, chunk
, (unsigned) nrbytes
,
85 rw_flag
, gid
, cum_io
, block_size
, &completed
);
87 if (r
!= OK
) break; /* EOF reached */
88 if (rdwt_err
< 0) break;
90 /* Update counters and pointers. */
91 nrbytes
-= chunk
; /* bytes yet to be read */
92 cum_io
+= chunk
; /* bytes read so far */
93 position
+= chunk
; /* position within the file */
96 fs_m_out
.RES_SEEK_POS_LO
= position
; /* It might change later and the VFS
97 has to know this value */
99 /* On write, update file size and access time. */
100 if (rw_flag
== WRITING
) {
101 if (regular
|| mode_word
== I_DIRECTORY
) {
102 if (position
> f_size
) rip
->i_size
= position
;
106 /* Check to see if read-ahead is called for, and if so, set it up. */
107 if(rw_flag
== READING
&& rip
->i_seek
== NO_SEEK
&&
108 position
% block_size
== 0 && (regular
|| mode_word
== I_DIRECTORY
)) {
110 rdahedpos
= position
;
113 rip
->i_seek
= NO_SEEK
;
115 if (rdwt_err
!= OK
) r
= rdwt_err
; /* check for disk error */
116 if (rdwt_err
== END_OF_FILE
) r
= OK
;
118 /* if user-space copying failed, read/write failed. */
119 if (r
== OK
&& r2
!= OK
)
123 if (rw_flag
== READING
) rip
->i_update
|= ATIME
;
124 if (rw_flag
== WRITING
) rip
->i_update
|= CTIME
| MTIME
;
125 rip
->i_dirt
= DIRTY
; /* inode is thus now dirty */
128 fs_m_out
.RES_NBYTES
= cum_io
;
134 /*===========================================================================*
136 *===========================================================================*/
137 PUBLIC
int fs_breadwrite(void)
139 int r
, rw_flag
, chunk
, block_size
;
143 unsigned int off
, cum_io
;
146 /* Pseudo inode for rw_chunk */
151 /* Get the values from the request message */
152 rw_flag
= (fs_m_in
.m_type
== REQ_BREAD
? READING
: WRITING
);
153 gid
= fs_m_in
.REQ_GRANT
;
154 position
= make64(fs_m_in
.REQ_SEEK_POS_LO
, fs_m_in
.REQ_SEEK_POS_HI
);
155 nrbytes
= (unsigned) fs_m_in
.REQ_NBYTES
;
157 block_size
= get_block_size(fs_m_in
.REQ_DEV2
);
159 rip
.i_zone
[0] = fs_m_in
.REQ_DEV2
;
160 rip
.i_mode
= I_BLOCK_SPECIAL
;
163 rdwt_err
= OK
; /* set to EIO if disk error occurs */
166 /* Split the transfer into chunks that don't span two blocks. */
167 while (nrbytes
!= 0) {
168 off
= rem64u(position
, block_size
); /* offset in blk*/
170 chunk
= MIN(nrbytes
, block_size
- off
);
171 if (chunk
< 0) chunk
= block_size
- off
;
173 /* Read or write 'chunk' bytes. */
174 r
= rw_chunk(&rip
, position
, off
, chunk
, (unsigned) nrbytes
,
175 rw_flag
, gid
, cum_io
, block_size
, &completed
);
177 if (r
!= OK
) break; /* EOF reached */
178 if (rdwt_err
< 0) break;
180 /* Update counters and pointers. */
181 nrbytes
-= chunk
; /* bytes yet to be read */
182 cum_io
+= chunk
; /* bytes read so far */
183 position
= add64ul(position
, chunk
); /* position within the file */
186 fs_m_out
.RES_SEEK_POS_LO
= ex64lo(position
);
187 fs_m_out
.RES_SEEK_POS_HI
= ex64hi(position
);
189 if (rdwt_err
!= OK
) r
= rdwt_err
; /* check for disk error */
190 if (rdwt_err
== END_OF_FILE
) r
= OK
;
192 fs_m_out
.RES_NBYTES
= cum_io
;
198 /*===========================================================================*
200 *===========================================================================*/
201 PRIVATE
int rw_chunk(rip
, position
, off
, chunk
, left
, rw_flag
, gid
,
202 buf_off
, block_size
, completed
)
203 register struct inode
*rip
; /* pointer to inode for file to be rd/wr */
204 u64_t position
; /* position within file to read or write */
205 unsigned off
; /* off within the current block */
206 int chunk
; /* number of bytes to read or write */
207 unsigned left
; /* max number of bytes wanted after position */
208 int rw_flag
; /* READING or WRITING */
209 cp_grant_id_t gid
; /* grant */
210 unsigned buf_off
; /* offset in grant */
211 int block_size
; /* block size of FS operating on */
212 int *completed
; /* number of bytes copied */
214 /* Read or write (part of) a block. */
216 register struct buf
*bp
;
224 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
227 b
= div64u(position
, block_size
);
228 dev
= (dev_t
) rip
->i_zone
[0];
230 if (ex64hi(position
) != 0)
231 panic("rw_chunk: position too high");
232 b
= read_map(rip
, ex64lo(position
));
236 if (!block_spec
&& b
== NO_BLOCK
) {
237 if (rw_flag
== READING
) {
238 /* Reading from a nonexistent block. Must read as all zeros.*/
239 bp
= get_block(NO_DEV
, NO_BLOCK
, NORMAL
); /* get a buffer */
242 /* Writing to a nonexistent block. Create and enter in inode.*/
243 if ((bp
= new_block(rip
, ex64lo(position
))) == NULL
)
246 } else if (rw_flag
== READING
) {
247 /* Read and read ahead if convenient. */
248 bp
= rahead(rip
, b
, position
, left
);
250 /* Normally an existing block to be partially overwritten is first read
251 * in. However, a full block need not be read in. If it is already in
252 * the cache, acquire it, otherwise just acquire a free buffer.
254 n
= (chunk
== block_size
? NO_READ
: NORMAL
);
255 if (!block_spec
&& off
== 0 && ex64lo(position
) >= rip
->i_size
)
257 bp
= get_block(dev
, b
, n
);
260 /* In all cases, bp now points to a valid buffer. */
262 panic("bp not valid in rw_chunk; this can't happen");
264 if (rw_flag
== WRITING
&& chunk
!= block_size
&& !block_spec
&&
265 ex64lo(position
) >= rip
->i_size
&& off
== 0) {
269 if (rw_flag
== READING
) {
270 /* Copy a chunk from the block buffer to user space. */
271 r
= sys_safecopyto(FS_PROC_NR
, gid
, buf_off
,
272 (vir_bytes
) (bp
->b_data
+off
), (phys_bytes
) chunk
, D
);
274 /* Copy a chunk from user space to the block buffer. */
275 r
= sys_safecopyfrom(FS_PROC_NR
, gid
, buf_off
,
276 (vir_bytes
) (bp
->b_data
+off
), (phys_bytes
) chunk
, D
);
280 n
= (off
+ chunk
== block_size
? FULL_DATA_BLOCK
: PARTIAL_DATA_BLOCK
);
287 /*===========================================================================*
289 *===========================================================================*/
290 PUBLIC block_t
read_map(rip
, position
)
291 register struct inode
*rip
; /* ptr to inode to map from */
292 off_t position
; /* position in file whose blk wanted */
294 /* Given an inode and a position within the corresponding file, locate the
295 * block (not zone) number in which that position is to be found and return it.
298 register struct buf
*bp
;
300 int scale
, boff
, dzones
, nr_indirects
, index
, zind
, ex
;
302 long excess
, zone
, block_pos
;
304 scale
= rip
->i_sp
->s_log_zone_size
; /* for block-zone conversion */
305 block_pos
= position
/rip
->i_sp
->s_block_size
; /* relative blk # in file */
306 zone
= block_pos
>> scale
; /* position's zone */
307 boff
= (int) (block_pos
- (zone
<< scale
) ); /* relative blk # within zone */
308 dzones
= rip
->i_ndzones
;
309 nr_indirects
= rip
->i_nindirs
;
311 /* Is 'position' to be found in the inode itself? */
313 zind
= (int) zone
; /* index should be an int */
314 z
= rip
->i_zone
[zind
];
315 if (z
== NO_ZONE
) return(NO_BLOCK
);
316 b
= ((block_t
) z
<< scale
) + boff
;
320 /* It is not in the inode, so it must be single or double indirect. */
321 excess
= zone
- dzones
; /* first Vx_NR_DZONES don't count */
323 if (excess
< nr_indirects
) {
324 /* 'position' can be located via the single indirect block. */
325 z
= rip
->i_zone
[dzones
];
327 /* 'position' can be located via the double indirect block. */
328 if ( (z
= rip
->i_zone
[dzones
+1]) == NO_ZONE
) return(NO_BLOCK
);
329 excess
-= nr_indirects
; /* single indir doesn't count*/
330 b
= (block_t
) z
<< scale
;
331 ASSERT(rip
->i_dev
!= NO_DEV
);
332 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get double indirect block */
333 index
= (int) (excess
/nr_indirects
);
334 ASSERT(bp
->b_dev
!= NO_DEV
);
335 ASSERT(bp
->b_dev
== rip
->i_dev
);
336 z
= rd_indir(bp
, index
); /* z= zone for single*/
337 put_block(bp
, INDIRECT_BLOCK
); /* release double ind block */
338 excess
= excess
% nr_indirects
; /* index into single ind blk */
341 /* 'z' is zone num for single indirect block; 'excess' is index into it. */
342 if (z
== NO_ZONE
) return(NO_BLOCK
);
343 b
= (block_t
) z
<< scale
; /* b is blk # for single ind */
344 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get single indirect block */
345 ex
= (int) excess
; /* need an integer */
346 z
= rd_indir(bp
, ex
); /* get block pointed to */
347 put_block(bp
, INDIRECT_BLOCK
); /* release single indir blk */
348 if (z
== NO_ZONE
) return(NO_BLOCK
);
349 b
= ((block_t
) z
<< scale
) + boff
;
354 /*===========================================================================*
356 *===========================================================================*/
357 PUBLIC zone_t
rd_indir(bp
, index
)
358 struct buf
*bp
; /* pointer to indirect block */
359 int index
; /* index into *bp */
361 /* Given a pointer to an indirect block, read one entry. The reason for
362 * making a separate routine out of this is that there are four cases:
363 * V1 (IBM and 68000), and V2 (IBM and 68000).
366 struct super_block
*sp
;
367 zone_t zone
; /* V2 zones are longs (shorts in V1) */
370 panic("rd_indir() on NULL");
372 sp
= get_super(bp
->b_dev
); /* need super block to find file sys type */
374 /* read a zone from an indirect block */
375 if (sp
->s_version
== V1
)
376 zone
= (zone_t
) conv2(sp
->s_native
, (int) bp
->b_v1_ind
[index
]);
378 zone
= (zone_t
) conv4(sp
->s_native
, (long) bp
->b_v2_ind
[index
]);
380 if (zone
!= NO_ZONE
&&
381 (zone
< (zone_t
) sp
->s_firstdatazone
|| zone
>= sp
->s_zones
)) {
382 printf("Illegal zone number %ld in indirect block, index %d\n",
384 panic("check file system");
391 /*===========================================================================*
393 *===========================================================================*/
394 PUBLIC
void read_ahead()
396 /* Read a block into the cache before it is needed. */
398 register struct inode
*rip
;
402 rip
= rdahed_inode
; /* pointer to inode to read ahead from */
403 block_size
= get_block_size(rip
->i_dev
);
404 rdahed_inode
= NULL
; /* turn off read ahead */
405 if ( (b
= read_map(rip
, rdahedpos
)) == NO_BLOCK
) return; /* at EOF */
406 bp
= rahead(rip
, b
, cvul64(rdahedpos
), block_size
);
407 put_block(bp
, PARTIAL_DATA_BLOCK
);
411 /*===========================================================================*
413 *===========================================================================*/
414 PUBLIC
struct buf
*rahead(rip
, baseblock
, position
, bytes_ahead
)
415 register struct inode
*rip
; /* pointer to inode for file to be read */
416 block_t baseblock
; /* block at current position */
417 u64_t position
; /* position within file */
418 unsigned bytes_ahead
; /* bytes beyond position for immediate use */
420 /* Fetch a block from the cache or the device. If a physical read is
421 * required, prefetch as many more blocks as convenient into the cache.
422 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
423 * The device driver may decide it knows better and stop reading at a
424 * cylinder boundary (or after an error). Rw_scattered() puts an optional
425 * flag on all reads to allow this.
428 /* Minimum number of blocks to prefetch. */
429 # define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32)
430 int block_spec
, scale
, read_q_size
;
431 unsigned int blocks_ahead
, fragment
;
432 block_t block
, blocks_left
;
436 static int readqsize
= 0;
437 static struct buf
**read_q
;
439 if(readqsize
!= nr_bufs
) {
442 if(!(read_q
= malloc(sizeof(read_q
[0])*nr_bufs
)))
443 panic("couldn't allocate read_q");
447 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
449 dev
= (dev_t
) rip
->i_zone
[0];
453 block_size
= get_block_size(dev
);
456 bp
= get_block(dev
, block
, PREFETCH
);
457 if (bp
->b_dev
!= NO_DEV
) return(bp
);
459 /* The best guess for the number of blocks to prefetch: A lot.
460 * It is impossible to tell what the device looks like, so we don't even
461 * try to guess the geometry, but leave it to the driver.
463 * The floppy driver can read a full track with no rotational delay, and it
464 * avoids reading partial tracks if it can, so handing it enough buffers to
465 * read two tracks is perfect. (Two, because some diskette types have
466 * an odd number of sectors per track, so a block may span tracks.)
468 * The disk drivers don't try to be smart. With todays disks it is
469 * impossible to tell what the real geometry looks like, so it is best to
470 * read as much as you can. With luck the caching on the drive allows
471 * for a little time to start the next read.
473 * The current solution below is a bit of a hack, it just reads blocks from
474 * the current file position hoping that more of the file can be found. A
475 * better solution must look at the already available zone pointers and
476 * indirect blocks (but don't call read_map!).
479 fragment
= rem64u(position
, block_size
);
480 position
= sub64u(position
, fragment
);
481 bytes_ahead
+= fragment
;
483 blocks_ahead
= (bytes_ahead
+ block_size
- 1) / block_size
;
485 if (block_spec
&& rip
->i_size
== 0) {
486 blocks_left
= NR_IOREQS
;
488 blocks_left
= (rip
->i_size
- ex64lo(position
) + block_size
- 1) /
491 /* Go for the first indirect block if we are in its neighborhood. */
493 scale
= rip
->i_sp
->s_log_zone_size
;
494 ind1_pos
= (off_t
) rip
->i_ndzones
* (block_size
<< scale
);
495 if (ex64lo(position
) <= ind1_pos
&& rip
->i_size
> ind1_pos
) {
502 /* No more than the maximum request. */
503 if (blocks_ahead
> NR_IOREQS
) blocks_ahead
= NR_IOREQS
;
505 /* Read at least the minimum number of blocks, but not after a seek. */
506 if (blocks_ahead
< BLOCKS_MINIMUM
&& rip
->i_seek
== NO_SEEK
)
507 blocks_ahead
= BLOCKS_MINIMUM
;
509 /* Can't go past end of file. */
510 if (blocks_ahead
> blocks_left
) blocks_ahead
= blocks_left
;
514 /* Acquire block buffers. */
516 read_q
[read_q_size
++] = bp
;
518 if (--blocks_ahead
== 0) break;
520 /* Don't trash the cache, leave 4 free. */
521 if (bufs_in_use
>= nr_bufs
- 4) break;
525 bp
= get_block(dev
, block
, PREFETCH
);
526 if (bp
->b_dev
!= NO_DEV
) {
527 /* Oops, block already in the cache, get out. */
528 put_block(bp
, FULL_DATA_BLOCK
);
532 rw_scattered(dev
, read_q
, read_q_size
, READING
);
533 return(get_block(dev
, baseblock
, NORMAL
));
537 /*===========================================================================*
539 *===========================================================================*/
540 PUBLIC
int fs_getdents(void)
542 register struct inode
*rip
;
543 int o
, r
, block_size
, len
, reclen
, done
;
547 size_t size
, tmpbuf_off
, userbuf_off
;
548 off_t pos
, off
, block_pos
, new_pos
, ent_pos
;
554 ino
= fs_m_in
.REQ_INODE_NR
;
555 gid
= fs_m_in
.REQ_GRANT
;
556 size
= fs_m_in
.REQ_MEM_SIZE
;
557 pos
= fs_m_in
.REQ_SEEK_POS_LO
;
559 /* Check whether the position is properly aligned */
560 if(pos
% DIR_ENTRY_SIZE
)
563 if( (rip
= get_inode(fs_dev
, ino
)) == NULL
)
566 block_size
= rip
->i_sp
->s_block_size
;
567 off
= (pos
% block_size
); /* Offset in block */
568 block_pos
= pos
- off
;
569 done
= FALSE
; /* Stop processing directory blocks when done is set */
571 tmpbuf_off
= 0; /* Offset in getdents_buf */
572 memset(getdents_buf
, '\0', GETDENTS_BUFSIZ
); /* Avoid leaking any data */
573 userbuf_off
= 0; /* Offset in the user's buffer */
575 /* The default position for the next request is EOF. If the user's buffer
576 * fills up before EOF, new_pos will be modified. */
577 new_pos
= rip
->i_size
;
579 for(; block_pos
< rip
->i_size
; block_pos
+= block_size
) {
580 b
= read_map(rip
, block_pos
); /* get block number */
582 /* Since directories don't have holes, 'b' cannot be NO_BLOCK. */
583 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get a dir block */
586 panic("get_block returned NO_BLOCK");
588 /* Search a directory block. */
590 dp
= &bp
->b_dir
[off
/ DIR_ENTRY_SIZE
];
593 for (; dp
< &bp
->b_dir
[NR_DIR_ENTRIES(block_size
)]; dp
++) {
595 continue; /* Entry is not in use */
597 /* Compute the length of the name */
598 cp
= memchr(dp
->d_name
, '\0', NAME_MAX
);
604 /* Compute record length */
605 reclen
= offsetof(struct dirent
, d_name
) + len
+ 1;
606 o
= (reclen
% sizeof(long));
608 reclen
+= sizeof(long) - o
;
610 /* Need the position of this entry in the directory */
611 ent_pos
= block_pos
+ ((char *)dp
- bp
->b_data
);
613 if(tmpbuf_off
+ reclen
> GETDENTS_BUFSIZ
) {
614 r
= sys_safecopyto(FS_PROC_NR
, gid
, userbuf_off
,
615 (vir_bytes
)getdents_buf
,
618 panic("fs_getdents: sys_safecopyto failed: %d", r
);
620 userbuf_off
+= tmpbuf_off
;
624 if(userbuf_off
+ tmpbuf_off
+ reclen
> size
) {
625 /* The user has no space for one more record */
628 /* Record the position of this entry, it is the
629 * starting point of the next request (unless the
630 * postion is modified with lseek).
636 dep
= (struct dirent
*)&getdents_buf
[tmpbuf_off
];
637 dep
->d_ino
= dp
->d_ino
;
638 dep
->d_off
= ent_pos
;
639 dep
->d_reclen
= reclen
;
640 memcpy(dep
->d_name
, dp
->d_name
, len
);
641 dep
->d_name
[len
] = '\0';
642 tmpbuf_off
+= reclen
;
645 put_block(bp
, DIRECTORY_BLOCK
);
650 if(tmpbuf_off
!= 0) {
651 r
= sys_safecopyto(FS_PROC_NR
, gid
, userbuf_off
,
652 (vir_bytes
) getdents_buf
, tmpbuf_off
, D
);
654 panic("fs_getdents: sys_safecopyto failed: %d", r
);
656 userbuf_off
+= tmpbuf_off
;
659 if(done
&& userbuf_off
== 0)
660 r
= EINVAL
; /* The user's buffer is too small */
662 fs_m_out
.RES_NBYTES
= userbuf_off
;
663 fs_m_out
.RES_SEEK_POS_LO
= new_pos
;
664 rip
->i_update
|= ATIME
;
669 put_inode(rip
); /* release the inode */