10 #include <minix/vfsif.h>
14 static struct buf
*rahead(struct inode
*rip
, block_t baseblock
, u64_t
15 position
, unsigned bytes_ahead
);
16 static int rw_chunk(struct inode
*rip
, u64_t position
, unsigned off
,
17 size_t chunk
, unsigned left
, int rw_flag
, cp_grant_id_t gid
, unsigned
18 buf_off
, unsigned int block_size
, int *completed
);
21 /*===========================================================================*
23 *===========================================================================*/
24 int fs_readwrite(void)
26 int r
, rw_flag
, block_spec
;
29 off_t position
, f_size
, bytes_left
;
30 unsigned int off
, cum_io
, block_size
, chunk
;
38 /* Find the inode referred */
39 if ((rip
= find_inode(fs_dev
, (ino_t
) fs_m_in
.REQ_INODE_NR
)) == NULL
)
42 mode_word
= rip
->i_mode
& I_TYPE
;
43 regular
= (mode_word
== I_REGULAR
|| mode_word
== I_NAMED_PIPE
);
44 block_spec
= (mode_word
== I_BLOCK_SPECIAL
? 1 : 0);
46 /* Determine blocksize */
48 block_size
= get_block_size( (dev_t
) rip
->i_zone
[0]);
49 f_size
= MAX_FILE_POS
;
51 block_size
= rip
->i_sp
->s_block_size
;
55 /* Get the values from the request message */
56 rw_flag
= (fs_m_in
.m_type
== REQ_READ
? READING
: WRITING
);
57 gid
= (cp_grant_id_t
) fs_m_in
.REQ_GRANT
;
58 position
= (off_t
) fs_m_in
.REQ_SEEK_POS_LO
;
59 nrbytes
= (size_t) fs_m_in
.REQ_NBYTES
;
61 lmfs_reset_rdwt_err();
63 /* If this is file i/o, check we can write */
64 if (rw_flag
== WRITING
&& !block_spec
) {
65 if(rip
->i_sp
->s_rd_only
)
68 /* Check in advance to see if file will grow too big. */
69 if (position
> (off_t
) (rip
->i_sp
->s_max_size
- nrbytes
))
72 /* Clear the zone containing present EOF if hole about
73 * to be created. This is necessary because all unwritten
74 * blocks prior to the EOF must read as zeros.
76 if(position
> f_size
) clear_zone(rip
, f_size
, 0);
79 /* If this is block i/o, check we can write */
80 if(block_spec
&& rw_flag
== WRITING
&&
81 (dev_t
) rip
->i_zone
[0] == superblock
.s_dev
&& superblock
.s_rd_only
)
85 /* Split the transfer into chunks that don't span two blocks. */
87 off
= ((unsigned int) position
) % block_size
; /* offset in blk*/
88 chunk
= min(nrbytes
, block_size
- off
);
90 if (rw_flag
== READING
) {
91 bytes_left
= f_size
- position
;
92 if (position
>= f_size
) break; /* we are beyond EOF */
93 if (chunk
> (unsigned int) bytes_left
) chunk
= bytes_left
;
96 /* Read or write 'chunk' bytes. */
97 r
= rw_chunk(rip
, cvul64((unsigned long) position
), off
, chunk
,
98 nrbytes
, rw_flag
, gid
, cum_io
, block_size
, &completed
);
100 if (r
!= OK
) break; /* EOF reached */
101 if (lmfs_rdwt_err() < 0) break;
103 /* Update counters and pointers. */
104 nrbytes
-= chunk
; /* bytes yet to be read */
105 cum_io
+= chunk
; /* bytes read so far */
106 position
+= (off_t
) chunk
; /* position within the file */
109 fs_m_out
.RES_SEEK_POS_LO
= position
; /* It might change later and the VFS
110 has to know this value */
112 /* On write, update file size and access time. */
113 if (rw_flag
== WRITING
) {
114 if (regular
|| mode_word
== I_DIRECTORY
) {
115 if (position
> f_size
) rip
->i_size
= position
;
119 rip
->i_seek
= NO_SEEK
;
121 if (lmfs_rdwt_err() != OK
) r
= lmfs_rdwt_err(); /* check for disk error */
122 if (lmfs_rdwt_err() == END_OF_FILE
) r
= OK
;
124 /* even on a ROFS, writing to a device node on it is fine,
125 * just don't update the inode stats for it. And dito for reading.
127 if (r
== OK
&& !rip
->i_sp
->s_rd_only
) {
128 if (rw_flag
== READING
) rip
->i_update
|= ATIME
;
129 if (rw_flag
== WRITING
) rip
->i_update
|= CTIME
| MTIME
;
130 IN_MARKDIRTY(rip
); /* inode is thus now dirty */
133 fs_m_out
.RES_NBYTES
= cum_io
;
139 /*===========================================================================*
141 *===========================================================================*/
142 int fs_breadwrite(void)
144 int r
, rw_flag
, completed
;
147 unsigned int off
, cum_io
, chunk
, block_size
;
151 /* Pseudo inode for rw_chunk */
156 target_dev
= (dev_t
) fs_m_in
.REQ_DEV2
;
158 /* Get the values from the request message */
159 rw_flag
= (fs_m_in
.m_type
== REQ_BREAD
? READING
: WRITING
);
160 gid
= (cp_grant_id_t
) fs_m_in
.REQ_GRANT
;
161 position
= make64((unsigned long) fs_m_in
.REQ_SEEK_POS_LO
,
162 (unsigned long) fs_m_in
.REQ_SEEK_POS_HI
);
163 nrbytes
= (size_t) fs_m_in
.REQ_NBYTES
;
165 block_size
= get_block_size(target_dev
);
167 /* Don't block-write to a RO-mounted filesystem. */
168 if(superblock
.s_dev
== target_dev
&& superblock
.s_rd_only
)
171 rip
.i_zone
[0] = (zone_t
) target_dev
;
172 rip
.i_mode
= I_BLOCK_SPECIAL
;
175 lmfs_reset_rdwt_err();
178 /* Split the transfer into chunks that don't span two blocks. */
179 while (nrbytes
> 0) {
180 off
= rem64u(position
, block_size
); /* offset in blk*/
181 chunk
= min(nrbytes
, block_size
- off
);
183 /* Read or write 'chunk' bytes. */
184 r
= rw_chunk(&rip
, position
, off
, chunk
, nrbytes
, rw_flag
, gid
,
185 cum_io
, block_size
, &completed
);
187 if (r
!= OK
) break; /* EOF reached */
188 if (lmfs_rdwt_err() < 0) break;
190 /* Update counters and pointers. */
191 nrbytes
-= chunk
; /* bytes yet to be read */
192 cum_io
+= chunk
; /* bytes read so far */
193 position
= add64ul(position
, chunk
); /* position within the file */
196 fs_m_out
.RES_SEEK_POS_LO
= ex64lo(position
);
197 fs_m_out
.RES_SEEK_POS_HI
= ex64hi(position
);
199 if (lmfs_rdwt_err() != OK
) r
= lmfs_rdwt_err(); /* check for disk error */
200 if (lmfs_rdwt_err() == END_OF_FILE
) r
= OK
;
202 fs_m_out
.RES_NBYTES
= cum_io
;
208 /*===========================================================================*
210 *===========================================================================*/
211 static int rw_chunk(rip
, position
, off
, chunk
, left
, rw_flag
, gid
,
212 buf_off
, block_size
, completed
)
213 register struct inode
*rip
; /* pointer to inode for file to be rd/wr */
214 u64_t position
; /* position within file to read or write */
215 unsigned off
; /* off within the current block */
216 unsigned int chunk
; /* number of bytes to read or write */
217 unsigned left
; /* max number of bytes wanted after position */
218 int rw_flag
; /* READING or WRITING */
219 cp_grant_id_t gid
; /* grant */
220 unsigned buf_off
; /* offset in grant */
221 unsigned int block_size
; /* block size of FS operating on */
222 int *completed
; /* number of bytes copied */
224 /* Read or write (part of) a block. */
226 register struct buf
*bp
;
234 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
237 b
= div64u(position
, block_size
);
238 dev
= (dev_t
) rip
->i_zone
[0];
240 if (ex64hi(position
) != 0)
241 panic("rw_chunk: position too high");
242 b
= read_map(rip
, (off_t
) ex64lo(position
));
246 if (!block_spec
&& b
== NO_BLOCK
) {
247 if (rw_flag
== READING
) {
248 /* Reading from a nonexistent block. Must read as all zeros.*/
249 bp
= get_block(NO_DEV
, NO_BLOCK
, NORMAL
); /* get a buffer */
252 /* Writing to a nonexistent block. Create and enter in inode.*/
253 if ((bp
= new_block(rip
, (off_t
) ex64lo(position
))) == NULL
)
256 } else if (rw_flag
== READING
) {
257 /* Read and read ahead if convenient. */
258 bp
= rahead(rip
, b
, position
, left
);
260 /* Normally an existing block to be partially overwritten is first read
261 * in. However, a full block need not be read in. If it is already in
262 * the cache, acquire it, otherwise just acquire a free buffer.
264 n
= (chunk
== block_size
? NO_READ
: NORMAL
);
265 if (!block_spec
&& off
== 0 && (off_t
) ex64lo(position
) >= rip
->i_size
)
267 bp
= get_block(dev
, b
, n
);
270 /* In all cases, bp now points to a valid buffer. */
272 panic("bp not valid in rw_chunk; this can't happen");
274 if (rw_flag
== WRITING
&& chunk
!= block_size
&& !block_spec
&&
275 (off_t
) ex64lo(position
) >= rip
->i_size
&& off
== 0) {
279 if (rw_flag
== READING
) {
280 /* Copy a chunk from the block buffer to user space. */
281 r
= sys_safecopyto(VFS_PROC_NR
, gid
, (vir_bytes
) buf_off
,
282 (vir_bytes
) (b_data(bp
)+off
), (size_t) chunk
);
284 /* Copy a chunk from user space to the block buffer. */
285 r
= sys_safecopyfrom(VFS_PROC_NR
, gid
, (vir_bytes
) buf_off
,
286 (vir_bytes
) (b_data(bp
)+off
), (size_t) chunk
);
290 n
= (off
+ chunk
== block_size
? FULL_DATA_BLOCK
: PARTIAL_DATA_BLOCK
);
297 /*===========================================================================*
299 *===========================================================================*/
300 block_t
read_map(rip
, position
)
301 register struct inode
*rip
; /* ptr to inode to map from */
302 off_t position
; /* position in file whose blk wanted */
304 /* Given an inode and a position within the corresponding file, locate the
305 * block (not zone) number in which that position is to be found and return it.
310 int scale
, boff
, index
, zind
;
311 unsigned int dzones
, nr_indirects
;
313 unsigned long excess
, zone
, block_pos
;
315 scale
= rip
->i_sp
->s_log_zone_size
; /* for block-zone conversion */
316 block_pos
= position
/rip
->i_sp
->s_block_size
; /* relative blk # in file */
317 zone
= block_pos
>> scale
; /* position's zone */
318 boff
= (int) (block_pos
- (zone
<< scale
) ); /* relative blk # within zone */
319 dzones
= rip
->i_ndzones
;
320 nr_indirects
= rip
->i_nindirs
;
322 /* Is 'position' to be found in the inode itself? */
324 zind
= (int) zone
; /* index should be an int */
325 z
= rip
->i_zone
[zind
];
326 if (z
== NO_ZONE
) return(NO_BLOCK
);
327 b
= (block_t
) ((z
<< scale
) + boff
);
331 /* It is not in the inode, so it must be single or double indirect. */
332 excess
= zone
- dzones
; /* first Vx_NR_DZONES don't count */
334 if (excess
< nr_indirects
) {
335 /* 'position' can be located via the single indirect block. */
336 z
= rip
->i_zone
[dzones
];
338 /* 'position' can be located via the double indirect block. */
339 if ( (z
= rip
->i_zone
[dzones
+1]) == NO_ZONE
) return(NO_BLOCK
);
340 excess
-= nr_indirects
; /* single indir doesn't count*/
341 b
= (block_t
) z
<< scale
;
342 ASSERT(rip
->i_dev
!= NO_DEV
);
343 index
= (int) (excess
/nr_indirects
);
344 if ((unsigned int) index
> rip
->i_nindirs
)
345 return(NO_BLOCK
); /* Can't go beyond double indirects */
346 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get double indirect block */
347 ASSERT(lmfs_dev(bp
) != NO_DEV
);
348 ASSERT(lmfs_dev(bp
) == rip
->i_dev
);
349 z
= rd_indir(bp
, index
); /* z= zone for single*/
350 put_block(bp
, INDIRECT_BLOCK
); /* release double ind block */
351 excess
= excess
% nr_indirects
; /* index into single ind blk */
354 /* 'z' is zone num for single indirect block; 'excess' is index into it. */
355 if (z
== NO_ZONE
) return(NO_BLOCK
);
356 b
= (block_t
) z
<< scale
; /* b is blk # for single ind */
357 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get single indirect block */
358 z
= rd_indir(bp
, (int) excess
); /* get block pointed to */
359 put_block(bp
, INDIRECT_BLOCK
); /* release single indir blk */
360 if (z
== NO_ZONE
) return(NO_BLOCK
);
361 b
= (block_t
) ((z
<< scale
) + boff
);
366 /*===========================================================================*
368 *===========================================================================*/
369 zone_t
rd_indir(bp
, index
)
370 struct buf
*bp
; /* pointer to indirect block */
371 int index
; /* index into *bp */
373 /* Given a pointer to an indirect block, read one entry. The reason for
374 * making a separate routine out of this is that there are four cases:
375 * V1 (IBM and 68000), and V2 (IBM and 68000).
378 struct super_block
*sp
;
379 zone_t zone
; /* V2 zones are longs (shorts in V1) */
382 panic("rd_indir() on NULL");
384 sp
= get_super(lmfs_dev(bp
)); /* need super block to find file sys type */
386 /* read a zone from an indirect block */
387 if (sp
->s_version
== V1
)
388 zone
= (zone_t
) conv2(sp
->s_native
, (int) b_v1_ind(bp
)[index
]);
390 zone
= (zone_t
) conv4(sp
->s_native
, (long) b_v2_ind(bp
)[index
]);
392 if (zone
!= NO_ZONE
&&
393 (zone
< (zone_t
) sp
->s_firstdatazone
|| zone
>= sp
->s_zones
)) {
394 printf("Illegal zone number %ld in indirect block, index %d\n",
396 panic("check file system");
402 /*===========================================================================*
404 *===========================================================================*/
405 static struct buf
*rahead(rip
, baseblock
, position
, bytes_ahead
)
406 register struct inode
*rip
; /* pointer to inode for file to be read */
407 block_t baseblock
; /* block at current position */
408 u64_t position
; /* position within file */
409 unsigned bytes_ahead
; /* bytes beyond position for immediate use */
411 /* Fetch a block from the cache or the device. If a physical read is
412 * required, prefetch as many more blocks as convenient into the cache.
413 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
414 * The device driver may decide it knows better and stop reading at a
415 * cylinder boundary (or after an error). Rw_scattered() puts an optional
416 * flag on all reads to allow this.
418 /* Minimum number of blocks to prefetch. */
419 int nr_bufs
= lmfs_nr_bufs();
420 # define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32)
421 int block_spec
, scale
, read_q_size
;
422 unsigned int blocks_ahead
, fragment
, block_size
;
423 block_t block
, blocks_left
;
427 static unsigned int readqsize
= 0;
428 static struct buf
**read_q
;
430 if(readqsize
!= nr_bufs
) {
432 assert(read_q
!= NULL
);
435 if(!(read_q
= malloc(sizeof(read_q
[0])*nr_bufs
)))
436 panic("couldn't allocate read_q");
440 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
442 dev
= (dev_t
) rip
->i_zone
[0];
446 block_size
= get_block_size(dev
);
449 bp
= get_block(dev
, block
, PREFETCH
);
451 if (lmfs_dev(bp
) != NO_DEV
) return(bp
);
453 /* The best guess for the number of blocks to prefetch: A lot.
454 * It is impossible to tell what the device looks like, so we don't even
455 * try to guess the geometry, but leave it to the driver.
457 * The floppy driver can read a full track with no rotational delay, and it
458 * avoids reading partial tracks if it can, so handing it enough buffers to
459 * read two tracks is perfect. (Two, because some diskette types have
460 * an odd number of sectors per track, so a block may span tracks.)
462 * The disk drivers don't try to be smart. With todays disks it is
463 * impossible to tell what the real geometry looks like, so it is best to
464 * read as much as you can. With luck the caching on the drive allows
465 * for a little time to start the next read.
467 * The current solution below is a bit of a hack, it just reads blocks from
468 * the current file position hoping that more of the file can be found. A
469 * better solution must look at the already available zone pointers and
470 * indirect blocks (but don't call read_map!).
473 fragment
= rem64u(position
, block_size
);
474 position
= sub64u(position
, fragment
);
475 bytes_ahead
+= fragment
;
477 blocks_ahead
= (bytes_ahead
+ block_size
- 1) / block_size
;
479 if (block_spec
&& rip
->i_size
== 0) {
480 blocks_left
= (block_t
) NR_IOREQS
;
482 blocks_left
= (block_t
) (rip
->i_size
-ex64lo(position
)+(block_size
-1)) /
485 /* Go for the first indirect block if we are in its neighborhood. */
487 scale
= rip
->i_sp
->s_log_zone_size
;
488 ind1_pos
= (off_t
) rip
->i_ndzones
* (block_size
<< scale
);
489 if ((off_t
) ex64lo(position
) <= ind1_pos
&&
490 rip
->i_size
> ind1_pos
) {
497 /* No more than the maximum request. */
498 if (blocks_ahead
> NR_IOREQS
) blocks_ahead
= NR_IOREQS
;
500 /* Read at least the minimum number of blocks, but not after a seek. */
501 if (blocks_ahead
< BLOCKS_MINIMUM
&& rip
->i_seek
== NO_SEEK
)
502 blocks_ahead
= BLOCKS_MINIMUM
;
504 /* Can't go past end of file. */
505 if (blocks_ahead
> blocks_left
) blocks_ahead
= blocks_left
;
509 /* Acquire block buffers. */
511 read_q
[read_q_size
++] = bp
;
513 if (--blocks_ahead
== 0) break;
515 /* Don't trash the cache, leave 4 free. */
516 if (lmfs_bufs_in_use() >= nr_bufs
- 4) break;
520 bp
= get_block(dev
, block
, PREFETCH
);
521 if (lmfs_dev(bp
) != NO_DEV
) {
522 /* Oops, block already in the cache, get out. */
523 put_block(bp
, FULL_DATA_BLOCK
);
527 lmfs_rw_scattered(dev
, read_q
, read_q_size
, READING
);
528 return(get_block(dev
, baseblock
, NORMAL
));
532 /*===========================================================================*
534 *===========================================================================*/
535 int fs_getdents(void)
537 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + MFS_NAME_MAX + 1)
538 #define GETDENTS_ENTRIES 8
539 static char getdents_buf
[GETDENTS_BUFSIZE
* GETDENTS_ENTRIES
];
540 register struct inode
*rip
;
542 unsigned int block_size
, len
, reclen
;
546 size_t size
, tmpbuf_off
, userbuf_off
;
547 off_t pos
, off
, block_pos
, new_pos
, ent_pos
;
553 ino
= (ino_t
) fs_m_in
.REQ_INODE_NR
;
554 gid
= (gid_t
) fs_m_in
.REQ_GRANT
;
555 size
= (size_t) fs_m_in
.REQ_MEM_SIZE
;
556 pos
= (off_t
) fs_m_in
.REQ_SEEK_POS_LO
;
558 /* Check whether the position is properly aligned */
559 if( (unsigned int) pos
% DIR_ENTRY_SIZE
)
562 if( (rip
= get_inode(fs_dev
, ino
)) == NULL
)
565 block_size
= rip
->i_sp
->s_block_size
;
566 off
= (pos
% block_size
); /* Offset in block */
567 block_pos
= pos
- off
;
568 done
= FALSE
; /* Stop processing directory blocks when done is set */
570 tmpbuf_off
= 0; /* Offset in getdents_buf */
571 memset(getdents_buf
, '\0', sizeof(getdents_buf
)); /* Avoid leaking any data */
572 userbuf_off
= 0; /* Offset in the user's buffer */
574 /* The default position for the next request is EOF. If the user's buffer
575 * fills up before EOF, new_pos will be modified. */
576 new_pos
= rip
->i_size
;
578 for(; block_pos
< rip
->i_size
; block_pos
+= block_size
) {
579 b
= read_map(rip
, block_pos
); /* get block number */
581 /* Since directories don't have holes, 'b' cannot be NO_BLOCK. */
582 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get a dir block */
586 /* Search a directory block. */
588 dp
= &b_dir(bp
)[off
/ DIR_ENTRY_SIZE
];
591 for (; dp
< &b_dir(bp
)[NR_DIR_ENTRIES(block_size
)]; dp
++) {
592 if (dp
->mfs_d_ino
== 0)
593 continue; /* Entry is not in use */
595 /* Compute the length of the name */
596 cp
= memchr(dp
->mfs_d_name
, '\0', sizeof(dp
->mfs_d_name
));
598 len
= sizeof(dp
->mfs_d_name
);
600 len
= cp
- (dp
->mfs_d_name
);
602 /* Compute record length */
603 reclen
= offsetof(struct dirent
, d_name
) + len
+ 1;
604 o
= (reclen
% sizeof(long));
606 reclen
+= sizeof(long) - o
;
608 /* Need the position of this entry in the directory */
609 ent_pos
= block_pos
+ ((char *) dp
- (char *) bp
->data
);
611 if (userbuf_off
+ tmpbuf_off
+ reclen
>= size
) {
612 /* The user has no space for one more record */
615 /* Record the position of this entry, it is the
616 * starting point of the next request (unless the
617 * postion is modified with lseek).
623 if (tmpbuf_off
+ reclen
>= GETDENTS_BUFSIZE
*GETDENTS_ENTRIES
) {
624 r
= sys_safecopyto(VFS_PROC_NR
, gid
,
625 (vir_bytes
) userbuf_off
,
626 (vir_bytes
) getdents_buf
,
627 (size_t) tmpbuf_off
);
633 userbuf_off
+= tmpbuf_off
;
637 dep
= (struct dirent
*) &getdents_buf
[tmpbuf_off
];
638 dep
->d_ino
= dp
->mfs_d_ino
;
639 dep
->d_off
= ent_pos
;
640 dep
->d_reclen
= (unsigned short) reclen
;
641 memcpy(dep
->d_name
, dp
->mfs_d_name
, len
);
642 dep
->d_name
[len
] = '\0';
643 tmpbuf_off
+= reclen
;
646 put_block(bp
, DIRECTORY_BLOCK
);
651 if (tmpbuf_off
!= 0) {
652 r
= sys_safecopyto(VFS_PROC_NR
, gid
, (vir_bytes
) userbuf_off
,
653 (vir_bytes
) getdents_buf
, (size_t) tmpbuf_off
);
659 userbuf_off
+= tmpbuf_off
;
662 if (done
&& userbuf_off
== 0)
663 r
= EINVAL
; /* The user's buffer is too small */
665 fs_m_out
.RES_NBYTES
= userbuf_off
;
666 fs_m_out
.RES_SEEK_POS_LO
= new_pos
;
667 if(!rip
->i_sp
->s_rd_only
) {
668 rip
->i_update
|= ATIME
;
674 put_inode(rip
); /* release the inode */