10 #include <minix/vfsif.h>
14 static struct buf
*rahead(struct inode
*rip
, block_t baseblock
, u64_t
15 position
, unsigned bytes_ahead
);
16 static int rw_chunk(struct inode
*rip
, u64_t position
, unsigned off
,
17 size_t chunk
, unsigned left
, int rw_flag
, cp_grant_id_t gid
, unsigned
18 buf_off
, unsigned int block_size
, int *completed
);
21 /*===========================================================================*
23 *===========================================================================*/
24 int fs_readwrite(void)
26 int r
, rw_flag
, block_spec
;
29 off_t position
, f_size
, bytes_left
;
30 unsigned int off
, cum_io
, block_size
, chunk
;
38 /* Find the inode referred */
39 if ((rip
= find_inode(fs_dev
, (ino_t
) fs_m_in
.REQ_INODE_NR
)) == NULL
)
42 mode_word
= rip
->i_mode
& I_TYPE
;
43 regular
= (mode_word
== I_REGULAR
|| mode_word
== I_NAMED_PIPE
);
44 block_spec
= (mode_word
== I_BLOCK_SPECIAL
? 1 : 0);
46 /* Determine blocksize */
48 block_size
= get_block_size( (dev_t
) rip
->i_zone
[0]);
49 f_size
= MAX_FILE_POS
;
51 block_size
= rip
->i_sp
->s_block_size
;
55 /* Get the values from the request message */
56 rw_flag
= (fs_m_in
.m_type
== REQ_READ
? READING
: WRITING
);
57 gid
= (cp_grant_id_t
) fs_m_in
.REQ_GRANT
;
58 position
= (off_t
) fs_m_in
.REQ_SEEK_POS_LO
;
59 nrbytes
= (size_t) fs_m_in
.REQ_NBYTES
;
61 lmfs_reset_rdwt_err();
63 /* If this is file i/o, check we can write */
64 if (rw_flag
== WRITING
&& !block_spec
) {
65 if(rip
->i_sp
->s_rd_only
)
68 /* Check in advance to see if file will grow too big. */
69 if (position
> (off_t
) (rip
->i_sp
->s_max_size
- nrbytes
))
72 /* Clear the zone containing present EOF if hole about
73 * to be created. This is necessary because all unwritten
74 * blocks prior to the EOF must read as zeros.
76 if(position
> f_size
) clear_zone(rip
, f_size
, 0);
79 /* If this is block i/o, check we can write */
80 if(block_spec
&& rw_flag
== WRITING
&&
81 (dev_t
) rip
->i_zone
[0] == superblock
.s_dev
&& superblock
.s_rd_only
)
85 /* Split the transfer into chunks that don't span two blocks. */
87 off
= ((unsigned int) position
) % block_size
; /* offset in blk*/
88 chunk
= min(nrbytes
, block_size
- off
);
90 if (rw_flag
== READING
) {
91 bytes_left
= f_size
- position
;
92 if (position
>= f_size
) break; /* we are beyond EOF */
93 if (chunk
> (unsigned int) bytes_left
) chunk
= bytes_left
;
96 /* Read or write 'chunk' bytes. */
97 r
= rw_chunk(rip
, cvul64((unsigned long) position
), off
, chunk
,
98 nrbytes
, rw_flag
, gid
, cum_io
, block_size
, &completed
);
100 if (r
!= OK
) break; /* EOF reached */
101 if (lmfs_rdwt_err() < 0) break;
103 /* Update counters and pointers. */
104 nrbytes
-= chunk
; /* bytes yet to be read */
105 cum_io
+= chunk
; /* bytes read so far */
106 position
+= (off_t
) chunk
; /* position within the file */
109 fs_m_out
.RES_SEEK_POS_LO
= position
; /* It might change later and the VFS
110 has to know this value */
112 /* On write, update file size and access time. */
113 if (rw_flag
== WRITING
) {
114 if (regular
|| mode_word
== I_DIRECTORY
) {
115 if (position
> f_size
) rip
->i_size
= position
;
119 rip
->i_seek
= NO_SEEK
;
121 if (lmfs_rdwt_err() != OK
) r
= lmfs_rdwt_err(); /* check for disk error */
122 if (lmfs_rdwt_err() == END_OF_FILE
) r
= OK
;
124 /* even on a ROFS, writing to a device node on it is fine,
125 * just don't update the inode stats for it. And dito for reading.
127 if (r
== OK
&& !rip
->i_sp
->s_rd_only
) {
128 if (rw_flag
== READING
) rip
->i_update
|= ATIME
;
129 if (rw_flag
== WRITING
) rip
->i_update
|= CTIME
| MTIME
;
130 IN_MARKDIRTY(rip
); /* inode is thus now dirty */
133 fs_m_out
.RES_NBYTES
= cum_io
;
139 /*===========================================================================*
141 *===========================================================================*/
142 int fs_breadwrite(void)
144 int r
, rw_flag
, completed
;
147 unsigned int off
, cum_io
, chunk
, block_size
;
151 /* Pseudo inode for rw_chunk */
156 target_dev
= (dev_t
) fs_m_in
.REQ_DEV2
;
158 /* Get the values from the request message */
159 rw_flag
= (fs_m_in
.m_type
== REQ_BREAD
? READING
: WRITING
);
160 gid
= (cp_grant_id_t
) fs_m_in
.REQ_GRANT
;
161 position
= make64((unsigned long) fs_m_in
.REQ_SEEK_POS_LO
,
162 (unsigned long) fs_m_in
.REQ_SEEK_POS_HI
);
163 nrbytes
= (size_t) fs_m_in
.REQ_NBYTES
;
165 block_size
= get_block_size(target_dev
);
167 /* Don't block-write to a RO-mounted filesystem. */
168 if(superblock
.s_dev
== target_dev
&& superblock
.s_rd_only
)
171 rip
.i_zone
[0] = (zone_t
) target_dev
;
172 rip
.i_mode
= I_BLOCK_SPECIAL
;
175 lmfs_reset_rdwt_err();
178 /* Split the transfer into chunks that don't span two blocks. */
179 while (nrbytes
> 0) {
180 off
= rem64u(position
, block_size
); /* offset in blk*/
181 chunk
= min(nrbytes
, block_size
- off
);
183 /* Read or write 'chunk' bytes. */
184 r
= rw_chunk(&rip
, position
, off
, chunk
, nrbytes
, rw_flag
, gid
,
185 cum_io
, block_size
, &completed
);
187 if (r
!= OK
) break; /* EOF reached */
188 if (lmfs_rdwt_err() < 0) break;
190 /* Update counters and pointers. */
191 nrbytes
-= chunk
; /* bytes yet to be read */
192 cum_io
+= chunk
; /* bytes read so far */
193 position
= add64ul(position
, chunk
); /* position within the file */
196 fs_m_out
.RES_SEEK_POS_LO
= ex64lo(position
);
197 fs_m_out
.RES_SEEK_POS_HI
= ex64hi(position
);
199 if (lmfs_rdwt_err() != OK
) r
= lmfs_rdwt_err(); /* check for disk error */
200 if (lmfs_rdwt_err() == END_OF_FILE
) r
= OK
;
202 fs_m_out
.RES_NBYTES
= cum_io
;
208 /*===========================================================================*
210 *===========================================================================*/
211 static int rw_chunk(rip
, position
, off
, chunk
, left
, rw_flag
, gid
,
212 buf_off
, block_size
, completed
)
213 register struct inode
*rip
; /* pointer to inode for file to be rd/wr */
214 u64_t position
; /* position within file to read or write */
215 unsigned off
; /* off within the current block */
216 unsigned int chunk
; /* number of bytes to read or write */
217 unsigned left
; /* max number of bytes wanted after position */
218 int rw_flag
; /* READING or WRITING */
219 cp_grant_id_t gid
; /* grant */
220 unsigned buf_off
; /* offset in grant */
221 unsigned int block_size
; /* block size of FS operating on */
222 int *completed
; /* number of bytes copied */
224 /* Read or write (part of) a block. */
226 register struct buf
*bp
;
234 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
237 b
= div64u(position
, block_size
);
238 dev
= (dev_t
) rip
->i_zone
[0];
240 if (ex64hi(position
) != 0)
241 panic("rw_chunk: position too high");
242 b
= read_map(rip
, (off_t
) ex64lo(position
));
246 if (!block_spec
&& b
== NO_BLOCK
) {
247 if (rw_flag
== READING
) {
248 /* Reading from a nonexistent block. Must read as all zeros.*/
249 r
= sys_safememset(VFS_PROC_NR
, gid
, (vir_bytes
) buf_off
,
252 printf("MFS: sys_safememset failed\n");
256 /* Writing to a nonexistent block. Create and enter in inode.*/
257 if ((bp
= new_block(rip
, (off_t
) ex64lo(position
))) == NULL
)
260 } else if (rw_flag
== READING
) {
261 /* Read and read ahead if convenient. */
262 bp
= rahead(rip
, b
, position
, left
);
264 /* Normally an existing block to be partially overwritten is first read
265 * in. However, a full block need not be read in. If it is already in
266 * the cache, acquire it, otherwise just acquire a free buffer.
268 n
= (chunk
== block_size
? NO_READ
: NORMAL
);
269 if (!block_spec
&& off
== 0 && (off_t
) ex64lo(position
) >= rip
->i_size
)
271 bp
= get_block(dev
, b
, n
);
274 /* In all cases, bp now points to a valid buffer. */
276 panic("bp not valid in rw_chunk; this can't happen");
278 if (rw_flag
== WRITING
&& chunk
!= block_size
&& !block_spec
&&
279 (off_t
) ex64lo(position
) >= rip
->i_size
&& off
== 0) {
283 if (rw_flag
== READING
) {
284 /* Copy a chunk from the block buffer to user space. */
285 r
= sys_safecopyto(VFS_PROC_NR
, gid
, (vir_bytes
) buf_off
,
286 (vir_bytes
) (b_data(bp
)+off
), (size_t) chunk
);
288 /* Copy a chunk from user space to the block buffer. */
289 r
= sys_safecopyfrom(VFS_PROC_NR
, gid
, (vir_bytes
) buf_off
,
290 (vir_bytes
) (b_data(bp
)+off
), (size_t) chunk
);
294 n
= (off
+ chunk
== block_size
? FULL_DATA_BLOCK
: PARTIAL_DATA_BLOCK
);
301 /*===========================================================================*
303 *===========================================================================*/
304 block_t
read_map(rip
, position
)
305 register struct inode
*rip
; /* ptr to inode to map from */
306 off_t position
; /* position in file whose blk wanted */
308 /* Given an inode and a position within the corresponding file, locate the
309 * block (not zone) number in which that position is to be found and return it.
314 int scale
, boff
, index
, zind
;
315 unsigned int dzones
, nr_indirects
;
317 unsigned long excess
, zone
, block_pos
;
319 scale
= rip
->i_sp
->s_log_zone_size
; /* for block-zone conversion */
320 block_pos
= position
/rip
->i_sp
->s_block_size
; /* relative blk # in file */
321 zone
= block_pos
>> scale
; /* position's zone */
322 boff
= (int) (block_pos
- (zone
<< scale
) ); /* relative blk # within zone */
323 dzones
= rip
->i_ndzones
;
324 nr_indirects
= rip
->i_nindirs
;
326 /* Is 'position' to be found in the inode itself? */
328 zind
= (int) zone
; /* index should be an int */
329 z
= rip
->i_zone
[zind
];
330 if (z
== NO_ZONE
) return(NO_BLOCK
);
331 b
= (block_t
) ((z
<< scale
) + boff
);
335 /* It is not in the inode, so it must be single or double indirect. */
336 excess
= zone
- dzones
; /* first Vx_NR_DZONES don't count */
338 if (excess
< nr_indirects
) {
339 /* 'position' can be located via the single indirect block. */
340 z
= rip
->i_zone
[dzones
];
342 /* 'position' can be located via the double indirect block. */
343 if ( (z
= rip
->i_zone
[dzones
+1]) == NO_ZONE
) return(NO_BLOCK
);
344 excess
-= nr_indirects
; /* single indir doesn't count*/
345 b
= (block_t
) z
<< scale
;
346 ASSERT(rip
->i_dev
!= NO_DEV
);
347 index
= (int) (excess
/nr_indirects
);
348 if ((unsigned int) index
> rip
->i_nindirs
)
349 return(NO_BLOCK
); /* Can't go beyond double indirects */
350 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get double indirect block */
351 ASSERT(lmfs_dev(bp
) != NO_DEV
);
352 ASSERT(lmfs_dev(bp
) == rip
->i_dev
);
353 z
= rd_indir(bp
, index
); /* z= zone for single*/
354 put_block(bp
, INDIRECT_BLOCK
); /* release double ind block */
355 excess
= excess
% nr_indirects
; /* index into single ind blk */
358 /* 'z' is zone num for single indirect block; 'excess' is index into it. */
359 if (z
== NO_ZONE
) return(NO_BLOCK
);
360 b
= (block_t
) z
<< scale
; /* b is blk # for single ind */
361 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get single indirect block */
362 z
= rd_indir(bp
, (int) excess
); /* get block pointed to */
363 put_block(bp
, INDIRECT_BLOCK
); /* release single indir blk */
364 if (z
== NO_ZONE
) return(NO_BLOCK
);
365 b
= (block_t
) ((z
<< scale
) + boff
);
370 /*===========================================================================*
372 *===========================================================================*/
373 zone_t
rd_indir(bp
, index
)
374 struct buf
*bp
; /* pointer to indirect block */
375 int index
; /* index into *bp */
377 /* Given a pointer to an indirect block, read one entry. The reason for
378 * making a separate routine out of this is that there are four cases:
379 * V1 (IBM and 68000), and V2 (IBM and 68000).
382 struct super_block
*sp
;
383 zone_t zone
; /* V2 zones are longs (shorts in V1) */
386 panic("rd_indir() on NULL");
388 sp
= get_super(lmfs_dev(bp
)); /* need super block to find file sys type */
390 /* read a zone from an indirect block */
391 if (sp
->s_version
== V1
)
392 zone
= (zone_t
) conv2(sp
->s_native
, (int) b_v1_ind(bp
)[index
]);
394 zone
= (zone_t
) conv4(sp
->s_native
, (long) b_v2_ind(bp
)[index
]);
396 if (zone
!= NO_ZONE
&&
397 (zone
< (zone_t
) sp
->s_firstdatazone
|| zone
>= sp
->s_zones
)) {
398 printf("Illegal zone number %ld in indirect block, index %d\n",
400 panic("check file system");
406 /*===========================================================================*
408 *===========================================================================*/
409 static struct buf
*rahead(rip
, baseblock
, position
, bytes_ahead
)
410 register struct inode
*rip
; /* pointer to inode for file to be read */
411 block_t baseblock
; /* block at current position */
412 u64_t position
; /* position within file */
413 unsigned bytes_ahead
; /* bytes beyond position for immediate use */
415 /* Fetch a block from the cache or the device. If a physical read is
416 * required, prefetch as many more blocks as convenient into the cache.
417 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
418 * The device driver may decide it knows better and stop reading at a
419 * cylinder boundary (or after an error). Rw_scattered() puts an optional
420 * flag on all reads to allow this.
422 /* Minimum number of blocks to prefetch. */
423 int nr_bufs
= lmfs_nr_bufs();
424 # define BLOCKS_MINIMUM (nr_bufs < 50 ? 18 : 32)
425 int block_spec
, scale
, read_q_size
;
426 unsigned int blocks_ahead
, fragment
, block_size
;
427 block_t block
, blocks_left
;
431 static unsigned int readqsize
= 0;
432 static struct buf
**read_q
;
434 if(readqsize
!= nr_bufs
) {
436 assert(read_q
!= NULL
);
439 if(!(read_q
= malloc(sizeof(read_q
[0])*nr_bufs
)))
440 panic("couldn't allocate read_q");
444 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
446 dev
= (dev_t
) rip
->i_zone
[0];
450 block_size
= get_block_size(dev
);
453 bp
= get_block(dev
, block
, PREFETCH
);
455 if (lmfs_dev(bp
) != NO_DEV
) return(bp
);
457 /* The best guess for the number of blocks to prefetch: A lot.
458 * It is impossible to tell what the device looks like, so we don't even
459 * try to guess the geometry, but leave it to the driver.
461 * The floppy driver can read a full track with no rotational delay, and it
462 * avoids reading partial tracks if it can, so handing it enough buffers to
463 * read two tracks is perfect. (Two, because some diskette types have
464 * an odd number of sectors per track, so a block may span tracks.)
466 * The disk drivers don't try to be smart. With todays disks it is
467 * impossible to tell what the real geometry looks like, so it is best to
468 * read as much as you can. With luck the caching on the drive allows
469 * for a little time to start the next read.
471 * The current solution below is a bit of a hack, it just reads blocks from
472 * the current file position hoping that more of the file can be found. A
473 * better solution must look at the already available zone pointers and
474 * indirect blocks (but don't call read_map!).
477 fragment
= rem64u(position
, block_size
);
478 position
= sub64u(position
, fragment
);
479 bytes_ahead
+= fragment
;
481 blocks_ahead
= (bytes_ahead
+ block_size
- 1) / block_size
;
483 if (block_spec
&& rip
->i_size
== 0) {
484 blocks_left
= (block_t
) NR_IOREQS
;
486 blocks_left
= (block_t
) (rip
->i_size
-ex64lo(position
)+(block_size
-1)) /
489 /* Go for the first indirect block if we are in its neighborhood. */
491 scale
= rip
->i_sp
->s_log_zone_size
;
492 ind1_pos
= (off_t
) rip
->i_ndzones
* (block_size
<< scale
);
493 if ((off_t
) ex64lo(position
) <= ind1_pos
&&
494 rip
->i_size
> ind1_pos
) {
501 /* No more than the maximum request. */
502 if (blocks_ahead
> NR_IOREQS
) blocks_ahead
= NR_IOREQS
;
504 /* Read at least the minimum number of blocks, but not after a seek. */
505 if (blocks_ahead
< BLOCKS_MINIMUM
&& rip
->i_seek
== NO_SEEK
)
506 blocks_ahead
= BLOCKS_MINIMUM
;
508 /* Can't go past end of file. */
509 if (blocks_ahead
> blocks_left
) blocks_ahead
= blocks_left
;
513 /* Acquire block buffers. */
515 read_q
[read_q_size
++] = bp
;
517 if (--blocks_ahead
== 0) break;
519 /* Don't trash the cache, leave 4 free. */
520 if (lmfs_bufs_in_use() >= nr_bufs
- 4) break;
524 bp
= get_block(dev
, block
, PREFETCH
);
525 if (lmfs_dev(bp
) != NO_DEV
) {
526 /* Oops, block already in the cache, get out. */
527 put_block(bp
, FULL_DATA_BLOCK
);
531 lmfs_rw_scattered(dev
, read_q
, read_q_size
, READING
);
532 return(get_block(dev
, baseblock
, NORMAL
));
536 /*===========================================================================*
538 *===========================================================================*/
539 int fs_getdents(void)
541 #define GETDENTS_BUFSIZE (sizeof(struct dirent) + MFS_NAME_MAX + 1)
542 #define GETDENTS_ENTRIES 8
543 static char getdents_buf
[GETDENTS_BUFSIZE
* GETDENTS_ENTRIES
];
544 register struct inode
*rip
;
546 unsigned int block_size
, len
, reclen
;
550 size_t size
, tmpbuf_off
, userbuf_off
;
551 off_t pos
, off
, block_pos
, new_pos
, ent_pos
;
557 ino
= (ino_t
) fs_m_in
.REQ_INODE_NR
;
558 gid
= (gid_t
) fs_m_in
.REQ_GRANT
;
559 size
= (size_t) fs_m_in
.REQ_MEM_SIZE
;
560 pos
= (off_t
) fs_m_in
.REQ_SEEK_POS_LO
;
562 /* Check whether the position is properly aligned */
563 if( (unsigned int) pos
% DIR_ENTRY_SIZE
)
566 if( (rip
= get_inode(fs_dev
, ino
)) == NULL
)
569 block_size
= rip
->i_sp
->s_block_size
;
570 off
= (pos
% block_size
); /* Offset in block */
571 block_pos
= pos
- off
;
572 done
= FALSE
; /* Stop processing directory blocks when done is set */
574 tmpbuf_off
= 0; /* Offset in getdents_buf */
575 memset(getdents_buf
, '\0', sizeof(getdents_buf
)); /* Avoid leaking any data */
576 userbuf_off
= 0; /* Offset in the user's buffer */
578 /* The default position for the next request is EOF. If the user's buffer
579 * fills up before EOF, new_pos will be modified. */
580 new_pos
= rip
->i_size
;
582 for(; block_pos
< rip
->i_size
; block_pos
+= block_size
) {
583 b
= read_map(rip
, block_pos
); /* get block number */
585 /* Since directories don't have holes, 'b' cannot be NO_BLOCK. */
586 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get a dir block */
590 /* Search a directory block. */
592 dp
= &b_dir(bp
)[off
/ DIR_ENTRY_SIZE
];
595 for (; dp
< &b_dir(bp
)[NR_DIR_ENTRIES(block_size
)]; dp
++) {
596 if (dp
->mfs_d_ino
== 0)
597 continue; /* Entry is not in use */
599 /* Compute the length of the name */
600 cp
= memchr(dp
->mfs_d_name
, '\0', sizeof(dp
->mfs_d_name
));
602 len
= sizeof(dp
->mfs_d_name
);
604 len
= cp
- (dp
->mfs_d_name
);
606 /* Compute record length */
607 reclen
= offsetof(struct dirent
, d_name
) + len
+ 1;
608 o
= (reclen
% sizeof(long));
610 reclen
+= sizeof(long) - o
;
612 /* Need the position of this entry in the directory */
613 ent_pos
= block_pos
+ ((char *) dp
- (char *) bp
->data
);
615 if (userbuf_off
+ tmpbuf_off
+ reclen
>= size
) {
616 /* The user has no space for one more record */
619 /* Record the position of this entry, it is the
620 * starting point of the next request (unless the
621 * postion is modified with lseek).
627 if (tmpbuf_off
+ reclen
>= GETDENTS_BUFSIZE
*GETDENTS_ENTRIES
) {
628 r
= sys_safecopyto(VFS_PROC_NR
, gid
,
629 (vir_bytes
) userbuf_off
,
630 (vir_bytes
) getdents_buf
,
631 (size_t) tmpbuf_off
);
637 userbuf_off
+= tmpbuf_off
;
641 dep
= (struct dirent
*) &getdents_buf
[tmpbuf_off
];
642 dep
->d_ino
= dp
->mfs_d_ino
;
643 dep
->d_off
= ent_pos
;
644 dep
->d_reclen
= (unsigned short) reclen
;
645 memcpy(dep
->d_name
, dp
->mfs_d_name
, len
);
646 dep
->d_name
[len
] = '\0';
647 tmpbuf_off
+= reclen
;
650 put_block(bp
, DIRECTORY_BLOCK
);
655 if (tmpbuf_off
!= 0) {
656 r
= sys_safecopyto(VFS_PROC_NR
, gid
, (vir_bytes
) userbuf_off
,
657 (vir_bytes
) getdents_buf
, (size_t) tmpbuf_off
);
663 userbuf_off
+= tmpbuf_off
;
666 if (done
&& userbuf_off
== 0)
667 r
= EINVAL
; /* The user's buffer is too small */
669 fs_m_out
.RES_NBYTES
= userbuf_off
;
670 fs_m_out
.RES_SEEK_POS_LO
= new_pos
;
671 if(!rip
->i_sp
->s_rd_only
) {
672 rip
->i_update
|= ATIME
;
678 put_inode(rip
); /* release the inode */