1 /* This file contains the heart of the mechanism used to read (and write)
2 * files. Read and write requests are split up into chunks that do not cross
3 * block boundaries. Each chunk is then processed in turn. Reads on special
4 * files are also detected and handled.
6 * The entry points into this file are
7 * do_read: perform the READ system call by calling read_write
8 * read_write: actually do the work of READ and WRITE
9 * read_map: given an inode and file position, look up its zone number
10 * rd_indir: read an entry in an indirect block
11 * read_ahead: manage the block read ahead business
17 #include <minix/com.h>
25 FORWARD
_PROTOTYPE( int rw_chunk
, (struct inode
*rip
, off_t position
,
26 unsigned off
, int chunk
, unsigned left
, int rw_flag
,
27 char *buff
, int seg
, int usr
, int block_size
, int *completed
));
29 /*===========================================================================*
31 *===========================================================================*/
34 return(read_write(READING
));
37 /*===========================================================================*
39 *===========================================================================*/
40 PUBLIC
int read_write(rw_flag
)
41 int rw_flag
; /* READING or WRITING */
43 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
45 register struct inode
*rip
;
46 register struct filp
*f
;
47 off_t bytes_left
, f_size
, position
;
48 unsigned int off
, cum_io
;
49 int op
, oflags
, r
, chunk
, usr
, seg
, block_spec
, char_spec
;
50 int regular
, partial_pipe
= 0, partial_cnt
= 0;
54 int completed
, r2
= OK
;
57 /* PM loads segments by putting funny things in other bits of the
58 * message, indicated by a high bit in fd.
60 if (who_e
== PM_PROC_NR
&& (m_in
.fd
& _PM_SEG_FLAG
)) {
61 seg
= (int) m_in
.m1_p2
;
62 usr
= (int) m_in
.m1_p3
;
63 m_in
.fd
&= ~(_PM_SEG_FLAG
); /* get rid of flag bit */
65 usr
= who_e
; /* normal case */
69 /* If the file descriptor is valid, get the inode, size and mode. */
70 if (m_in
.nbytes
< 0) return(EINVAL
);
71 if ((f
= get_filp(m_in
.fd
)) == NIL_FILP
) return(err_code
);
72 if (((f
->filp_mode
) & (rw_flag
== READING
? R_BIT
: W_BIT
)) == 0) {
73 return(f
->filp_mode
== FILP_CLOSED
? EIO
: EBADF
);
76 return(0); /* so char special files need not check for 0*/
78 /* check if user process has the memory it needs.
79 * if not, copying will fail later.
80 * do this after 0-check above because umap doesn't want to map 0 bytes.
82 if ((r
= sys_umap(usr
, seg
, (vir_bytes
) m_in
.buffer
, m_in
.nbytes
, &p
)) != OK
) {
83 printf("FS: read_write: umap failed for process %d\n", usr
);
86 position
= f
->filp_pos
;
87 oflags
= f
->filp_flags
;
91 if (rip
->i_pipe
== I_PIPE
) {
92 /* fp->fp_cum_io_partial is only nonzero when doing partial writes */
93 cum_io
= fp
->fp_cum_io_partial
;
97 op
= (rw_flag
== READING
? DEV_READ
: DEV_WRITE
);
98 mode_word
= rip
->i_mode
& I_TYPE
;
99 regular
= mode_word
== I_REGULAR
|| mode_word
== I_NAMED_PIPE
;
101 if ((char_spec
= (mode_word
== I_CHAR_SPECIAL
? 1 : 0))) {
102 if (rip
->i_zone
[0] == NO_DEV
)
103 panic(__FILE__
,"read_write tries to read from "
104 "character device NO_DEV", NO_NUM
);
105 block_size
= get_block_size(rip
->i_zone
[0]);
107 if ((block_spec
= (mode_word
== I_BLOCK_SPECIAL
? 1 : 0))) {
109 if (rip
->i_zone
[0] == NO_DEV
)
110 panic(__FILE__
,"read_write tries to read from "
111 " block device NO_DEV", NO_NUM
);
112 block_size
= get_block_size(rip
->i_zone
[0]);
115 if (!char_spec
&& !block_spec
)
116 block_size
= rip
->i_sp
->s_block_size
;
118 rdwt_err
= OK
; /* set to EIO if disk error occurs */
120 /* Check for character special files. */
123 dev
= (dev_t
) rip
->i_zone
[0];
124 r
= dev_io(op
, dev
, usr
, m_in
.buffer
, position
, m_in
.nbytes
, oflags
);
131 if (rw_flag
== WRITING
&& block_spec
== 0) {
132 /* Check in advance to see if file will grow too big. */
133 if (position
> rip
->i_sp
->s_max_size
- m_in
.nbytes
)
136 /* Check for O_APPEND flag. */
137 if (oflags
& O_APPEND
) position
= f_size
;
139 /* Clear the zone containing present EOF if hole about
140 * to be created. This is necessary because all unwritten
141 * blocks prior to the EOF must read as zeros.
143 if (position
> f_size
) clear_zone(rip
, f_size
, 0);
146 /* Pipes are a little different. Check. */
147 if (rip
->i_pipe
== I_PIPE
) {
148 r
= pipe_check(rip
, rw_flag
, oflags
,
149 m_in
.nbytes
, position
, &partial_cnt
, 0);
150 if (r
<= 0) return(r
);
153 if (partial_cnt
> 0) partial_pipe
= 1;
155 /* Split the transfer into chunks that don't span two blocks. */
156 while (m_in
.nbytes
!= 0) {
158 off
= (unsigned int) (position
% block_size
);/* offset in blk*/
159 if (partial_pipe
) { /* pipes only */
160 chunk
= MIN(partial_cnt
, block_size
- off
);
162 chunk
= MIN(m_in
.nbytes
, block_size
- off
);
163 if (chunk
< 0) chunk
= block_size
- off
;
165 if (rw_flag
== READING
) {
166 bytes_left
= f_size
- position
;
167 if (position
>= f_size
) break; /* we are beyond EOF */
168 if (chunk
> bytes_left
) chunk
= (int) bytes_left
;
171 /* Read or write 'chunk' bytes. */
172 r
= rw_chunk(rip
, position
, off
, chunk
, (unsigned) m_in
.nbytes
,
173 rw_flag
, m_in
.buffer
, seg
, usr
, block_size
, &completed
);
175 if (r
!= OK
) break; /* EOF reached */
176 if (rdwt_err
< 0) break;
178 /* Update counters and pointers. */
179 m_in
.buffer
+= chunk
; /* user buffer address */
180 m_in
.nbytes
-= chunk
; /* bytes yet to be read */
181 cum_io
+= chunk
; /* bytes read so far */
182 position
+= chunk
; /* position within the file */
185 partial_cnt
-= chunk
;
186 if (partial_cnt
<= 0) break;
191 /* On write, update file size and access time. */
192 if (rw_flag
== WRITING
) {
193 if (regular
|| mode_word
== I_DIRECTORY
) {
194 if (position
> f_size
) rip
->i_size
= position
;
197 if (rip
->i_pipe
== I_PIPE
) {
198 if ( position
>= rip
->i_size
) {
199 /* Reset pipe pointers. */
200 rip
->i_size
= 0; /* no data left */
201 position
= 0; /* reset reader(s) */
202 wf
= find_filp(rip
, W_BIT
);
203 if (wf
!= NIL_FILP
) wf
->filp_pos
= 0;
207 f
->filp_pos
= position
;
209 /* Check to see if read-ahead is called for, and if so, set it up. */
210 if (rw_flag
== READING
&& rip
->i_seek
== NO_SEEK
&& position
% block_size
== 0
211 && (regular
|| mode_word
== I_DIRECTORY
)) {
213 rdahedpos
= position
;
215 rip
->i_seek
= NO_SEEK
;
217 if (rdwt_err
!= OK
) r
= rdwt_err
; /* check for disk error */
218 if (rdwt_err
== END_OF_FILE
) r
= OK
;
220 /* if user-space copying failed, read/write failed. */
221 if (r
== OK
&& r2
!= OK
) {
225 if (rw_flag
== READING
) rip
->i_update
|= ATIME
;
226 if (rw_flag
== WRITING
) rip
->i_update
|= CTIME
| MTIME
;
227 rip
->i_dirt
= DIRTY
; /* inode is thus now dirty */
230 /* partial write on pipe with */
231 /* O_NONBLOCK, return write count */
232 if (!(oflags
& O_NONBLOCK
)) {
233 fp
->fp_cum_io_partial
= cum_io
;
234 suspend(XPIPE
); /* partial write on pipe with */
235 return(SUSPEND
); /* nbyte > PIPE_SIZE - non-atomic */
238 fp
->fp_cum_io_partial
= 0;
244 /*===========================================================================*
246 *===========================================================================*/
247 PRIVATE
int rw_chunk(rip
, position
, off
, chunk
, left
, rw_flag
, buff
,
248 seg
, usr
, block_size
, completed
)
249 register struct inode
*rip
; /* pointer to inode for file to be rd/wr */
250 off_t position
; /* position within file to read or write */
251 unsigned off
; /* off within the current block */
252 int chunk
; /* number of bytes to read or write */
253 unsigned left
; /* max number of bytes wanted after position */
254 int rw_flag
; /* READING or WRITING */
255 char *buff
; /* virtual address of the user buffer */
256 int seg
; /* T or D segment in user space */
257 int usr
; /* which user process */
258 int block_size
; /* block size of FS operating on */
259 int *completed
; /* number of bytes copied */
261 /* Read or write (part of) a block. */
263 register struct buf
*bp
;
271 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
273 b
= position
/block_size
;
274 dev
= (dev_t
) rip
->i_zone
[0];
276 b
= read_map(rip
, position
);
280 if (!block_spec
&& b
== NO_BLOCK
) {
281 if (rw_flag
== READING
) {
282 /* Reading from a nonexistent block. Must read as all zeros.*/
283 bp
= get_block(NO_DEV
, NO_BLOCK
, NORMAL
); /* get a buffer */
286 /* Writing to a nonexistent block. Create and enter in inode.*/
287 if ((bp
= new_block(rip
, position
)) == NIL_BUF
)return(err_code
);
289 } else if (rw_flag
== READING
) {
290 /* Read and read ahead if convenient. */
291 bp
= rahead(rip
, b
, position
, left
);
293 /* Normally an existing block to be partially overwritten is first read
294 * in. However, a full block need not be read in. If it is already in
295 * the cache, acquire it, otherwise just acquire a free buffer.
297 n
= (chunk
== block_size
? NO_READ
: NORMAL
);
298 if (!block_spec
&& off
== 0 && position
>= rip
->i_size
) n
= NO_READ
;
299 bp
= get_block(dev
, b
, n
);
302 /* In all cases, bp now points to a valid buffer. */
304 panic(__FILE__
,"bp not valid in rw_chunk, this can't happen", NO_NUM
);
306 if (rw_flag
== WRITING
&& chunk
!= block_size
&& !block_spec
&&
307 position
>= rip
->i_size
&& off
== 0) {
311 if (rw_flag
== READING
) {
312 /* Copy a chunk from the block buffer to user space. */
313 r
= sys_vircopy(FS_PROC_NR
, D
, (phys_bytes
) (bp
->b_data
+off
),
314 usr
, seg
, (phys_bytes
) buff
,
317 /* Copy a chunk from user space to the block buffer. */
318 r
= sys_vircopy(usr
, seg
, (phys_bytes
) buff
,
319 FS_PROC_NR
, D
, (phys_bytes
) (bp
->b_data
+off
),
323 n
= (off
+ chunk
== block_size
? FULL_DATA_BLOCK
: PARTIAL_DATA_BLOCK
);
330 /*===========================================================================*
332 *===========================================================================*/
333 PUBLIC block_t
read_map(rip
, position
)
334 register struct inode
*rip
; /* ptr to inode to map from */
335 off_t position
; /* position in file whose blk wanted */
337 /* Given an inode and a position within the corresponding file, locate the
338 * block (not zone) number in which that position is to be found and return it.
341 register struct buf
*bp
;
343 int scale
, boff
, dzones
, nr_indirects
, index
, zind
, ex
;
345 long excess
, zone
, block_pos
;
347 scale
= rip
->i_sp
->s_log_zone_size
; /* for block-zone conversion */
348 block_pos
= position
/rip
->i_sp
->s_block_size
; /* relative blk # in file */
349 zone
= block_pos
>> scale
; /* position's zone */
350 boff
= (int) (block_pos
- (zone
<< scale
) ); /* relative blk # within zone */
351 dzones
= rip
->i_ndzones
;
352 nr_indirects
= rip
->i_nindirs
;
354 /* Is 'position' to be found in the inode itself? */
356 zind
= (int) zone
; /* index should be an int */
357 z
= rip
->i_zone
[zind
];
358 if (z
== NO_ZONE
) return(NO_BLOCK
);
359 b
= ((block_t
) z
<< scale
) + boff
;
363 /* It is not in the inode, so it must be single or double indirect. */
364 excess
= zone
- dzones
; /* first Vx_NR_DZONES don't count */
366 if (excess
< nr_indirects
) {
367 /* 'position' can be located via the single indirect block. */
368 z
= rip
->i_zone
[dzones
];
370 /* 'position' can be located via the double indirect block. */
371 if ( (z
= rip
->i_zone
[dzones
+1]) == NO_ZONE
) return(NO_BLOCK
);
372 excess
-= nr_indirects
; /* single indir doesn't count*/
373 b
= (block_t
) z
<< scale
;
374 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get double indirect block */
375 index
= (int) (excess
/nr_indirects
);
376 z
= rd_indir(bp
, index
); /* z= zone for single*/
377 put_block(bp
, INDIRECT_BLOCK
); /* release double ind block */
378 excess
= excess
% nr_indirects
; /* index into single ind blk */
381 /* 'z' is zone num for single indirect block; 'excess' is index into it. */
382 if (z
== NO_ZONE
) return(NO_BLOCK
);
383 b
= (block_t
) z
<< scale
; /* b is blk # for single ind */
384 bp
= get_block(rip
->i_dev
, b
, NORMAL
); /* get single indirect block */
385 ex
= (int) excess
; /* need an integer */
386 z
= rd_indir(bp
, ex
); /* get block pointed to */
387 put_block(bp
, INDIRECT_BLOCK
); /* release single indir blk */
388 if (z
== NO_ZONE
) return(NO_BLOCK
);
389 b
= ((block_t
) z
<< scale
) + boff
;
393 /*===========================================================================*
395 *===========================================================================*/
396 PUBLIC zone_t
rd_indir(bp
, index
)
397 struct buf
*bp
; /* pointer to indirect block */
398 int index
; /* index into *bp */
400 /* Given a pointer to an indirect block, read one entry. The reason for
401 * making a separate routine out of this is that there are four cases:
402 * V1 (IBM and 68000), and V2 (IBM and 68000).
405 struct super_block
*sp
;
406 zone_t zone
; /* V2 zones are longs (shorts in V1) */
409 panic(__FILE__
, "rd_indir() on NIL_BUF", NO_NUM
);
411 sp
= get_super(bp
->b_dev
); /* need super block to find file sys type */
413 /* read a zone from an indirect block */
414 if (sp
->s_version
== V1
)
415 zone
= (zone_t
) conv2(sp
->s_native
, (int) bp
->b_v1_ind
[index
]);
417 zone
= (zone_t
) conv4(sp
->s_native
, (long) bp
->b_v2_ind
[index
]);
419 if (zone
!= NO_ZONE
&&
420 (zone
< (zone_t
) sp
->s_firstdatazone
|| zone
>= sp
->s_zones
)) {
421 printf("Illegal zone number %ld in indirect block, index %d\n",
423 panic(__FILE__
,"check file system", NO_NUM
);
428 /*===========================================================================*
430 *===========================================================================*/
431 PUBLIC
void read_ahead()
433 /* Read a block into the cache before it is needed. */
435 register struct inode
*rip
;
439 rip
= rdahed_inode
; /* pointer to inode to read ahead from */
440 block_size
= get_block_size(rip
->i_dev
);
441 rdahed_inode
= NIL_INODE
; /* turn off read ahead */
442 if ( (b
= read_map(rip
, rdahedpos
)) == NO_BLOCK
) return; /* at EOF */
443 bp
= rahead(rip
, b
, rdahedpos
, block_size
);
444 put_block(bp
, PARTIAL_DATA_BLOCK
);
447 /*===========================================================================*
449 *===========================================================================*/
450 PUBLIC
struct buf
*rahead(rip
, baseblock
, position
, bytes_ahead
)
451 register struct inode
*rip
; /* pointer to inode for file to be read */
452 block_t baseblock
; /* block at current position */
453 off_t position
; /* position within file */
454 unsigned bytes_ahead
; /* bytes beyond position for immediate use */
456 /* Fetch a block from the cache or the device. If a physical read is
457 * required, prefetch as many more blocks as convenient into the cache.
458 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
459 * The device driver may decide it knows better and stop reading at a
460 * cylinder boundary (or after an error). Rw_scattered() puts an optional
461 * flag on all reads to allow this.
464 /* Minimum number of blocks to prefetch. */
465 # define BLOCKS_MINIMUM (NR_BUFS < 50 ? 18 : 32)
466 int block_spec
, scale
, read_q_size
;
467 unsigned int blocks_ahead
, fragment
;
468 block_t block
, blocks_left
;
472 static struct buf
*read_q
[NR_BUFS
];
474 block_spec
= (rip
->i_mode
& I_TYPE
) == I_BLOCK_SPECIAL
;
476 dev
= (dev_t
) rip
->i_zone
[0];
480 block_size
= get_block_size(dev
);
483 bp
= get_block(dev
, block
, PREFETCH
);
484 if (bp
->b_dev
!= NO_DEV
) return(bp
);
486 /* The best guess for the number of blocks to prefetch: A lot.
487 * It is impossible to tell what the device looks like, so we don't even
488 * try to guess the geometry, but leave it to the driver.
490 * The floppy driver can read a full track with no rotational delay, and it
491 * avoids reading partial tracks if it can, so handing it enough buffers to
492 * read two tracks is perfect. (Two, because some diskette types have
493 * an odd number of sectors per track, so a block may span tracks.)
495 * The disk drivers don't try to be smart. With todays disks it is
496 * impossible to tell what the real geometry looks like, so it is best to
497 * read as much as you can. With luck the caching on the drive allows
498 * for a little time to start the next read.
500 * The current solution below is a bit of a hack, it just reads blocks from
501 * the current file position hoping that more of the file can be found. A
502 * better solution must look at the already available zone pointers and
503 * indirect blocks (but don't call read_map!).
506 fragment
= position
% block_size
;
507 position
-= fragment
;
508 bytes_ahead
+= fragment
;
510 blocks_ahead
= (bytes_ahead
+ block_size
- 1) / block_size
;
512 if (block_spec
&& rip
->i_size
== 0) {
513 blocks_left
= NR_IOREQS
;
515 blocks_left
= (rip
->i_size
- position
+ block_size
- 1) / block_size
;
517 /* Go for the first indirect block if we are in its neighborhood. */
519 scale
= rip
->i_sp
->s_log_zone_size
;
520 ind1_pos
= (off_t
) rip
->i_ndzones
* (block_size
<< scale
);
521 if (position
<= ind1_pos
&& rip
->i_size
> ind1_pos
) {
528 /* No more than the maximum request. */
529 if (blocks_ahead
> NR_IOREQS
) blocks_ahead
= NR_IOREQS
;
531 /* Read at least the minimum number of blocks, but not after a seek. */
532 if (blocks_ahead
< BLOCKS_MINIMUM
&& rip
->i_seek
== NO_SEEK
)
533 blocks_ahead
= BLOCKS_MINIMUM
;
535 /* Can't go past end of file. */
536 if (blocks_ahead
> blocks_left
) blocks_ahead
= blocks_left
;
540 /* Acquire block buffers. */
542 read_q
[read_q_size
++] = bp
;
544 if (--blocks_ahead
== 0) break;
546 /* Don't trash the cache, leave 4 free. */
547 if (bufs_in_use
>= NR_BUFS
- 4) break;
551 bp
= get_block(dev
, block
, PREFETCH
);
552 if (bp
->b_dev
!= NO_DEV
) {
553 /* Oops, block already in the cache, get out. */
554 put_block(bp
, FULL_DATA_BLOCK
);
558 rw_scattered(dev
, read_q
, read_q_size
, READING
);
559 return(get_block(dev
, baseblock
, NORMAL
));