1 /* The file system maintains a buffer cache to reduce the number of disk
2 * accesses needed. Whenever a read or write to the disk is done, a check is
3 * first made to see if the block is in the cache. This file manages the
6 * The entry points into this file are:
7 * get_block: request to fetch a block for reading or writing from cache
8 * put_block: return a block previously requested with get_block
9 * alloc_zone: allocate a new zone (to increase the length of a file)
10 * free_zone: release a zone (when a file is removed)
11 * invalidate: remove all the cache blocks on some device
14 * rw_block: read or write a block from the disk itself
18 #include <minix/com.h>
19 #include <minix/u64.h>
25 FORWARD
_PROTOTYPE( void rm_lru
, (struct buf
*bp
) );
26 FORWARD
_PROTOTYPE( int rw_block
, (struct buf
*, int) );
28 /*===========================================================================*
30 *===========================================================================*/
31 PUBLIC
struct buf
*get_block(dev
, block
, only_search
)
32 register dev_t dev
; /* on which device is the block? */
33 register block_t block
; /* which block is wanted? */
34 int only_search
; /* if NO_READ, don't read, else act normal */
36 /* Check to see if the requested block is in the block cache. If so, return
37 * a pointer to it. If not, evict some other block and fetch it (unless
38 * 'only_search' is 1). All the blocks in the cache that are not in use
39 * are linked together in a chain, with 'front' pointing to the least recently
40 * used block and 'rear' to the most recently used block. If 'only_search' is
41 * 1, the block being requested will be overwritten in its entirety, so it is
42 * only necessary to see if it is in the cache; if it is not, any free buffer
43 * will do. It is not necessary to actually read the block in from disk.
44 * If 'only_search' is PREFETCH, the block need not be read from the disk,
45 * and the device is not to be marked on the block, so callers can tell if
46 * the block returned is valid.
47 * In addition to the LRU chain, there is also a hash chain to link together
48 * blocks whose block numbers end with the same bit strings, for fast lookup.
52 static struct buf
*bp
, *prev_ptr
;
54 ASSERT(fs_block_size
> 0);
56 /* Search the hash chain for (dev, block). Do_read() can use
57 * get_block(NO_DEV ...) to get an unnamed block to fill with zeros when
58 * someone wants to read from a hole in a file, in which case this search
64 while (bp
!= NIL_BUF
) {
65 if (bp
->b_blocknr
== block
&& bp
->b_dev
== dev
) {
66 /* Block needed has been found. */
67 if (bp
->b_count
== 0) rm_lru(bp
);
68 bp
->b_count
++; /* record that block is in use */
69 ASSERT(bp
->b_bytes
== fs_block_size
);
70 ASSERT(bp
->b_dev
== dev
);
71 ASSERT(bp
->b_dev
!= NO_DEV
);
75 /* This block is not the one sought. */
76 bp
= bp
->b_hash
; /* move to next block on hash chain */
81 /* Desired block is not on available chain. Take oldest block ('front'). */
82 if ((bp
= front
) == NIL_BUF
) panic(__FILE__
,"all buffers in use", NR_BUFS
);
84 if(bp
->b_bytes
< fs_block_size
) {
86 ASSERT(bp
->b_bytes
== 0);
87 if(!(bp
->bp
= alloc_contig(fs_block_size
, 0, NULL
))) {
88 printf("MFS: couldn't allocate a new block.\n");
90 bp
&& bp
->b_bytes
< fs_block_size
; bp
= bp
->b_next
)
93 panic("MFS", "no buffer available", NO_NUM
);
96 bp
->b_bytes
= fs_block_size
;
102 ASSERT(bp
->b_bytes
== fs_block_size
);
103 ASSERT(bp
->b_count
== 0);
107 /* Remove the block that was just taken from its hash chain. */
108 b
= BUFHASH(bp
->b_blocknr
);
109 prev_ptr
= buf_hash
[b
];
110 if (prev_ptr
== bp
) {
111 buf_hash
[b
] = bp
->b_hash
;
113 /* The block just taken is not on the front of its hash chain. */
114 while (prev_ptr
->b_hash
!= NIL_BUF
)
115 if (prev_ptr
->b_hash
== bp
) {
116 prev_ptr
->b_hash
= bp
->b_hash
; /* found it */
119 prev_ptr
= prev_ptr
->b_hash
; /* keep looking */
123 /* If the block taken is dirty, make it clean by writing it to the disk.
124 * Avoid hysteresis by flushing all other dirty blocks for the same device.
126 if (bp
->b_dev
!= NO_DEV
) {
127 if (bp
->b_dirt
== DIRTY
) flushall(bp
->b_dev
);
130 /* Fill in block's parameters and add it to the hash chain where it goes. */
131 bp
->b_dev
= dev
; /* fill in device number */
132 bp
->b_blocknr
= block
; /* fill in block number */
133 bp
->b_count
++; /* record that block is being used */
134 b
= BUFHASH(bp
->b_blocknr
);
135 bp
->b_hash
= buf_hash
[b
];
137 buf_hash
[b
] = bp
; /* add to hash list */
139 /* Go get the requested block unless searching or prefetching. */
141 if (only_search
== PREFETCH
) bp
->b_dev
= NO_DEV
;
143 if (only_search
== NORMAL
) {
144 rw_block(bp
, READING
);
150 return(bp
); /* return the newly acquired block */
153 /*===========================================================================*
155 *===========================================================================*/
156 PUBLIC
void put_block(bp
, block_type
)
157 register struct buf
*bp
; /* pointer to the buffer to be released */
158 int block_type
; /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */
160 /* Return a block to the list of available blocks. Depending on 'block_type'
161 * it may be put on the front or rear of the LRU chain. Blocks that are
162 * expected to be needed again shortly (e.g., partially full data blocks)
163 * go on the rear; blocks that are unlikely to be needed again shortly
164 * (e.g., full data blocks) go on the front. Blocks whose loss can hurt
165 * the integrity of the file system (e.g., inode blocks) are written to
166 * disk immediately if they are dirty.
168 if (bp
== NIL_BUF
) return; /* it is easier to check here than in caller */
170 bp
->b_count
--; /* there is one use fewer now */
171 if (bp
->b_count
!= 0) return; /* block is still in use */
173 bufs_in_use
--; /* one fewer block buffers in use */
175 /* Put this block back on the LRU chain. If the ONE_SHOT bit is set in
176 * 'block_type', the block is not likely to be needed again shortly, so put
177 * it on the front of the LRU chain where it will be the first one to be
178 * taken when a free buffer is needed later.
180 if (bp
->b_dev
== DEV_RAM
|| (block_type
& ONE_SHOT
)) {
181 /* Block probably won't be needed quickly. Put it on front of chain.
182 * It will be the next block to be evicted from the cache.
184 bp
->b_prev
= NIL_BUF
;
186 if (front
== NIL_BUF
)
187 rear
= bp
; /* LRU chain was empty */
193 /* Block probably will be needed quickly. Put it on rear of chain.
194 * It will not be evicted from the cache for a long time.
197 bp
->b_next
= NIL_BUF
;
205 /* Some blocks are so important (e.g., inodes, indirect blocks) that they
206 * should be written to the disk immediately to avoid messing up the file
207 * system in the event of a crash.
209 if ((block_type
& WRITE_IMMED
) && bp
->b_dirt
==DIRTY
&& bp
->b_dev
!= NO_DEV
) {
210 rw_block(bp
, WRITING
);
214 /*===========================================================================*
216 *===========================================================================*/
217 PUBLIC zone_t
alloc_zone(dev
, z
)
218 dev_t dev
; /* device where zone wanted */
219 zone_t z
; /* try to allocate new zone near this one */
221 /* Allocate a new zone on the indicated device and return its number. */
225 struct super_block
*sp
;
227 /* Note that the routine alloc_bit() returns 1 for the lowest possible
228 * zone, which corresponds to sp->s_firstdatazone. To convert a value
229 * between the bit number, 'b', used by alloc_bit() and the zone number, 'z',
230 * stored in the inode, use the formula:
231 * z = b + sp->s_firstdatazone - 1
232 * Alloc_bit() never returns 0, since this is used for NO_BIT (failure).
236 /* If z is 0, skip initial part of the map known to be fully in use. */
237 if (z
== sp
->s_firstdatazone
) {
240 bit
= (bit_t
) z
- (sp
->s_firstdatazone
- 1);
242 b
= alloc_bit(sp
, ZMAP
, bit
);
245 major
= (int) (sp
->s_dev
>> MAJOR
) & BYTE
;
246 minor
= (int) (sp
->s_dev
>> MINOR
) & BYTE
;
247 printf("No space on device %d/%d\n", major
, minor
);
250 if (z
== sp
->s_firstdatazone
) sp
->s_zsearch
= b
; /* for next time */
251 return(sp
->s_firstdatazone
- 1 + (zone_t
) b
);
254 /*===========================================================================*
256 *===========================================================================*/
257 PUBLIC
void free_zone(dev
, numb
)
258 dev_t dev
; /* device where zone located */
259 zone_t numb
; /* zone to be returned */
263 register struct super_block
*sp
;
266 /* Locate the appropriate super_block and return bit. */
268 if (numb
< sp
->s_firstdatazone
|| numb
>= sp
->s_zones
) return;
269 bit
= (bit_t
) (numb
- (sp
->s_firstdatazone
- 1));
270 free_bit(sp
, ZMAP
, bit
);
271 if (bit
< sp
->s_zsearch
) sp
->s_zsearch
= bit
;
274 /*===========================================================================*
276 *===========================================================================*/
277 PRIVATE
int rw_block(bp
, rw_flag
)
278 register struct buf
*bp
; /* buffer pointer */
279 int rw_flag
; /* READING or WRITING */
281 /* Read or write a disk block. This is the only routine in which actual disk
282 * I/O is invoked. If an error occurs, a message is printed here, but the error
283 * is not reported to the caller. If the error occurred while purging a block
284 * from the cache, it is not clear what the caller could do about it anyway.
290 if ( (dev
= bp
->b_dev
) != NO_DEV
) {
291 pos
= mul64u(bp
->b_blocknr
, fs_block_size
);
292 op
= (rw_flag
== READING
? MFS_DEV_READ
: MFS_DEV_WRITE
);
293 r
= block_dev_io(op
, dev
, SELF_E
, bp
->b_data
, pos
, fs_block_size
, 0);
294 if (r
!= fs_block_size
) {
295 if (r
>= 0) r
= END_OF_FILE
;
296 if (r
!= END_OF_FILE
)
297 printf("MFS(%d) I/O error on device %d/%d, block %ld\n",
298 SELF_E
, (dev
>>MAJOR
)&BYTE
, (dev
>>MINOR
)&BYTE
,
301 bp
->b_dev
= NO_DEV
; /* invalidate block */
303 /* Report read errors to interested parties. */
304 if (rw_flag
== READING
) rdwt_err
= r
;
313 /*===========================================================================*
315 *===========================================================================*/
316 PUBLIC
void invalidate(device
)
317 dev_t device
; /* device whose blocks are to be purged */
319 /* Remove all the blocks belonging to some device from the cache. */
321 register struct buf
*bp
;
323 for (bp
= &buf
[0]; bp
< &buf
[NR_BUFS
]; bp
++)
324 if (bp
->b_dev
== device
) bp
->b_dev
= NO_DEV
;
327 /*===========================================================================*
329 *===========================================================================*/
330 PUBLIC
void flushall(dev
)
331 dev_t dev
; /* device to flush */
333 /* Flush all dirty blocks for one device. */
335 register struct buf
*bp
;
336 static struct buf
**dirty
; /* static so it isn't on stack */
339 STATICINIT(dirty
, NR_BUFS
);
341 for (bp
= &buf
[0], ndirty
= 0; bp
< &buf
[NR_BUFS
]; bp
++)
342 if (bp
->b_dirt
== DIRTY
&& bp
->b_dev
== dev
) dirty
[ndirty
++] = bp
;
343 rw_scattered(dev
, dirty
, ndirty
, WRITING
);
346 /*===========================================================================*
348 *===========================================================================*/
349 PUBLIC
void rw_scattered(dev
, bufq
, bufqsize
, rw_flag
)
350 dev_t dev
; /* major-minor device number */
351 struct buf
**bufq
; /* pointer to array of buffers */
352 int bufqsize
; /* number of buffers */
353 int rw_flag
; /* READING or WRITING */
355 /* Read or write scattered data from a device. */
357 register struct buf
*bp
;
360 register iovec_t
*iop
;
361 static iovec_t
*iovec
= NULL
;
364 STATICINIT(iovec
, NR_IOREQS
);
366 /* (Shell) sort buffers on b_blocknr. */
370 while (gap
<= bufqsize
);
373 for (j
= gap
; j
< bufqsize
; j
++) {
375 i
>= 0 && bufq
[i
]->b_blocknr
> bufq
[i
+ gap
]->b_blocknr
;
378 bufq
[i
] = bufq
[i
+ gap
];
384 /* Set up I/O vector and do I/O. The result of dev_io is OK if everything
385 * went fine, otherwise the error code for the first failed transfer.
387 while (bufqsize
> 0) {
388 for (j
= 0, iop
= iovec
; j
< NR_IOREQS
&& j
< bufqsize
; j
++, iop
++) {
390 if (bp
->b_blocknr
!= bufq
[0]->b_blocknr
+ j
) break;
391 iop
->iov_addr
= (vir_bytes
) bp
->b_data
;
392 iop
->iov_size
= fs_block_size
;
394 r
= block_dev_io(rw_flag
== WRITING
? MFS_DEV_SCATTER
: MFS_DEV_GATHER
,
396 mul64u(bufq
[0]->b_blocknr
, fs_block_size
), j
, 0);
398 /* Harvest the results. Dev_io reports the first error it may have
399 * encountered, but we only care if it's the first block that failed.
401 for (i
= 0, iop
= iovec
; i
< j
; i
++, iop
++) {
403 if (iop
->iov_size
!= 0) {
404 /* Transfer failed. An error? Do we care? */
405 if (r
!= OK
&& i
== 0) {
407 "fs: I/O error on device %d/%d, block %lu\n",
408 (dev
>>MAJOR
)&BYTE
, (dev
>>MINOR
)&BYTE
,
410 bp
->b_dev
= NO_DEV
; /* invalidate block */
414 if (rw_flag
== READING
) {
415 bp
->b_dev
= dev
; /* validate block */
416 put_block(bp
, PARTIAL_DATA_BLOCK
);
423 if (rw_flag
== READING
) {
424 /* Don't bother reading more than the device is willing to
425 * give at this time. Don't forget to release those extras.
427 while (bufqsize
> 0) {
428 put_block(*bufq
++, PARTIAL_DATA_BLOCK
);
432 if (rw_flag
== WRITING
&& i
== 0) {
433 /* We're not making progress, this means we might keep
434 * looping. Buffers remain dirty if un-written. Buffers are
435 * lost if invalidate()d or LRU-removed while dirty. This
436 * is better than keeping unwritable blocks around forever..
443 /*===========================================================================*
445 *===========================================================================*/
446 PRIVATE
void rm_lru(bp
)
449 /* Remove a block from its LRU chain. */
450 struct buf
*next_ptr
, *prev_ptr
;
453 next_ptr
= bp
->b_next
; /* successor on LRU chain */
454 prev_ptr
= bp
->b_prev
; /* predecessor on LRU chain */
455 if (prev_ptr
!= NIL_BUF
)
456 prev_ptr
->b_next
= next_ptr
;
458 front
= next_ptr
; /* this block was at front of chain */
460 if (next_ptr
!= NIL_BUF
)
461 next_ptr
->b_prev
= prev_ptr
;
463 rear
= prev_ptr
; /* this block was at rear of chain */
466 /*===========================================================================*
468 *===========================================================================*/
469 PUBLIC
void set_blocksize(int blocksize
)
474 ASSERT(blocksize
> 0);
476 for (bp
= &buf
[0]; bp
< &buf
[NR_BUFS
]; bp
++)
478 panic("MFS", "change blocksize with buffer in use",
481 for (rip
= &inode
[0]; rip
< &inode
[NR_INODES
]; rip
++)
482 if (rip
->i_count
> 0)
483 panic("MFS", "change blocksize with inode in use",
489 fs_block_size
= blocksize
;
492 /*===========================================================================*
494 *===========================================================================*/
495 PUBLIC
void buf_pool(void)
497 /* Initialize the buffer pool. */
498 register struct buf
*bp
;
502 rear
= &buf
[NR_BUFS
- 1];
504 for (bp
= &buf
[0]; bp
< &buf
[NR_BUFS
]; bp
++) {
505 bp
->b_blocknr
= NO_BLOCK
;
512 buf
[0].b_prev
= NIL_BUF
;
513 buf
[NR_BUFS
- 1].b_next
= NIL_BUF
;
515 for (bp
= &buf
[0]; bp
< &buf
[NR_BUFS
]; bp
++) bp
->b_hash
= bp
->b_next
;