Fixed extern declaration from pointer to array
[minix.git] / servers / mfs / cache.c
blob2b53c0e0234a6e267e9f4be2eb8c8c59cc76a2cf
1 /* The file system maintains a buffer cache to reduce the number of disk
2 * accesses needed. Whenever a read or write to the disk is done, a check is
3 * first made to see if the block is in the cache. This file manages the
4 * cache.
6 * The entry points into this file are:
7 * get_block: request to fetch a block for reading or writing from cache
8 * put_block: return a block previously requested with get_block
9 * alloc_zone: allocate a new zone (to increase the length of a file)
10 * free_zone: release a zone (when a file is removed)
11 * invalidate: remove all the cache blocks on some device
13 * Private functions:
14 * rw_block: read or write a block from the disk itself
17 #include "fs.h"
18 #include <minix/com.h>
19 #include <minix/u64.h>
20 #include <string.h>
21 #include "buf.h"
22 #include "super.h"
23 #include "inode.h"
25 FORWARD _PROTOTYPE( void rm_lru, (struct buf *bp) );
26 FORWARD _PROTOTYPE( int rw_block, (struct buf *, int) );
28 /*===========================================================================*
29 * get_block *
30 *===========================================================================*/
31 PUBLIC struct buf *get_block(dev, block, only_search)
32 register dev_t dev; /* on which device is the block? */
33 register block_t block; /* which block is wanted? */
34 int only_search; /* if NO_READ, don't read, else act normal */
36 /* Check to see if the requested block is in the block cache. If so, return
37 * a pointer to it. If not, evict some other block and fetch it (unless
38 * 'only_search' is 1). All the blocks in the cache that are not in use
39 * are linked together in a chain, with 'front' pointing to the least recently
40 * used block and 'rear' to the most recently used block. If 'only_search' is
41 * 1, the block being requested will be overwritten in its entirety, so it is
42 * only necessary to see if it is in the cache; if it is not, any free buffer
43 * will do. It is not necessary to actually read the block in from disk.
44 * If 'only_search' is PREFETCH, the block need not be read from the disk,
45 * and the device is not to be marked on the block, so callers can tell if
46 * the block returned is valid.
47 * In addition to the LRU chain, there is also a hash chain to link together
48 * blocks whose block numbers end with the same bit strings, for fast lookup.
51 int b;
52 static struct buf *bp, *prev_ptr;
54 ASSERT(fs_block_size > 0);
56 /* Search the hash chain for (dev, block). Do_read() can use
57 * get_block(NO_DEV ...) to get an unnamed block to fill with zeros when
58 * someone wants to read from a hole in a file, in which case this search
59 * is skipped
61 if (dev != NO_DEV) {
62 b = BUFHASH(block);
63 bp = buf_hash[b];
64 while (bp != NIL_BUF) {
65 if (bp->b_blocknr == block && bp->b_dev == dev) {
66 /* Block needed has been found. */
67 if (bp->b_count == 0) rm_lru(bp);
68 bp->b_count++; /* record that block is in use */
69 ASSERT(bp->b_bytes == fs_block_size);
70 ASSERT(bp->b_dev == dev);
71 ASSERT(bp->b_dev != NO_DEV);
72 ASSERT(bp->bp);
73 return(bp);
74 } else {
75 /* This block is not the one sought. */
76 bp = bp->b_hash; /* move to next block on hash chain */
81 /* Desired block is not on available chain. Take oldest block ('front'). */
82 if ((bp = front) == NIL_BUF) panic(__FILE__,"all buffers in use", NR_BUFS);
84 if(bp->b_bytes < fs_block_size) {
85 ASSERT(!bp->bp);
86 ASSERT(bp->b_bytes == 0);
87 if(!(bp->bp = alloc_contig(fs_block_size, 0, NULL))) {
88 printf("MFS: couldn't allocate a new block.\n");
89 for(bp = front;
90 bp && bp->b_bytes < fs_block_size; bp = bp->b_next)
92 if(!bp) {
93 panic("MFS", "no buffer available", NO_NUM);
95 } else {
96 bp->b_bytes = fs_block_size;
100 ASSERT(bp);
101 ASSERT(bp->bp);
102 ASSERT(bp->b_bytes == fs_block_size);
103 ASSERT(bp->b_count == 0);
105 rm_lru(bp);
107 /* Remove the block that was just taken from its hash chain. */
108 b = BUFHASH(bp->b_blocknr);
109 prev_ptr = buf_hash[b];
110 if (prev_ptr == bp) {
111 buf_hash[b] = bp->b_hash;
112 } else {
113 /* The block just taken is not on the front of its hash chain. */
114 while (prev_ptr->b_hash != NIL_BUF)
115 if (prev_ptr->b_hash == bp) {
116 prev_ptr->b_hash = bp->b_hash; /* found it */
117 break;
118 } else {
119 prev_ptr = prev_ptr->b_hash; /* keep looking */
123 /* If the block taken is dirty, make it clean by writing it to the disk.
124 * Avoid hysteresis by flushing all other dirty blocks for the same device.
126 if (bp->b_dev != NO_DEV) {
127 if (bp->b_dirt == DIRTY) flushall(bp->b_dev);
130 /* Fill in block's parameters and add it to the hash chain where it goes. */
131 bp->b_dev = dev; /* fill in device number */
132 bp->b_blocknr = block; /* fill in block number */
133 bp->b_count++; /* record that block is being used */
134 b = BUFHASH(bp->b_blocknr);
135 bp->b_hash = buf_hash[b];
137 buf_hash[b] = bp; /* add to hash list */
139 /* Go get the requested block unless searching or prefetching. */
140 if (dev != NO_DEV) {
141 if (only_search == PREFETCH) bp->b_dev = NO_DEV;
142 else
143 if (only_search == NORMAL) {
144 rw_block(bp, READING);
148 ASSERT(bp->bp);
150 return(bp); /* return the newly acquired block */
153 /*===========================================================================*
154 * put_block *
155 *===========================================================================*/
156 PUBLIC void put_block(bp, block_type)
157 register struct buf *bp; /* pointer to the buffer to be released */
158 int block_type; /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */
160 /* Return a block to the list of available blocks. Depending on 'block_type'
161 * it may be put on the front or rear of the LRU chain. Blocks that are
162 * expected to be needed again shortly (e.g., partially full data blocks)
163 * go on the rear; blocks that are unlikely to be needed again shortly
164 * (e.g., full data blocks) go on the front. Blocks whose loss can hurt
165 * the integrity of the file system (e.g., inode blocks) are written to
166 * disk immediately if they are dirty.
168 if (bp == NIL_BUF) return; /* it is easier to check here than in caller */
170 bp->b_count--; /* there is one use fewer now */
171 if (bp->b_count != 0) return; /* block is still in use */
173 bufs_in_use--; /* one fewer block buffers in use */
175 /* Put this block back on the LRU chain. If the ONE_SHOT bit is set in
176 * 'block_type', the block is not likely to be needed again shortly, so put
177 * it on the front of the LRU chain where it will be the first one to be
178 * taken when a free buffer is needed later.
180 if (bp->b_dev == DEV_RAM || (block_type & ONE_SHOT)) {
181 /* Block probably won't be needed quickly. Put it on front of chain.
182 * It will be the next block to be evicted from the cache.
184 bp->b_prev = NIL_BUF;
185 bp->b_next = front;
186 if (front == NIL_BUF)
187 rear = bp; /* LRU chain was empty */
188 else
189 front->b_prev = bp;
190 front = bp;
192 else {
193 /* Block probably will be needed quickly. Put it on rear of chain.
194 * It will not be evicted from the cache for a long time.
196 bp->b_prev = rear;
197 bp->b_next = NIL_BUF;
198 if (rear == NIL_BUF)
199 front = bp;
200 else
201 rear->b_next = bp;
202 rear = bp;
205 /* Some blocks are so important (e.g., inodes, indirect blocks) that they
206 * should be written to the disk immediately to avoid messing up the file
207 * system in the event of a crash.
209 if ((block_type & WRITE_IMMED) && bp->b_dirt==DIRTY && bp->b_dev != NO_DEV) {
210 rw_block(bp, WRITING);
214 /*===========================================================================*
215 * alloc_zone *
216 *===========================================================================*/
217 PUBLIC zone_t alloc_zone(dev, z)
218 dev_t dev; /* device where zone wanted */
219 zone_t z; /* try to allocate new zone near this one */
221 /* Allocate a new zone on the indicated device and return its number. */
223 int major, minor;
224 bit_t b, bit;
225 struct super_block *sp;
227 /* Note that the routine alloc_bit() returns 1 for the lowest possible
228 * zone, which corresponds to sp->s_firstdatazone. To convert a value
229 * between the bit number, 'b', used by alloc_bit() and the zone number, 'z',
230 * stored in the inode, use the formula:
231 * z = b + sp->s_firstdatazone - 1
232 * Alloc_bit() never returns 0, since this is used for NO_BIT (failure).
234 sp = get_super(dev);
236 /* If z is 0, skip initial part of the map known to be fully in use. */
237 if (z == sp->s_firstdatazone) {
238 bit = sp->s_zsearch;
239 } else {
240 bit = (bit_t) z - (sp->s_firstdatazone - 1);
242 b = alloc_bit(sp, ZMAP, bit);
243 if (b == NO_BIT) {
244 err_code = ENOSPC;
245 major = (int) (sp->s_dev >> MAJOR) & BYTE;
246 minor = (int) (sp->s_dev >> MINOR) & BYTE;
247 printf("No space on device %d/%d\n", major, minor);
248 return(NO_ZONE);
250 if (z == sp->s_firstdatazone) sp->s_zsearch = b; /* for next time */
251 return(sp->s_firstdatazone - 1 + (zone_t) b);
254 /*===========================================================================*
255 * free_zone *
256 *===========================================================================*/
257 PUBLIC void free_zone(dev, numb)
258 dev_t dev; /* device where zone located */
259 zone_t numb; /* zone to be returned */
261 /* Return a zone. */
263 register struct super_block *sp;
264 bit_t bit;
266 /* Locate the appropriate super_block and return bit. */
267 sp = get_super(dev);
268 if (numb < sp->s_firstdatazone || numb >= sp->s_zones) return;
269 bit = (bit_t) (numb - (sp->s_firstdatazone - 1));
270 free_bit(sp, ZMAP, bit);
271 if (bit < sp->s_zsearch) sp->s_zsearch = bit;
274 /*===========================================================================*
275 * rw_block *
276 *===========================================================================*/
277 PRIVATE int rw_block(bp, rw_flag)
278 register struct buf *bp; /* buffer pointer */
279 int rw_flag; /* READING or WRITING */
281 /* Read or write a disk block. This is the only routine in which actual disk
282 * I/O is invoked. If an error occurs, a message is printed here, but the error
283 * is not reported to the caller. If the error occurred while purging a block
284 * from the cache, it is not clear what the caller could do about it anyway.
286 int r, op;
287 u64_t pos;
288 dev_t dev;
290 if ( (dev = bp->b_dev) != NO_DEV) {
291 pos = mul64u(bp->b_blocknr, fs_block_size);
292 op = (rw_flag == READING ? MFS_DEV_READ : MFS_DEV_WRITE);
293 r = block_dev_io(op, dev, SELF_E, bp->b_data, pos, fs_block_size, 0);
294 if (r != fs_block_size) {
295 if (r >= 0) r = END_OF_FILE;
296 if (r != END_OF_FILE)
297 printf("MFS(%d) I/O error on device %d/%d, block %ld\n",
298 SELF_E, (dev>>MAJOR)&BYTE, (dev>>MINOR)&BYTE,
299 bp->b_blocknr);
301 bp->b_dev = NO_DEV; /* invalidate block */
303 /* Report read errors to interested parties. */
304 if (rw_flag == READING) rdwt_err = r;
308 bp->b_dirt = CLEAN;
310 return OK;
313 /*===========================================================================*
314 * invalidate *
315 *===========================================================================*/
316 PUBLIC void invalidate(device)
317 dev_t device; /* device whose blocks are to be purged */
319 /* Remove all the blocks belonging to some device from the cache. */
321 register struct buf *bp;
323 for (bp = &buf[0]; bp < &buf[NR_BUFS]; bp++)
324 if (bp->b_dev == device) bp->b_dev = NO_DEV;
327 /*===========================================================================*
328 * flushall *
329 *===========================================================================*/
330 PUBLIC void flushall(dev)
331 dev_t dev; /* device to flush */
333 /* Flush all dirty blocks for one device. */
335 register struct buf *bp;
336 static struct buf **dirty; /* static so it isn't on stack */
337 int ndirty;
339 STATICINIT(dirty, NR_BUFS);
341 for (bp = &buf[0], ndirty = 0; bp < &buf[NR_BUFS]; bp++)
342 if (bp->b_dirt == DIRTY && bp->b_dev == dev) dirty[ndirty++] = bp;
343 rw_scattered(dev, dirty, ndirty, WRITING);
346 /*===========================================================================*
347 * rw_scattered *
348 *===========================================================================*/
349 PUBLIC void rw_scattered(dev, bufq, bufqsize, rw_flag)
350 dev_t dev; /* major-minor device number */
351 struct buf **bufq; /* pointer to array of buffers */
352 int bufqsize; /* number of buffers */
353 int rw_flag; /* READING or WRITING */
355 /* Read or write scattered data from a device. */
357 register struct buf *bp;
358 int gap;
359 register int i;
360 register iovec_t *iop;
361 static iovec_t *iovec = NULL;
362 int j, r;
364 STATICINIT(iovec, NR_IOREQS);
366 /* (Shell) sort buffers on b_blocknr. */
367 gap = 1;
369 gap = 3 * gap + 1;
370 while (gap <= bufqsize);
371 while (gap != 1) {
372 gap /= 3;
373 for (j = gap; j < bufqsize; j++) {
374 for (i = j - gap;
375 i >= 0 && bufq[i]->b_blocknr > bufq[i + gap]->b_blocknr;
376 i -= gap) {
377 bp = bufq[i];
378 bufq[i] = bufq[i + gap];
379 bufq[i + gap] = bp;
384 /* Set up I/O vector and do I/O. The result of dev_io is OK if everything
385 * went fine, otherwise the error code for the first failed transfer.
387 while (bufqsize > 0) {
388 for (j = 0, iop = iovec; j < NR_IOREQS && j < bufqsize; j++, iop++) {
389 bp = bufq[j];
390 if (bp->b_blocknr != bufq[0]->b_blocknr + j) break;
391 iop->iov_addr = (vir_bytes) bp->b_data;
392 iop->iov_size = fs_block_size;
394 r = block_dev_io(rw_flag == WRITING ? MFS_DEV_SCATTER : MFS_DEV_GATHER,
395 dev, SELF_E, iovec,
396 mul64u(bufq[0]->b_blocknr, fs_block_size), j, 0);
398 /* Harvest the results. Dev_io reports the first error it may have
399 * encountered, but we only care if it's the first block that failed.
401 for (i = 0, iop = iovec; i < j; i++, iop++) {
402 bp = bufq[i];
403 if (iop->iov_size != 0) {
404 /* Transfer failed. An error? Do we care? */
405 if (r != OK && i == 0) {
406 printf(
407 "fs: I/O error on device %d/%d, block %lu\n",
408 (dev>>MAJOR)&BYTE, (dev>>MINOR)&BYTE,
409 bp->b_blocknr);
410 bp->b_dev = NO_DEV; /* invalidate block */
412 break;
414 if (rw_flag == READING) {
415 bp->b_dev = dev; /* validate block */
416 put_block(bp, PARTIAL_DATA_BLOCK);
417 } else {
418 bp->b_dirt = CLEAN;
421 bufq += i;
422 bufqsize -= i;
423 if (rw_flag == READING) {
424 /* Don't bother reading more than the device is willing to
425 * give at this time. Don't forget to release those extras.
427 while (bufqsize > 0) {
428 put_block(*bufq++, PARTIAL_DATA_BLOCK);
429 bufqsize--;
432 if (rw_flag == WRITING && i == 0) {
433 /* We're not making progress, this means we might keep
434 * looping. Buffers remain dirty if un-written. Buffers are
435 * lost if invalidate()d or LRU-removed while dirty. This
436 * is better than keeping unwritable blocks around forever..
438 break;
443 /*===========================================================================*
444 * rm_lru *
445 *===========================================================================*/
446 PRIVATE void rm_lru(bp)
447 struct buf *bp;
449 /* Remove a block from its LRU chain. */
450 struct buf *next_ptr, *prev_ptr;
452 bufs_in_use++;
453 next_ptr = bp->b_next; /* successor on LRU chain */
454 prev_ptr = bp->b_prev; /* predecessor on LRU chain */
455 if (prev_ptr != NIL_BUF)
456 prev_ptr->b_next = next_ptr;
457 else
458 front = next_ptr; /* this block was at front of chain */
460 if (next_ptr != NIL_BUF)
461 next_ptr->b_prev = prev_ptr;
462 else
463 rear = prev_ptr; /* this block was at rear of chain */
466 /*===========================================================================*
467 * set_blocksize *
468 *===========================================================================*/
469 PUBLIC void set_blocksize(int blocksize)
471 struct buf *bp;
472 struct inode *rip;
474 ASSERT(blocksize > 0);
476 for (bp = &buf[0]; bp < &buf[NR_BUFS]; bp++)
477 if(bp->b_count != 0)
478 panic("MFS", "change blocksize with buffer in use",
479 NO_NUM);
481 for (rip = &inode[0]; rip < &inode[NR_INODES]; rip++)
482 if (rip->i_count > 0)
483 panic("MFS", "change blocksize with inode in use",
484 NO_NUM);
486 fs_sync();
488 buf_pool();
489 fs_block_size = blocksize;
492 /*===========================================================================*
493 * buf_pool *
494 *===========================================================================*/
495 PUBLIC void buf_pool(void)
497 /* Initialize the buffer pool. */
498 register struct buf *bp;
500 bufs_in_use = 0;
501 front = &buf[0];
502 rear = &buf[NR_BUFS - 1];
504 for (bp = &buf[0]; bp < &buf[NR_BUFS]; bp++) {
505 bp->b_blocknr = NO_BLOCK;
506 bp->b_dev = NO_DEV;
507 bp->b_next = bp + 1;
508 bp->b_prev = bp - 1;
509 bp->bp = NULL;
510 bp->b_bytes = 0;
512 buf[0].b_prev = NIL_BUF;
513 buf[NR_BUFS - 1].b_next = NIL_BUF;
515 for (bp = &buf[0]; bp < &buf[NR_BUFS]; bp++) bp->b_hash = bp->b_next;
516 buf_hash[0] = front;