* added 0.99 linux version
[mascara-docs.git] / i386 / linux / linux-0.99 / fs / buffer.c
blob1d008685252f1edea9e7a46ffc4f3fd5f4f3fffb
1 /*
2 * linux/fs/buffer.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 /*
8 * 'buffer.c' implements the buffer-cache functions. Race-conditions have
9 * been avoided by NEVER letting an interrupt change a buffer (except for the
10 * data, of course), but instead letting the caller do it.
14 * NOTE! There is one discordant note here: checking floppies for
15 * disk change. This is where it fits best, I think, as it should
16 * invalidate changed floppy-disk-caches.
19 #include <stdarg.h>
21 #include <linux/config.h>
22 #include <linux/errno.h>
23 #include <linux/sched.h>
24 #include <linux/kernel.h>
25 #include <linux/major.h>
26 #include <linux/string.h>
27 #include <linux/locks.h>
28 #include <linux/errno.h>
30 #include <asm/system.h>
31 #include <asm/io.h>
33 #ifdef CONFIG_SCSI
34 #ifdef CONFIG_BLK_DEV_SR
35 extern int check_cdrom_media_change(int, int);
36 #endif
37 #ifdef CONFIG_BLK_DEV_SD
38 extern int check_scsidisk_media_change(int, int);
39 extern int revalidate_scsidisk(int, int);
40 #endif
41 #endif
42 #ifdef CONFIG_CDU31A
43 extern int check_cdu31a_media_change(int, int);
44 #endif
45 #ifdef CONFIG_MCD
46 extern int check_mcd_media_change(int, int);
47 #endif
49 static int grow_buffers(int pri, int size);
51 static struct buffer_head * hash_table[NR_HASH];
52 static struct buffer_head * free_list = NULL;
53 static struct buffer_head * unused_list = NULL;
54 static struct wait_queue * buffer_wait = NULL;
56 int nr_buffers = 0;
57 int buffermem = 0;
58 int nr_buffer_heads = 0;
59 static int min_free_pages = 20; /* nr free pages needed before buffer grows */
60 extern int *blksize_size[];
63 * Rewrote the wait-routines to use the "new" wait-queue functionality,
64 * and getting rid of the cli-sti pairs. The wait-queue routines still
65 * need cli-sti, but now it's just a couple of 386 instructions or so.
67 * Note that the real wait_on_buffer() is an inline function that checks
68 * if 'b_wait' is set before calling this, so that the queues aren't set
69 * up unnecessarily.
71 void __wait_on_buffer(struct buffer_head * bh)
73 struct wait_queue wait = { current, NULL };
75 bh->b_count++;
76 add_wait_queue(&bh->b_wait, &wait);
77 repeat:
78 current->state = TASK_UNINTERRUPTIBLE;
79 if (bh->b_lock) {
80 schedule();
81 goto repeat;
83 remove_wait_queue(&bh->b_wait, &wait);
84 bh->b_count--;
85 current->state = TASK_RUNNING;
88 /* Call sync_buffers with wait!=0 to ensure that the call does not
89 return until all buffer writes have completed. Sync() may return
90 before the writes have finished; fsync() may not. */
92 static int sync_buffers(dev_t dev, int wait)
94 int i, retry, pass = 0, err = 0;
95 struct buffer_head * bh;
97 /* One pass for no-wait, three for wait:
98 0) write out all dirty, unlocked buffers;
99 1) write out all dirty buffers, waiting if locked;
100 2) wait for completion by waiting for all buffers to unlock.
102 repeat:
103 retry = 0;
104 bh = free_list;
105 for (i = nr_buffers*2 ; i-- > 0 ; bh = bh->b_next_free) {
106 if (dev && bh->b_dev != dev)
107 continue;
108 #ifdef 0 /* Disable bad-block debugging code */
109 if (bh->b_req && !bh->b_lock &&
110 !bh->b_dirt && !bh->b_uptodate)
111 printk ("Warning (IO error) - orphaned block %08x on %04x\n",
112 bh->b_blocknr, bh->b_dev);
113 #endif
114 if (bh->b_lock)
116 /* Buffer is locked; skip it unless wait is
117 requested AND pass > 0. */
118 if (!wait || !pass) {
119 retry = 1;
120 continue;
122 wait_on_buffer (bh);
124 /* If an unlocked buffer is not uptodate, there has been
125 an IO error. Skip it. */
126 if (wait && bh->b_req && !bh->b_lock &&
127 !bh->b_dirt && !bh->b_uptodate)
129 err = 1;
130 continue;
132 /* Don't write clean buffers. Don't write ANY buffers
133 on the third pass. */
134 if (!bh->b_dirt || pass>=2)
135 continue;
136 bh->b_count++;
137 ll_rw_block(WRITE, 1, &bh);
138 bh->b_count--;
139 retry = 1;
141 /* If we are waiting for the sync to succeed, and if any dirty
142 blocks were written, then repeat; on the second pass, only
143 wait for buffers being written (do not pass to write any
144 more buffers on the second pass). */
145 if (wait && retry && ++pass<=2)
146 goto repeat;
147 return err;
150 void sync_dev(dev_t dev)
152 sync_buffers(dev, 0);
153 sync_supers(dev);
154 sync_inodes(dev);
155 sync_buffers(dev, 0);
158 int fsync_dev(dev_t dev)
160 sync_buffers(dev, 0);
161 sync_supers(dev);
162 sync_inodes(dev);
163 return sync_buffers(dev, 1);
166 asmlinkage int sys_sync(void)
168 sync_dev(0);
169 return 0;
172 int file_fsync (struct inode *inode, struct file *filp)
174 return fsync_dev(inode->i_dev);
177 asmlinkage int sys_fsync(unsigned int fd)
179 struct file * file;
180 struct inode * inode;
182 if (fd>=NR_OPEN || !(file=current->filp[fd]) || !(inode=file->f_inode))
183 return -EBADF;
184 if (!file->f_op || !file->f_op->fsync)
185 return -EINVAL;
186 if (file->f_op->fsync(inode,file))
187 return -EIO;
188 return 0;
191 void invalidate_buffers(dev_t dev)
193 int i;
194 struct buffer_head * bh;
196 bh = free_list;
197 for (i = nr_buffers*2 ; --i > 0 ; bh = bh->b_next_free) {
198 if (bh->b_dev != dev)
199 continue;
200 wait_on_buffer(bh);
201 if (bh->b_dev == dev)
202 bh->b_uptodate = bh->b_dirt = bh->b_req = 0;
207 * This routine checks whether a floppy has been changed, and
208 * invalidates all buffer-cache-entries in that case. This
209 * is a relatively slow routine, so we have to try to minimize using
210 * it. Thus it is called only upon a 'mount' or 'open'. This
211 * is the best way of combining speed and utility, I think.
212 * People changing diskettes in the middle of an operation deserve
213 * to loose :-)
215 * NOTE! Although currently this is only for floppies, the idea is
216 * that any additional removable block-device will use this routine,
217 * and that mount/open needn't know that floppies/whatever are
218 * special.
220 void check_disk_change(dev_t dev)
222 int i;
223 struct buffer_head * bh;
225 switch(MAJOR(dev)){
226 case FLOPPY_MAJOR:
227 if (!(bh = getblk(dev,0,1024)))
228 return;
229 i = floppy_change(bh);
230 brelse(bh);
231 break;
233 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
234 case SCSI_DISK_MAJOR:
235 i = check_scsidisk_media_change(dev, 0);
236 break;
237 #endif
239 #if defined(CONFIG_BLK_DEV_SR) && defined(CONFIG_SCSI)
240 case SCSI_CDROM_MAJOR:
241 i = check_cdrom_media_change(dev, 0);
242 break;
243 #endif
245 #if defined(CONFIG_CDU31A)
246 case CDU31A_CDROM_MAJOR:
247 i = check_cdu31a_media_change(dev, 0);
248 break;
249 #endif
251 #if defined(CONFIG_MCD)
252 case MITSUMI_CDROM_MAJOR:
253 i = check_mcd_media_change(dev, 0);
254 break;
255 #endif
257 default:
258 return;
261 if (!i) return;
263 printk("VFS: Disk change detected on device %d/%d\n",
264 MAJOR(dev), MINOR(dev));
265 for (i=0 ; i<NR_SUPER ; i++)
266 if (super_blocks[i].s_dev == dev)
267 put_super(super_blocks[i].s_dev);
268 invalidate_inodes(dev);
269 invalidate_buffers(dev);
271 #if defined(CONFIG_BLK_DEV_SD) && defined(CONFIG_SCSI)
272 /* This is trickier for a removable hardisk, because we have to invalidate
273 all of the partitions that lie on the disk. */
274 if (MAJOR(dev) == SCSI_DISK_MAJOR)
275 revalidate_scsidisk(dev, 0);
276 #endif
279 #define _hashfn(dev,block) (((unsigned)(dev^block))%NR_HASH)
280 #define hash(dev,block) hash_table[_hashfn(dev,block)]
282 static inline void remove_from_hash_queue(struct buffer_head * bh)
284 if (bh->b_next)
285 bh->b_next->b_prev = bh->b_prev;
286 if (bh->b_prev)
287 bh->b_prev->b_next = bh->b_next;
288 if (hash(bh->b_dev,bh->b_blocknr) == bh)
289 hash(bh->b_dev,bh->b_blocknr) = bh->b_next;
290 bh->b_next = bh->b_prev = NULL;
293 static inline void remove_from_free_list(struct buffer_head * bh)
295 if (!(bh->b_prev_free) || !(bh->b_next_free))
296 panic("VFS: Free block list corrupted");
297 bh->b_prev_free->b_next_free = bh->b_next_free;
298 bh->b_next_free->b_prev_free = bh->b_prev_free;
299 if (free_list == bh)
300 free_list = bh->b_next_free;
301 bh->b_next_free = bh->b_prev_free = NULL;
304 static inline void remove_from_queues(struct buffer_head * bh)
306 remove_from_hash_queue(bh);
307 remove_from_free_list(bh);
310 static inline void put_first_free(struct buffer_head * bh)
312 if (!bh || (bh == free_list))
313 return;
314 remove_from_free_list(bh);
315 /* add to front of free list */
316 bh->b_next_free = free_list;
317 bh->b_prev_free = free_list->b_prev_free;
318 free_list->b_prev_free->b_next_free = bh;
319 free_list->b_prev_free = bh;
320 free_list = bh;
323 static inline void put_last_free(struct buffer_head * bh)
325 if (!bh)
326 return;
327 if (bh == free_list) {
328 free_list = bh->b_next_free;
329 return;
331 remove_from_free_list(bh);
332 /* add to back of free list */
333 bh->b_next_free = free_list;
334 bh->b_prev_free = free_list->b_prev_free;
335 free_list->b_prev_free->b_next_free = bh;
336 free_list->b_prev_free = bh;
339 static inline void insert_into_queues(struct buffer_head * bh)
341 /* put at end of free list */
342 bh->b_next_free = free_list;
343 bh->b_prev_free = free_list->b_prev_free;
344 free_list->b_prev_free->b_next_free = bh;
345 free_list->b_prev_free = bh;
346 /* put the buffer in new hash-queue if it has a device */
347 bh->b_prev = NULL;
348 bh->b_next = NULL;
349 if (!bh->b_dev)
350 return;
351 bh->b_next = hash(bh->b_dev,bh->b_blocknr);
352 hash(bh->b_dev,bh->b_blocknr) = bh;
353 if (bh->b_next)
354 bh->b_next->b_prev = bh;
357 static struct buffer_head * find_buffer(dev_t dev, int block, int size)
359 struct buffer_head * tmp;
361 for (tmp = hash(dev,block) ; tmp != NULL ; tmp = tmp->b_next)
362 if (tmp->b_dev==dev && tmp->b_blocknr==block)
363 if (tmp->b_size == size)
364 return tmp;
365 else {
366 printk("VFS: Wrong blocksize on device %d/%d\n",
367 MAJOR(dev), MINOR(dev));
368 return NULL;
370 return NULL;
374 * Why like this, I hear you say... The reason is race-conditions.
375 * As we don't lock buffers (unless we are readint them, that is),
376 * something might happen to it while we sleep (ie a read-error
377 * will force it bad). This shouldn't really happen currently, but
378 * the code is ready.
380 struct buffer_head * get_hash_table(dev_t dev, int block, int size)
382 struct buffer_head * bh;
384 for (;;) {
385 if (!(bh=find_buffer(dev,block,size)))
386 return NULL;
387 bh->b_count++;
388 wait_on_buffer(bh);
389 if (bh->b_dev == dev && bh->b_blocknr == block && bh->b_size == size)
390 return bh;
391 bh->b_count--;
395 void set_blocksize(dev_t dev, int size)
397 int i;
398 struct buffer_head * bh, *bhnext;
400 if (!blksize_size[MAJOR(dev)])
401 return;
403 switch(size) {
404 default: panic("Invalid blocksize passed to set_blocksize");
405 case 512: case 1024: case 2048: case 4096:;
408 if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
409 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
410 return;
412 if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
413 return;
414 sync_buffers(dev, 2);
415 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
417 /* We need to be quite careful how we do this - we are moving entries
418 around on the free list, and we can get in a loop if we are not careful.*/
420 bh = free_list;
421 for (i = nr_buffers*2 ; --i > 0 ; bh = bhnext) {
422 bhnext = bh->b_next_free;
423 if (bh->b_dev != dev)
424 continue;
425 if (bh->b_size == size)
426 continue;
428 wait_on_buffer(bh);
429 if (bh->b_dev == dev && bh->b_size != size)
430 bh->b_uptodate = bh->b_dirt = 0;
431 remove_from_hash_queue(bh);
432 /* put_first_free(bh); */
437 * Ok, this is getblk, and it isn't very clear, again to hinder
438 * race-conditions. Most of the code is seldom used, (ie repeating),
439 * so it should be much more efficient than it looks.
441 * The algoritm is changed: hopefully better, and an elusive bug removed.
443 * 14.02.92: changed it to sync dirty buffers a bit: better performance
444 * when the filesystem starts to get full of dirty blocks (I hope).
446 #define BADNESS(bh) (((bh)->b_dirt<<1)+(bh)->b_lock)
447 struct buffer_head * getblk(dev_t dev, int block, int size)
449 struct buffer_head * bh, * tmp;
450 int buffers;
451 static int grow_size = 0;
453 repeat:
454 bh = get_hash_table(dev, block, size);
455 if (bh) {
456 if (bh->b_uptodate && !bh->b_dirt)
457 put_last_free(bh);
458 return bh;
460 grow_size -= size;
461 if (nr_free_pages > min_free_pages && grow_size <= 0) {
462 if (grow_buffers(GFP_BUFFER, size))
463 grow_size = PAGE_SIZE;
465 buffers = nr_buffers;
466 bh = NULL;
468 for (tmp = free_list; buffers-- > 0 ; tmp = tmp->b_next_free) {
469 if (tmp->b_count || tmp->b_size != size)
470 continue;
471 if (mem_map[MAP_NR((unsigned long) tmp->b_data)] != 1)
472 continue;
473 if (!bh || BADNESS(tmp)<BADNESS(bh)) {
474 bh = tmp;
475 if (!BADNESS(tmp))
476 break;
478 #if 0
479 if (tmp->b_dirt) {
480 tmp->b_count++;
481 ll_rw_block(WRITEA, 1, &tmp);
482 tmp->b_count--;
484 #endif
487 if (!bh && nr_free_pages > 5) {
488 if (grow_buffers(GFP_BUFFER, size))
489 goto repeat;
492 /* and repeat until we find something good */
493 if (!bh) {
494 if (!grow_buffers(GFP_ATOMIC, size))
495 sleep_on(&buffer_wait);
496 goto repeat;
498 wait_on_buffer(bh);
499 if (bh->b_count || bh->b_size != size)
500 goto repeat;
501 if (bh->b_dirt) {
502 sync_buffers(0,0);
503 goto repeat;
505 /* NOTE!! While we slept waiting for this block, somebody else might */
506 /* already have added "this" block to the cache. check it */
507 if (find_buffer(dev,block,size))
508 goto repeat;
509 /* OK, FINALLY we know that this buffer is the only one of its kind, */
510 /* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
511 bh->b_count=1;
512 bh->b_dirt=0;
513 bh->b_uptodate=0;
514 bh->b_req=0;
515 remove_from_queues(bh);
516 bh->b_dev=dev;
517 bh->b_blocknr=block;
518 insert_into_queues(bh);
519 return bh;
522 void brelse(struct buffer_head * buf)
524 if (!buf)
525 return;
526 wait_on_buffer(buf);
527 if (buf->b_count) {
528 if (--buf->b_count)
529 return;
530 wake_up(&buffer_wait);
531 return;
533 printk("VFS: brelse: Trying to free free buffer\n");
537 * bread() reads a specified block and returns the buffer that contains
538 * it. It returns NULL if the block was unreadable.
540 struct buffer_head * bread(dev_t dev, int block, int size)
542 struct buffer_head * bh;
544 if (!(bh = getblk(dev, block, size))) {
545 printk("VFS: bread: READ error on device %d/%d\n",
546 MAJOR(dev), MINOR(dev));
547 return NULL;
549 if (bh->b_uptodate)
550 return bh;
551 ll_rw_block(READ, 1, &bh);
552 wait_on_buffer(bh);
553 if (bh->b_uptodate)
554 return bh;
555 brelse(bh);
556 return NULL;
560 * Ok, breada can be used as bread, but additionally to mark other
561 * blocks for reading as well. End the argument list with a negative
562 * number.
564 struct buffer_head * breada(dev_t dev,int first, ...)
566 va_list args;
567 unsigned int blocksize;
568 struct buffer_head * bh, *tmp;
570 va_start(args,first);
572 blocksize = BLOCK_SIZE;
573 if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
574 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
576 if (!(bh = getblk(dev, first, blocksize))) {
577 printk("VFS: breada: READ error on device %d/%d\n",
578 MAJOR(dev), MINOR(dev));
579 return NULL;
581 if (!bh->b_uptodate)
582 ll_rw_block(READ, 1, &bh);
583 while ((first=va_arg(args,int))>=0) {
584 tmp = getblk(dev, first, blocksize);
585 if (tmp) {
586 if (!tmp->b_uptodate)
587 ll_rw_block(READA, 1, &tmp);
588 tmp->b_count--;
591 va_end(args);
592 wait_on_buffer(bh);
593 if (bh->b_uptodate)
594 return bh;
595 brelse(bh);
596 return (NULL);
600 * See fs/inode.c for the weird use of volatile..
602 static void put_unused_buffer_head(struct buffer_head * bh)
604 struct wait_queue * wait;
606 wait = ((volatile struct buffer_head *) bh)->b_wait;
607 memset((void *) bh,0,sizeof(*bh));
608 ((volatile struct buffer_head *) bh)->b_wait = wait;
609 bh->b_next_free = unused_list;
610 unused_list = bh;
613 static void get_more_buffer_heads(void)
615 int i;
616 struct buffer_head * bh;
618 if (unused_list)
619 return;
621 if(! (bh = (struct buffer_head*) get_free_page(GFP_BUFFER)))
622 return;
624 for (nr_buffer_heads+=i=PAGE_SIZE/sizeof*bh ; i>0; i--) {
625 bh->b_next_free = unused_list; /* only make link */
626 unused_list = bh++;
630 static struct buffer_head * get_unused_buffer_head(void)
632 struct buffer_head * bh;
634 get_more_buffer_heads();
635 if (!unused_list)
636 return NULL;
637 bh = unused_list;
638 unused_list = bh->b_next_free;
639 bh->b_next_free = NULL;
640 bh->b_data = NULL;
641 bh->b_size = 0;
642 bh->b_req = 0;
643 return bh;
647 * Create the appropriate buffers when given a page for data area and
648 * the size of each buffer.. Use the bh->b_this_page linked list to
649 * follow the buffers created. Return NULL if unable to create more
650 * buffers.
652 static struct buffer_head * create_buffers(unsigned long page, unsigned long size)
654 struct buffer_head *bh, *head;
655 unsigned long offset;
657 head = NULL;
658 offset = PAGE_SIZE;
659 while ((offset -= size) < PAGE_SIZE) {
660 bh = get_unused_buffer_head();
661 if (!bh)
662 goto no_grow;
663 bh->b_this_page = head;
664 head = bh;
665 bh->b_data = (char *) (page+offset);
666 bh->b_size = size;
668 return head;
670 * In case anything failed, we just free everything we got.
672 no_grow:
673 bh = head;
674 while (bh) {
675 head = bh;
676 bh = bh->b_this_page;
677 put_unused_buffer_head(head);
679 return NULL;
682 static void read_buffers(struct buffer_head * bh[], int nrbuf)
684 int i;
685 int bhnum = 0;
686 struct buffer_head * bhr[8];
688 for (i = 0 ; i < nrbuf ; i++) {
689 if (bh[i] && !bh[i]->b_uptodate)
690 bhr[bhnum++] = bh[i];
692 if (bhnum)
693 ll_rw_block(READ, bhnum, bhr);
694 for (i = 0 ; i < nrbuf ; i++) {
695 if (bh[i]) {
696 wait_on_buffer(bh[i]);
701 static unsigned long check_aligned(struct buffer_head * first, unsigned long address,
702 dev_t dev, int *b, int size)
704 struct buffer_head * bh[8];
705 unsigned long page;
706 unsigned long offset;
707 int block;
708 int nrbuf;
710 page = (unsigned long) first->b_data;
711 if (page & ~PAGE_MASK) {
712 brelse(first);
713 return 0;
715 mem_map[MAP_NR(page)]++;
716 bh[0] = first;
717 nrbuf = 1;
718 for (offset = size ; offset < PAGE_SIZE ; offset += size) {
719 block = *++b;
720 if (!block)
721 goto no_go;
722 first = get_hash_table(dev, block, size);
723 if (!first)
724 goto no_go;
725 bh[nrbuf++] = first;
726 if (page+offset != (unsigned long) first->b_data)
727 goto no_go;
729 read_buffers(bh,nrbuf); /* make sure they are actually read correctly */
730 while (nrbuf-- > 0)
731 brelse(bh[nrbuf]);
732 free_page(address);
733 ++current->min_flt;
734 return page;
735 no_go:
736 while (nrbuf-- > 0)
737 brelse(bh[nrbuf]);
738 free_page(page);
739 return 0;
742 static unsigned long try_to_load_aligned(unsigned long address,
743 dev_t dev, int b[], int size)
745 struct buffer_head * bh, * tmp, * arr[8];
746 unsigned long offset;
747 int * p;
748 int block;
750 bh = create_buffers(address, size);
751 if (!bh)
752 return 0;
753 p = b;
754 for (offset = 0 ; offset < PAGE_SIZE ; offset += size) {
755 block = *(p++);
756 if (!block)
757 goto not_aligned;
758 tmp = get_hash_table(dev, block, size);
759 if (tmp) {
760 brelse(tmp);
761 goto not_aligned;
764 tmp = bh;
765 p = b;
766 block = 0;
767 while (1) {
768 arr[block++] = bh;
769 bh->b_count = 1;
770 bh->b_dirt = 0;
771 bh->b_uptodate = 0;
772 bh->b_dev = dev;
773 bh->b_blocknr = *(p++);
774 nr_buffers++;
775 insert_into_queues(bh);
776 if (bh->b_this_page)
777 bh = bh->b_this_page;
778 else
779 break;
781 buffermem += PAGE_SIZE;
782 bh->b_this_page = tmp;
783 mem_map[MAP_NR(address)]++;
784 read_buffers(arr,block);
785 while (block-- > 0)
786 brelse(arr[block]);
787 ++current->maj_flt;
788 return address;
789 not_aligned:
790 while ((tmp = bh) != NULL) {
791 bh = bh->b_this_page;
792 put_unused_buffer_head(tmp);
794 return 0;
798 * Try-to-share-buffers tries to minimize memory use by trying to keep
799 * both code pages and the buffer area in the same page. This is done by
800 * (a) checking if the buffers are already aligned correctly in memory and
801 * (b) if none of the buffer heads are in memory at all, trying to load
802 * them into memory the way we want them.
804 * This doesn't guarantee that the memory is shared, but should under most
805 * circumstances work very well indeed (ie >90% sharing of code pages on
806 * demand-loadable executables).
808 static inline unsigned long try_to_share_buffers(unsigned long address,
809 dev_t dev, int *b, int size)
811 struct buffer_head * bh;
812 int block;
814 block = b[0];
815 if (!block)
816 return 0;
817 bh = get_hash_table(dev, block, size);
818 if (bh)
819 return check_aligned(bh, address, dev, b, size);
820 return try_to_load_aligned(address, dev, b, size);
823 #define COPYBLK(size,from,to) \
824 __asm__ __volatile__("rep ; movsl": \
825 :"c" (((unsigned long) size) >> 2),"S" (from),"D" (to) \
826 :"cx","di","si")
829 * bread_page reads four buffers into memory at the desired address. It's
830 * a function of its own, as there is some speed to be got by reading them
831 * all at the same time, not waiting for one to be read, and then another
832 * etc. This also allows us to optimize memory usage by sharing code pages
833 * and filesystem buffers..
835 unsigned long bread_page(unsigned long address, dev_t dev, int b[], int size, int prot)
837 struct buffer_head * bh[8];
838 unsigned long where;
839 int i, j;
841 if (!(prot & PAGE_RW)) {
842 where = try_to_share_buffers(address,dev,b,size);
843 if (where)
844 return where;
846 ++current->maj_flt;
847 for (i=0, j=0; j<PAGE_SIZE ; i++, j+= size) {
848 bh[i] = NULL;
849 if (b[i])
850 bh[i] = getblk(dev, b[i], size);
852 read_buffers(bh,i);
853 where = address;
854 for (i=0, j=0; j<PAGE_SIZE ; i++, j += size,address += size) {
855 if (bh[i]) {
856 if (bh[i]->b_uptodate)
857 COPYBLK(size, (unsigned long) bh[i]->b_data,address);
858 brelse(bh[i]);
861 return where;
865 * Try to increase the number of buffers available: the size argument
866 * is used to determine what kind of buffers we want.
868 static int grow_buffers(int pri, int size)
870 unsigned long page;
871 struct buffer_head *bh, *tmp;
873 if ((size & 511) || (size > PAGE_SIZE)) {
874 printk("VFS: grow_buffers: size = %d\n",size);
875 return 0;
877 if(!(page = __get_free_page(pri)))
878 return 0;
879 bh = create_buffers(page, size);
880 if (!bh) {
881 free_page(page);
882 return 0;
884 tmp = bh;
885 while (1) {
886 if (free_list) {
887 tmp->b_next_free = free_list;
888 tmp->b_prev_free = free_list->b_prev_free;
889 free_list->b_prev_free->b_next_free = tmp;
890 free_list->b_prev_free = tmp;
891 } else {
892 tmp->b_prev_free = tmp;
893 tmp->b_next_free = tmp;
895 free_list = tmp;
896 ++nr_buffers;
897 if (tmp->b_this_page)
898 tmp = tmp->b_this_page;
899 else
900 break;
902 tmp->b_this_page = bh;
903 buffermem += PAGE_SIZE;
904 return 1;
908 * try_to_free() checks if all the buffers on this particular page
909 * are unused, and free's the page if so.
911 static int try_to_free(struct buffer_head * bh, struct buffer_head ** bhp)
913 unsigned long page;
914 struct buffer_head * tmp, * p;
916 *bhp = bh;
917 page = (unsigned long) bh->b_data;
918 page &= PAGE_MASK;
919 tmp = bh;
920 do {
921 if (!tmp)
922 return 0;
923 if (tmp->b_count || tmp->b_dirt || tmp->b_lock)
924 return 0;
925 tmp = tmp->b_this_page;
926 } while (tmp != bh);
927 tmp = bh;
928 do {
929 p = tmp;
930 tmp = tmp->b_this_page;
931 nr_buffers--;
932 if (p == *bhp)
933 *bhp = p->b_prev_free;
934 remove_from_queues(p);
935 put_unused_buffer_head(p);
936 } while (tmp != bh);
937 buffermem -= PAGE_SIZE;
938 free_page(page);
939 return !mem_map[MAP_NR(page)];
943 * Try to free up some pages by shrinking the buffer-cache
945 * Priority tells the routine how hard to try to shrink the
946 * buffers: 3 means "don't bother too much", while a value
947 * of 0 means "we'd better get some free pages now".
949 int shrink_buffers(unsigned int priority)
951 struct buffer_head *bh;
952 int i;
954 if (priority < 2)
955 sync_buffers(0,0);
956 bh = free_list;
957 i = nr_buffers >> priority;
958 for ( ; i-- > 0 ; bh = bh->b_next_free) {
959 if (bh->b_count || !bh->b_this_page)
960 continue;
961 if (bh->b_lock)
962 if (priority)
963 continue;
964 else
965 wait_on_buffer(bh);
966 if (bh->b_dirt) {
967 bh->b_count++;
968 ll_rw_block(WRITEA, 1, &bh);
969 bh->b_count--;
970 continue;
972 if (try_to_free(bh, &bh))
973 return 1;
975 return 0;
979 * This initializes the initial buffer free list. nr_buffers is set
980 * to one less the actual number of buffers, as a sop to backwards
981 * compatibility --- the old code did this (I think unintentionally,
982 * but I'm not sure), and programs in the ps package expect it.
983 * - TYT 8/30/92
985 void buffer_init(void)
987 int i;
989 if (high_memory >= 4*1024*1024)
990 min_free_pages = 200;
991 else
992 min_free_pages = 20;
993 for (i = 0 ; i < NR_HASH ; i++)
994 hash_table[i] = NULL;
995 free_list = 0;
996 grow_buffers(GFP_KERNEL, BLOCK_SIZE);
997 if (!free_list)
998 panic("VFS: Unable to initialize buffer free list!");
999 return;