Ok. I didn't make 2.4.0 in 2000. Tough. I tried, but we had some
[davej-history.git] / drivers / block / ll_rw_blk.c
blobef71dddc73e679c18edc49baa49d6b10e90076dd
1 /*
2 * linux/drivers/block/ll_rw_blk.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
6 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
7 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
8 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
9 */
12 * This handles all read/write requests to block devices
14 #include <linux/sched.h>
15 #include <linux/kernel.h>
16 #include <linux/kernel_stat.h>
17 #include <linux/errno.h>
18 #include <linux/string.h>
19 #include <linux/config.h>
20 #include <linux/locks.h>
21 #include <linux/mm.h>
22 #include <linux/init.h>
23 #include <linux/smp_lock.h>
25 #include <asm/system.h>
26 #include <asm/io.h>
27 #include <linux/blk.h>
28 #include <linux/highmem.h>
29 #include <linux/raid/md.h>
31 #include <linux/module.h>
34 * MAC Floppy IWM hooks
37 #ifdef CONFIG_MAC_FLOPPY_IWM
38 extern int mac_floppy_init(void);
39 #endif
41 extern int lvm_init(void);
44 * For the allocated request tables
46 static kmem_cache_t *request_cachep;
49 * The "disk" task queue is used to start the actual requests
50 * after a plug
52 DECLARE_TASK_QUEUE(tq_disk);
55 * Protect the request list against multiple users..
57 * With this spinlock the Linux block IO subsystem is 100% SMP threaded
58 * from the IRQ event side, and almost 100% SMP threaded from the syscall
59 * side (we still have protect against block device array operations, and
60 * the do_request() side is casually still unsafe. The kernel lock protects
61 * this part currently.).
63 * there is a fair chance that things will work just OK if these functions
64 * are called with no global kernel lock held ...
66 spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
68 /* This specifies how many sectors to read ahead on the disk. */
70 int read_ahead[MAX_BLKDEV];
72 /* blk_dev_struct is:
73 * *request_fn
74 * *current_request
76 struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
79 * blk_size contains the size of all block-devices in units of 1024 byte
80 * sectors:
82 * blk_size[MAJOR][MINOR]
84 * if (!blk_size[MAJOR]) then no minor size checking is done.
86 int * blk_size[MAX_BLKDEV];
89 * blksize_size contains the size of all block-devices:
91 * blksize_size[MAJOR][MINOR]
93 * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
95 int * blksize_size[MAX_BLKDEV];
98 * hardsect_size contains the size of the hardware sector of a device.
100 * hardsect_size[MAJOR][MINOR]
102 * if (!hardsect_size[MAJOR])
103 * then 512 bytes is assumed.
104 * else
105 * sector_size is hardsect_size[MAJOR][MINOR]
106 * This is currently set by some scsi devices and read by the msdos fs driver.
107 * Other uses may appear later.
109 int * hardsect_size[MAX_BLKDEV];
112 * The following tunes the read-ahead algorithm in mm/filemap.c
114 int * max_readahead[MAX_BLKDEV];
117 * Max number of sectors per request
119 int * max_sectors[MAX_BLKDEV];
121 static inline int get_max_sectors(kdev_t dev)
123 if (!max_sectors[MAJOR(dev)])
124 return MAX_SECTORS;
125 return max_sectors[MAJOR(dev)][MINOR(dev)];
128 static inline request_queue_t *__blk_get_queue(kdev_t dev)
130 struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
132 if (bdev->queue)
133 return bdev->queue(dev);
134 else
135 return &blk_dev[MAJOR(dev)].request_queue;
139 * NOTE: the device-specific queue() functions
140 * have to be atomic!
142 request_queue_t *blk_get_queue(kdev_t dev)
144 request_queue_t *ret;
145 unsigned long flags;
147 spin_lock_irqsave(&io_request_lock,flags);
148 ret = __blk_get_queue(dev);
149 spin_unlock_irqrestore(&io_request_lock,flags);
151 return ret;
154 static int __blk_cleanup_queue(struct list_head *head)
156 struct list_head *entry;
157 struct request *rq;
158 int i = 0;
160 if (list_empty(head))
161 return 0;
163 entry = head->next;
164 do {
165 rq = list_entry(entry, struct request, table);
166 entry = entry->next;
167 list_del(&rq->table);
168 kmem_cache_free(request_cachep, rq);
169 i++;
170 } while (!list_empty(head));
172 return i;
176 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
177 * @q: the request queue to be released
179 * Description:
180 * blk_cleanup_queue is the pair to blk_init_queue(). It should
181 * be called when a request queue is being released; typically
182 * when a block device is being de-registered. Currently, its
183 * primary task it to free all the &struct request structures that
184 * were allocated to the queue.
185 * Caveat:
186 * Hopefully the low level driver will have finished any
187 * outstanding requests first...
189 void blk_cleanup_queue(request_queue_t * q)
191 int count = QUEUE_NR_REQUESTS;
193 count -= __blk_cleanup_queue(&q->request_freelist[READ]);
194 count -= __blk_cleanup_queue(&q->request_freelist[WRITE]);
196 if (count)
197 printk("blk_cleanup_queue: leaked requests (%d)\n", count);
199 memset(q, 0, sizeof(*q));
203 * blk_queue_headactive - indicate whether head of request queue may be active
204 * @q: The queue which this applies to.
205 * @active: A flag indication where the head of the queue is active.
207 * Description:
208 * The driver for a block device may choose to leave the currently active
209 * request on the request queue, removing it only when it has completed.
210 * The queue handling routines assume this by default for safety reasons
211 * and will not involve the head of the request queue in any merging or
212 * reordering of requests when the queue is unplugged (and thus may be
213 * working on this particular request).
215 * If a driver removes requests from the queue before processing them, then
216 * it may indicate that it does so, there by allowing the head of the queue
217 * to be involved in merging and reordering. This is done be calling
218 * blk_queue_headactive() with an @active flag of %0.
220 * If a driver processes several requests at once, it must remove them (or
221 * at least all but one of them) from the request queue.
223 * When a queue is plugged (see blk_queue_pluggable()) the head will be
224 * assumed to be inactive.
227 void blk_queue_headactive(request_queue_t * q, int active)
229 q->head_active = active;
233 * blk_queue_pluggable - define a plugging function for a request queue
234 * @q: the request queue to which the function will apply
235 * @plug: the function to be called to plug a queue
237 * Description:
238 * A request queue will be "plugged" if a request is added to it
239 * while it is empty. This allows a number of requests to be added
240 * before any are processed, thus providing an opportunity for these
241 * requests to be merged or re-ordered.
242 * The default plugging function (generic_plug_device()) sets the
243 * "plugged" flag for the queue and adds a task to the $tq_disk task
244 * queue to unplug the queue and call the request function at a
245 * later time.
247 * A device driver may provide an alternate plugging function by
248 * passing it to blk_queue_pluggable(). This function should set
249 * the "plugged" flag if it want calls to the request_function to be
250 * blocked, and should place a task on $tq_disk which will unplug
251 * the queue. Alternately it can simply do nothing and there-by
252 * disable plugging of the device.
255 void blk_queue_pluggable (request_queue_t * q, plug_device_fn *plug)
257 q->plug_device_fn = plug;
262 * blk_queue_make_request - define an alternate make_request function for a device
263 * @q: the request queue for the device to be affected
264 * @mfn: the alternate make_request function
266 * Description:
267 * The normal way for &struct buffer_heads to be passed to a device
268 * driver is for them to be collected into requests on a request
269 * queue, and then to allow the device driver to select requests
270 * off that queue when it is ready. This works well for many block
271 * devices. However some block devices (typically virtual devices
272 * such as md or lvm) do not benefit from the processing on the
273 * request queue, and are served best by having the requests passed
274 * directly to them. This can be achieved by providing a function
275 * to blk_queue_make_request().
277 * Caveat:
278 * The driver that does this *must* be able to deal appropriately
279 * with buffers in "highmemory", either by calling bh_kmap() to get
280 * a kernel mapping, to by calling create_bounce() to create a
281 * buffer in normal memory.
284 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
286 q->make_request_fn = mfn;
289 static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
291 if (req->nr_segments < max_segments) {
292 req->nr_segments++;
293 q->elevator.nr_segments++;
294 return 1;
296 return 0;
299 static int ll_back_merge_fn(request_queue_t *q, struct request *req,
300 struct buffer_head *bh, int max_segments)
302 if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
303 return 1;
304 return ll_new_segment(q, req, max_segments);
307 static int ll_front_merge_fn(request_queue_t *q, struct request *req,
308 struct buffer_head *bh, int max_segments)
310 if (bh->b_data + bh->b_size == req->bh->b_data)
311 return 1;
312 return ll_new_segment(q, req, max_segments);
315 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
316 struct request *next, int max_segments)
318 int total_segments = req->nr_segments + next->nr_segments;
319 int same_segment;
321 same_segment = 0;
322 if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) {
323 total_segments--;
324 same_segment = 1;
327 if (total_segments > max_segments)
328 return 0;
330 q->elevator.nr_segments -= same_segment;
331 req->nr_segments = total_segments;
332 return 1;
336 * "plug" the device if there are no outstanding requests: this will
337 * force the transfer to start only after we have put all the requests
338 * on the list.
340 * This is called with interrupts off and no requests on the queue.
341 * (and with the request spinlock acquired)
343 static void generic_plug_device(request_queue_t *q, kdev_t dev)
346 * no need to replug device
348 if (!list_empty(&q->queue_head) || q->plugged)
349 return;
351 q->plugged = 1;
352 queue_task(&q->plug_tq, &tq_disk);
356 * remove the plug and let it rip..
358 static inline void __generic_unplug_device(request_queue_t *q)
360 if (q->plugged) {
361 q->plugged = 0;
362 if (!list_empty(&q->queue_head))
363 q->request_fn(q);
367 static void generic_unplug_device(void *data)
369 request_queue_t *q = (request_queue_t *) data;
370 unsigned long flags;
372 spin_lock_irqsave(&io_request_lock, flags);
373 __generic_unplug_device(q);
374 spin_unlock_irqrestore(&io_request_lock, flags);
377 static void blk_init_free_list(request_queue_t *q)
379 struct request *rq;
380 int i;
383 * Divide requests in half between read and write. This used to
384 * be a 2/3 advantage for reads, but now reads can steal from
385 * the write free list.
387 for (i = 0; i < QUEUE_NR_REQUESTS; i++) {
388 rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
389 rq->rq_status = RQ_INACTIVE;
390 list_add(&rq->table, &q->request_freelist[i & 1]);
393 init_waitqueue_head(&q->wait_for_request);
394 spin_lock_init(&q->request_lock);
397 static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
400 * blk_init_queue - prepare a request queue for use with a block device
401 * @q: The &request_queue_t to be initialised
402 * @rfn: The function to be called to process requests that have been
403 * placed on the queue.
405 * Description:
406 * If a block device wishes to use the standard request handling procedures,
407 * which sorts requests and coalesces adjacent requests, then it must
408 * call blk_init_queue(). The function @rfn will be called when there
409 * are requests on the queue that need to be processed. If the device
410 * supports plugging, then @rfn may not be called immediately when requests
411 * are available on the queue, but may be called at some time later instead.
412 * Plugged queues are generally unplugged when a buffer belonging to one
413 * of the requests on the queue is needed, or due to memory pressure.
415 * @rfn is not required, or even expected, to remove all requests off the
416 * queue, but only as many as it can handle at a time. If it does leave
417 * requests on the queue, it is responsible for arranging that the requests
418 * get dealt with eventually.
420 * A global spin lock $io_request_lock must be held while manipulating the
421 * requests on the request queue.
423 * The request on the head of the queue is by default assumed to be
424 * potentially active, and it is not considered for re-ordering or merging
425 * whenever the given queue is unplugged. This behaviour can be changed with
426 * blk_queue_headactive().
428 * Note:
429 * blk_init_queue() must be paired with a blk_cleanup-queue() call
430 * when the block device is deactivated (such as at module unload).
432 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
434 INIT_LIST_HEAD(&q->queue_head);
435 INIT_LIST_HEAD(&q->request_freelist[READ]);
436 INIT_LIST_HEAD(&q->request_freelist[WRITE]);
437 elevator_init(&q->elevator, ELEVATOR_LINUS);
438 blk_init_free_list(q);
439 q->request_fn = rfn;
440 q->back_merge_fn = ll_back_merge_fn;
441 q->front_merge_fn = ll_front_merge_fn;
442 q->merge_requests_fn = ll_merge_requests_fn;
443 q->make_request_fn = __make_request;
444 q->plug_tq.sync = 0;
445 q->plug_tq.routine = &generic_unplug_device;
446 q->plug_tq.data = q;
447 q->plugged = 0;
449 * These booleans describe the queue properties. We set the
450 * default (and most common) values here. Other drivers can
451 * use the appropriate functions to alter the queue properties.
452 * as appropriate.
454 q->plug_device_fn = generic_plug_device;
455 q->head_active = 1;
459 #define blkdev_free_rq(list) list_entry((list)->next, struct request, table);
461 * Get a free request. io_request_lock must be held and interrupts
462 * disabled on the way in.
464 static inline struct request *get_request(request_queue_t *q, int rw)
466 struct list_head *list = &q->request_freelist[rw];
467 struct request *rq;
470 * Reads get preferential treatment and are allowed to steal
471 * from the write free list if necessary.
473 if (!list_empty(list)) {
474 rq = blkdev_free_rq(list);
475 goto got_rq;
479 * if the WRITE list is non-empty, we know that rw is READ
480 * and that the READ list is empty. allow reads to 'steal'
481 * from the WRITE list.
483 if (!list_empty(&q->request_freelist[WRITE])) {
484 list = &q->request_freelist[WRITE];
485 rq = blkdev_free_rq(list);
486 goto got_rq;
489 return NULL;
491 got_rq:
492 list_del(&rq->table);
493 rq->free_list = list;
494 rq->rq_status = RQ_ACTIVE;
495 rq->special = NULL;
496 rq->q = q;
497 return rq;
501 * No available requests for this queue, unplug the device.
503 static struct request *__get_request_wait(request_queue_t *q, int rw)
505 register struct request *rq;
506 DECLARE_WAITQUEUE(wait, current);
508 add_wait_queue_exclusive(&q->wait_for_request, &wait);
509 for (;;) {
510 __set_current_state(TASK_UNINTERRUPTIBLE);
511 spin_lock_irq(&io_request_lock);
512 rq = get_request(q, rw);
513 spin_unlock_irq(&io_request_lock);
514 if (rq)
515 break;
516 generic_unplug_device(q);
517 schedule();
519 remove_wait_queue(&q->wait_for_request, &wait);
520 current->state = TASK_RUNNING;
521 return rq;
524 static inline struct request *get_request_wait(request_queue_t *q, int rw)
526 register struct request *rq;
528 spin_lock_irq(&io_request_lock);
529 rq = get_request(q, rw);
530 spin_unlock_irq(&io_request_lock);
531 if (rq)
532 return rq;
533 return __get_request_wait(q, rw);
536 /* RO fail safe mechanism */
538 static long ro_bits[MAX_BLKDEV][8];
540 int is_read_only(kdev_t dev)
542 int minor,major;
544 major = MAJOR(dev);
545 minor = MINOR(dev);
546 if (major < 0 || major >= MAX_BLKDEV) return 0;
547 return ro_bits[major][minor >> 5] & (1 << (minor & 31));
550 void set_device_ro(kdev_t dev,int flag)
552 int minor,major;
554 major = MAJOR(dev);
555 minor = MINOR(dev);
556 if (major < 0 || major >= MAX_BLKDEV) return;
557 if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
558 else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
561 inline void drive_stat_acct (kdev_t dev, int rw,
562 unsigned long nr_sectors, int new_io)
564 unsigned int major = MAJOR(dev);
565 unsigned int index;
567 index = disk_index(dev);
568 if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
569 return;
571 kstat.dk_drive[major][index] += new_io;
572 if (rw == READ) {
573 kstat.dk_drive_rio[major][index] += new_io;
574 kstat.dk_drive_rblk[major][index] += nr_sectors;
575 } else if (rw == WRITE) {
576 kstat.dk_drive_wio[major][index] += new_io;
577 kstat.dk_drive_wblk[major][index] += nr_sectors;
578 } else
579 printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
583 * add-request adds a request to the linked list.
584 * It disables interrupts (acquires the request spinlock) so that it can muck
585 * with the request-lists in peace. Thus it should be called with no spinlocks
586 * held.
588 * By this point, req->cmd is always either READ/WRITE, never READA,
589 * which is important for drive_stat_acct() above.
592 static inline void add_request(request_queue_t * q, struct request * req,
593 struct list_head *head, int lat)
595 int major;
597 drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
600 * let selected elevator insert the request
602 q->elevator.elevator_fn(req, &q->elevator, &q->queue_head, head, lat);
605 * FIXME(eric) I don't understand why there is a need for this
606 * special case code. It clearly doesn't fit any more with
607 * the new queueing architecture, and it got added in 2.3.10.
608 * I am leaving this in here until I hear back from the COMPAQ
609 * people.
611 major = MAJOR(req->rq_dev);
612 if (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7)
613 (q->request_fn)(q);
614 if (major >= COMPAQ_CISS_MAJOR+0 && major <= COMPAQ_CISS_MAJOR+7)
615 (q->request_fn)(q);
616 if (major >= DAC960_MAJOR+0 && major <= DAC960_MAJOR+7)
617 (q->request_fn)(q);
621 * Must be called with io_request_lock held and interrupts disabled
623 void inline blkdev_release_request(struct request *req)
625 req->rq_status = RQ_INACTIVE;
628 * Request may not have originated from ll_rw_blk
630 if (req->free_list) {
631 list_add(&req->table, req->free_list);
632 req->free_list = NULL;
633 wake_up(&req->q->wait_for_request);
638 * Has to be called with the request spinlock acquired
640 static void attempt_merge(request_queue_t * q,
641 struct request *req,
642 int max_sectors,
643 int max_segments)
645 struct request *next;
647 next = blkdev_next_request(req);
648 if (req->sector + req->nr_sectors != next->sector)
649 return;
650 if (req->cmd != next->cmd
651 || req->rq_dev != next->rq_dev
652 || req->nr_sectors + next->nr_sectors > max_sectors
653 || next->sem)
654 return;
656 * If we are not allowed to merge these requests, then
657 * return. If we are allowed to merge, then the count
658 * will have been updated to the appropriate number,
659 * and we shouldn't do it here too.
661 if(!(q->merge_requests_fn)(q, req, next, max_segments))
662 return;
664 req->bhtail->b_reqnext = next->bh;
665 req->bhtail = next->bhtail;
666 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
667 list_del(&next->queue);
668 blkdev_release_request(next);
671 static inline void attempt_back_merge(request_queue_t * q,
672 struct request *req,
673 int max_sectors,
674 int max_segments)
676 if (&req->queue == q->queue_head.prev)
677 return;
678 attempt_merge(q, req, max_sectors, max_segments);
681 static inline void attempt_front_merge(request_queue_t * q,
682 struct list_head * head,
683 struct request *req,
684 int max_sectors,
685 int max_segments)
687 struct list_head * prev;
689 prev = req->queue.prev;
690 if (head == prev)
691 return;
692 attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
695 static int __make_request(request_queue_t * q, int rw,
696 struct buffer_head * bh)
698 unsigned int sector, count;
699 int max_segments = MAX_SEGMENTS;
700 struct request * req = NULL, *freereq = NULL;
701 int rw_ahead, max_sectors, el_ret;
702 struct list_head *head;
703 int latency;
704 elevator_t *elevator = &q->elevator;
706 count = bh->b_size >> 9;
707 sector = bh->b_rsector;
709 rw_ahead = 0; /* normal case; gets changed below for READA */
710 switch (rw) {
711 case READA:
712 rw_ahead = 1;
713 rw = READ; /* drop into READ */
714 case READ:
715 case WRITE:
716 break;
717 default:
718 BUG();
719 goto end_io;
722 /* We'd better have a real physical mapping!
723 Check this bit only if the buffer was dirty and just locked
724 down by us so at this point flushpage will block and
725 won't clear the mapped bit under us. */
726 if (!buffer_mapped(bh))
727 BUG();
730 * Temporary solution - in 2.5 this will be done by the lowlevel
731 * driver. Create a bounce buffer if the buffer data points into
732 * high memory - keep the original buffer otherwise.
734 #if CONFIG_HIGHMEM
735 bh = create_bounce(rw, bh);
736 #endif
738 /* look for a free request. */
740 * Try to coalesce the new request with old requests
742 max_sectors = get_max_sectors(bh->b_rdev);
744 latency = elevator_request_latency(elevator, rw);
747 * Now we acquire the request spinlock, we have to be mega careful
748 * not to schedule or do something nonatomic
750 again:
751 spin_lock_irq(&io_request_lock);
754 * skip first entry, for devices with active queue head
756 head = &q->queue_head;
757 if (q->head_active && !q->plugged)
758 head = head->next;
760 if (list_empty(head)) {
761 q->plug_device_fn(q, bh->b_rdev); /* is atomic */
762 goto get_rq;
765 el_ret = elevator->elevator_merge_fn(q, &req, bh, rw,
766 &max_sectors, &max_segments);
767 switch (el_ret) {
769 case ELEVATOR_BACK_MERGE:
770 if (!q->back_merge_fn(q, req, bh, max_segments))
771 break;
772 req->bhtail->b_reqnext = bh;
773 req->bhtail = bh;
774 req->nr_sectors = req->hard_nr_sectors += count;
775 req->e = elevator;
776 drive_stat_acct(req->rq_dev, req->cmd, count, 0);
777 attempt_back_merge(q, req, max_sectors, max_segments);
778 goto out;
780 case ELEVATOR_FRONT_MERGE:
781 if (!q->front_merge_fn(q, req, bh, max_segments))
782 break;
783 bh->b_reqnext = req->bh;
784 req->bh = bh;
785 req->buffer = bh->b_data;
786 req->current_nr_sectors = count;
787 req->sector = req->hard_sector = sector;
788 req->nr_sectors = req->hard_nr_sectors += count;
789 req->e = elevator;
790 drive_stat_acct(req->rq_dev, req->cmd, count, 0);
791 attempt_front_merge(q, head, req, max_sectors, max_segments);
792 goto out;
794 * elevator says don't/can't merge. get new request
796 case ELEVATOR_NO_MERGE:
797 break;
799 default:
800 printk("elevator returned crap (%d)\n", el_ret);
801 BUG();
805 * Grab a free request from the freelist. Read first try their
806 * own queue - if that is empty, we steal from the write list.
807 * Writes must block if the write list is empty, and read aheads
808 * are not crucial.
810 get_rq:
811 if (freereq) {
812 req = freereq;
813 freereq = NULL;
814 } else if ((req = get_request(q, rw)) == NULL) {
815 spin_unlock_irq(&io_request_lock);
816 if (rw_ahead)
817 goto end_io;
819 freereq = __get_request_wait(q, rw);
820 goto again;
823 /* fill up the request-info, and add it to the queue */
824 req->cmd = rw;
825 req->errors = 0;
826 req->hard_sector = req->sector = sector;
827 req->hard_nr_sectors = req->nr_sectors = count;
828 req->current_nr_sectors = count;
829 req->nr_segments = 1; /* Always 1 for a new request. */
830 req->nr_hw_segments = 1; /* Always 1 for a new request. */
831 req->buffer = bh->b_data;
832 req->sem = NULL;
833 req->bh = bh;
834 req->bhtail = bh;
835 req->rq_dev = bh->b_rdev;
836 req->e = elevator;
837 add_request(q, req, head, latency);
838 out:
839 if (!q->plugged)
840 (q->request_fn)(q);
841 if (freereq)
842 blkdev_release_request(freereq);
843 spin_unlock_irq(&io_request_lock);
844 return 0;
845 end_io:
846 bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
847 return 0;
851 * generic_make_request: hand a buffer head to it's device driver for I/O
852 * @rw: READ, WRITE, or READA - what sort of I/O is desired.
853 * @bh: The buffer head describing the location in memory and on the device.
855 * generic_make_request() is used to make I/O requests of block
856 * devices. It is passed a &struct buffer_head and a &rw value. The
857 * %READ and %WRITE options are (hopefully) obvious in meaning. The
858 * %READA value means that a read is required, but that the driver is
859 * free to fail the request if, for example, it cannot get needed
860 * resources immediately.
862 * generic_make_request() does not return any status. The
863 * success/failure status of the request, along with notification of
864 * completion, is delivered asynchronously through the bh->b_end_io
865 * function described (one day) else where.
867 * The caller of generic_make_request must make sure that b_page,
868 * b_addr, b_size are set to describe the memory buffer, that b_rdev
869 * and b_rsector are set to describe the device address, and the
870 * b_end_io and optionally b_private are set to describe how
871 * completion notification should be signaled. BH_Mapped should also
872 * be set (to confirm that b_dev and b_blocknr are valid).
874 * generic_make_request and the drivers it calls may use b_reqnext,
875 * and may change b_rdev and b_rsector. So the values of these fields
876 * should NOT be depended on after the call to generic_make_request.
877 * Because of this, the caller should record the device address
878 * information in b_dev and b_blocknr.
880 * Apart from those fields mentioned above, no other fields, and in
881 * particular, no other flags, are changed by generic_make_request or
882 * any lower level drivers.
883 * */
884 void generic_make_request (int rw, struct buffer_head * bh)
886 int major = MAJOR(bh->b_rdev);
887 request_queue_t *q;
889 if (!bh->b_end_io) BUG();
890 if (blk_size[major]) {
891 unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1;
892 unsigned int sector, count;
894 count = bh->b_size >> 9;
895 sector = bh->b_rsector;
897 if (maxsector < count || maxsector - count < sector) {
898 bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
899 if (blk_size[major][MINOR(bh->b_rdev)]) {
901 /* This may well happen - the kernel calls bread()
902 without checking the size of the device, e.g.,
903 when mounting a device. */
904 printk(KERN_INFO
905 "attempt to access beyond end of device\n");
906 printk(KERN_INFO "%s: rw=%d, want=%d, limit=%d\n",
907 kdevname(bh->b_rdev), rw,
908 (sector + count)>>1,
909 blk_size[major][MINOR(bh->b_rdev)]);
911 bh->b_end_io(bh, 0);
912 return;
917 * Resolve the mapping until finished. (drivers are
918 * still free to implement/resolve their own stacking
919 * by explicitly returning 0)
921 /* NOTE: we don't repeat the blk_size check for each new device.
922 * Stacking drivers are expected to know what they are doing.
924 do {
925 q = blk_get_queue(bh->b_rdev);
926 if (!q) {
927 printk(KERN_ERR
928 "generic_make_request: Trying to access nonexistent block-device %s (%ld)\n",
929 kdevname(bh->b_rdev), bh->b_rsector);
930 buffer_IO_error(bh);
931 break;
935 while (q->make_request_fn(q, rw, bh));
940 * submit_bh: submit a buffer_head to the block device later for I/O
941 * @rw: whether to %READ or %WRITE, or mayve to %READA (read ahead)
942 * @bh: The &struct buffer_head which describes the I/O
944 * submit_bh() is very similar in purpose to generic_make_request(), and
945 * uses that function to do most of the work.
947 * The extra functionality provided by submit_bh is to determine
948 * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
949 * This is is appropriate for IO requests that come from the buffer
950 * cache and page cache which (currently) always use aligned blocks.
952 void submit_bh(int rw, struct buffer_head * bh)
954 if (!test_bit(BH_Lock, &bh->b_state))
955 BUG();
957 set_bit(BH_Req, &bh->b_state);
960 * First step, 'identity mapping' - RAID or LVM might
961 * further remap this.
963 bh->b_rdev = bh->b_dev;
964 bh->b_rsector = bh->b_blocknr * (bh->b_size>>9);
966 generic_make_request(rw, bh);
968 switch (rw) {
969 case WRITE:
970 kstat.pgpgout++;
971 break;
972 default:
973 kstat.pgpgin++;
974 break;
979 * Default IO end handler, used by "ll_rw_block()".
981 static void end_buffer_io_sync(struct buffer_head *bh, int uptodate)
983 mark_buffer_uptodate(bh, uptodate);
984 unlock_buffer(bh);
988 * ll_rw_block: low-level access to block devices
989 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
990 * @nr: number of &struct buffer_heads in the array
991 * @bhs: array of pointers to &struct buffer_head
993 * ll_rw_block() takes an array of pointers to &struct buffer_heads,
994 * and requests an I/O operation on them, either a %READ or a %WRITE.
995 * The third %READA option is described in the documentation for
996 * generic_make_request() which ll_rw_block() calls.
998 * This function provides extra functionality that is not in
999 * generic_make_request() that is relevant to buffers in the buffer
1000 * cache or page cache. In particular it drops any buffer that it
1001 * cannot get a lock on (with the BH_Lock state bit), any buffer that
1002 * appears to be clean when doing a write request, and any buffer that
1003 * appears to be up-to-date when doing read request. Further it marks
1004 * as clean buffers that are processed for writing (the buffer cache
1005 * wont assume that they are actually clean until the buffer gets
1006 * unlocked).
1008 * ll_rw_block sets b_end_io to simple completion handler that marks
1009 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
1010 * any waiters. As client that needs a more interesting completion
1011 * routine should call submit_bh() (or generic_make_request())
1012 * directly.
1014 * Caveat:
1015 * All of the buffers must be for the same device, and must also be
1016 * of the current approved size for the device. */
1018 void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
1020 unsigned int major;
1021 int correct_size;
1022 int i;
1024 major = MAJOR(bhs[0]->b_dev);
1026 /* Determine correct block size for this device. */
1027 correct_size = BLOCK_SIZE;
1028 if (blksize_size[major]) {
1029 i = blksize_size[major][MINOR(bhs[0]->b_dev)];
1030 if (i)
1031 correct_size = i;
1034 /* Verify requested block sizes. */
1035 for (i = 0; i < nr; i++) {
1036 struct buffer_head *bh;
1037 bh = bhs[i];
1038 if (bh->b_size != correct_size) {
1039 printk(KERN_NOTICE "ll_rw_block: device %s: "
1040 "only %d-char blocks implemented (%u)\n",
1041 kdevname(bhs[0]->b_dev),
1042 correct_size, bh->b_size);
1043 goto sorry;
1047 if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
1048 printk(KERN_NOTICE "Can't write to read-only device %s\n",
1049 kdevname(bhs[0]->b_dev));
1050 goto sorry;
1053 for (i = 0; i < nr; i++) {
1054 struct buffer_head *bh;
1055 bh = bhs[i];
1057 /* Only one thread can actually submit the I/O. */
1058 if (test_and_set_bit(BH_Lock, &bh->b_state))
1059 continue;
1061 /* We have the buffer lock */
1062 bh->b_end_io = end_buffer_io_sync;
1064 switch(rw) {
1065 case WRITE:
1066 if (!atomic_set_buffer_clean(bh))
1067 /* Hmmph! Nothing to write */
1068 goto end_io;
1069 __mark_buffer_clean(bh);
1070 break;
1072 case READA:
1073 case READ:
1074 if (buffer_uptodate(bh))
1075 /* Hmmph! Already have it */
1076 goto end_io;
1077 break;
1078 default:
1079 BUG();
1080 end_io:
1081 bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1082 continue;
1085 submit_bh(rw, bh);
1087 return;
1089 sorry:
1090 /* Make sure we don't get infinite dirty retries.. */
1091 for (i = 0; i < nr; i++)
1092 mark_buffer_clean(bhs[i]);
1095 #ifdef CONFIG_STRAM_SWAP
1096 extern int stram_device_init (void);
1097 #endif
1100 * First step of what used to be end_request
1102 * 0 means continue with end_that_request_last,
1103 * 1 means we are done
1106 int end_that_request_first (struct request *req, int uptodate, char *name)
1108 struct buffer_head * bh;
1109 int nsect;
1111 req->errors = 0;
1112 if (!uptodate)
1113 printk("end_request: I/O error, dev %s (%s), sector %lu\n",
1114 kdevname(req->rq_dev), name, req->sector);
1116 if ((bh = req->bh) != NULL) {
1117 nsect = bh->b_size >> 9;
1118 req->bh = bh->b_reqnext;
1119 bh->b_reqnext = NULL;
1120 bh->b_end_io(bh, uptodate);
1121 if ((bh = req->bh) != NULL) {
1122 req->hard_sector += nsect;
1123 req->hard_nr_sectors -= nsect;
1124 req->sector = req->hard_sector;
1125 req->nr_sectors = req->hard_nr_sectors;
1127 req->current_nr_sectors = bh->b_size >> 9;
1128 if (req->nr_sectors < req->current_nr_sectors) {
1129 req->nr_sectors = req->current_nr_sectors;
1130 printk("end_request: buffer-list destroyed\n");
1132 req->buffer = bh->b_data;
1133 return 1;
1136 return 0;
1139 void end_that_request_last(struct request *req)
1141 if (req->e) {
1142 printk("end_that_request_last called with non-dequeued req\n");
1143 BUG();
1145 if (req->sem != NULL)
1146 up(req->sem);
1148 blkdev_release_request(req);
1151 int __init blk_dev_init(void)
1153 struct blk_dev_struct *dev;
1155 request_cachep = kmem_cache_create("blkdev_requests",
1156 sizeof(struct request),
1157 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
1159 if (!request_cachep)
1160 panic("Can't create request pool slab cache\n");
1162 for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
1163 dev->queue = NULL;
1165 memset(ro_bits,0,sizeof(ro_bits));
1166 memset(max_readahead, 0, sizeof(max_readahead));
1167 memset(max_sectors, 0, sizeof(max_sectors));
1168 #ifdef CONFIG_AMIGA_Z2RAM
1169 z2_init();
1170 #endif
1171 #ifdef CONFIG_STRAM_SWAP
1172 stram_device_init();
1173 #endif
1174 #ifdef CONFIG_BLK_DEV_RAM
1175 rd_init();
1176 #endif
1177 #ifdef CONFIG_BLK_DEV_LOOP
1178 loop_init();
1179 #endif
1180 #ifdef CONFIG_ISP16_CDI
1181 isp16_init();
1182 #endif
1183 #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE)
1184 ide_init(); /* this MUST precede hd_init */
1185 #endif
1186 #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD)
1187 hd_init();
1188 #endif
1189 #ifdef CONFIG_BLK_DEV_PS2
1190 ps2esdi_init();
1191 #endif
1192 #ifdef CONFIG_BLK_DEV_XD
1193 xd_init();
1194 #endif
1195 #ifdef CONFIG_BLK_DEV_MFM
1196 mfm_init();
1197 #endif
1198 #ifdef CONFIG_PARIDE
1199 { extern void paride_init(void); paride_init(); };
1200 #endif
1201 #ifdef CONFIG_MAC_FLOPPY
1202 swim3_init();
1203 #endif
1204 #ifdef CONFIG_BLK_DEV_SWIM_IOP
1205 swimiop_init();
1206 #endif
1207 #ifdef CONFIG_AMIGA_FLOPPY
1208 amiga_floppy_init();
1209 #endif
1210 #ifdef CONFIG_ATARI_FLOPPY
1211 atari_floppy_init();
1212 #endif
1213 #ifdef CONFIG_BLK_DEV_FD
1214 floppy_init();
1215 #else
1216 #if defined(__i386__) /* Do we even need this? */
1217 outb_p(0xc, 0x3f2);
1218 #endif
1219 #endif
1220 #ifdef CONFIG_CDU31A
1221 cdu31a_init();
1222 #endif
1223 #ifdef CONFIG_ATARI_ACSI
1224 acsi_init();
1225 #endif
1226 #ifdef CONFIG_MCD
1227 mcd_init();
1228 #endif
1229 #ifdef CONFIG_MCDX
1230 mcdx_init();
1231 #endif
1232 #ifdef CONFIG_SBPCD
1233 sbpcd_init();
1234 #endif
1235 #ifdef CONFIG_AZTCD
1236 aztcd_init();
1237 #endif
1238 #ifdef CONFIG_CDU535
1239 sony535_init();
1240 #endif
1241 #ifdef CONFIG_GSCD
1242 gscd_init();
1243 #endif
1244 #ifdef CONFIG_CM206
1245 cm206_init();
1246 #endif
1247 #ifdef CONFIG_OPTCD
1248 optcd_init();
1249 #endif
1250 #ifdef CONFIG_SJCD
1251 sjcd_init();
1252 #endif
1253 #ifdef CONFIG_APBLOCK
1254 ap_init();
1255 #endif
1256 #ifdef CONFIG_DDV
1257 ddv_init();
1258 #endif
1259 #ifdef CONFIG_BLK_DEV_NBD
1260 nbd_init();
1261 #endif
1262 #ifdef CONFIG_MDISK
1263 mdisk_init();
1264 #endif
1265 #ifdef CONFIG_DASD
1266 dasd_init();
1267 #endif
1268 #ifdef CONFIG_SUN_JSFLASH
1269 jsfd_init();
1270 #endif
1271 #ifdef CONFIG_BLK_DEV_LVM
1272 lvm_init();
1273 #endif
1274 return 0;
1277 EXPORT_SYMBOL(io_request_lock);
1278 EXPORT_SYMBOL(end_that_request_first);
1279 EXPORT_SYMBOL(end_that_request_last);
1280 EXPORT_SYMBOL(blk_init_queue);
1281 EXPORT_SYMBOL(blk_get_queue);
1282 EXPORT_SYMBOL(blk_cleanup_queue);
1283 EXPORT_SYMBOL(blk_queue_headactive);
1284 EXPORT_SYMBOL(blk_queue_pluggable);
1285 EXPORT_SYMBOL(blk_queue_make_request);
1286 EXPORT_SYMBOL(generic_make_request);
1287 EXPORT_SYMBOL(blkdev_release_request);