Automatic merge of rsync://rsync.kernel.org/pub/scm/linux/kernel/git/gregkh/driver...
[linux-2.6/verdex.git] / drivers / block / aoe / aoecmd.c
blobb5be4b7d7b5b3acaab207e9175de16fc0dcbc895
1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
2 /*
3 * aoecmd.c
4 * Filesystem request handling methods
5 */
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include "aoe.h"
13 #define TIMERTICK (HZ / 10)
14 #define MINTIMER (2 * TIMERTICK)
15 #define MAXTIMER (HZ << 1)
16 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
18 static struct sk_buff *
19 new_skb(struct net_device *if_dev, ulong len)
21 struct sk_buff *skb;
23 skb = alloc_skb(len, GFP_ATOMIC);
24 if (skb) {
25 skb->nh.raw = skb->mac.raw = skb->data;
26 skb->dev = if_dev;
27 skb->protocol = __constant_htons(ETH_P_AOE);
28 skb->priority = 0;
29 skb_put(skb, len);
30 skb->next = skb->prev = NULL;
32 /* tell the network layer not to perform IP checksums
33 * or to get the NIC to do it
35 skb->ip_summed = CHECKSUM_NONE;
37 return skb;
40 static struct sk_buff *
41 skb_prepare(struct aoedev *d, struct frame *f)
43 struct sk_buff *skb;
44 char *p;
46 skb = new_skb(d->ifp, f->ndata + f->writedatalen);
47 if (!skb) {
48 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
49 return NULL;
52 p = skb->mac.raw;
53 memcpy(p, f->data, f->ndata);
55 if (f->writedatalen) {
56 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
57 memcpy(p, f->bufaddr, f->writedatalen);
60 return skb;
63 static struct frame *
64 getframe(struct aoedev *d, int tag)
66 struct frame *f, *e;
68 f = d->frames;
69 e = f + d->nframes;
70 for (; f<e; f++)
71 if (f->tag == tag)
72 return f;
73 return NULL;
77 * Leave the top bit clear so we have tagspace for userland.
78 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
79 * This driver reserves tag -1 to mean "unused frame."
81 static int
82 newtag(struct aoedev *d)
84 register ulong n;
86 n = jiffies & 0xffff;
87 return n |= (++d->lasttag & 0x7fff) << 16;
90 static int
91 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
93 u32 host_tag = newtag(d);
95 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
96 memcpy(h->dst, d->addr, sizeof h->dst);
97 h->type = __constant_cpu_to_be16(ETH_P_AOE);
98 h->verfl = AOE_HVER;
99 h->major = cpu_to_be16(d->aoemajor);
100 h->minor = d->aoeminor;
101 h->cmd = AOECMD_ATA;
102 h->tag = cpu_to_be32(host_tag);
104 return host_tag;
107 static void
108 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
110 struct aoe_hdr *h;
111 struct aoe_atahdr *ah;
112 struct buf *buf;
113 struct sk_buff *skb;
114 ulong bcnt;
115 register sector_t sector;
116 char writebit, extbit;
118 writebit = 0x10;
119 extbit = 0x4;
121 buf = d->inprocess;
123 sector = buf->sector;
124 bcnt = buf->bv_resid;
125 if (bcnt > MAXATADATA)
126 bcnt = MAXATADATA;
128 /* initialize the headers & frame */
129 h = (struct aoe_hdr *) f->data;
130 ah = (struct aoe_atahdr *) (h+1);
131 f->ndata = sizeof *h + sizeof *ah;
132 memset(h, 0, f->ndata);
133 f->tag = aoehdr_atainit(d, h);
134 f->waited = 0;
135 f->buf = buf;
136 f->bufaddr = buf->bufaddr;
138 /* set up ata header */
139 ah->scnt = bcnt >> 9;
140 ah->lba0 = sector;
141 ah->lba1 = sector >>= 8;
142 ah->lba2 = sector >>= 8;
143 ah->lba3 = sector >>= 8;
144 if (d->flags & DEVFL_EXT) {
145 ah->aflags |= AOEAFL_EXT;
146 ah->lba4 = sector >>= 8;
147 ah->lba5 = sector >>= 8;
148 } else {
149 extbit = 0;
150 ah->lba3 &= 0x0f;
151 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
154 if (bio_data_dir(buf->bio) == WRITE) {
155 ah->aflags |= AOEAFL_WRITE;
156 f->writedatalen = bcnt;
157 } else {
158 writebit = 0;
159 f->writedatalen = 0;
162 ah->cmdstat = WIN_READ | writebit | extbit;
164 /* mark all tracking fields and load out */
165 buf->nframesout += 1;
166 buf->bufaddr += bcnt;
167 buf->bv_resid -= bcnt;
168 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
169 buf->resid -= bcnt;
170 buf->sector += bcnt >> 9;
171 if (buf->resid == 0) {
172 d->inprocess = NULL;
173 } else if (buf->bv_resid == 0) {
174 buf->bv++;
175 buf->bv_resid = buf->bv->bv_len;
176 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
179 skb = skb_prepare(d, f);
180 if (skb) {
181 skb->next = NULL;
182 if (d->sendq_hd)
183 d->sendq_tl->next = skb;
184 else
185 d->sendq_hd = skb;
186 d->sendq_tl = skb;
190 /* enters with d->lock held */
191 void
192 aoecmd_work(struct aoedev *d)
194 struct frame *f;
195 struct buf *buf;
196 loop:
197 f = getframe(d, FREETAG);
198 if (f == NULL)
199 return;
200 if (d->inprocess == NULL) {
201 if (list_empty(&d->bufq))
202 return;
203 buf = container_of(d->bufq.next, struct buf, bufs);
204 list_del(d->bufq.next);
205 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
206 d->inprocess = buf;
208 aoecmd_ata_rw(d, f);
209 goto loop;
212 static void
213 rexmit(struct aoedev *d, struct frame *f)
215 struct sk_buff *skb;
216 struct aoe_hdr *h;
217 char buf[128];
218 u32 n;
220 n = newtag(d);
222 snprintf(buf, sizeof buf,
223 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
224 "retransmit",
225 d->aoemajor, d->aoeminor, f->tag, jiffies, n);
226 aoechr_error(buf);
228 h = (struct aoe_hdr *) f->data;
229 f->tag = n;
230 h->tag = cpu_to_be32(n);
232 skb = skb_prepare(d, f);
233 if (skb) {
234 skb->next = NULL;
235 if (d->sendq_hd)
236 d->sendq_tl->next = skb;
237 else
238 d->sendq_hd = skb;
239 d->sendq_tl = skb;
243 static int
244 tsince(int tag)
246 int n;
248 n = jiffies & 0xffff;
249 n -= tag & 0xffff;
250 if (n < 0)
251 n += 1<<16;
252 return n;
255 static void
256 rexmit_timer(ulong vp)
258 struct aoedev *d;
259 struct frame *f, *e;
260 struct sk_buff *sl;
261 register long timeout;
262 ulong flags, n;
264 d = (struct aoedev *) vp;
265 sl = NULL;
267 /* timeout is always ~150% of the moving average */
268 timeout = d->rttavg;
269 timeout += timeout >> 1;
271 spin_lock_irqsave(&d->lock, flags);
273 if (d->flags & DEVFL_TKILL) {
274 tdie: spin_unlock_irqrestore(&d->lock, flags);
275 return;
277 f = d->frames;
278 e = f + d->nframes;
279 for (; f<e; f++) {
280 if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
281 n = f->waited += timeout;
282 n /= HZ;
283 if (n > MAXWAIT) { /* waited too long. device failure. */
284 aoedev_downdev(d);
285 goto tdie;
287 rexmit(d, f);
291 sl = d->sendq_hd;
292 d->sendq_hd = d->sendq_tl = NULL;
293 if (sl) {
294 n = d->rttavg <<= 1;
295 if (n > MAXTIMER)
296 d->rttavg = MAXTIMER;
299 d->timer.expires = jiffies + TIMERTICK;
300 add_timer(&d->timer);
302 spin_unlock_irqrestore(&d->lock, flags);
304 aoenet_xmit(sl);
307 static void
308 ataid_complete(struct aoedev *d, unsigned char *id)
310 u64 ssize;
311 u16 n;
313 /* word 83: command set supported */
314 n = le16_to_cpup((__le16 *) &id[83<<1]);
316 /* word 86: command set/feature enabled */
317 n |= le16_to_cpup((__le16 *) &id[86<<1]);
319 if (n & (1<<10)) { /* bit 10: LBA 48 */
320 d->flags |= DEVFL_EXT;
322 /* word 100: number lba48 sectors */
323 ssize = le64_to_cpup((__le64 *) &id[100<<1]);
325 /* set as in ide-disk.c:init_idedisk_capacity */
326 d->geo.cylinders = ssize;
327 d->geo.cylinders /= (255 * 63);
328 d->geo.heads = 255;
329 d->geo.sectors = 63;
330 } else {
331 d->flags &= ~DEVFL_EXT;
333 /* number lba28 sectors */
334 ssize = le32_to_cpup((__le32 *) &id[60<<1]);
336 /* NOTE: obsolete in ATA 6 */
337 d->geo.cylinders = le16_to_cpup((__le16 *) &id[54<<1]);
338 d->geo.heads = le16_to_cpup((__le16 *) &id[55<<1]);
339 d->geo.sectors = le16_to_cpup((__le16 *) &id[56<<1]);
341 d->ssize = ssize;
342 d->geo.start = 0;
343 if (d->gd != NULL) {
344 d->gd->capacity = ssize;
345 d->flags |= DEVFL_UP;
346 return;
348 if (d->flags & DEVFL_WORKON) {
349 printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
350 "(This really shouldn't happen).\n");
351 return;
353 INIT_WORK(&d->work, aoeblk_gdalloc, d);
354 schedule_work(&d->work);
355 d->flags |= DEVFL_WORKON;
358 static void
359 calc_rttavg(struct aoedev *d, int rtt)
361 register long n;
363 n = rtt;
364 if (n < MINTIMER)
365 n = MINTIMER;
366 else if (n > MAXTIMER)
367 n = MAXTIMER;
369 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
370 n -= d->rttavg;
371 d->rttavg += n >> 2;
374 void
375 aoecmd_ata_rsp(struct sk_buff *skb)
377 struct aoedev *d;
378 struct aoe_hdr *hin;
379 struct aoe_atahdr *ahin, *ahout;
380 struct frame *f;
381 struct buf *buf;
382 struct sk_buff *sl;
383 register long n;
384 ulong flags;
385 char ebuf[128];
386 u16 aoemajor;
388 hin = (struct aoe_hdr *) skb->mac.raw;
389 aoemajor = be16_to_cpu(hin->major);
390 d = aoedev_by_aoeaddr(aoemajor, hin->minor);
391 if (d == NULL) {
392 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
393 "for unknown device %d.%d\n",
394 aoemajor, hin->minor);
395 aoechr_error(ebuf);
396 return;
399 spin_lock_irqsave(&d->lock, flags);
401 f = getframe(d, be32_to_cpu(hin->tag));
402 if (f == NULL) {
403 spin_unlock_irqrestore(&d->lock, flags);
404 snprintf(ebuf, sizeof ebuf,
405 "%15s e%d.%d tag=%08x@%08lx\n",
406 "unexpected rsp",
407 be16_to_cpu(hin->major),
408 hin->minor,
409 be32_to_cpu(hin->tag),
410 jiffies);
411 aoechr_error(ebuf);
412 return;
415 calc_rttavg(d, tsince(f->tag));
417 ahin = (struct aoe_atahdr *) (hin+1);
418 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
419 buf = f->buf;
421 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
422 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
423 "stat=%2.2Xh from e%ld.%ld\n",
424 ahout->cmdstat, ahin->cmdstat,
425 d->aoemajor, d->aoeminor);
426 if (buf)
427 buf->flags |= BUFFL_FAIL;
428 } else {
429 switch (ahout->cmdstat) {
430 case WIN_READ:
431 case WIN_READ_EXT:
432 n = ahout->scnt << 9;
433 if (skb->len - sizeof *hin - sizeof *ahin < n) {
434 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
435 "ata data size in read. skb->len=%d\n",
436 skb->len);
437 /* fail frame f? just returning will rexmit. */
438 spin_unlock_irqrestore(&d->lock, flags);
439 return;
441 memcpy(f->bufaddr, ahin+1, n);
442 case WIN_WRITE:
443 case WIN_WRITE_EXT:
444 break;
445 case WIN_IDENTIFY:
446 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
447 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
448 "in ataid. skb->len=%d\n", skb->len);
449 spin_unlock_irqrestore(&d->lock, flags);
450 return;
452 ataid_complete(d, (char *) (ahin+1));
453 /* d->flags |= DEVFL_WC_UPDATE; */
454 break;
455 default:
456 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
457 "outbound ata command %2.2Xh for %d.%d\n",
458 ahout->cmdstat,
459 be16_to_cpu(hin->major),
460 hin->minor);
464 if (buf) {
465 buf->nframesout -= 1;
466 if (buf->nframesout == 0 && buf->resid == 0) {
467 unsigned long duration = jiffies - buf->start_time;
468 unsigned long n_sect = buf->bio->bi_size >> 9;
469 struct gendisk *disk = d->gd;
471 if (bio_data_dir(buf->bio) == WRITE) {
472 disk_stat_inc(disk, writes);
473 disk_stat_add(disk, write_ticks, duration);
474 disk_stat_add(disk, write_sectors, n_sect);
475 } else {
476 disk_stat_inc(disk, reads);
477 disk_stat_add(disk, read_ticks, duration);
478 disk_stat_add(disk, read_sectors, n_sect);
480 disk_stat_add(disk, io_ticks, duration);
481 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
482 bio_endio(buf->bio, buf->bio->bi_size, n);
483 mempool_free(buf, d->bufpool);
487 f->buf = NULL;
488 f->tag = FREETAG;
490 aoecmd_work(d);
492 sl = d->sendq_hd;
493 d->sendq_hd = d->sendq_tl = NULL;
495 spin_unlock_irqrestore(&d->lock, flags);
497 aoenet_xmit(sl);
500 void
501 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
503 struct aoe_hdr *h;
504 struct aoe_cfghdr *ch;
505 struct sk_buff *skb, *sl;
506 struct net_device *ifp;
508 sl = NULL;
510 read_lock(&dev_base_lock);
511 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
512 dev_hold(ifp);
513 if (!is_aoe_netif(ifp))
514 continue;
516 skb = new_skb(ifp, sizeof *h + sizeof *ch);
517 if (skb == NULL) {
518 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
519 continue;
521 h = (struct aoe_hdr *) skb->mac.raw;
522 memset(h, 0, sizeof *h + sizeof *ch);
524 memset(h->dst, 0xff, sizeof h->dst);
525 memcpy(h->src, ifp->dev_addr, sizeof h->src);
526 h->type = __constant_cpu_to_be16(ETH_P_AOE);
527 h->verfl = AOE_HVER;
528 h->major = cpu_to_be16(aoemajor);
529 h->minor = aoeminor;
530 h->cmd = AOECMD_CFG;
532 skb->next = sl;
533 sl = skb;
535 read_unlock(&dev_base_lock);
537 aoenet_xmit(sl);
541 * Since we only call this in one place (and it only prepares one frame)
542 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
544 static struct sk_buff *
545 aoecmd_ata_id(struct aoedev *d)
547 struct aoe_hdr *h;
548 struct aoe_atahdr *ah;
549 struct frame *f;
550 struct sk_buff *skb;
552 f = getframe(d, FREETAG);
553 if (f == NULL) {
554 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
555 "This shouldn't happen.\n");
556 return NULL;
559 /* initialize the headers & frame */
560 h = (struct aoe_hdr *) f->data;
561 ah = (struct aoe_atahdr *) (h+1);
562 f->ndata = sizeof *h + sizeof *ah;
563 memset(h, 0, f->ndata);
564 f->tag = aoehdr_atainit(d, h);
565 f->waited = 0;
566 f->writedatalen = 0;
568 /* this message initializes the device, so we reset the rttavg */
569 d->rttavg = MAXTIMER;
571 /* set up ata header */
572 ah->scnt = 1;
573 ah->cmdstat = WIN_IDENTIFY;
574 ah->lba3 = 0xa0;
576 skb = skb_prepare(d, f);
578 /* we now want to start the rexmit tracking */
579 d->flags &= ~DEVFL_TKILL;
580 d->timer.data = (ulong) d;
581 d->timer.function = rexmit_timer;
582 d->timer.expires = jiffies + TIMERTICK;
583 add_timer(&d->timer);
585 return skb;
588 void
589 aoecmd_cfg_rsp(struct sk_buff *skb)
591 struct aoedev *d;
592 struct aoe_hdr *h;
593 struct aoe_cfghdr *ch;
594 ulong flags, sysminor, aoemajor;
595 u16 bufcnt;
596 struct sk_buff *sl;
597 enum { MAXFRAMES = 8 };
599 h = (struct aoe_hdr *) skb->mac.raw;
600 ch = (struct aoe_cfghdr *) (h+1);
603 * Enough people have their dip switches set backwards to
604 * warrant a loud message for this special case.
606 aoemajor = be16_to_cpu(h->major);
607 if (aoemajor == 0xfff) {
608 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
609 "address is all ones. Check shelf dip switches\n");
610 return;
613 sysminor = SYSMINOR(aoemajor, h->minor);
614 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
615 printk(KERN_INFO
616 "aoe: e%ld.%d: minor number too large\n",
617 aoemajor, (int) h->minor);
618 return;
621 bufcnt = be16_to_cpu(ch->bufcnt);
622 if (bufcnt > MAXFRAMES) /* keep it reasonable */
623 bufcnt = MAXFRAMES;
625 d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
626 if (d == NULL) {
627 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
628 return;
631 spin_lock_irqsave(&d->lock, flags);
633 if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
634 spin_unlock_irqrestore(&d->lock, flags);
635 return;
638 d->fw_ver = be16_to_cpu(ch->fwver);
640 /* we get here only if the device is new */
641 sl = aoecmd_ata_id(d);
643 spin_unlock_irqrestore(&d->lock, flags);
645 aoenet_xmit(sl);