1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
4 * Filesystem request handling methods
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
13 #define TIMERTICK (HZ / 10)
14 #define MINTIMER (2 * TIMERTICK)
15 #define MAXTIMER (HZ << 1)
16 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
18 static struct sk_buff
*
19 new_skb(struct net_device
*if_dev
, ulong len
)
23 skb
= alloc_skb(len
, GFP_ATOMIC
);
25 skb
->nh
.raw
= skb
->mac
.raw
= skb
->data
;
27 skb
->protocol
= __constant_htons(ETH_P_AOE
);
30 skb
->next
= skb
->prev
= NULL
;
32 /* tell the network layer not to perform IP checksums
33 * or to get the NIC to do it
35 skb
->ip_summed
= CHECKSUM_NONE
;
40 static struct sk_buff
*
41 skb_prepare(struct aoedev
*d
, struct frame
*f
)
46 skb
= new_skb(d
->ifp
, f
->ndata
+ f
->writedatalen
);
48 printk(KERN_INFO
"aoe: skb_prepare: failure to allocate skb\n");
53 memcpy(p
, f
->data
, f
->ndata
);
55 if (f
->writedatalen
) {
56 p
+= sizeof(struct aoe_hdr
) + sizeof(struct aoe_atahdr
);
57 memcpy(p
, f
->bufaddr
, f
->writedatalen
);
64 getframe(struct aoedev
*d
, int tag
)
77 * Leave the top bit clear so we have tagspace for userland.
78 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
79 * This driver reserves tag -1 to mean "unused frame."
82 newtag(struct aoedev
*d
)
87 return n
|= (++d
->lasttag
& 0x7fff) << 16;
91 aoehdr_atainit(struct aoedev
*d
, struct aoe_hdr
*h
)
93 u32 host_tag
= newtag(d
);
95 memcpy(h
->src
, d
->ifp
->dev_addr
, sizeof h
->src
);
96 memcpy(h
->dst
, d
->addr
, sizeof h
->dst
);
97 h
->type
= __constant_cpu_to_be16(ETH_P_AOE
);
99 h
->major
= cpu_to_be16(d
->aoemajor
);
100 h
->minor
= d
->aoeminor
;
102 h
->tag
= cpu_to_be32(host_tag
);
108 aoecmd_ata_rw(struct aoedev
*d
, struct frame
*f
)
111 struct aoe_atahdr
*ah
;
115 register sector_t sector
;
116 char writebit
, extbit
;
123 sector
= buf
->sector
;
124 bcnt
= buf
->bv_resid
;
125 if (bcnt
> MAXATADATA
)
128 /* initialize the headers & frame */
129 h
= (struct aoe_hdr
*) f
->data
;
130 ah
= (struct aoe_atahdr
*) (h
+1);
131 f
->ndata
= sizeof *h
+ sizeof *ah
;
132 memset(h
, 0, f
->ndata
);
133 f
->tag
= aoehdr_atainit(d
, h
);
136 f
->bufaddr
= buf
->bufaddr
;
138 /* set up ata header */
139 ah
->scnt
= bcnt
>> 9;
141 ah
->lba1
= sector
>>= 8;
142 ah
->lba2
= sector
>>= 8;
143 ah
->lba3
= sector
>>= 8;
144 if (d
->flags
& DEVFL_EXT
) {
145 ah
->aflags
|= AOEAFL_EXT
;
146 ah
->lba4
= sector
>>= 8;
147 ah
->lba5
= sector
>>= 8;
151 ah
->lba3
|= 0xe0; /* LBA bit + obsolete 0xa0 */
154 if (bio_data_dir(buf
->bio
) == WRITE
) {
155 ah
->aflags
|= AOEAFL_WRITE
;
156 f
->writedatalen
= bcnt
;
162 ah
->cmdstat
= WIN_READ
| writebit
| extbit
;
164 /* mark all tracking fields and load out */
165 buf
->nframesout
+= 1;
166 buf
->bufaddr
+= bcnt
;
167 buf
->bv_resid
-= bcnt
;
168 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
170 buf
->sector
+= bcnt
>> 9;
171 if (buf
->resid
== 0) {
173 } else if (buf
->bv_resid
== 0) {
175 buf
->bv_resid
= buf
->bv
->bv_len
;
176 buf
->bufaddr
= page_address(buf
->bv
->bv_page
) + buf
->bv
->bv_offset
;
179 skb
= skb_prepare(d
, f
);
183 d
->sendq_tl
->next
= skb
;
190 /* enters with d->lock held */
192 aoecmd_work(struct aoedev
*d
)
197 f
= getframe(d
, FREETAG
);
200 if (d
->inprocess
== NULL
) {
201 if (list_empty(&d
->bufq
))
203 buf
= container_of(d
->bufq
.next
, struct buf
, bufs
);
204 list_del(d
->bufq
.next
);
205 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
213 rexmit(struct aoedev
*d
, struct frame
*f
)
222 snprintf(buf
, sizeof buf
,
223 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
225 d
->aoemajor
, d
->aoeminor
, f
->tag
, jiffies
, n
);
228 h
= (struct aoe_hdr
*) f
->data
;
230 h
->tag
= cpu_to_be32(n
);
232 skb
= skb_prepare(d
, f
);
236 d
->sendq_tl
->next
= skb
;
248 n
= jiffies
& 0xffff;
256 rexmit_timer(ulong vp
)
261 register long timeout
;
264 d
= (struct aoedev
*) vp
;
267 /* timeout is always ~150% of the moving average */
269 timeout
+= timeout
>> 1;
271 spin_lock_irqsave(&d
->lock
, flags
);
273 if (d
->flags
& DEVFL_TKILL
) {
274 tdie
: spin_unlock_irqrestore(&d
->lock
, flags
);
280 if (f
->tag
!= FREETAG
&& tsince(f
->tag
) >= timeout
) {
281 n
= f
->waited
+= timeout
;
283 if (n
> MAXWAIT
) { /* waited too long. device failure. */
292 d
->sendq_hd
= d
->sendq_tl
= NULL
;
296 d
->rttavg
= MAXTIMER
;
299 d
->timer
.expires
= jiffies
+ TIMERTICK
;
300 add_timer(&d
->timer
);
302 spin_unlock_irqrestore(&d
->lock
, flags
);
308 ataid_complete(struct aoedev
*d
, unsigned char *id
)
313 /* word 83: command set supported */
314 n
= le16_to_cpup((__le16
*) &id
[83<<1]);
316 /* word 86: command set/feature enabled */
317 n
|= le16_to_cpup((__le16
*) &id
[86<<1]);
319 if (n
& (1<<10)) { /* bit 10: LBA 48 */
320 d
->flags
|= DEVFL_EXT
;
322 /* word 100: number lba48 sectors */
323 ssize
= le64_to_cpup((__le64
*) &id
[100<<1]);
325 /* set as in ide-disk.c:init_idedisk_capacity */
326 d
->geo
.cylinders
= ssize
;
327 d
->geo
.cylinders
/= (255 * 63);
331 d
->flags
&= ~DEVFL_EXT
;
333 /* number lba28 sectors */
334 ssize
= le32_to_cpup((__le32
*) &id
[60<<1]);
336 /* NOTE: obsolete in ATA 6 */
337 d
->geo
.cylinders
= le16_to_cpup((__le16
*) &id
[54<<1]);
338 d
->geo
.heads
= le16_to_cpup((__le16
*) &id
[55<<1]);
339 d
->geo
.sectors
= le16_to_cpup((__le16
*) &id
[56<<1]);
344 d
->gd
->capacity
= ssize
;
345 d
->flags
|= DEVFL_UP
;
348 if (d
->flags
& DEVFL_WORKON
) {
349 printk(KERN_INFO
"aoe: ataid_complete: can't schedule work, it's already on! "
350 "(This really shouldn't happen).\n");
353 INIT_WORK(&d
->work
, aoeblk_gdalloc
, d
);
354 schedule_work(&d
->work
);
355 d
->flags
|= DEVFL_WORKON
;
359 calc_rttavg(struct aoedev
*d
, int rtt
)
366 else if (n
> MAXTIMER
)
369 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
375 aoecmd_ata_rsp(struct sk_buff
*skb
)
379 struct aoe_atahdr
*ahin
, *ahout
;
388 hin
= (struct aoe_hdr
*) skb
->mac
.raw
;
389 aoemajor
= be16_to_cpu(hin
->major
);
390 d
= aoedev_by_aoeaddr(aoemajor
, hin
->minor
);
392 snprintf(ebuf
, sizeof ebuf
, "aoecmd_ata_rsp: ata response "
393 "for unknown device %d.%d\n",
394 aoemajor
, hin
->minor
);
399 spin_lock_irqsave(&d
->lock
, flags
);
401 f
= getframe(d
, be32_to_cpu(hin
->tag
));
403 spin_unlock_irqrestore(&d
->lock
, flags
);
404 snprintf(ebuf
, sizeof ebuf
,
405 "%15s e%d.%d tag=%08x@%08lx\n",
407 be16_to_cpu(hin
->major
),
409 be32_to_cpu(hin
->tag
),
415 calc_rttavg(d
, tsince(f
->tag
));
417 ahin
= (struct aoe_atahdr
*) (hin
+1);
418 ahout
= (struct aoe_atahdr
*) (f
->data
+ sizeof(struct aoe_hdr
));
421 if (ahin
->cmdstat
& 0xa9) { /* these bits cleared on success */
422 printk(KERN_CRIT
"aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
423 "stat=%2.2Xh from e%ld.%ld\n",
424 ahout
->cmdstat
, ahin
->cmdstat
,
425 d
->aoemajor
, d
->aoeminor
);
427 buf
->flags
|= BUFFL_FAIL
;
429 switch (ahout
->cmdstat
) {
432 n
= ahout
->scnt
<< 9;
433 if (skb
->len
- sizeof *hin
- sizeof *ahin
< n
) {
434 printk(KERN_CRIT
"aoe: aoecmd_ata_rsp: runt "
435 "ata data size in read. skb->len=%d\n",
437 /* fail frame f? just returning will rexmit. */
438 spin_unlock_irqrestore(&d
->lock
, flags
);
441 memcpy(f
->bufaddr
, ahin
+1, n
);
446 if (skb
->len
- sizeof *hin
- sizeof *ahin
< 512) {
447 printk(KERN_INFO
"aoe: aoecmd_ata_rsp: runt data size "
448 "in ataid. skb->len=%d\n", skb
->len
);
449 spin_unlock_irqrestore(&d
->lock
, flags
);
452 ataid_complete(d
, (char *) (ahin
+1));
453 /* d->flags |= DEVFL_WC_UPDATE; */
456 printk(KERN_INFO
"aoe: aoecmd_ata_rsp: unrecognized "
457 "outbound ata command %2.2Xh for %d.%d\n",
459 be16_to_cpu(hin
->major
),
465 buf
->nframesout
-= 1;
466 if (buf
->nframesout
== 0 && buf
->resid
== 0) {
467 unsigned long duration
= jiffies
- buf
->start_time
;
468 unsigned long n_sect
= buf
->bio
->bi_size
>> 9;
469 struct gendisk
*disk
= d
->gd
;
471 if (bio_data_dir(buf
->bio
) == WRITE
) {
472 disk_stat_inc(disk
, writes
);
473 disk_stat_add(disk
, write_ticks
, duration
);
474 disk_stat_add(disk
, write_sectors
, n_sect
);
476 disk_stat_inc(disk
, reads
);
477 disk_stat_add(disk
, read_ticks
, duration
);
478 disk_stat_add(disk
, read_sectors
, n_sect
);
480 disk_stat_add(disk
, io_ticks
, duration
);
481 n
= (buf
->flags
& BUFFL_FAIL
) ? -EIO
: 0;
482 bio_endio(buf
->bio
, buf
->bio
->bi_size
, n
);
483 mempool_free(buf
, d
->bufpool
);
493 d
->sendq_hd
= d
->sendq_tl
= NULL
;
495 spin_unlock_irqrestore(&d
->lock
, flags
);
501 aoecmd_cfg(ushort aoemajor
, unsigned char aoeminor
)
504 struct aoe_cfghdr
*ch
;
505 struct sk_buff
*skb
, *sl
;
506 struct net_device
*ifp
;
510 read_lock(&dev_base_lock
);
511 for (ifp
= dev_base
; ifp
; dev_put(ifp
), ifp
= ifp
->next
) {
513 if (!is_aoe_netif(ifp
))
516 skb
= new_skb(ifp
, sizeof *h
+ sizeof *ch
);
518 printk(KERN_INFO
"aoe: aoecmd_cfg: skb alloc failure\n");
521 h
= (struct aoe_hdr
*) skb
->mac
.raw
;
522 memset(h
, 0, sizeof *h
+ sizeof *ch
);
524 memset(h
->dst
, 0xff, sizeof h
->dst
);
525 memcpy(h
->src
, ifp
->dev_addr
, sizeof h
->src
);
526 h
->type
= __constant_cpu_to_be16(ETH_P_AOE
);
528 h
->major
= cpu_to_be16(aoemajor
);
535 read_unlock(&dev_base_lock
);
541 * Since we only call this in one place (and it only prepares one frame)
542 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
544 static struct sk_buff
*
545 aoecmd_ata_id(struct aoedev
*d
)
548 struct aoe_atahdr
*ah
;
552 f
= getframe(d
, FREETAG
);
554 printk(KERN_CRIT
"aoe: aoecmd_ata_id: can't get a frame. "
555 "This shouldn't happen.\n");
559 /* initialize the headers & frame */
560 h
= (struct aoe_hdr
*) f
->data
;
561 ah
= (struct aoe_atahdr
*) (h
+1);
562 f
->ndata
= sizeof *h
+ sizeof *ah
;
563 memset(h
, 0, f
->ndata
);
564 f
->tag
= aoehdr_atainit(d
, h
);
568 /* this message initializes the device, so we reset the rttavg */
569 d
->rttavg
= MAXTIMER
;
571 /* set up ata header */
573 ah
->cmdstat
= WIN_IDENTIFY
;
576 skb
= skb_prepare(d
, f
);
578 /* we now want to start the rexmit tracking */
579 d
->flags
&= ~DEVFL_TKILL
;
580 d
->timer
.data
= (ulong
) d
;
581 d
->timer
.function
= rexmit_timer
;
582 d
->timer
.expires
= jiffies
+ TIMERTICK
;
583 add_timer(&d
->timer
);
589 aoecmd_cfg_rsp(struct sk_buff
*skb
)
593 struct aoe_cfghdr
*ch
;
594 ulong flags
, sysminor
, aoemajor
;
597 enum { MAXFRAMES
= 8 };
599 h
= (struct aoe_hdr
*) skb
->mac
.raw
;
600 ch
= (struct aoe_cfghdr
*) (h
+1);
603 * Enough people have their dip switches set backwards to
604 * warrant a loud message for this special case.
606 aoemajor
= be16_to_cpu(h
->major
);
607 if (aoemajor
== 0xfff) {
608 printk(KERN_CRIT
"aoe: aoecmd_cfg_rsp: Warning: shelf "
609 "address is all ones. Check shelf dip switches\n");
613 sysminor
= SYSMINOR(aoemajor
, h
->minor
);
614 if (sysminor
* AOE_PARTITIONS
+ AOE_PARTITIONS
> MINORMASK
) {
616 "aoe: e%ld.%d: minor number too large\n",
617 aoemajor
, (int) h
->minor
);
621 bufcnt
= be16_to_cpu(ch
->bufcnt
);
622 if (bufcnt
> MAXFRAMES
) /* keep it reasonable */
625 d
= aoedev_set(sysminor
, h
->src
, skb
->dev
, bufcnt
);
627 printk(KERN_INFO
"aoe: aoecmd_cfg_rsp: device set failure\n");
631 spin_lock_irqsave(&d
->lock
, flags
);
633 if (d
->flags
& (DEVFL_UP
| DEVFL_CLOSEWAIT
)) {
634 spin_unlock_irqrestore(&d
->lock
, flags
);
638 d
->fw_ver
= be16_to_cpu(ch
->fwver
);
640 /* we get here only if the device is new */
641 sl
= aoecmd_ata_id(d
);
643 spin_unlock_irqrestore(&d
->lock
, flags
);