1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
4 * Filesystem request handling methods
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include <linux/genhd.h>
12 #include <asm/unaligned.h>
15 #define TIMERTICK (HZ / 10)
16 #define MINTIMER (2 * TIMERTICK)
17 #define MAXTIMER (HZ << 1)
18 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
20 static struct sk_buff
*
21 new_skb(struct net_device
*if_dev
, ulong len
)
25 skb
= alloc_skb(len
, GFP_ATOMIC
);
27 skb
->nh
.raw
= skb
->mac
.raw
= skb
->data
;
29 skb
->protocol
= __constant_htons(ETH_P_AOE
);
32 memset(skb
->head
, 0, len
);
33 skb
->next
= skb
->prev
= NULL
;
35 /* tell the network layer not to perform IP checksums
36 * or to get the NIC to do it
38 skb
->ip_summed
= CHECKSUM_NONE
;
43 static struct sk_buff
*
44 skb_prepare(struct aoedev
*d
, struct frame
*f
)
49 skb
= new_skb(d
->ifp
, f
->ndata
+ f
->writedatalen
);
51 printk(KERN_INFO
"aoe: skb_prepare: failure to allocate skb\n");
56 memcpy(p
, f
->data
, f
->ndata
);
58 if (f
->writedatalen
) {
59 p
+= sizeof(struct aoe_hdr
) + sizeof(struct aoe_atahdr
);
60 memcpy(p
, f
->bufaddr
, f
->writedatalen
);
67 getframe(struct aoedev
*d
, int tag
)
80 * Leave the top bit clear so we have tagspace for userland.
81 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
82 * This driver reserves tag -1 to mean "unused frame."
85 newtag(struct aoedev
*d
)
90 return n
|= (++d
->lasttag
& 0x7fff) << 16;
94 aoehdr_atainit(struct aoedev
*d
, struct aoe_hdr
*h
)
96 u32 host_tag
= newtag(d
);
98 memcpy(h
->src
, d
->ifp
->dev_addr
, sizeof h
->src
);
99 memcpy(h
->dst
, d
->addr
, sizeof h
->dst
);
100 h
->type
= __constant_cpu_to_be16(ETH_P_AOE
);
102 h
->major
= cpu_to_be16(d
->aoemajor
);
103 h
->minor
= d
->aoeminor
;
105 h
->tag
= cpu_to_be32(host_tag
);
111 aoecmd_ata_rw(struct aoedev
*d
, struct frame
*f
)
114 struct aoe_atahdr
*ah
;
118 register sector_t sector
;
119 char writebit
, extbit
;
126 sector
= buf
->sector
;
127 bcnt
= buf
->bv_resid
;
128 if (bcnt
> MAXATADATA
)
131 /* initialize the headers & frame */
132 h
= (struct aoe_hdr
*) f
->data
;
133 ah
= (struct aoe_atahdr
*) (h
+1);
134 f
->ndata
= sizeof *h
+ sizeof *ah
;
135 memset(h
, 0, f
->ndata
);
136 f
->tag
= aoehdr_atainit(d
, h
);
139 f
->bufaddr
= buf
->bufaddr
;
141 /* set up ata header */
142 ah
->scnt
= bcnt
>> 9;
144 ah
->lba1
= sector
>>= 8;
145 ah
->lba2
= sector
>>= 8;
146 ah
->lba3
= sector
>>= 8;
147 if (d
->flags
& DEVFL_EXT
) {
148 ah
->aflags
|= AOEAFL_EXT
;
149 ah
->lba4
= sector
>>= 8;
150 ah
->lba5
= sector
>>= 8;
154 ah
->lba3
|= 0xe0; /* LBA bit + obsolete 0xa0 */
157 if (bio_data_dir(buf
->bio
) == WRITE
) {
158 ah
->aflags
|= AOEAFL_WRITE
;
159 f
->writedatalen
= bcnt
;
165 ah
->cmdstat
= WIN_READ
| writebit
| extbit
;
167 /* mark all tracking fields and load out */
168 buf
->nframesout
+= 1;
169 buf
->bufaddr
+= bcnt
;
170 buf
->bv_resid
-= bcnt
;
171 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
173 buf
->sector
+= bcnt
>> 9;
174 if (buf
->resid
== 0) {
176 } else if (buf
->bv_resid
== 0) {
178 buf
->bv_resid
= buf
->bv
->bv_len
;
179 buf
->bufaddr
= page_address(buf
->bv
->bv_page
) + buf
->bv
->bv_offset
;
182 skb
= skb_prepare(d
, f
);
186 d
->sendq_tl
->next
= skb
;
193 /* some callers cannot sleep, and they can call this function,
194 * transmitting the packets later, when interrupts are on
196 static struct sk_buff
*
197 aoecmd_cfg_pkts(ushort aoemajor
, unsigned char aoeminor
, struct sk_buff
**tail
)
200 struct aoe_cfghdr
*ch
;
201 struct sk_buff
*skb
, *sl
, *sl_tail
;
202 struct net_device
*ifp
;
206 read_lock(&dev_base_lock
);
207 for (ifp
= dev_base
; ifp
; dev_put(ifp
), ifp
= ifp
->next
) {
209 if (!is_aoe_netif(ifp
))
212 skb
= new_skb(ifp
, sizeof *h
+ sizeof *ch
);
214 printk(KERN_INFO
"aoe: aoecmd_cfg: skb alloc failure\n");
219 h
= (struct aoe_hdr
*) skb
->mac
.raw
;
220 memset(h
, 0, sizeof *h
+ sizeof *ch
);
222 memset(h
->dst
, 0xff, sizeof h
->dst
);
223 memcpy(h
->src
, ifp
->dev_addr
, sizeof h
->src
);
224 h
->type
= __constant_cpu_to_be16(ETH_P_AOE
);
226 h
->major
= cpu_to_be16(aoemajor
);
233 read_unlock(&dev_base_lock
);
240 /* enters with d->lock held */
242 aoecmd_work(struct aoedev
*d
)
247 if (d
->flags
& DEVFL_PAUSE
) {
248 if (!aoedev_isbusy(d
))
249 d
->sendq_hd
= aoecmd_cfg_pkts(d
->aoemajor
,
250 d
->aoeminor
, &d
->sendq_tl
);
255 f
= getframe(d
, FREETAG
);
258 if (d
->inprocess
== NULL
) {
259 if (list_empty(&d
->bufq
))
261 buf
= container_of(d
->bufq
.next
, struct buf
, bufs
);
262 list_del(d
->bufq
.next
);
263 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
271 rexmit(struct aoedev
*d
, struct frame
*f
)
280 snprintf(buf
, sizeof buf
,
281 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
283 d
->aoemajor
, d
->aoeminor
, f
->tag
, jiffies
, n
);
286 h
= (struct aoe_hdr
*) f
->data
;
288 h
->tag
= cpu_to_be32(n
);
289 memcpy(h
->dst
, d
->addr
, sizeof h
->dst
);
290 memcpy(h
->src
, d
->ifp
->dev_addr
, sizeof h
->src
);
292 skb
= skb_prepare(d
, f
);
296 d
->sendq_tl
->next
= skb
;
308 n
= jiffies
& 0xffff;
316 rexmit_timer(ulong vp
)
321 register long timeout
;
324 d
= (struct aoedev
*) vp
;
327 /* timeout is always ~150% of the moving average */
329 timeout
+= timeout
>> 1;
331 spin_lock_irqsave(&d
->lock
, flags
);
333 if (d
->flags
& DEVFL_TKILL
) {
334 spin_unlock_irqrestore(&d
->lock
, flags
);
340 if (f
->tag
!= FREETAG
&& tsince(f
->tag
) >= timeout
) {
341 n
= f
->waited
+= timeout
;
343 if (n
> MAXWAIT
) { /* waited too long. device failure. */
352 d
->sendq_hd
= d
->sendq_tl
= NULL
;
356 d
->rttavg
= MAXTIMER
;
359 d
->timer
.expires
= jiffies
+ TIMERTICK
;
360 add_timer(&d
->timer
);
362 spin_unlock_irqrestore(&d
->lock
, flags
);
367 /* this function performs work that has been deferred until sleeping is OK
370 aoecmd_sleepwork(void *vp
)
372 struct aoedev
*d
= (struct aoedev
*) vp
;
374 if (d
->flags
& DEVFL_GDALLOC
)
377 if (d
->flags
& DEVFL_NEWSIZE
) {
378 struct block_device
*bd
;
382 ssize
= d
->gd
->capacity
;
383 bd
= bdget_disk(d
->gd
, 0);
386 mutex_lock(&bd
->bd_inode
->i_mutex
);
387 i_size_write(bd
->bd_inode
, (loff_t
)ssize
<<9);
388 mutex_unlock(&bd
->bd_inode
->i_mutex
);
391 spin_lock_irqsave(&d
->lock
, flags
);
392 d
->flags
|= DEVFL_UP
;
393 d
->flags
&= ~DEVFL_NEWSIZE
;
394 spin_unlock_irqrestore(&d
->lock
, flags
);
399 ataid_complete(struct aoedev
*d
, unsigned char *id
)
404 /* word 83: command set supported */
405 n
= le16_to_cpu(get_unaligned((__le16
*) &id
[83<<1]));
407 /* word 86: command set/feature enabled */
408 n
|= le16_to_cpu(get_unaligned((__le16
*) &id
[86<<1]));
410 if (n
& (1<<10)) { /* bit 10: LBA 48 */
411 d
->flags
|= DEVFL_EXT
;
413 /* word 100: number lba48 sectors */
414 ssize
= le64_to_cpu(get_unaligned((__le64
*) &id
[100<<1]));
416 /* set as in ide-disk.c:init_idedisk_capacity */
417 d
->geo
.cylinders
= ssize
;
418 d
->geo
.cylinders
/= (255 * 63);
422 d
->flags
&= ~DEVFL_EXT
;
424 /* number lba28 sectors */
425 ssize
= le32_to_cpu(get_unaligned((__le32
*) &id
[60<<1]));
427 /* NOTE: obsolete in ATA 6 */
428 d
->geo
.cylinders
= le16_to_cpu(get_unaligned((__le16
*) &id
[54<<1]));
429 d
->geo
.heads
= le16_to_cpu(get_unaligned((__le16
*) &id
[55<<1]));
430 d
->geo
.sectors
= le16_to_cpu(get_unaligned((__le16
*) &id
[56<<1]));
433 if (d
->ssize
!= ssize
)
434 printk(KERN_INFO
"aoe: %012llx e%lu.%lu v%04x has %llu "
435 "sectors\n", (unsigned long long)mac_addr(d
->addr
),
436 d
->aoemajor
, d
->aoeminor
,
437 d
->fw_ver
, (long long)ssize
);
441 d
->gd
->capacity
= ssize
;
442 d
->flags
|= DEVFL_NEWSIZE
;
444 if (d
->flags
& DEVFL_GDALLOC
) {
445 printk(KERN_INFO
"aoe: %s: %s e%lu.%lu, %s\n",
447 "can't schedule work for",
448 d
->aoemajor
, d
->aoeminor
,
449 "it's already on! (This really shouldn't happen).\n");
452 d
->flags
|= DEVFL_GDALLOC
;
454 schedule_work(&d
->work
);
458 calc_rttavg(struct aoedev
*d
, int rtt
)
465 else if (n
> MAXTIMER
)
468 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
474 aoecmd_ata_rsp(struct sk_buff
*skb
)
478 struct aoe_atahdr
*ahin
, *ahout
;
487 hin
= (struct aoe_hdr
*) skb
->mac
.raw
;
488 aoemajor
= be16_to_cpu(hin
->major
);
489 d
= aoedev_by_aoeaddr(aoemajor
, hin
->minor
);
491 snprintf(ebuf
, sizeof ebuf
, "aoecmd_ata_rsp: ata response "
492 "for unknown device %d.%d\n",
493 aoemajor
, hin
->minor
);
498 spin_lock_irqsave(&d
->lock
, flags
);
500 f
= getframe(d
, be32_to_cpu(hin
->tag
));
502 spin_unlock_irqrestore(&d
->lock
, flags
);
503 snprintf(ebuf
, sizeof ebuf
,
504 "%15s e%d.%d tag=%08x@%08lx\n",
506 be16_to_cpu(hin
->major
),
508 be32_to_cpu(hin
->tag
),
514 calc_rttavg(d
, tsince(f
->tag
));
516 ahin
= (struct aoe_atahdr
*) (hin
+1);
517 ahout
= (struct aoe_atahdr
*) (f
->data
+ sizeof(struct aoe_hdr
));
520 if (ahout
->cmdstat
== WIN_IDENTIFY
)
521 d
->flags
&= ~DEVFL_PAUSE
;
522 if (ahin
->cmdstat
& 0xa9) { /* these bits cleared on success */
523 printk(KERN_CRIT
"aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
524 "stat=%2.2Xh from e%ld.%ld\n",
525 ahout
->cmdstat
, ahin
->cmdstat
,
526 d
->aoemajor
, d
->aoeminor
);
528 buf
->flags
|= BUFFL_FAIL
;
530 switch (ahout
->cmdstat
) {
533 n
= ahout
->scnt
<< 9;
534 if (skb
->len
- sizeof *hin
- sizeof *ahin
< n
) {
535 printk(KERN_CRIT
"aoe: aoecmd_ata_rsp: runt "
536 "ata data size in read. skb->len=%d\n",
538 /* fail frame f? just returning will rexmit. */
539 spin_unlock_irqrestore(&d
->lock
, flags
);
542 memcpy(f
->bufaddr
, ahin
+1, n
);
547 if (skb
->len
- sizeof *hin
- sizeof *ahin
< 512) {
548 printk(KERN_INFO
"aoe: aoecmd_ata_rsp: runt data size "
549 "in ataid. skb->len=%d\n", skb
->len
);
550 spin_unlock_irqrestore(&d
->lock
, flags
);
553 ataid_complete(d
, (char *) (ahin
+1));
556 printk(KERN_INFO
"aoe: aoecmd_ata_rsp: unrecognized "
557 "outbound ata command %2.2Xh for %d.%d\n",
559 be16_to_cpu(hin
->major
),
565 buf
->nframesout
-= 1;
566 if (buf
->nframesout
== 0 && buf
->resid
== 0) {
567 unsigned long duration
= jiffies
- buf
->start_time
;
568 unsigned long n_sect
= buf
->bio
->bi_size
>> 9;
569 struct gendisk
*disk
= d
->gd
;
570 const int rw
= bio_data_dir(buf
->bio
);
572 disk_stat_inc(disk
, ios
[rw
]);
573 disk_stat_add(disk
, ticks
[rw
], duration
);
574 disk_stat_add(disk
, sectors
[rw
], n_sect
);
575 disk_stat_add(disk
, io_ticks
, duration
);
576 n
= (buf
->flags
& BUFFL_FAIL
) ? -EIO
: 0;
577 bio_endio(buf
->bio
, buf
->bio
->bi_size
, n
);
578 mempool_free(buf
, d
->bufpool
);
587 d
->sendq_hd
= d
->sendq_tl
= NULL
;
589 spin_unlock_irqrestore(&d
->lock
, flags
);
594 aoecmd_cfg(ushort aoemajor
, unsigned char aoeminor
)
598 sl
= aoecmd_cfg_pkts(aoemajor
, aoeminor
, NULL
);
604 * Since we only call this in one place (and it only prepares one frame)
605 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
607 static struct sk_buff
*
608 aoecmd_ata_id(struct aoedev
*d
)
611 struct aoe_atahdr
*ah
;
615 f
= getframe(d
, FREETAG
);
617 printk(KERN_CRIT
"aoe: aoecmd_ata_id: can't get a frame. "
618 "This shouldn't happen.\n");
622 /* initialize the headers & frame */
623 h
= (struct aoe_hdr
*) f
->data
;
624 ah
= (struct aoe_atahdr
*) (h
+1);
625 f
->ndata
= sizeof *h
+ sizeof *ah
;
626 memset(h
, 0, f
->ndata
);
627 f
->tag
= aoehdr_atainit(d
, h
);
631 /* set up ata header */
633 ah
->cmdstat
= WIN_IDENTIFY
;
636 skb
= skb_prepare(d
, f
);
638 d
->rttavg
= MAXTIMER
;
639 d
->timer
.function
= rexmit_timer
;
645 aoecmd_cfg_rsp(struct sk_buff
*skb
)
649 struct aoe_cfghdr
*ch
;
650 ulong flags
, sysminor
, aoemajor
;
653 enum { MAXFRAMES
= 16 };
655 h
= (struct aoe_hdr
*) skb
->mac
.raw
;
656 ch
= (struct aoe_cfghdr
*) (h
+1);
659 * Enough people have their dip switches set backwards to
660 * warrant a loud message for this special case.
662 aoemajor
= be16_to_cpu(h
->major
);
663 if (aoemajor
== 0xfff) {
664 printk(KERN_CRIT
"aoe: aoecmd_cfg_rsp: Warning: shelf "
665 "address is all ones. Check shelf dip switches\n");
669 sysminor
= SYSMINOR(aoemajor
, h
->minor
);
670 if (sysminor
* AOE_PARTITIONS
+ AOE_PARTITIONS
> MINORMASK
) {
672 "aoe: e%ld.%d: minor number too large\n",
673 aoemajor
, (int) h
->minor
);
677 bufcnt
= be16_to_cpu(ch
->bufcnt
);
678 if (bufcnt
> MAXFRAMES
) /* keep it reasonable */
681 d
= aoedev_by_sysminor_m(sysminor
, bufcnt
);
683 printk(KERN_INFO
"aoe: aoecmd_cfg_rsp: device sysminor_m failure\n");
687 spin_lock_irqsave(&d
->lock
, flags
);
689 /* permit device to migrate mac and network interface */
691 memcpy(d
->addr
, h
->src
, sizeof d
->addr
);
693 /* don't change users' perspective */
694 if (d
->nopen
&& !(d
->flags
& DEVFL_PAUSE
)) {
695 spin_unlock_irqrestore(&d
->lock
, flags
);
698 d
->flags
|= DEVFL_PAUSE
; /* force pause */
699 d
->fw_ver
= be16_to_cpu(ch
->fwver
);
701 /* check for already outstanding ataid */
702 sl
= aoedev_isbusy(d
) == 0 ? aoecmd_ata_id(d
) : NULL
;
704 spin_unlock_irqrestore(&d
->lock
, flags
);