1 /* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */
4 * Filesystem request handling methods
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include <linux/genhd.h>
12 #include <net/net_namespace.h>
13 #include <asm/unaligned.h>
16 #define TIMERTICK (HZ / 10)
17 #define MINTIMER (2 * TIMERTICK)
18 #define MAXTIMER (HZ << 1)
20 static int aoe_deadsecs
= 60 * 3;
21 module_param(aoe_deadsecs
, int, 0644);
22 MODULE_PARM_DESC(aoe_deadsecs
, "After aoe_deadsecs seconds, give up and fail dev.");
29 skb
= alloc_skb(len
, GFP_ATOMIC
);
31 skb_reset_mac_header(skb
);
32 skb_reset_network_header(skb
);
33 skb
->protocol
= __constant_htons(ETH_P_AOE
);
35 skb
->next
= skb
->prev
= NULL
;
37 /* tell the network layer not to perform IP checksums
38 * or to get the NIC to do it
40 skb
->ip_summed
= CHECKSUM_NONE
;
46 getframe(struct aoedev
*d
, int tag
)
59 * Leave the top bit clear so we have tagspace for userland.
60 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
61 * This driver reserves tag -1 to mean "unused frame."
64 newtag(struct aoedev
*d
)
69 return n
|= (++d
->lasttag
& 0x7fff) << 16;
73 aoehdr_atainit(struct aoedev
*d
, struct aoe_hdr
*h
)
75 u32 host_tag
= newtag(d
);
77 memcpy(h
->src
, d
->ifp
->dev_addr
, sizeof h
->src
);
78 memcpy(h
->dst
, d
->addr
, sizeof h
->dst
);
79 h
->type
= __constant_cpu_to_be16(ETH_P_AOE
);
81 h
->major
= cpu_to_be16(d
->aoemajor
);
82 h
->minor
= d
->aoeminor
;
84 h
->tag
= cpu_to_be32(host_tag
);
90 put_lba(struct aoe_atahdr
*ah
, sector_t lba
)
101 aoecmd_ata_rw(struct aoedev
*d
, struct frame
*f
)
104 struct aoe_atahdr
*ah
;
108 register sector_t sector
;
109 char writebit
, extbit
;
116 sector
= buf
->sector
;
117 bcnt
= buf
->bv_resid
;
118 if (bcnt
> d
->maxbcnt
)
121 /* initialize the headers & frame */
124 ah
= (struct aoe_atahdr
*) (h
+1);
125 skb_put(skb
, sizeof *h
+ sizeof *ah
);
126 memset(h
, 0, skb
->len
);
127 f
->tag
= aoehdr_atainit(d
, h
);
130 f
->bufaddr
= buf
->bufaddr
;
134 /* set up ata header */
135 ah
->scnt
= bcnt
>> 9;
137 if (d
->flags
& DEVFL_EXT
) {
138 ah
->aflags
|= AOEAFL_EXT
;
142 ah
->lba3
|= 0xe0; /* LBA bit + obsolete 0xa0 */
145 if (bio_data_dir(buf
->bio
) == WRITE
) {
146 skb_fill_page_desc(skb
, 0, virt_to_page(f
->bufaddr
),
147 offset_in_page(f
->bufaddr
), bcnt
);
148 ah
->aflags
|= AOEAFL_WRITE
;
150 skb
->data_len
= bcnt
;
155 ah
->cmdstat
= WIN_READ
| writebit
| extbit
;
157 /* mark all tracking fields and load out */
158 buf
->nframesout
+= 1;
159 buf
->bufaddr
+= bcnt
;
160 buf
->bv_resid
-= bcnt
;
161 /* printk(KERN_DEBUG "aoe: bv_resid=%ld\n", buf->bv_resid); */
163 buf
->sector
+= bcnt
>> 9;
164 if (buf
->resid
== 0) {
166 } else if (buf
->bv_resid
== 0) {
168 WARN_ON(buf
->bv
->bv_len
== 0);
169 buf
->bv_resid
= buf
->bv
->bv_len
;
170 buf
->bufaddr
= page_address(buf
->bv
->bv_page
) + buf
->bv
->bv_offset
;
174 skb
= skb_clone(skb
, GFP_ATOMIC
);
178 d
->sendq_tl
->next
= skb
;
184 /* some callers cannot sleep, and they can call this function,
185 * transmitting the packets later, when interrupts are on
187 static struct sk_buff
*
188 aoecmd_cfg_pkts(ushort aoemajor
, unsigned char aoeminor
, struct sk_buff
**tail
)
191 struct aoe_cfghdr
*ch
;
192 struct sk_buff
*skb
, *sl
, *sl_tail
;
193 struct net_device
*ifp
;
197 read_lock(&dev_base_lock
);
198 for_each_netdev(&init_net
, ifp
) {
200 if (!is_aoe_netif(ifp
))
203 skb
= new_skb(sizeof *h
+ sizeof *ch
);
205 printk(KERN_INFO
"aoe: skb alloc failure\n");
208 skb_put(skb
, sizeof *h
+ sizeof *ch
);
213 memset(h
, 0, sizeof *h
+ sizeof *ch
);
215 memset(h
->dst
, 0xff, sizeof h
->dst
);
216 memcpy(h
->src
, ifp
->dev_addr
, sizeof h
->src
);
217 h
->type
= __constant_cpu_to_be16(ETH_P_AOE
);
219 h
->major
= cpu_to_be16(aoemajor
);
228 read_unlock(&dev_base_lock
);
235 static struct frame
*
236 freeframe(struct aoedev
*d
)
244 if (f
->tag
!= FREETAG
)
246 if (atomic_read(&skb_shinfo(f
->skb
)->dataref
) == 1) {
247 skb_shinfo(f
->skb
)->nr_frags
= f
->skb
->data_len
= 0;
253 if (n
== d
->nframes
) /* wait for network layer */
254 d
->flags
|= DEVFL_KICKME
;
259 /* enters with d->lock held */
261 aoecmd_work(struct aoedev
*d
)
266 if (d
->flags
& DEVFL_PAUSE
) {
267 if (!aoedev_isbusy(d
))
268 d
->sendq_hd
= aoecmd_cfg_pkts(d
->aoemajor
,
269 d
->aoeminor
, &d
->sendq_tl
);
277 if (d
->inprocess
== NULL
) {
278 if (list_empty(&d
->bufq
))
280 buf
= container_of(d
->bufq
.next
, struct buf
, bufs
);
281 list_del(d
->bufq
.next
);
282 /*printk(KERN_DEBUG "aoe: bi_size=%ld\n", buf->bio->bi_size); */
290 rexmit(struct aoedev
*d
, struct frame
*f
)
294 struct aoe_atahdr
*ah
;
300 snprintf(buf
, sizeof buf
,
301 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
303 d
->aoemajor
, d
->aoeminor
, f
->tag
, jiffies
, n
);
308 ah
= (struct aoe_atahdr
*) (h
+1);
310 h
->tag
= cpu_to_be32(n
);
311 memcpy(h
->dst
, d
->addr
, sizeof h
->dst
);
312 memcpy(h
->src
, d
->ifp
->dev_addr
, sizeof h
->src
);
314 n
= DEFAULTBCNT
/ 512;
317 if (ah
->aflags
& AOEAFL_WRITE
) {
318 skb_fill_page_desc(skb
, 0, virt_to_page(f
->bufaddr
),
319 offset_in_page(f
->bufaddr
), DEFAULTBCNT
);
320 skb
->len
= sizeof *h
+ sizeof *ah
+ DEFAULTBCNT
;
321 skb
->data_len
= DEFAULTBCNT
;
323 if (++d
->lostjumbo
> (d
->nframes
<< 1))
324 if (d
->maxbcnt
!= DEFAULTBCNT
) {
325 printk(KERN_INFO
"aoe: e%ld.%ld: too many lost jumbo on %s - using 1KB frames.\n",
326 d
->aoemajor
, d
->aoeminor
, d
->ifp
->name
);
327 d
->maxbcnt
= DEFAULTBCNT
;
328 d
->flags
|= DEVFL_MAXBCNT
;
333 skb
= skb_clone(skb
, GFP_ATOMIC
);
337 d
->sendq_tl
->next
= skb
;
348 n
= jiffies
& 0xffff;
356 rexmit_timer(ulong vp
)
361 register long timeout
;
364 d
= (struct aoedev
*) vp
;
367 /* timeout is always ~150% of the moving average */
369 timeout
+= timeout
>> 1;
371 spin_lock_irqsave(&d
->lock
, flags
);
373 if (d
->flags
& DEVFL_TKILL
) {
374 spin_unlock_irqrestore(&d
->lock
, flags
);
380 if (f
->tag
!= FREETAG
&& tsince(f
->tag
) >= timeout
) {
381 n
= f
->waited
+= timeout
;
383 if (n
> aoe_deadsecs
) { /* waited too long for response */
390 if (d
->flags
& DEVFL_KICKME
) {
391 d
->flags
&= ~DEVFL_KICKME
;
396 d
->sendq_hd
= d
->sendq_tl
= NULL
;
400 d
->rttavg
= MAXTIMER
;
403 d
->timer
.expires
= jiffies
+ TIMERTICK
;
404 add_timer(&d
->timer
);
406 spin_unlock_irqrestore(&d
->lock
, flags
);
411 /* this function performs work that has been deferred until sleeping is OK
414 aoecmd_sleepwork(struct work_struct
*work
)
416 struct aoedev
*d
= container_of(work
, struct aoedev
, work
);
418 if (d
->flags
& DEVFL_GDALLOC
)
421 if (d
->flags
& DEVFL_NEWSIZE
) {
422 struct block_device
*bd
;
426 ssize
= d
->gd
->capacity
;
427 bd
= bdget_disk(d
->gd
, 0);
430 mutex_lock(&bd
->bd_inode
->i_mutex
);
431 i_size_write(bd
->bd_inode
, (loff_t
)ssize
<<9);
432 mutex_unlock(&bd
->bd_inode
->i_mutex
);
435 spin_lock_irqsave(&d
->lock
, flags
);
436 d
->flags
|= DEVFL_UP
;
437 d
->flags
&= ~DEVFL_NEWSIZE
;
438 spin_unlock_irqrestore(&d
->lock
, flags
);
443 ataid_complete(struct aoedev
*d
, unsigned char *id
)
448 /* word 83: command set supported */
449 n
= le16_to_cpu(get_unaligned((__le16
*) &id
[83<<1]));
451 /* word 86: command set/feature enabled */
452 n
|= le16_to_cpu(get_unaligned((__le16
*) &id
[86<<1]));
454 if (n
& (1<<10)) { /* bit 10: LBA 48 */
455 d
->flags
|= DEVFL_EXT
;
457 /* word 100: number lba48 sectors */
458 ssize
= le64_to_cpu(get_unaligned((__le64
*) &id
[100<<1]));
460 /* set as in ide-disk.c:init_idedisk_capacity */
461 d
->geo
.cylinders
= ssize
;
462 d
->geo
.cylinders
/= (255 * 63);
466 d
->flags
&= ~DEVFL_EXT
;
468 /* number lba28 sectors */
469 ssize
= le32_to_cpu(get_unaligned((__le32
*) &id
[60<<1]));
471 /* NOTE: obsolete in ATA 6 */
472 d
->geo
.cylinders
= le16_to_cpu(get_unaligned((__le16
*) &id
[54<<1]));
473 d
->geo
.heads
= le16_to_cpu(get_unaligned((__le16
*) &id
[55<<1]));
474 d
->geo
.sectors
= le16_to_cpu(get_unaligned((__le16
*) &id
[56<<1]));
477 if (d
->ssize
!= ssize
)
478 printk(KERN_INFO
"aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
479 (unsigned long long)mac_addr(d
->addr
),
480 d
->aoemajor
, d
->aoeminor
,
481 d
->fw_ver
, (long long)ssize
);
485 d
->gd
->capacity
= ssize
;
486 d
->flags
|= DEVFL_NEWSIZE
;
488 if (d
->flags
& DEVFL_GDALLOC
) {
489 printk(KERN_ERR
"aoe: can't schedule work for e%lu.%lu, %s\n",
490 d
->aoemajor
, d
->aoeminor
,
491 "it's already on! This shouldn't happen.\n");
494 d
->flags
|= DEVFL_GDALLOC
;
496 schedule_work(&d
->work
);
500 calc_rttavg(struct aoedev
*d
, int rtt
)
509 else if (n
> MAXTIMER
)
511 d
->mintimer
+= (n
- d
->mintimer
) >> 1;
512 } else if (n
< d
->mintimer
)
514 else if (n
> MAXTIMER
)
517 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
523 aoecmd_ata_rsp(struct sk_buff
*skb
)
526 struct aoe_hdr
*hin
, *hout
;
527 struct aoe_atahdr
*ahin
, *ahout
;
537 aoemajor
= be16_to_cpu(get_unaligned(&hin
->major
));
538 d
= aoedev_by_aoeaddr(aoemajor
, hin
->minor
);
540 snprintf(ebuf
, sizeof ebuf
, "aoecmd_ata_rsp: ata response "
541 "for unknown device %d.%d\n",
542 aoemajor
, hin
->minor
);
547 spin_lock_irqsave(&d
->lock
, flags
);
549 n
= be32_to_cpu(get_unaligned(&hin
->tag
));
552 calc_rttavg(d
, -tsince(n
));
553 spin_unlock_irqrestore(&d
->lock
, flags
);
554 snprintf(ebuf
, sizeof ebuf
,
555 "%15s e%d.%d tag=%08x@%08lx\n",
557 be16_to_cpu(get_unaligned(&hin
->major
)),
559 be32_to_cpu(get_unaligned(&hin
->tag
)),
565 calc_rttavg(d
, tsince(f
->tag
));
567 ahin
= (struct aoe_atahdr
*) (hin
+1);
568 hout
= aoe_hdr(f
->skb
);
569 ahout
= (struct aoe_atahdr
*) (hout
+1);
572 if (ahout
->cmdstat
== WIN_IDENTIFY
)
573 d
->flags
&= ~DEVFL_PAUSE
;
574 if (ahin
->cmdstat
& 0xa9) { /* these bits cleared on success */
576 "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
577 ahout
->cmdstat
, ahin
->cmdstat
,
578 d
->aoemajor
, d
->aoeminor
);
580 buf
->flags
|= BUFFL_FAIL
;
582 n
= ahout
->scnt
<< 9;
583 switch (ahout
->cmdstat
) {
586 if (skb
->len
- sizeof *hin
- sizeof *ahin
< n
) {
588 "aoe: runt data size in read. skb->len=%d\n",
590 /* fail frame f? just returning will rexmit. */
591 spin_unlock_irqrestore(&d
->lock
, flags
);
594 memcpy(f
->bufaddr
, ahin
+1, n
);
600 put_lba(ahout
, f
->lba
+= ahout
->scnt
);
604 ahout
->scnt
= n
>> 9;
605 if (ahout
->aflags
& AOEAFL_WRITE
) {
606 skb_fill_page_desc(skb
, 0,
607 virt_to_page(f
->bufaddr
),
608 offset_in_page(f
->bufaddr
), n
);
609 skb
->len
= sizeof *hout
+ sizeof *ahout
+ n
;
613 hout
->tag
= cpu_to_be32(f
->tag
);
615 skb
= skb_clone(skb
, GFP_ATOMIC
);
616 spin_unlock_irqrestore(&d
->lock
, flags
);
625 if (skb
->len
- sizeof *hin
- sizeof *ahin
< 512) {
627 "aoe: runt data size in ataid. skb->len=%d\n",
629 spin_unlock_irqrestore(&d
->lock
, flags
);
632 ataid_complete(d
, (char *) (ahin
+1));
636 "aoe: unrecognized ata command %2.2Xh for %d.%d\n",
638 be16_to_cpu(get_unaligned(&hin
->major
)),
644 buf
->nframesout
-= 1;
645 if (buf
->nframesout
== 0 && buf
->resid
== 0) {
646 unsigned long duration
= jiffies
- buf
->start_time
;
647 unsigned long n_sect
= buf
->bio
->bi_size
>> 9;
648 struct gendisk
*disk
= d
->gd
;
649 const int rw
= bio_data_dir(buf
->bio
);
651 disk_stat_inc(disk
, ios
[rw
]);
652 disk_stat_add(disk
, ticks
[rw
], duration
);
653 disk_stat_add(disk
, sectors
[rw
], n_sect
);
654 disk_stat_add(disk
, io_ticks
, duration
);
655 n
= (buf
->flags
& BUFFL_FAIL
) ? -EIO
: 0;
656 bio_endio(buf
->bio
, n
);
657 mempool_free(buf
, d
->bufpool
);
666 d
->sendq_hd
= d
->sendq_tl
= NULL
;
668 spin_unlock_irqrestore(&d
->lock
, flags
);
673 aoecmd_cfg(ushort aoemajor
, unsigned char aoeminor
)
677 sl
= aoecmd_cfg_pkts(aoemajor
, aoeminor
, NULL
);
683 * Since we only call this in one place (and it only prepares one frame)
684 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
686 static struct sk_buff
*
687 aoecmd_ata_id(struct aoedev
*d
)
690 struct aoe_atahdr
*ah
;
696 printk(KERN_ERR
"aoe: can't get a frame. This shouldn't happen.\n");
700 /* initialize the headers & frame */
703 ah
= (struct aoe_atahdr
*) (h
+1);
704 skb_put(skb
, sizeof *h
+ sizeof *ah
);
705 memset(h
, 0, skb
->len
);
706 f
->tag
= aoehdr_atainit(d
, h
);
709 /* set up ata header */
711 ah
->cmdstat
= WIN_IDENTIFY
;
716 d
->rttavg
= MAXTIMER
;
717 d
->timer
.function
= rexmit_timer
;
719 return skb_clone(skb
, GFP_ATOMIC
);
723 aoecmd_cfg_rsp(struct sk_buff
*skb
)
727 struct aoe_cfghdr
*ch
;
728 ulong flags
, sysminor
, aoemajor
;
730 enum { MAXFRAMES
= 16 };
734 ch
= (struct aoe_cfghdr
*) (h
+1);
737 * Enough people have their dip switches set backwards to
738 * warrant a loud message for this special case.
740 aoemajor
= be16_to_cpu(get_unaligned(&h
->major
));
741 if (aoemajor
== 0xfff) {
742 printk(KERN_ERR
"aoe: Warning: shelf address is all ones. "
743 "Check shelf dip switches.\n");
747 sysminor
= SYSMINOR(aoemajor
, h
->minor
);
748 if (sysminor
* AOE_PARTITIONS
+ AOE_PARTITIONS
> MINORMASK
) {
749 printk(KERN_INFO
"aoe: e%ld.%d: minor number too large\n",
750 aoemajor
, (int) h
->minor
);
754 n
= be16_to_cpu(ch
->bufcnt
);
755 if (n
> MAXFRAMES
) /* keep it reasonable */
758 d
= aoedev_by_sysminor_m(sysminor
, n
);
760 printk(KERN_INFO
"aoe: device sysminor_m failure\n");
764 spin_lock_irqsave(&d
->lock
, flags
);
766 /* permit device to migrate mac and network interface */
768 memcpy(d
->addr
, h
->src
, sizeof d
->addr
);
769 if (!(d
->flags
& DEVFL_MAXBCNT
)) {
771 n
-= sizeof (struct aoe_hdr
) + sizeof (struct aoe_atahdr
);
775 n
= n
? n
* 512 : DEFAULTBCNT
;
776 if (n
!= d
->maxbcnt
) {
778 "aoe: e%ld.%ld: setting %d byte data frames on %s\n",
779 d
->aoemajor
, d
->aoeminor
, n
, d
->ifp
->name
);
784 /* don't change users' perspective */
785 if (d
->nopen
&& !(d
->flags
& DEVFL_PAUSE
)) {
786 spin_unlock_irqrestore(&d
->lock
, flags
);
789 d
->flags
|= DEVFL_PAUSE
; /* force pause */
790 d
->mintimer
= MINTIMER
;
791 d
->fw_ver
= be16_to_cpu(ch
->fwver
);
793 /* check for already outstanding ataid */
794 sl
= aoedev_isbusy(d
) == 0 ? aoecmd_ata_id(d
) : NULL
;
796 spin_unlock_irqrestore(&d
->lock
, flags
);