1 /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
4 * AoE device utility functions; maintains device list.
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/netdevice.h>
10 #include <linux/delay.h>
11 #include <linux/slab.h>
12 #include <linux/bitmap.h>
13 #include <linux/kdev_t.h>
14 #include <linux/moduleparam.h>
17 static void dummy_timer(ulong
);
18 static void freetgt(struct aoedev
*d
, struct aoetgt
*t
);
19 static void skbpoolfree(struct aoedev
*d
);
21 static int aoe_dyndevs
= 1;
22 module_param(aoe_dyndevs
, int, 0644);
23 MODULE_PARM_DESC(aoe_dyndevs
, "Use dynamic minor numbers for devices.");
25 static struct aoedev
*devlist
;
26 static DEFINE_SPINLOCK(devlist_lock
);
28 /* Because some systems will have one, many, or no
32 * we need some flexibility in the way the minor numbers
33 * are allocated. So they are dynamic.
35 #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
37 static DEFINE_SPINLOCK(used_minors_lock
);
38 static DECLARE_BITMAP(used_minors
, N_DEVS
);
41 minor_get_dyn(ulong
*sysminor
)
47 spin_lock_irqsave(&used_minors_lock
, flags
);
48 n
= find_first_zero_bit(used_minors
, N_DEVS
);
50 set_bit(n
, used_minors
);
53 spin_unlock_irqrestore(&used_minors_lock
, flags
);
55 *sysminor
= n
* AOE_PARTITIONS
;
60 minor_get_static(ulong
*sysminor
, ulong aoemaj
, int aoemin
)
66 /* for backwards compatibility when !aoe_dyndevs,
67 * a static number of supported slots per shelf */
71 if (aoemin
>= NPERSHELF
) {
72 pr_err("aoe: %s %d slots per shelf\n",
73 "static minor device numbers support only",
79 n
= aoemaj
* NPERSHELF
+ aoemin
;
81 pr_err("aoe: %s with e%ld.%d\n",
82 "cannot use static minor device numbers",
88 spin_lock_irqsave(&used_minors_lock
, flags
);
89 if (test_bit(n
, used_minors
)) {
90 pr_err("aoe: %s %lu\n",
91 "existing device already has static minor number",
95 set_bit(n
, used_minors
);
96 spin_unlock_irqrestore(&used_minors_lock
, flags
);
97 *sysminor
= n
* AOE_PARTITIONS
;
103 minor_get(ulong
*sysminor
, ulong aoemaj
, int aoemin
)
106 return minor_get_dyn(sysminor
);
108 return minor_get_static(sysminor
, aoemaj
, aoemin
);
112 minor_free(ulong minor
)
116 minor
/= AOE_PARTITIONS
;
117 BUG_ON(minor
>= N_DEVS
);
119 spin_lock_irqsave(&used_minors_lock
, flags
);
120 BUG_ON(!test_bit(minor
, used_minors
));
121 clear_bit(minor
, used_minors
);
122 spin_unlock_irqrestore(&used_minors_lock
, flags
);
126 * Users who grab a pointer to the device with aoedev_by_aoeaddr
127 * automatically get a reference count and must be responsible
128 * for performing a aoedev_put. With the addition of async
129 * kthread processing I'm no longer confident that we can
130 * guarantee consistency in the face of device flushes.
132 * For the time being, we only bother to add extra references for
133 * frames sitting on the iocq. When the kthreads finish processing
134 * these frames, they will aoedev_put the device.
138 aoedev_put(struct aoedev
*d
)
142 spin_lock_irqsave(&devlist_lock
, flags
);
144 spin_unlock_irqrestore(&devlist_lock
, flags
);
148 dummy_timer(ulong vp
)
152 d
= (struct aoedev
*)vp
;
153 if (d
->flags
& DEVFL_TKILL
)
155 d
->timer
.expires
= jiffies
+ HZ
;
156 add_timer(&d
->timer
);
160 aoe_failip(struct aoedev
*d
)
166 aoe_failbuf(d
, d
->ip
.buf
);
171 while ((bio
= d
->ip
.nxbio
)) {
172 clear_bit(BIO_UPTODATE
, &bio
->bi_flags
);
173 d
->ip
.nxbio
= bio
->bi_next
;
174 n
= (unsigned long) rq
->special
;
175 rq
->special
= (void *) --n
;
177 if ((unsigned long) rq
->special
== 0)
178 aoe_end_request(d
, rq
, 0);
182 downdev_frame(struct list_head
*pos
)
186 f
= list_entry(pos
, struct frame
, head
);
189 f
->buf
->nframesout
--;
190 aoe_failbuf(f
->t
->d
, f
->buf
);
196 aoedev_downdev(struct aoedev
*d
)
198 struct aoetgt
*t
, **tt
, **te
;
199 struct list_head
*head
, *pos
, *nx
;
203 d
->flags
&= ~DEVFL_UP
;
205 /* clean out active and to-be-retransmitted buffers */
206 for (i
= 0; i
< NFACTIVE
; i
++) {
207 head
= &d
->factive
[i
];
208 list_for_each_safe(pos
, nx
, head
)
212 list_for_each_safe(pos
, nx
, head
)
215 /* reset window dressings */
217 te
= tt
+ d
->ntargets
;
218 for (; tt
< te
&& (t
= *tt
); tt
++) {
223 /* clean out the in-process request (if any) */
226 /* fast fail all pending I/O */
228 while ((rq
= blk_peek_request(d
->blkq
))) {
229 blk_start_request(rq
);
230 aoe_end_request(d
, rq
, 1);
235 set_capacity(d
->gd
, 0);
238 /* return whether the user asked for this particular
239 * device to be flushed
242 user_req(char *s
, size_t slen
, struct aoedev
*d
)
249 p
= strrchr(d
->gd
->disk_name
, '/');
251 p
= d
->gd
->disk_name
;
254 lim
= sizeof(d
->gd
->disk_name
);
255 lim
-= p
- d
->gd
->disk_name
;
259 return !strncmp(s
, p
, lim
);
263 freedev(struct aoedev
*d
)
265 struct aoetgt
**t
, **e
;
269 spin_lock_irqsave(&d
->lock
, flags
);
270 if (d
->flags
& DEVFL_TKILL
271 && !(d
->flags
& DEVFL_FREEING
)) {
272 d
->flags
|= DEVFL_FREEING
;
275 spin_unlock_irqrestore(&d
->lock
, flags
);
279 del_timer_sync(&d
->timer
);
284 blk_cleanup_queue(d
->blkq
);
288 for (; t
< e
&& *t
; t
++)
291 mempool_destroy(d
->bufpool
);
293 minor_free(d
->sysminor
);
295 spin_lock_irqsave(&d
->lock
, flags
);
296 d
->flags
|= DEVFL_FREED
;
297 spin_unlock_irqrestore(&d
->lock
, flags
);
306 flush(const char __user
*str
, size_t cnt
, int exiting
)
309 struct aoedev
*d
, **dd
;
312 int specified
= 0; /* flush a specific device */
313 unsigned int skipflags
;
315 skipflags
= DEVFL_GDALLOC
| DEVFL_NEWSIZE
| DEVFL_TKILL
;
317 if (!exiting
&& cnt
>= 3) {
318 if (cnt
> sizeof buf
)
320 if (copy_from_user(buf
, str
, cnt
))
322 all
= !strncmp(buf
, "all", 3);
327 flush_scheduled_work();
328 /* pass one: without sleeping, do aoedev_downdev */
329 spin_lock_irqsave(&devlist_lock
, flags
);
330 for (d
= devlist
; d
; d
= d
->next
) {
333 /* unconditionally take each device down */
334 } else if (specified
) {
335 if (!user_req(buf
, cnt
, d
))
337 } else if ((!all
&& (d
->flags
& DEVFL_UP
))
338 || d
->flags
& skipflags
344 d
->flags
|= DEVFL_TKILL
;
346 spin_unlock(&d
->lock
);
348 spin_unlock_irqrestore(&devlist_lock
, flags
);
350 /* pass two: call freedev, which might sleep,
351 * for aoedevs marked with DEVFL_TKILL
354 spin_lock_irqsave(&devlist_lock
, flags
);
355 for (d
= devlist
; d
; d
= d
->next
) {
357 if (d
->flags
& DEVFL_TKILL
358 && !(d
->flags
& DEVFL_FREEING
)) {
359 spin_unlock(&d
->lock
);
360 spin_unlock_irqrestore(&devlist_lock
, flags
);
364 spin_unlock(&d
->lock
);
367 /* pass three: remove aoedevs marked with DEVFL_FREED */
368 for (dd
= &devlist
, d
= *dd
; d
; d
= *dd
) {
369 struct aoedev
*doomed
= NULL
;
372 if (d
->flags
& DEVFL_FREED
) {
378 spin_unlock(&d
->lock
);
380 kfree(doomed
->targets
);
383 spin_unlock_irqrestore(&devlist_lock
, flags
);
389 aoedev_flush(const char __user
*str
, size_t cnt
)
391 return flush(str
, cnt
, NOT_EXITING
);
394 /* This has been confirmed to occur once with Tms=3*1000 due to the
395 * driver changing link and not processing its transmit ring. The
396 * problem is hard enough to solve by returning an error that I'm
397 * still punting on "solving" this.
400 skbfree(struct sk_buff
*skb
)
402 enum { Sms
= 250, Tms
= 30 * 1000};
407 while (atomic_read(&skb_shinfo(skb
)->dataref
) != 1 && i
-- > 0)
411 "aoe: %s holds ref: %s\n",
412 skb
->dev
? skb
->dev
->name
: "netif",
413 "cannot free skb -- memory leaked.");
416 skb
->truesize
-= skb
->data_len
;
417 skb_shinfo(skb
)->nr_frags
= skb
->data_len
= 0;
423 skbpoolfree(struct aoedev
*d
)
425 struct sk_buff
*skb
, *tmp
;
427 skb_queue_walk_safe(&d
->skbpool
, skb
, tmp
)
430 __skb_queue_head_init(&d
->skbpool
);
433 /* find it or allocate it */
435 aoedev_by_aoeaddr(ulong maj
, int min
, int do_alloc
)
442 spin_lock_irqsave(&devlist_lock
, flags
);
444 for (d
=devlist
; d
; d
=d
->next
)
445 if (d
->aoemajor
== maj
&& d
->aoeminor
== min
) {
447 if (d
->flags
& DEVFL_TKILL
) {
448 spin_unlock(&d
->lock
);
453 spin_unlock(&d
->lock
);
456 if (d
|| !do_alloc
|| minor_get(&sysminor
, maj
, min
) < 0)
458 d
= kcalloc(1, sizeof *d
, GFP_ATOMIC
);
461 d
->targets
= kcalloc(NTARGETS
, sizeof(*d
->targets
), GFP_ATOMIC
);
467 d
->ntargets
= NTARGETS
;
468 INIT_WORK(&d
->work
, aoecmd_sleepwork
);
469 spin_lock_init(&d
->lock
);
470 skb_queue_head_init(&d
->skbpool
);
471 init_timer(&d
->timer
);
472 d
->timer
.data
= (ulong
) d
;
473 d
->timer
.function
= dummy_timer
;
474 d
->timer
.expires
= jiffies
+ HZ
;
475 add_timer(&d
->timer
);
476 d
->bufpool
= NULL
; /* defer to aoeblk_gdalloc */
479 for (i
= 0; i
< NFACTIVE
; i
++)
480 INIT_LIST_HEAD(&d
->factive
[i
]);
481 INIT_LIST_HEAD(&d
->rexmitq
);
482 d
->sysminor
= sysminor
;
485 d
->rttavg
= RTTAVG_INIT
;
486 d
->rttdev
= RTTDEV_INIT
;
490 spin_unlock_irqrestore(&devlist_lock
, flags
);
495 freetgt(struct aoedev
*d
, struct aoetgt
*t
)
498 struct list_head
*pos
, *nx
, *head
;
501 for (ifp
= t
->ifs
; ifp
< &t
->ifs
[NAOEIFS
]; ++ifp
) {
508 list_for_each_safe(pos
, nx
, head
) {
510 f
= list_entry(pos
, struct frame
, head
);
520 flush_scheduled_work();
521 flush(NULL
, 0, EXITING
);