1 /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
4 * AoE device utility functions; maintains device list.
7 #include <linux/hdreg.h>
8 #include <linux/blk-mq.h>
9 #include <linux/netdevice.h>
10 #include <linux/delay.h>
11 #include <linux/slab.h>
12 #include <linux/bitmap.h>
13 #include <linux/kdev_t.h>
14 #include <linux/moduleparam.h>
15 #include <linux/string.h>
18 static void freetgt(struct aoedev
*d
, struct aoetgt
*t
);
19 static void skbpoolfree(struct aoedev
*d
);
21 static int aoe_dyndevs
= 1;
22 module_param(aoe_dyndevs
, int, 0644);
23 MODULE_PARM_DESC(aoe_dyndevs
, "Use dynamic minor numbers for devices.");
25 static struct aoedev
*devlist
;
26 static DEFINE_SPINLOCK(devlist_lock
);
28 /* Because some systems will have one, many, or no
32 * we need some flexibility in the way the minor numbers
33 * are allocated. So they are dynamic.
35 #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
37 static DEFINE_SPINLOCK(used_minors_lock
);
38 static DECLARE_BITMAP(used_minors
, N_DEVS
);
41 minor_get_dyn(ulong
*sysminor
)
47 spin_lock_irqsave(&used_minors_lock
, flags
);
48 n
= find_first_zero_bit(used_minors
, N_DEVS
);
50 set_bit(n
, used_minors
);
53 spin_unlock_irqrestore(&used_minors_lock
, flags
);
55 *sysminor
= n
* AOE_PARTITIONS
;
60 minor_get_static(ulong
*sysminor
, ulong aoemaj
, int aoemin
)
66 /* for backwards compatibility when !aoe_dyndevs,
67 * a static number of supported slots per shelf */
71 if (aoemin
>= NPERSHELF
) {
72 pr_err("aoe: %s %d slots per shelf\n",
73 "static minor device numbers support only",
79 n
= aoemaj
* NPERSHELF
+ aoemin
;
81 pr_err("aoe: %s with e%ld.%d\n",
82 "cannot use static minor device numbers",
88 spin_lock_irqsave(&used_minors_lock
, flags
);
89 if (test_bit(n
, used_minors
)) {
90 pr_err("aoe: %s %lu\n",
91 "existing device already has static minor number",
95 set_bit(n
, used_minors
);
96 spin_unlock_irqrestore(&used_minors_lock
, flags
);
97 *sysminor
= n
* AOE_PARTITIONS
;
103 minor_get(ulong
*sysminor
, ulong aoemaj
, int aoemin
)
106 return minor_get_dyn(sysminor
);
108 return minor_get_static(sysminor
, aoemaj
, aoemin
);
112 minor_free(ulong minor
)
116 minor
/= AOE_PARTITIONS
;
117 BUG_ON(minor
>= N_DEVS
);
119 spin_lock_irqsave(&used_minors_lock
, flags
);
120 BUG_ON(!test_bit(minor
, used_minors
));
121 clear_bit(minor
, used_minors
);
122 spin_unlock_irqrestore(&used_minors_lock
, flags
);
126 * Users who grab a pointer to the device with aoedev_by_aoeaddr
127 * automatically get a reference count and must be responsible
128 * for performing a aoedev_put. With the addition of async
129 * kthread processing I'm no longer confident that we can
130 * guarantee consistency in the face of device flushes.
132 * For the time being, we only bother to add extra references for
133 * frames sitting on the iocq. When the kthreads finish processing
134 * these frames, they will aoedev_put the device.
138 aoedev_put(struct aoedev
*d
)
142 spin_lock_irqsave(&devlist_lock
, flags
);
144 spin_unlock_irqrestore(&devlist_lock
, flags
);
148 dummy_timer(struct timer_list
*t
)
152 d
= from_timer(d
, t
, timer
);
153 if (d
->flags
& DEVFL_TKILL
)
155 d
->timer
.expires
= jiffies
+ HZ
;
156 add_timer(&d
->timer
);
160 aoe_failip(struct aoedev
*d
)
166 aoe_failbuf(d
, d
->ip
.buf
);
171 req
= blk_mq_rq_to_pdu(rq
);
172 while ((bio
= d
->ip
.nxbio
)) {
173 bio
->bi_status
= BLK_STS_IOERR
;
174 d
->ip
.nxbio
= bio
->bi_next
;
179 aoe_end_request(d
, rq
, 0);
183 downdev_frame(struct list_head
*pos
)
187 f
= list_entry(pos
, struct frame
, head
);
190 f
->buf
->nframesout
--;
191 aoe_failbuf(f
->t
->d
, f
->buf
);
197 aoedev_downdev(struct aoedev
*d
)
199 struct aoetgt
*t
, **tt
, **te
;
200 struct list_head
*head
, *pos
, *nx
;
203 d
->flags
&= ~DEVFL_UP
;
205 /* clean out active and to-be-retransmitted buffers */
206 for (i
= 0; i
< NFACTIVE
; i
++) {
207 head
= &d
->factive
[i
];
208 list_for_each_safe(pos
, nx
, head
)
212 list_for_each_safe(pos
, nx
, head
)
215 /* reset window dressings */
217 te
= tt
+ d
->ntargets
;
218 for (; tt
< te
&& (t
= *tt
); tt
++) {
223 /* clean out the in-process request (if any) */
226 /* fast fail all pending I/O */
228 /* UP is cleared, freeze+quiesce to insure all are errored */
229 blk_mq_freeze_queue(d
->blkq
);
230 blk_mq_quiesce_queue(d
->blkq
);
231 blk_mq_unquiesce_queue(d
->blkq
);
232 blk_mq_unfreeze_queue(d
->blkq
);
236 set_capacity(d
->gd
, 0);
239 /* return whether the user asked for this particular
240 * device to be flushed
243 user_req(char *s
, size_t slen
, struct aoedev
*d
)
250 p
= kbasename(d
->gd
->disk_name
);
251 lim
= sizeof(d
->gd
->disk_name
);
252 lim
-= p
- d
->gd
->disk_name
;
256 return !strncmp(s
, p
, lim
);
260 freedev(struct aoedev
*d
)
262 struct aoetgt
**t
, **e
;
266 spin_lock_irqsave(&d
->lock
, flags
);
267 if (d
->flags
& DEVFL_TKILL
268 && !(d
->flags
& DEVFL_FREEING
)) {
269 d
->flags
|= DEVFL_FREEING
;
272 spin_unlock_irqrestore(&d
->lock
, flags
);
276 del_timer_sync(&d
->timer
);
278 aoedisk_rm_debugfs(d
);
281 blk_mq_free_tag_set(&d
->tag_set
);
282 blk_cleanup_queue(d
->blkq
);
286 for (; t
< e
&& *t
; t
++)
289 mempool_destroy(d
->bufpool
);
291 minor_free(d
->sysminor
);
293 spin_lock_irqsave(&d
->lock
, flags
);
294 d
->flags
|= DEVFL_FREED
;
295 spin_unlock_irqrestore(&d
->lock
, flags
);
304 flush(const char __user
*str
, size_t cnt
, int exiting
)
307 struct aoedev
*d
, **dd
;
310 int specified
= 0; /* flush a specific device */
311 unsigned int skipflags
;
313 skipflags
= DEVFL_GDALLOC
| DEVFL_NEWSIZE
| DEVFL_TKILL
;
315 if (!exiting
&& cnt
>= 3) {
316 if (cnt
> sizeof buf
)
318 if (copy_from_user(buf
, str
, cnt
))
320 all
= !strncmp(buf
, "all", 3);
325 flush_scheduled_work();
326 /* pass one: do aoedev_downdev, which might sleep */
328 spin_lock_irqsave(&devlist_lock
, flags
);
329 for (d
= devlist
; d
; d
= d
->next
) {
331 if (d
->flags
& DEVFL_TKILL
)
335 /* unconditionally take each device down */
336 } else if (specified
) {
337 if (!user_req(buf
, cnt
, d
))
339 } else if ((!all
&& (d
->flags
& DEVFL_UP
))
340 || d
->flags
& skipflags
345 spin_unlock(&d
->lock
);
346 spin_unlock_irqrestore(&devlist_lock
, flags
);
348 d
->flags
|= DEVFL_TKILL
;
351 spin_unlock(&d
->lock
);
353 spin_unlock_irqrestore(&devlist_lock
, flags
);
355 /* pass two: call freedev, which might sleep,
356 * for aoedevs marked with DEVFL_TKILL
359 spin_lock_irqsave(&devlist_lock
, flags
);
360 for (d
= devlist
; d
; d
= d
->next
) {
362 if (d
->flags
& DEVFL_TKILL
363 && !(d
->flags
& DEVFL_FREEING
)) {
364 spin_unlock(&d
->lock
);
365 spin_unlock_irqrestore(&devlist_lock
, flags
);
369 spin_unlock(&d
->lock
);
372 /* pass three: remove aoedevs marked with DEVFL_FREED */
373 for (dd
= &devlist
, d
= *dd
; d
; d
= *dd
) {
374 struct aoedev
*doomed
= NULL
;
377 if (d
->flags
& DEVFL_FREED
) {
383 spin_unlock(&d
->lock
);
385 kfree(doomed
->targets
);
388 spin_unlock_irqrestore(&devlist_lock
, flags
);
394 aoedev_flush(const char __user
*str
, size_t cnt
)
396 return flush(str
, cnt
, NOT_EXITING
);
399 /* This has been confirmed to occur once with Tms=3*1000 due to the
400 * driver changing link and not processing its transmit ring. The
401 * problem is hard enough to solve by returning an error that I'm
402 * still punting on "solving" this.
405 skbfree(struct sk_buff
*skb
)
407 enum { Sms
= 250, Tms
= 30 * 1000};
412 while (atomic_read(&skb_shinfo(skb
)->dataref
) != 1 && i
-- > 0)
416 "aoe: %s holds ref: %s\n",
417 skb
->dev
? skb
->dev
->name
: "netif",
418 "cannot free skb -- memory leaked.");
421 skb
->truesize
-= skb
->data_len
;
422 skb_shinfo(skb
)->nr_frags
= skb
->data_len
= 0;
428 skbpoolfree(struct aoedev
*d
)
430 struct sk_buff
*skb
, *tmp
;
432 skb_queue_walk_safe(&d
->skbpool
, skb
, tmp
)
435 __skb_queue_head_init(&d
->skbpool
);
438 /* find it or allocate it */
440 aoedev_by_aoeaddr(ulong maj
, int min
, int do_alloc
)
447 spin_lock_irqsave(&devlist_lock
, flags
);
449 for (d
=devlist
; d
; d
=d
->next
)
450 if (d
->aoemajor
== maj
&& d
->aoeminor
== min
) {
452 if (d
->flags
& DEVFL_TKILL
) {
453 spin_unlock(&d
->lock
);
458 spin_unlock(&d
->lock
);
461 if (d
|| !do_alloc
|| minor_get(&sysminor
, maj
, min
) < 0)
463 d
= kcalloc(1, sizeof *d
, GFP_ATOMIC
);
466 d
->targets
= kcalloc(NTARGETS
, sizeof(*d
->targets
), GFP_ATOMIC
);
472 d
->ntargets
= NTARGETS
;
473 INIT_WORK(&d
->work
, aoecmd_sleepwork
);
474 spin_lock_init(&d
->lock
);
475 INIT_LIST_HEAD(&d
->rq_list
);
476 skb_queue_head_init(&d
->skbpool
);
477 timer_setup(&d
->timer
, dummy_timer
, 0);
478 d
->timer
.expires
= jiffies
+ HZ
;
479 add_timer(&d
->timer
);
480 d
->bufpool
= NULL
; /* defer to aoeblk_gdalloc */
483 for (i
= 0; i
< NFACTIVE
; i
++)
484 INIT_LIST_HEAD(&d
->factive
[i
]);
485 INIT_LIST_HEAD(&d
->rexmitq
);
486 d
->sysminor
= sysminor
;
489 d
->rttavg
= RTTAVG_INIT
;
490 d
->rttdev
= RTTDEV_INIT
;
494 spin_unlock_irqrestore(&devlist_lock
, flags
);
499 freetgt(struct aoedev
*d
, struct aoetgt
*t
)
502 struct list_head
*pos
, *nx
, *head
;
505 for (ifp
= t
->ifs
; ifp
< &t
->ifs
[NAOEIFS
]; ++ifp
) {
512 list_for_each_safe(pos
, nx
, head
) {
514 f
= list_entry(pos
, struct frame
, head
);
524 flush_scheduled_work();
525 flush(NULL
, 0, EXITING
);