1 /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
4 * AoE device utility functions; maintains device list.
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/netdevice.h>
10 #include <linux/delay.h>
11 #include <linux/slab.h>
12 #include <linux/bitmap.h>
13 #include <linux/kdev_t.h>
14 #include <linux/moduleparam.h>
15 #include <linux/string.h>
18 static void freetgt(struct aoedev
*d
, struct aoetgt
*t
);
19 static void skbpoolfree(struct aoedev
*d
);
21 static int aoe_dyndevs
= 1;
22 module_param(aoe_dyndevs
, int, 0644);
23 MODULE_PARM_DESC(aoe_dyndevs
, "Use dynamic minor numbers for devices.");
25 static struct aoedev
*devlist
;
26 static DEFINE_SPINLOCK(devlist_lock
);
28 /* Because some systems will have one, many, or no
32 * we need some flexibility in the way the minor numbers
33 * are allocated. So they are dynamic.
35 #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
37 static DEFINE_SPINLOCK(used_minors_lock
);
38 static DECLARE_BITMAP(used_minors
, N_DEVS
);
41 minor_get_dyn(ulong
*sysminor
)
47 spin_lock_irqsave(&used_minors_lock
, flags
);
48 n
= find_first_zero_bit(used_minors
, N_DEVS
);
50 set_bit(n
, used_minors
);
53 spin_unlock_irqrestore(&used_minors_lock
, flags
);
55 *sysminor
= n
* AOE_PARTITIONS
;
60 minor_get_static(ulong
*sysminor
, ulong aoemaj
, int aoemin
)
66 /* for backwards compatibility when !aoe_dyndevs,
67 * a static number of supported slots per shelf */
71 if (aoemin
>= NPERSHELF
) {
72 pr_err("aoe: %s %d slots per shelf\n",
73 "static minor device numbers support only",
79 n
= aoemaj
* NPERSHELF
+ aoemin
;
81 pr_err("aoe: %s with e%ld.%d\n",
82 "cannot use static minor device numbers",
88 spin_lock_irqsave(&used_minors_lock
, flags
);
89 if (test_bit(n
, used_minors
)) {
90 pr_err("aoe: %s %lu\n",
91 "existing device already has static minor number",
95 set_bit(n
, used_minors
);
96 spin_unlock_irqrestore(&used_minors_lock
, flags
);
97 *sysminor
= n
* AOE_PARTITIONS
;
103 minor_get(ulong
*sysminor
, ulong aoemaj
, int aoemin
)
106 return minor_get_dyn(sysminor
);
108 return minor_get_static(sysminor
, aoemaj
, aoemin
);
112 minor_free(ulong minor
)
116 minor
/= AOE_PARTITIONS
;
117 BUG_ON(minor
>= N_DEVS
);
119 spin_lock_irqsave(&used_minors_lock
, flags
);
120 BUG_ON(!test_bit(minor
, used_minors
));
121 clear_bit(minor
, used_minors
);
122 spin_unlock_irqrestore(&used_minors_lock
, flags
);
126 * Users who grab a pointer to the device with aoedev_by_aoeaddr
127 * automatically get a reference count and must be responsible
128 * for performing a aoedev_put. With the addition of async
129 * kthread processing I'm no longer confident that we can
130 * guarantee consistency in the face of device flushes.
132 * For the time being, we only bother to add extra references for
133 * frames sitting on the iocq. When the kthreads finish processing
134 * these frames, they will aoedev_put the device.
138 aoedev_put(struct aoedev
*d
)
142 spin_lock_irqsave(&devlist_lock
, flags
);
144 spin_unlock_irqrestore(&devlist_lock
, flags
);
148 dummy_timer(struct timer_list
*t
)
152 d
= from_timer(d
, t
, timer
);
153 if (d
->flags
& DEVFL_TKILL
)
155 d
->timer
.expires
= jiffies
+ HZ
;
156 add_timer(&d
->timer
);
160 aoe_failip(struct aoedev
*d
)
166 aoe_failbuf(d
, d
->ip
.buf
);
171 while ((bio
= d
->ip
.nxbio
)) {
172 bio
->bi_status
= BLK_STS_IOERR
;
173 d
->ip
.nxbio
= bio
->bi_next
;
174 n
= (unsigned long) rq
->special
;
175 rq
->special
= (void *) --n
;
177 if ((unsigned long) rq
->special
== 0)
178 aoe_end_request(d
, rq
, 0);
182 downdev_frame(struct list_head
*pos
)
186 f
= list_entry(pos
, struct frame
, head
);
189 f
->buf
->nframesout
--;
190 aoe_failbuf(f
->t
->d
, f
->buf
);
196 aoedev_downdev(struct aoedev
*d
)
198 struct aoetgt
*t
, **tt
, **te
;
199 struct list_head
*head
, *pos
, *nx
;
203 d
->flags
&= ~DEVFL_UP
;
205 /* clean out active and to-be-retransmitted buffers */
206 for (i
= 0; i
< NFACTIVE
; i
++) {
207 head
= &d
->factive
[i
];
208 list_for_each_safe(pos
, nx
, head
)
212 list_for_each_safe(pos
, nx
, head
)
215 /* reset window dressings */
217 te
= tt
+ d
->ntargets
;
218 for (; tt
< te
&& (t
= *tt
); tt
++) {
223 /* clean out the in-process request (if any) */
226 /* fast fail all pending I/O */
228 while ((rq
= blk_peek_request(d
->blkq
))) {
229 blk_start_request(rq
);
230 aoe_end_request(d
, rq
, 1);
235 set_capacity(d
->gd
, 0);
238 /* return whether the user asked for this particular
239 * device to be flushed
242 user_req(char *s
, size_t slen
, struct aoedev
*d
)
249 p
= kbasename(d
->gd
->disk_name
);
250 lim
= sizeof(d
->gd
->disk_name
);
251 lim
-= p
- d
->gd
->disk_name
;
255 return !strncmp(s
, p
, lim
);
259 freedev(struct aoedev
*d
)
261 struct aoetgt
**t
, **e
;
265 spin_lock_irqsave(&d
->lock
, flags
);
266 if (d
->flags
& DEVFL_TKILL
267 && !(d
->flags
& DEVFL_FREEING
)) {
268 d
->flags
|= DEVFL_FREEING
;
271 spin_unlock_irqrestore(&d
->lock
, flags
);
275 del_timer_sync(&d
->timer
);
277 aoedisk_rm_debugfs(d
);
281 blk_cleanup_queue(d
->blkq
);
285 for (; t
< e
&& *t
; t
++)
288 mempool_destroy(d
->bufpool
);
290 minor_free(d
->sysminor
);
292 spin_lock_irqsave(&d
->lock
, flags
);
293 d
->flags
|= DEVFL_FREED
;
294 spin_unlock_irqrestore(&d
->lock
, flags
);
303 flush(const char __user
*str
, size_t cnt
, int exiting
)
306 struct aoedev
*d
, **dd
;
309 int specified
= 0; /* flush a specific device */
310 unsigned int skipflags
;
312 skipflags
= DEVFL_GDALLOC
| DEVFL_NEWSIZE
| DEVFL_TKILL
;
314 if (!exiting
&& cnt
>= 3) {
315 if (cnt
> sizeof buf
)
317 if (copy_from_user(buf
, str
, cnt
))
319 all
= !strncmp(buf
, "all", 3);
324 flush_scheduled_work();
325 /* pass one: without sleeping, do aoedev_downdev */
326 spin_lock_irqsave(&devlist_lock
, flags
);
327 for (d
= devlist
; d
; d
= d
->next
) {
330 /* unconditionally take each device down */
331 } else if (specified
) {
332 if (!user_req(buf
, cnt
, d
))
334 } else if ((!all
&& (d
->flags
& DEVFL_UP
))
335 || d
->flags
& skipflags
341 d
->flags
|= DEVFL_TKILL
;
343 spin_unlock(&d
->lock
);
345 spin_unlock_irqrestore(&devlist_lock
, flags
);
347 /* pass two: call freedev, which might sleep,
348 * for aoedevs marked with DEVFL_TKILL
351 spin_lock_irqsave(&devlist_lock
, flags
);
352 for (d
= devlist
; d
; d
= d
->next
) {
354 if (d
->flags
& DEVFL_TKILL
355 && !(d
->flags
& DEVFL_FREEING
)) {
356 spin_unlock(&d
->lock
);
357 spin_unlock_irqrestore(&devlist_lock
, flags
);
361 spin_unlock(&d
->lock
);
364 /* pass three: remove aoedevs marked with DEVFL_FREED */
365 for (dd
= &devlist
, d
= *dd
; d
; d
= *dd
) {
366 struct aoedev
*doomed
= NULL
;
369 if (d
->flags
& DEVFL_FREED
) {
375 spin_unlock(&d
->lock
);
377 kfree(doomed
->targets
);
380 spin_unlock_irqrestore(&devlist_lock
, flags
);
386 aoedev_flush(const char __user
*str
, size_t cnt
)
388 return flush(str
, cnt
, NOT_EXITING
);
391 /* This has been confirmed to occur once with Tms=3*1000 due to the
392 * driver changing link and not processing its transmit ring. The
393 * problem is hard enough to solve by returning an error that I'm
394 * still punting on "solving" this.
397 skbfree(struct sk_buff
*skb
)
399 enum { Sms
= 250, Tms
= 30 * 1000};
404 while (atomic_read(&skb_shinfo(skb
)->dataref
) != 1 && i
-- > 0)
408 "aoe: %s holds ref: %s\n",
409 skb
->dev
? skb
->dev
->name
: "netif",
410 "cannot free skb -- memory leaked.");
413 skb
->truesize
-= skb
->data_len
;
414 skb_shinfo(skb
)->nr_frags
= skb
->data_len
= 0;
420 skbpoolfree(struct aoedev
*d
)
422 struct sk_buff
*skb
, *tmp
;
424 skb_queue_walk_safe(&d
->skbpool
, skb
, tmp
)
427 __skb_queue_head_init(&d
->skbpool
);
430 /* find it or allocate it */
432 aoedev_by_aoeaddr(ulong maj
, int min
, int do_alloc
)
439 spin_lock_irqsave(&devlist_lock
, flags
);
441 for (d
=devlist
; d
; d
=d
->next
)
442 if (d
->aoemajor
== maj
&& d
->aoeminor
== min
) {
444 if (d
->flags
& DEVFL_TKILL
) {
445 spin_unlock(&d
->lock
);
450 spin_unlock(&d
->lock
);
453 if (d
|| !do_alloc
|| minor_get(&sysminor
, maj
, min
) < 0)
455 d
= kcalloc(1, sizeof *d
, GFP_ATOMIC
);
458 d
->targets
= kcalloc(NTARGETS
, sizeof(*d
->targets
), GFP_ATOMIC
);
464 d
->ntargets
= NTARGETS
;
465 INIT_WORK(&d
->work
, aoecmd_sleepwork
);
466 spin_lock_init(&d
->lock
);
467 skb_queue_head_init(&d
->skbpool
);
468 timer_setup(&d
->timer
, dummy_timer
, 0);
469 d
->timer
.expires
= jiffies
+ HZ
;
470 add_timer(&d
->timer
);
471 d
->bufpool
= NULL
; /* defer to aoeblk_gdalloc */
474 for (i
= 0; i
< NFACTIVE
; i
++)
475 INIT_LIST_HEAD(&d
->factive
[i
]);
476 INIT_LIST_HEAD(&d
->rexmitq
);
477 d
->sysminor
= sysminor
;
480 d
->rttavg
= RTTAVG_INIT
;
481 d
->rttdev
= RTTDEV_INIT
;
485 spin_unlock_irqrestore(&devlist_lock
, flags
);
490 freetgt(struct aoedev
*d
, struct aoetgt
*t
)
493 struct list_head
*pos
, *nx
, *head
;
496 for (ifp
= t
->ifs
; ifp
< &t
->ifs
[NAOEIFS
]; ++ifp
) {
503 list_for_each_safe(pos
, nx
, head
) {
505 f
= list_entry(pos
, struct frame
, head
);
515 flush_scheduled_work();
516 flush(NULL
, 0, EXITING
);