1 /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
4 * AoE device utility functions; maintains device list.
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/netdevice.h>
10 #include <linux/delay.h>
11 #include <linux/slab.h>
12 #include <linux/bitmap.h>
13 #include <linux/kdev_t.h>
14 #include <linux/moduleparam.h>
15 #include <linux/string.h>
18 static void dummy_timer(ulong
);
19 static void freetgt(struct aoedev
*d
, struct aoetgt
*t
);
20 static void skbpoolfree(struct aoedev
*d
);
22 static int aoe_dyndevs
= 1;
23 module_param(aoe_dyndevs
, int, 0644);
24 MODULE_PARM_DESC(aoe_dyndevs
, "Use dynamic minor numbers for devices.");
26 static struct aoedev
*devlist
;
27 static DEFINE_SPINLOCK(devlist_lock
);
29 /* Because some systems will have one, many, or no
33 * we need some flexibility in the way the minor numbers
34 * are allocated. So they are dynamic.
36 #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
38 static DEFINE_SPINLOCK(used_minors_lock
);
39 static DECLARE_BITMAP(used_minors
, N_DEVS
);
42 minor_get_dyn(ulong
*sysminor
)
48 spin_lock_irqsave(&used_minors_lock
, flags
);
49 n
= find_first_zero_bit(used_minors
, N_DEVS
);
51 set_bit(n
, used_minors
);
54 spin_unlock_irqrestore(&used_minors_lock
, flags
);
56 *sysminor
= n
* AOE_PARTITIONS
;
61 minor_get_static(ulong
*sysminor
, ulong aoemaj
, int aoemin
)
67 /* for backwards compatibility when !aoe_dyndevs,
68 * a static number of supported slots per shelf */
72 if (aoemin
>= NPERSHELF
) {
73 pr_err("aoe: %s %d slots per shelf\n",
74 "static minor device numbers support only",
80 n
= aoemaj
* NPERSHELF
+ aoemin
;
82 pr_err("aoe: %s with e%ld.%d\n",
83 "cannot use static minor device numbers",
89 spin_lock_irqsave(&used_minors_lock
, flags
);
90 if (test_bit(n
, used_minors
)) {
91 pr_err("aoe: %s %lu\n",
92 "existing device already has static minor number",
96 set_bit(n
, used_minors
);
97 spin_unlock_irqrestore(&used_minors_lock
, flags
);
98 *sysminor
= n
* AOE_PARTITIONS
;
104 minor_get(ulong
*sysminor
, ulong aoemaj
, int aoemin
)
107 return minor_get_dyn(sysminor
);
109 return minor_get_static(sysminor
, aoemaj
, aoemin
);
113 minor_free(ulong minor
)
117 minor
/= AOE_PARTITIONS
;
118 BUG_ON(minor
>= N_DEVS
);
120 spin_lock_irqsave(&used_minors_lock
, flags
);
121 BUG_ON(!test_bit(minor
, used_minors
));
122 clear_bit(minor
, used_minors
);
123 spin_unlock_irqrestore(&used_minors_lock
, flags
);
127 * Users who grab a pointer to the device with aoedev_by_aoeaddr
128 * automatically get a reference count and must be responsible
129 * for performing a aoedev_put. With the addition of async
130 * kthread processing I'm no longer confident that we can
131 * guarantee consistency in the face of device flushes.
133 * For the time being, we only bother to add extra references for
134 * frames sitting on the iocq. When the kthreads finish processing
135 * these frames, they will aoedev_put the device.
139 aoedev_put(struct aoedev
*d
)
143 spin_lock_irqsave(&devlist_lock
, flags
);
145 spin_unlock_irqrestore(&devlist_lock
, flags
);
149 dummy_timer(ulong vp
)
153 d
= (struct aoedev
*)vp
;
154 if (d
->flags
& DEVFL_TKILL
)
156 d
->timer
.expires
= jiffies
+ HZ
;
157 add_timer(&d
->timer
);
161 aoe_failip(struct aoedev
*d
)
167 aoe_failbuf(d
, d
->ip
.buf
);
172 while ((bio
= d
->ip
.nxbio
)) {
173 clear_bit(BIO_UPTODATE
, &bio
->bi_flags
);
174 d
->ip
.nxbio
= bio
->bi_next
;
175 n
= (unsigned long) rq
->special
;
176 rq
->special
= (void *) --n
;
178 if ((unsigned long) rq
->special
== 0)
179 aoe_end_request(d
, rq
, 0);
183 downdev_frame(struct list_head
*pos
)
187 f
= list_entry(pos
, struct frame
, head
);
190 f
->buf
->nframesout
--;
191 aoe_failbuf(f
->t
->d
, f
->buf
);
197 aoedev_downdev(struct aoedev
*d
)
199 struct aoetgt
*t
, **tt
, **te
;
200 struct list_head
*head
, *pos
, *nx
;
204 d
->flags
&= ~DEVFL_UP
;
206 /* clean out active and to-be-retransmitted buffers */
207 for (i
= 0; i
< NFACTIVE
; i
++) {
208 head
= &d
->factive
[i
];
209 list_for_each_safe(pos
, nx
, head
)
213 list_for_each_safe(pos
, nx
, head
)
216 /* reset window dressings */
218 te
= tt
+ d
->ntargets
;
219 for (; tt
< te
&& (t
= *tt
); tt
++) {
224 /* clean out the in-process request (if any) */
227 /* fast fail all pending I/O */
229 while ((rq
= blk_peek_request(d
->blkq
))) {
230 blk_start_request(rq
);
231 aoe_end_request(d
, rq
, 1);
236 set_capacity(d
->gd
, 0);
239 /* return whether the user asked for this particular
240 * device to be flushed
243 user_req(char *s
, size_t slen
, struct aoedev
*d
)
250 p
= kbasename(d
->gd
->disk_name
);
251 lim
= sizeof(d
->gd
->disk_name
);
252 lim
-= p
- d
->gd
->disk_name
;
256 return !strncmp(s
, p
, lim
);
260 freedev(struct aoedev
*d
)
262 struct aoetgt
**t
, **e
;
266 spin_lock_irqsave(&d
->lock
, flags
);
267 if (d
->flags
& DEVFL_TKILL
268 && !(d
->flags
& DEVFL_FREEING
)) {
269 d
->flags
|= DEVFL_FREEING
;
272 spin_unlock_irqrestore(&d
->lock
, flags
);
276 del_timer_sync(&d
->timer
);
278 aoedisk_rm_debugfs(d
);
282 blk_cleanup_queue(d
->blkq
);
286 for (; t
< e
&& *t
; t
++)
289 mempool_destroy(d
->bufpool
);
291 minor_free(d
->sysminor
);
293 spin_lock_irqsave(&d
->lock
, flags
);
294 d
->flags
|= DEVFL_FREED
;
295 spin_unlock_irqrestore(&d
->lock
, flags
);
304 flush(const char __user
*str
, size_t cnt
, int exiting
)
307 struct aoedev
*d
, **dd
;
310 int specified
= 0; /* flush a specific device */
311 unsigned int skipflags
;
313 skipflags
= DEVFL_GDALLOC
| DEVFL_NEWSIZE
| DEVFL_TKILL
;
315 if (!exiting
&& cnt
>= 3) {
316 if (cnt
> sizeof buf
)
318 if (copy_from_user(buf
, str
, cnt
))
320 all
= !strncmp(buf
, "all", 3);
325 flush_scheduled_work();
326 /* pass one: without sleeping, do aoedev_downdev */
327 spin_lock_irqsave(&devlist_lock
, flags
);
328 for (d
= devlist
; d
; d
= d
->next
) {
331 /* unconditionally take each device down */
332 } else if (specified
) {
333 if (!user_req(buf
, cnt
, d
))
335 } else if ((!all
&& (d
->flags
& DEVFL_UP
))
336 || d
->flags
& skipflags
342 d
->flags
|= DEVFL_TKILL
;
344 spin_unlock(&d
->lock
);
346 spin_unlock_irqrestore(&devlist_lock
, flags
);
348 /* pass two: call freedev, which might sleep,
349 * for aoedevs marked with DEVFL_TKILL
352 spin_lock_irqsave(&devlist_lock
, flags
);
353 for (d
= devlist
; d
; d
= d
->next
) {
355 if (d
->flags
& DEVFL_TKILL
356 && !(d
->flags
& DEVFL_FREEING
)) {
357 spin_unlock(&d
->lock
);
358 spin_unlock_irqrestore(&devlist_lock
, flags
);
362 spin_unlock(&d
->lock
);
365 /* pass three: remove aoedevs marked with DEVFL_FREED */
366 for (dd
= &devlist
, d
= *dd
; d
; d
= *dd
) {
367 struct aoedev
*doomed
= NULL
;
370 if (d
->flags
& DEVFL_FREED
) {
376 spin_unlock(&d
->lock
);
378 kfree(doomed
->targets
);
381 spin_unlock_irqrestore(&devlist_lock
, flags
);
387 aoedev_flush(const char __user
*str
, size_t cnt
)
389 return flush(str
, cnt
, NOT_EXITING
);
392 /* This has been confirmed to occur once with Tms=3*1000 due to the
393 * driver changing link and not processing its transmit ring. The
394 * problem is hard enough to solve by returning an error that I'm
395 * still punting on "solving" this.
398 skbfree(struct sk_buff
*skb
)
400 enum { Sms
= 250, Tms
= 30 * 1000};
405 while (atomic_read(&skb_shinfo(skb
)->dataref
) != 1 && i
-- > 0)
409 "aoe: %s holds ref: %s\n",
410 skb
->dev
? skb
->dev
->name
: "netif",
411 "cannot free skb -- memory leaked.");
414 skb
->truesize
-= skb
->data_len
;
415 skb_shinfo(skb
)->nr_frags
= skb
->data_len
= 0;
421 skbpoolfree(struct aoedev
*d
)
423 struct sk_buff
*skb
, *tmp
;
425 skb_queue_walk_safe(&d
->skbpool
, skb
, tmp
)
428 __skb_queue_head_init(&d
->skbpool
);
431 /* find it or allocate it */
433 aoedev_by_aoeaddr(ulong maj
, int min
, int do_alloc
)
440 spin_lock_irqsave(&devlist_lock
, flags
);
442 for (d
=devlist
; d
; d
=d
->next
)
443 if (d
->aoemajor
== maj
&& d
->aoeminor
== min
) {
445 if (d
->flags
& DEVFL_TKILL
) {
446 spin_unlock(&d
->lock
);
451 spin_unlock(&d
->lock
);
454 if (d
|| !do_alloc
|| minor_get(&sysminor
, maj
, min
) < 0)
456 d
= kcalloc(1, sizeof *d
, GFP_ATOMIC
);
459 d
->targets
= kcalloc(NTARGETS
, sizeof(*d
->targets
), GFP_ATOMIC
);
465 d
->ntargets
= NTARGETS
;
466 INIT_WORK(&d
->work
, aoecmd_sleepwork
);
467 spin_lock_init(&d
->lock
);
468 skb_queue_head_init(&d
->skbpool
);
469 init_timer(&d
->timer
);
470 d
->timer
.data
= (ulong
) d
;
471 d
->timer
.function
= dummy_timer
;
472 d
->timer
.expires
= jiffies
+ HZ
;
473 add_timer(&d
->timer
);
474 d
->bufpool
= NULL
; /* defer to aoeblk_gdalloc */
477 for (i
= 0; i
< NFACTIVE
; i
++)
478 INIT_LIST_HEAD(&d
->factive
[i
]);
479 INIT_LIST_HEAD(&d
->rexmitq
);
480 d
->sysminor
= sysminor
;
483 d
->rttavg
= RTTAVG_INIT
;
484 d
->rttdev
= RTTDEV_INIT
;
488 spin_unlock_irqrestore(&devlist_lock
, flags
);
493 freetgt(struct aoedev
*d
, struct aoetgt
*t
)
496 struct list_head
*pos
, *nx
, *head
;
499 for (ifp
= t
->ifs
; ifp
< &t
->ifs
[NAOEIFS
]; ++ifp
) {
506 list_for_each_safe(pos
, nx
, head
) {
508 f
= list_entry(pos
, struct frame
, head
);
518 flush_scheduled_work();
519 flush(NULL
, 0, EXITING
);