From bcc05910359183b431da92713e98eed478edf83a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Jul 2014 15:57:26 +0200 Subject: [PATCH] IB/srp: Fix deadlock between host removal and multipathd If scsi_remove_host() is invoked after a SCSI device has been blocked, if the fast_io_fail_tmo or dev_loss_tmo work gets scheduled on the workqueue executing srp_remove_work() and if an I/O request is scheduled after the SCSI device had been blocked by e.g. multipathd then the following deadlock can occur: kworker/6:1 D ffff880831f3c460 0 195 2 0x00000000 Call Trace: [] schedule+0x29/0x70 [] schedule_timeout+0x10f/0x2a0 [] msleep+0x2f/0x40 [] __blk_drain_queue+0x4e/0x180 [] blk_cleanup_queue+0x225/0x230 [] __scsi_remove_device+0x62/0xe0 [scsi_mod] [] scsi_forget_host+0x6f/0x80 [scsi_mod] [] scsi_remove_host+0x7a/0x130 [scsi_mod] [] srp_remove_work+0x95/0x180 [ib_srp] [] process_one_work+0x1ea/0x6c0 [] worker_thread+0x11b/0x3a0 [] kthread+0xed/0x110 [] ret_from_fork+0x7c/0xb0 multipathd D ffff880096acc460 0 5340 1 0x00000000 Call Trace: [] schedule+0x29/0x70 [] schedule_timeout+0x10f/0x2a0 [] io_schedule_timeout+0x9b/0xf0 [] wait_for_completion_io_timeout+0xdc/0x110 [] blk_execute_rq+0x9b/0x100 [] sg_io+0x1a5/0x450 [] scsi_cmd_ioctl+0x2a1/0x430 [] scsi_cmd_blk_ioctl+0x42/0x50 [] sd_ioctl+0xbe/0x140 [sd_mod] [] blkdev_ioctl+0x234/0x840 [] block_ioctl+0x41/0x50 [] do_vfs_ioctl+0x300/0x520 [] SyS_ioctl+0x41/0x80 [] tracesys+0xd0/0xd5 Fix this by scheduling removal work on another workqueue than the transport layer timers. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Reviewed-by: David Dillow Cc: Sebastian Parschauer Cc: Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 38 +++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index e3c2c5b4297f..767000811cf9 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); static struct scsi_transport_template *ib_srp_transport_template; +static struct workqueue_struct *srp_remove_wq; static struct ib_client srp_client = { .name = "srp", @@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target) spin_unlock_irq(&target->lock); if (changed) - queue_work(system_long_wq, &target->remove_work); + queue_work(srp_remove_wq, &target->remove_work); return changed; } @@ -3261,9 +3262,10 @@ static void srp_remove_one(struct ib_device *device) spin_unlock(&host->target_lock); /* - * Wait for target port removal tasks. + * Wait for tl_err and target port removal tasks. */ flush_workqueue(system_long_wq); + flush_workqueue(srp_remove_wq); kfree(host); } @@ -3313,16 +3315,22 @@ static int __init srp_init_module(void) indirect_sg_entries = cmd_sg_entries; } + srp_remove_wq = create_workqueue("srp_remove"); + if (IS_ERR(srp_remove_wq)) { + ret = PTR_ERR(srp_remove_wq); + goto out; + } + + ret = -ENOMEM; ib_srp_transport_template = srp_attach_transport(&ib_srp_transport_functions); if (!ib_srp_transport_template) - return -ENOMEM; + goto destroy_wq; ret = class_register(&srp_class); if (ret) { pr_err("couldn't register class infiniband_srp\n"); - srp_release_transport(ib_srp_transport_template); - return ret; + goto release_tr; } ib_sa_register_client(&srp_sa_client); @@ -3330,13 +3338,22 @@ static int __init srp_init_module(void) ret = ib_register_client(&srp_client); if (ret) { pr_err("couldn't register IB client\n"); - srp_release_transport(ib_srp_transport_template); - ib_sa_unregister_client(&srp_sa_client); - class_unregister(&srp_class); - return ret; + goto unreg_sa; } - return 0; +out: + return ret; + +unreg_sa: + ib_sa_unregister_client(&srp_sa_client); + class_unregister(&srp_class); + +release_tr: + srp_release_transport(ib_srp_transport_template); + +destroy_wq: + destroy_workqueue(srp_remove_wq); + goto out; } static void __exit srp_cleanup_module(void) @@ -3345,6 +3362,7 @@ static void __exit srp_cleanup_module(void) ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); srp_release_transport(ib_srp_transport_template); + destroy_workqueue(srp_remove_wq); } module_init(srp_init_module); -- 2.11.4.GIT