Merge tag 'regmap-fix-v5.11-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux/fpc-iii.git] / drivers / scsi / device_handler / scsi_dh_alua.c
blobea436a14087f1ee1416680db87b29674b2b48404
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Generic SCSI-3 ALUA SCSI Device Handler
5 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH.
6 * All rights reserved.
7 */
8 #include <linux/slab.h>
9 #include <linux/delay.h>
10 #include <linux/module.h>
11 #include <asm/unaligned.h>
12 #include <scsi/scsi.h>
13 #include <scsi/scsi_proto.h>
14 #include <scsi/scsi_dbg.h>
15 #include <scsi/scsi_eh.h>
16 #include <scsi/scsi_dh.h>
18 #define ALUA_DH_NAME "alua"
19 #define ALUA_DH_VER "2.0"
21 #define TPGS_SUPPORT_NONE 0x00
22 #define TPGS_SUPPORT_OPTIMIZED 0x01
23 #define TPGS_SUPPORT_NONOPTIMIZED 0x02
24 #define TPGS_SUPPORT_STANDBY 0x04
25 #define TPGS_SUPPORT_UNAVAILABLE 0x08
26 #define TPGS_SUPPORT_LBA_DEPENDENT 0x10
27 #define TPGS_SUPPORT_OFFLINE 0x40
28 #define TPGS_SUPPORT_TRANSITION 0x80
29 #define TPGS_SUPPORT_ALL 0xdf
31 #define RTPG_FMT_MASK 0x70
32 #define RTPG_FMT_EXT_HDR 0x10
34 #define TPGS_MODE_UNINITIALIZED -1
35 #define TPGS_MODE_NONE 0x0
36 #define TPGS_MODE_IMPLICIT 0x1
37 #define TPGS_MODE_EXPLICIT 0x2
39 #define ALUA_RTPG_SIZE 128
40 #define ALUA_FAILOVER_TIMEOUT 60
41 #define ALUA_FAILOVER_RETRIES 5
42 #define ALUA_RTPG_DELAY_MSECS 5
43 #define ALUA_RTPG_RETRY_DELAY 2
45 /* device handler flags */
46 #define ALUA_OPTIMIZE_STPG 0x01
47 #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02
48 /* State machine flags */
49 #define ALUA_PG_RUN_RTPG 0x10
50 #define ALUA_PG_RUN_STPG 0x20
51 #define ALUA_PG_RUNNING 0x40
53 static uint optimize_stpg;
54 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
55 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
57 static LIST_HEAD(port_group_list);
58 static DEFINE_SPINLOCK(port_group_lock);
59 static struct workqueue_struct *kaluad_wq;
61 struct alua_port_group {
62 struct kref kref;
63 struct rcu_head rcu;
64 struct list_head node;
65 struct list_head dh_list;
66 unsigned char device_id_str[256];
67 int device_id_len;
68 int group_id;
69 int tpgs;
70 int state;
71 int pref;
72 int valid_states;
73 unsigned flags; /* used for optimizing STPG */
74 unsigned char transition_tmo;
75 unsigned long expiry;
76 unsigned long interval;
77 struct delayed_work rtpg_work;
78 spinlock_t lock;
79 struct list_head rtpg_list;
80 struct scsi_device *rtpg_sdev;
83 struct alua_dh_data {
84 struct list_head node;
85 struct alua_port_group __rcu *pg;
86 int group_id;
87 spinlock_t pg_lock;
88 struct scsi_device *sdev;
89 int init_error;
90 struct mutex init_mutex;
93 struct alua_queue_data {
94 struct list_head entry;
95 activate_complete callback_fn;
96 void *callback_data;
99 #define ALUA_POLICY_SWITCH_CURRENT 0
100 #define ALUA_POLICY_SWITCH_ALL 1
102 static void alua_rtpg_work(struct work_struct *work);
103 static bool alua_rtpg_queue(struct alua_port_group *pg,
104 struct scsi_device *sdev,
105 struct alua_queue_data *qdata, bool force);
106 static void alua_check(struct scsi_device *sdev, bool force);
108 static void release_port_group(struct kref *kref)
110 struct alua_port_group *pg;
112 pg = container_of(kref, struct alua_port_group, kref);
113 if (pg->rtpg_sdev)
114 flush_delayed_work(&pg->rtpg_work);
115 spin_lock(&port_group_lock);
116 list_del(&pg->node);
117 spin_unlock(&port_group_lock);
118 kfree_rcu(pg, rcu);
122 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
123 * @sdev: sdev the command should be sent to
125 static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
126 int bufflen, struct scsi_sense_hdr *sshdr, int flags)
128 u8 cdb[MAX_COMMAND_SIZE];
129 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
130 REQ_FAILFAST_DRIVER;
132 /* Prepare the command. */
133 memset(cdb, 0x0, MAX_COMMAND_SIZE);
134 cdb[0] = MAINTENANCE_IN;
135 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
136 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
137 else
138 cdb[1] = MI_REPORT_TARGET_PGS;
139 put_unaligned_be32(bufflen, &cdb[6]);
141 return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL,
142 sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
143 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
147 * submit_stpg - Issue a SET TARGET PORT GROUP command
149 * Currently we're only setting the current target port group state
150 * to 'active/optimized' and let the array firmware figure out
151 * the states of the remaining groups.
153 static int submit_stpg(struct scsi_device *sdev, int group_id,
154 struct scsi_sense_hdr *sshdr)
156 u8 cdb[MAX_COMMAND_SIZE];
157 unsigned char stpg_data[8];
158 int stpg_len = 8;
159 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
160 REQ_FAILFAST_DRIVER;
162 /* Prepare the data buffer */
163 memset(stpg_data, 0, stpg_len);
164 stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL;
165 put_unaligned_be16(group_id, &stpg_data[6]);
167 /* Prepare the command. */
168 memset(cdb, 0x0, MAX_COMMAND_SIZE);
169 cdb[0] = MAINTENANCE_OUT;
170 cdb[1] = MO_SET_TARGET_PGS;
171 put_unaligned_be32(stpg_len, &cdb[6]);
173 return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL,
174 sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
175 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
178 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
179 int group_id)
181 struct alua_port_group *pg;
183 if (!id_str || !id_size || !strlen(id_str))
184 return NULL;
186 list_for_each_entry(pg, &port_group_list, node) {
187 if (pg->group_id != group_id)
188 continue;
189 if (!pg->device_id_len || pg->device_id_len != id_size)
190 continue;
191 if (strncmp(pg->device_id_str, id_str, id_size))
192 continue;
193 if (!kref_get_unless_zero(&pg->kref))
194 continue;
195 return pg;
198 return NULL;
202 * alua_alloc_pg - Allocate a new port_group structure
203 * @sdev: scsi device
204 * @group_id: port group id
205 * @tpgs: target port group settings
207 * Allocate a new port_group structure for a given
208 * device.
210 static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
211 int group_id, int tpgs)
213 struct alua_port_group *pg, *tmp_pg;
215 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
216 if (!pg)
217 return ERR_PTR(-ENOMEM);
219 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
220 sizeof(pg->device_id_str));
221 if (pg->device_id_len <= 0) {
223 * TPGS supported but no device identification found.
224 * Generate private device identification.
226 sdev_printk(KERN_INFO, sdev,
227 "%s: No device descriptors found\n",
228 ALUA_DH_NAME);
229 pg->device_id_str[0] = '\0';
230 pg->device_id_len = 0;
232 pg->group_id = group_id;
233 pg->tpgs = tpgs;
234 pg->state = SCSI_ACCESS_STATE_OPTIMAL;
235 pg->valid_states = TPGS_SUPPORT_ALL;
236 if (optimize_stpg)
237 pg->flags |= ALUA_OPTIMIZE_STPG;
238 kref_init(&pg->kref);
239 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
240 INIT_LIST_HEAD(&pg->rtpg_list);
241 INIT_LIST_HEAD(&pg->node);
242 INIT_LIST_HEAD(&pg->dh_list);
243 spin_lock_init(&pg->lock);
245 spin_lock(&port_group_lock);
246 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
247 group_id);
248 if (tmp_pg) {
249 spin_unlock(&port_group_lock);
250 kfree(pg);
251 return tmp_pg;
254 list_add(&pg->node, &port_group_list);
255 spin_unlock(&port_group_lock);
257 return pg;
261 * alua_check_tpgs - Evaluate TPGS setting
262 * @sdev: device to be checked
264 * Examine the TPGS setting of the sdev to find out if ALUA
265 * is supported.
267 static int alua_check_tpgs(struct scsi_device *sdev)
269 int tpgs = TPGS_MODE_NONE;
272 * ALUA support for non-disk devices is fraught with
273 * difficulties, so disable it for now.
275 if (sdev->type != TYPE_DISK) {
276 sdev_printk(KERN_INFO, sdev,
277 "%s: disable for non-disk devices\n",
278 ALUA_DH_NAME);
279 return tpgs;
282 tpgs = scsi_device_tpgs(sdev);
283 switch (tpgs) {
284 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
285 sdev_printk(KERN_INFO, sdev,
286 "%s: supports implicit and explicit TPGS\n",
287 ALUA_DH_NAME);
288 break;
289 case TPGS_MODE_EXPLICIT:
290 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
291 ALUA_DH_NAME);
292 break;
293 case TPGS_MODE_IMPLICIT:
294 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
295 ALUA_DH_NAME);
296 break;
297 case TPGS_MODE_NONE:
298 sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
299 ALUA_DH_NAME);
300 break;
301 default:
302 sdev_printk(KERN_INFO, sdev,
303 "%s: unsupported TPGS setting %d\n",
304 ALUA_DH_NAME, tpgs);
305 tpgs = TPGS_MODE_NONE;
306 break;
309 return tpgs;
313 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83
314 * @sdev: device to be checked
316 * Extract the relative target port and the target port group
317 * descriptor from the list of identificators.
319 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
320 int tpgs)
322 int rel_port = -1, group_id;
323 struct alua_port_group *pg, *old_pg = NULL;
324 bool pg_updated = false;
325 unsigned long flags;
327 group_id = scsi_vpd_tpg_id(sdev, &rel_port);
328 if (group_id < 0) {
330 * Internal error; TPGS supported but required
331 * VPD identification descriptors not present.
332 * Disable ALUA support
334 sdev_printk(KERN_INFO, sdev,
335 "%s: No target port descriptors found\n",
336 ALUA_DH_NAME);
337 return SCSI_DH_DEV_UNSUPP;
340 pg = alua_alloc_pg(sdev, group_id, tpgs);
341 if (IS_ERR(pg)) {
342 if (PTR_ERR(pg) == -ENOMEM)
343 return SCSI_DH_NOMEM;
344 return SCSI_DH_DEV_UNSUPP;
346 if (pg->device_id_len)
347 sdev_printk(KERN_INFO, sdev,
348 "%s: device %s port group %x rel port %x\n",
349 ALUA_DH_NAME, pg->device_id_str,
350 group_id, rel_port);
351 else
352 sdev_printk(KERN_INFO, sdev,
353 "%s: port group %x rel port %x\n",
354 ALUA_DH_NAME, group_id, rel_port);
356 /* Check for existing port group references */
357 spin_lock(&h->pg_lock);
358 old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
359 if (old_pg != pg) {
360 /* port group has changed. Update to new port group */
361 if (h->pg) {
362 spin_lock_irqsave(&old_pg->lock, flags);
363 list_del_rcu(&h->node);
364 spin_unlock_irqrestore(&old_pg->lock, flags);
366 rcu_assign_pointer(h->pg, pg);
367 pg_updated = true;
370 spin_lock_irqsave(&pg->lock, flags);
371 if (pg_updated)
372 list_add_rcu(&h->node, &pg->dh_list);
373 spin_unlock_irqrestore(&pg->lock, flags);
375 alua_rtpg_queue(rcu_dereference_protected(h->pg,
376 lockdep_is_held(&h->pg_lock)),
377 sdev, NULL, true);
378 spin_unlock(&h->pg_lock);
380 if (old_pg)
381 kref_put(&old_pg->kref, release_port_group);
383 return SCSI_DH_OK;
386 static char print_alua_state(unsigned char state)
388 switch (state) {
389 case SCSI_ACCESS_STATE_OPTIMAL:
390 return 'A';
391 case SCSI_ACCESS_STATE_ACTIVE:
392 return 'N';
393 case SCSI_ACCESS_STATE_STANDBY:
394 return 'S';
395 case SCSI_ACCESS_STATE_UNAVAILABLE:
396 return 'U';
397 case SCSI_ACCESS_STATE_LBA:
398 return 'L';
399 case SCSI_ACCESS_STATE_OFFLINE:
400 return 'O';
401 case SCSI_ACCESS_STATE_TRANSITIONING:
402 return 'T';
403 default:
404 return 'X';
408 static int alua_check_sense(struct scsi_device *sdev,
409 struct scsi_sense_hdr *sense_hdr)
411 struct alua_dh_data *h = sdev->handler_data;
412 struct alua_port_group *pg;
414 switch (sense_hdr->sense_key) {
415 case NOT_READY:
416 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
418 * LUN Not Accessible - ALUA state transition
420 rcu_read_lock();
421 pg = rcu_dereference(h->pg);
422 if (pg)
423 pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
424 rcu_read_unlock();
425 alua_check(sdev, false);
426 return NEEDS_RETRY;
428 break;
429 case UNIT_ATTENTION:
430 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
432 * Power On, Reset, or Bus Device Reset.
433 * Might have obscured a state transition,
434 * so schedule a recheck.
436 alua_check(sdev, true);
437 return ADD_TO_MLQUEUE;
439 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
441 * Device internal reset
443 return ADD_TO_MLQUEUE;
444 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
446 * Mode Parameters Changed
448 return ADD_TO_MLQUEUE;
449 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
451 * ALUA state changed
453 alua_check(sdev, true);
454 return ADD_TO_MLQUEUE;
456 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
458 * Implicit ALUA state transition failed
460 alua_check(sdev, true);
461 return ADD_TO_MLQUEUE;
463 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
465 * Inquiry data has changed
467 return ADD_TO_MLQUEUE;
468 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
470 * REPORTED_LUNS_DATA_HAS_CHANGED is reported
471 * when switching controllers on targets like
472 * Intel Multi-Flex. We can just retry.
474 return ADD_TO_MLQUEUE;
475 break;
478 return SCSI_RETURN_NOT_HANDLED;
482 * alua_tur - Send a TEST UNIT READY
483 * @sdev: device to which the TEST UNIT READY command should be send
485 * Send a TEST UNIT READY to @sdev to figure out the device state
486 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
487 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
489 static int alua_tur(struct scsi_device *sdev)
491 struct scsi_sense_hdr sense_hdr;
492 int retval;
494 retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
495 ALUA_FAILOVER_RETRIES, &sense_hdr);
496 if (sense_hdr.sense_key == NOT_READY &&
497 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
498 return SCSI_DH_RETRY;
499 else if (retval)
500 return SCSI_DH_IO;
501 else
502 return SCSI_DH_OK;
506 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
507 * @sdev: the device to be evaluated.
509 * Evaluate the Target Port Group State.
510 * Returns SCSI_DH_DEV_OFFLINED if the path is
511 * found to be unusable.
513 static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
515 struct scsi_sense_hdr sense_hdr;
516 struct alua_port_group *tmp_pg;
517 int len, k, off, bufflen = ALUA_RTPG_SIZE;
518 unsigned char *desc, *buff;
519 unsigned err, retval;
520 unsigned int tpg_desc_tbl_off;
521 unsigned char orig_transition_tmo;
522 unsigned long flags;
523 bool transitioning_sense = false;
525 if (!pg->expiry) {
526 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
528 if (pg->transition_tmo)
529 transition_tmo = pg->transition_tmo * HZ;
531 pg->expiry = round_jiffies_up(jiffies + transition_tmo);
534 buff = kzalloc(bufflen, GFP_KERNEL);
535 if (!buff)
536 return SCSI_DH_DEV_TEMP_BUSY;
538 retry:
539 err = 0;
540 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
542 if (retval) {
544 * Some (broken) implementations have a habit of returning
545 * an error during things like firmware update etc.
546 * But if the target only supports active/optimized there's
547 * not much we can do; it's not that we can switch paths
548 * or anything.
549 * So ignore any errors to avoid spurious failures during
550 * path failover.
552 if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
553 sdev_printk(KERN_INFO, sdev,
554 "%s: ignoring rtpg result %d\n",
555 ALUA_DH_NAME, retval);
556 kfree(buff);
557 return SCSI_DH_OK;
559 if (!scsi_sense_valid(&sense_hdr)) {
560 sdev_printk(KERN_INFO, sdev,
561 "%s: rtpg failed, result %d\n",
562 ALUA_DH_NAME, retval);
563 kfree(buff);
564 if (driver_byte(retval) == DRIVER_ERROR)
565 return SCSI_DH_DEV_TEMP_BUSY;
566 return SCSI_DH_IO;
570 * submit_rtpg() has failed on existing arrays
571 * when requesting extended header info, and
572 * the array doesn't support extended headers,
573 * even though it shouldn't according to T10.
574 * The retry without rtpg_ext_hdr_req set
575 * handles this.
577 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
578 sense_hdr.sense_key == ILLEGAL_REQUEST &&
579 sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) {
580 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
581 goto retry;
584 * If the array returns with 'ALUA state transition'
585 * sense code here it cannot return RTPG data during
586 * transition. So set the state to 'transitioning' directly.
588 if (sense_hdr.sense_key == NOT_READY &&
589 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) {
590 transitioning_sense = true;
591 goto skip_rtpg;
594 * Retry on any other UNIT ATTENTION occurred.
596 if (sense_hdr.sense_key == UNIT_ATTENTION)
597 err = SCSI_DH_RETRY;
598 if (err == SCSI_DH_RETRY &&
599 pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
600 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
601 ALUA_DH_NAME);
602 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
603 kfree(buff);
604 return err;
606 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
607 ALUA_DH_NAME);
608 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
609 kfree(buff);
610 pg->expiry = 0;
611 return SCSI_DH_IO;
614 len = get_unaligned_be32(&buff[0]) + 4;
616 if (len > bufflen) {
617 /* Resubmit with the correct length */
618 kfree(buff);
619 bufflen = len;
620 buff = kmalloc(bufflen, GFP_KERNEL);
621 if (!buff) {
622 sdev_printk(KERN_WARNING, sdev,
623 "%s: kmalloc buffer failed\n",__func__);
624 /* Temporary failure, bypass */
625 pg->expiry = 0;
626 return SCSI_DH_DEV_TEMP_BUSY;
628 goto retry;
631 orig_transition_tmo = pg->transition_tmo;
632 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
633 pg->transition_tmo = buff[5];
634 else
635 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
637 if (orig_transition_tmo != pg->transition_tmo) {
638 sdev_printk(KERN_INFO, sdev,
639 "%s: transition timeout set to %d seconds\n",
640 ALUA_DH_NAME, pg->transition_tmo);
641 pg->expiry = jiffies + pg->transition_tmo * HZ;
644 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
645 tpg_desc_tbl_off = 8;
646 else
647 tpg_desc_tbl_off = 4;
649 for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
650 k < len;
651 k += off, desc += off) {
652 u16 group_id = get_unaligned_be16(&desc[2]);
654 spin_lock_irqsave(&port_group_lock, flags);
655 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
656 group_id);
657 spin_unlock_irqrestore(&port_group_lock, flags);
658 if (tmp_pg) {
659 if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
660 if ((tmp_pg == pg) ||
661 !(tmp_pg->flags & ALUA_PG_RUNNING)) {
662 struct alua_dh_data *h;
664 tmp_pg->state = desc[0] & 0x0f;
665 tmp_pg->pref = desc[0] >> 7;
666 rcu_read_lock();
667 list_for_each_entry_rcu(h,
668 &tmp_pg->dh_list, node) {
669 if (!h->sdev)
670 continue;
671 h->sdev->access_state = desc[0];
673 rcu_read_unlock();
675 if (tmp_pg == pg)
676 tmp_pg->valid_states = desc[1];
677 spin_unlock_irqrestore(&tmp_pg->lock, flags);
679 kref_put(&tmp_pg->kref, release_port_group);
681 off = 8 + (desc[7] * 4);
684 skip_rtpg:
685 spin_lock_irqsave(&pg->lock, flags);
686 if (transitioning_sense)
687 pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
689 sdev_printk(KERN_INFO, sdev,
690 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
691 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
692 pg->pref ? "preferred" : "non-preferred",
693 pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
694 pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
695 pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
696 pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
697 pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
698 pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
699 pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
701 switch (pg->state) {
702 case SCSI_ACCESS_STATE_TRANSITIONING:
703 if (time_before(jiffies, pg->expiry)) {
704 /* State transition, retry */
705 pg->interval = ALUA_RTPG_RETRY_DELAY;
706 err = SCSI_DH_RETRY;
707 } else {
708 struct alua_dh_data *h;
710 /* Transitioning time exceeded, set port to standby */
711 err = SCSI_DH_IO;
712 pg->state = SCSI_ACCESS_STATE_STANDBY;
713 pg->expiry = 0;
714 rcu_read_lock();
715 list_for_each_entry_rcu(h, &pg->dh_list, node) {
716 if (!h->sdev)
717 continue;
718 h->sdev->access_state =
719 (pg->state & SCSI_ACCESS_STATE_MASK);
720 if (pg->pref)
721 h->sdev->access_state |=
722 SCSI_ACCESS_STATE_PREFERRED;
724 rcu_read_unlock();
726 break;
727 case SCSI_ACCESS_STATE_OFFLINE:
728 /* Path unusable */
729 err = SCSI_DH_DEV_OFFLINED;
730 pg->expiry = 0;
731 break;
732 default:
733 /* Useable path if active */
734 err = SCSI_DH_OK;
735 pg->expiry = 0;
736 break;
738 spin_unlock_irqrestore(&pg->lock, flags);
739 kfree(buff);
740 return err;
744 * alua_stpg - Issue a SET TARGET PORT GROUP command
746 * Issue a SET TARGET PORT GROUP command and evaluate the
747 * response. Returns SCSI_DH_RETRY per default to trigger
748 * a re-evaluation of the target group state or SCSI_DH_OK
749 * if no further action needs to be taken.
751 static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
753 int retval;
754 struct scsi_sense_hdr sense_hdr;
756 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
757 /* Only implicit ALUA supported, retry */
758 return SCSI_DH_RETRY;
760 switch (pg->state) {
761 case SCSI_ACCESS_STATE_OPTIMAL:
762 return SCSI_DH_OK;
763 case SCSI_ACCESS_STATE_ACTIVE:
764 if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
765 !pg->pref &&
766 (pg->tpgs & TPGS_MODE_IMPLICIT))
767 return SCSI_DH_OK;
768 break;
769 case SCSI_ACCESS_STATE_STANDBY:
770 case SCSI_ACCESS_STATE_UNAVAILABLE:
771 break;
772 case SCSI_ACCESS_STATE_OFFLINE:
773 return SCSI_DH_IO;
774 case SCSI_ACCESS_STATE_TRANSITIONING:
775 break;
776 default:
777 sdev_printk(KERN_INFO, sdev,
778 "%s: stpg failed, unhandled TPGS state %d",
779 ALUA_DH_NAME, pg->state);
780 return SCSI_DH_NOSYS;
782 retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
784 if (retval) {
785 if (!scsi_sense_valid(&sense_hdr)) {
786 sdev_printk(KERN_INFO, sdev,
787 "%s: stpg failed, result %d",
788 ALUA_DH_NAME, retval);
789 if (driver_byte(retval) == DRIVER_ERROR)
790 return SCSI_DH_DEV_TEMP_BUSY;
791 } else {
792 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
793 ALUA_DH_NAME);
794 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
797 /* Retry RTPG */
798 return SCSI_DH_RETRY;
801 static void alua_rtpg_work(struct work_struct *work)
803 struct alua_port_group *pg =
804 container_of(work, struct alua_port_group, rtpg_work.work);
805 struct scsi_device *sdev;
806 LIST_HEAD(qdata_list);
807 int err = SCSI_DH_OK;
808 struct alua_queue_data *qdata, *tmp;
809 unsigned long flags;
811 spin_lock_irqsave(&pg->lock, flags);
812 sdev = pg->rtpg_sdev;
813 if (!sdev) {
814 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
815 WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
816 spin_unlock_irqrestore(&pg->lock, flags);
817 kref_put(&pg->kref, release_port_group);
818 return;
820 pg->flags |= ALUA_PG_RUNNING;
821 if (pg->flags & ALUA_PG_RUN_RTPG) {
822 int state = pg->state;
824 pg->flags &= ~ALUA_PG_RUN_RTPG;
825 spin_unlock_irqrestore(&pg->lock, flags);
826 if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
827 if (alua_tur(sdev) == SCSI_DH_RETRY) {
828 spin_lock_irqsave(&pg->lock, flags);
829 pg->flags &= ~ALUA_PG_RUNNING;
830 pg->flags |= ALUA_PG_RUN_RTPG;
831 if (!pg->interval)
832 pg->interval = ALUA_RTPG_RETRY_DELAY;
833 spin_unlock_irqrestore(&pg->lock, flags);
834 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
835 pg->interval * HZ);
836 return;
838 /* Send RTPG on failure or if TUR indicates SUCCESS */
840 err = alua_rtpg(sdev, pg);
841 spin_lock_irqsave(&pg->lock, flags);
842 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
843 pg->flags &= ~ALUA_PG_RUNNING;
844 if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG))
845 pg->interval = ALUA_RTPG_RETRY_DELAY;
846 pg->flags |= ALUA_PG_RUN_RTPG;
847 spin_unlock_irqrestore(&pg->lock, flags);
848 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
849 pg->interval * HZ);
850 return;
852 if (err != SCSI_DH_OK)
853 pg->flags &= ~ALUA_PG_RUN_STPG;
855 if (pg->flags & ALUA_PG_RUN_STPG) {
856 pg->flags &= ~ALUA_PG_RUN_STPG;
857 spin_unlock_irqrestore(&pg->lock, flags);
858 err = alua_stpg(sdev, pg);
859 spin_lock_irqsave(&pg->lock, flags);
860 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
861 pg->flags |= ALUA_PG_RUN_RTPG;
862 pg->interval = 0;
863 pg->flags &= ~ALUA_PG_RUNNING;
864 spin_unlock_irqrestore(&pg->lock, flags);
865 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
866 pg->interval * HZ);
867 return;
871 list_splice_init(&pg->rtpg_list, &qdata_list);
872 pg->rtpg_sdev = NULL;
873 spin_unlock_irqrestore(&pg->lock, flags);
875 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
876 list_del(&qdata->entry);
877 if (qdata->callback_fn)
878 qdata->callback_fn(qdata->callback_data, err);
879 kfree(qdata);
881 spin_lock_irqsave(&pg->lock, flags);
882 pg->flags &= ~ALUA_PG_RUNNING;
883 spin_unlock_irqrestore(&pg->lock, flags);
884 scsi_device_put(sdev);
885 kref_put(&pg->kref, release_port_group);
889 * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
890 * @pg: ALUA port group associated with @sdev.
891 * @sdev: SCSI device for which to submit an RTPG.
892 * @qdata: Information about the callback to invoke after the RTPG.
893 * @force: Whether or not to submit an RTPG if a work item that will submit an
894 * RTPG already has been scheduled.
896 * Returns true if and only if alua_rtpg_work() will be called asynchronously.
897 * That function is responsible for calling @qdata->fn().
899 static bool alua_rtpg_queue(struct alua_port_group *pg,
900 struct scsi_device *sdev,
901 struct alua_queue_data *qdata, bool force)
903 int start_queue = 0;
904 unsigned long flags;
905 if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
906 return false;
908 spin_lock_irqsave(&pg->lock, flags);
909 if (qdata) {
910 list_add_tail(&qdata->entry, &pg->rtpg_list);
911 pg->flags |= ALUA_PG_RUN_STPG;
912 force = true;
914 if (pg->rtpg_sdev == NULL) {
915 pg->interval = 0;
916 pg->flags |= ALUA_PG_RUN_RTPG;
917 kref_get(&pg->kref);
918 pg->rtpg_sdev = sdev;
919 start_queue = 1;
920 } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
921 pg->flags |= ALUA_PG_RUN_RTPG;
922 /* Do not queue if the worker is already running */
923 if (!(pg->flags & ALUA_PG_RUNNING)) {
924 kref_get(&pg->kref);
925 start_queue = 1;
929 spin_unlock_irqrestore(&pg->lock, flags);
931 if (start_queue) {
932 if (queue_delayed_work(kaluad_wq, &pg->rtpg_work,
933 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
934 sdev = NULL;
935 else
936 kref_put(&pg->kref, release_port_group);
938 if (sdev)
939 scsi_device_put(sdev);
941 return true;
945 * alua_initialize - Initialize ALUA state
946 * @sdev: the device to be initialized
948 * For the prep_fn to work correctly we have
949 * to initialize the ALUA state for the device.
951 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
953 int err = SCSI_DH_DEV_UNSUPP, tpgs;
955 mutex_lock(&h->init_mutex);
956 tpgs = alua_check_tpgs(sdev);
957 if (tpgs != TPGS_MODE_NONE)
958 err = alua_check_vpd(sdev, h, tpgs);
959 h->init_error = err;
960 mutex_unlock(&h->init_mutex);
961 return err;
964 * alua_set_params - set/unset the optimize flag
965 * @sdev: device on the path to be activated
966 * params - parameters in the following format
967 * "no_of_params\0param1\0param2\0param3\0...\0"
968 * For example, to set the flag pass the following parameters
969 * from multipath.conf
970 * hardware_handler "2 alua 1"
972 static int alua_set_params(struct scsi_device *sdev, const char *params)
974 struct alua_dh_data *h = sdev->handler_data;
975 struct alua_port_group *pg = NULL;
976 unsigned int optimize = 0, argc;
977 const char *p = params;
978 int result = SCSI_DH_OK;
979 unsigned long flags;
981 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1))
982 return -EINVAL;
984 while (*p++)
986 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
987 return -EINVAL;
989 rcu_read_lock();
990 pg = rcu_dereference(h->pg);
991 if (!pg) {
992 rcu_read_unlock();
993 return -ENXIO;
995 spin_lock_irqsave(&pg->lock, flags);
996 if (optimize)
997 pg->flags |= ALUA_OPTIMIZE_STPG;
998 else
999 pg->flags &= ~ALUA_OPTIMIZE_STPG;
1000 spin_unlock_irqrestore(&pg->lock, flags);
1001 rcu_read_unlock();
1003 return result;
1007 * alua_activate - activate a path
1008 * @sdev: device on the path to be activated
1010 * We're currently switching the port group to be activated only and
1011 * let the array figure out the rest.
1012 * There may be other arrays which require us to switch all port groups
1013 * based on a certain policy. But until we actually encounter them it
1014 * should be okay.
1016 static int alua_activate(struct scsi_device *sdev,
1017 activate_complete fn, void *data)
1019 struct alua_dh_data *h = sdev->handler_data;
1020 int err = SCSI_DH_OK;
1021 struct alua_queue_data *qdata;
1022 struct alua_port_group *pg;
1024 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL);
1025 if (!qdata) {
1026 err = SCSI_DH_RES_TEMP_UNAVAIL;
1027 goto out;
1029 qdata->callback_fn = fn;
1030 qdata->callback_data = data;
1032 mutex_lock(&h->init_mutex);
1033 rcu_read_lock();
1034 pg = rcu_dereference(h->pg);
1035 if (!pg || !kref_get_unless_zero(&pg->kref)) {
1036 rcu_read_unlock();
1037 kfree(qdata);
1038 err = h->init_error;
1039 mutex_unlock(&h->init_mutex);
1040 goto out;
1042 rcu_read_unlock();
1043 mutex_unlock(&h->init_mutex);
1045 if (alua_rtpg_queue(pg, sdev, qdata, true))
1046 fn = NULL;
1047 else
1048 err = SCSI_DH_DEV_OFFLINED;
1049 kref_put(&pg->kref, release_port_group);
1050 out:
1051 if (fn)
1052 fn(data, err);
1053 return 0;
1057 * alua_check - check path status
1058 * @sdev: device on the path to be checked
1060 * Check the device status
1062 static void alua_check(struct scsi_device *sdev, bool force)
1064 struct alua_dh_data *h = sdev->handler_data;
1065 struct alua_port_group *pg;
1067 rcu_read_lock();
1068 pg = rcu_dereference(h->pg);
1069 if (!pg || !kref_get_unless_zero(&pg->kref)) {
1070 rcu_read_unlock();
1071 return;
1073 rcu_read_unlock();
1075 alua_rtpg_queue(pg, sdev, NULL, force);
1076 kref_put(&pg->kref, release_port_group);
1080 * alua_prep_fn - request callback
1082 * Fail I/O to all paths not in state
1083 * active/optimized or active/non-optimized.
1085 static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
1087 struct alua_dh_data *h = sdev->handler_data;
1088 struct alua_port_group *pg;
1089 unsigned char state = SCSI_ACCESS_STATE_OPTIMAL;
1091 rcu_read_lock();
1092 pg = rcu_dereference(h->pg);
1093 if (pg)
1094 state = pg->state;
1095 rcu_read_unlock();
1097 switch (state) {
1098 case SCSI_ACCESS_STATE_OPTIMAL:
1099 case SCSI_ACCESS_STATE_ACTIVE:
1100 case SCSI_ACCESS_STATE_LBA:
1101 return BLK_STS_OK;
1102 case SCSI_ACCESS_STATE_TRANSITIONING:
1103 return BLK_STS_AGAIN;
1104 default:
1105 req->rq_flags |= RQF_QUIET;
1106 return BLK_STS_IOERR;
1110 static void alua_rescan(struct scsi_device *sdev)
1112 struct alua_dh_data *h = sdev->handler_data;
1114 alua_initialize(sdev, h);
1118 * alua_bus_attach - Attach device handler
1119 * @sdev: device to be attached to
1121 static int alua_bus_attach(struct scsi_device *sdev)
1123 struct alua_dh_data *h;
1124 int err;
1126 h = kzalloc(sizeof(*h) , GFP_KERNEL);
1127 if (!h)
1128 return SCSI_DH_NOMEM;
1129 spin_lock_init(&h->pg_lock);
1130 rcu_assign_pointer(h->pg, NULL);
1131 h->init_error = SCSI_DH_OK;
1132 h->sdev = sdev;
1133 INIT_LIST_HEAD(&h->node);
1135 mutex_init(&h->init_mutex);
1136 err = alua_initialize(sdev, h);
1137 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
1138 goto failed;
1140 sdev->handler_data = h;
1141 return SCSI_DH_OK;
1142 failed:
1143 kfree(h);
1144 return err;
1148 * alua_bus_detach - Detach device handler
1149 * @sdev: device to be detached from
1151 static void alua_bus_detach(struct scsi_device *sdev)
1153 struct alua_dh_data *h = sdev->handler_data;
1154 struct alua_port_group *pg;
1156 spin_lock(&h->pg_lock);
1157 pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
1158 rcu_assign_pointer(h->pg, NULL);
1159 spin_unlock(&h->pg_lock);
1160 if (pg) {
1161 spin_lock_irq(&pg->lock);
1162 list_del_rcu(&h->node);
1163 spin_unlock_irq(&pg->lock);
1164 kref_put(&pg->kref, release_port_group);
1166 sdev->handler_data = NULL;
1167 synchronize_rcu();
1168 kfree(h);
1171 static struct scsi_device_handler alua_dh = {
1172 .name = ALUA_DH_NAME,
1173 .module = THIS_MODULE,
1174 .attach = alua_bus_attach,
1175 .detach = alua_bus_detach,
1176 .prep_fn = alua_prep_fn,
1177 .check_sense = alua_check_sense,
1178 .activate = alua_activate,
1179 .rescan = alua_rescan,
1180 .set_params = alua_set_params,
1183 static int __init alua_init(void)
1185 int r;
1187 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0);
1188 if (!kaluad_wq)
1189 return -ENOMEM;
1191 r = scsi_register_device_handler(&alua_dh);
1192 if (r != 0) {
1193 printk(KERN_ERR "%s: Failed to register scsi device handler",
1194 ALUA_DH_NAME);
1195 destroy_workqueue(kaluad_wq);
1197 return r;
1200 static void __exit alua_exit(void)
1202 scsi_unregister_device_handler(&alua_dh);
1203 destroy_workqueue(kaluad_wq);
1206 module_init(alua_init);
1207 module_exit(alua_exit);
1209 MODULE_DESCRIPTION("DM Multipath ALUA support");
1210 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
1211 MODULE_LICENSE("GPL");
1212 MODULE_VERSION(ALUA_DH_VER);