ARC: Cache arc_c value during arc_evict()
[zfs.git] / cmd / zed / zed.d / deadman-slot_off.sh
blob7b339b3add01fcdfbc9d2b132755c90c0747ac00
1 #!/bin/sh
2 # shellcheck disable=SC3014,SC2154,SC2086,SC2034
4 # Turn off disk's enclosure slot if an I/O is hung triggering the deadman.
6 # It's possible for outstanding I/O to a misbehaving SCSI disk to neither
7 # promptly complete or return an error. This can occur due to retry and
8 # recovery actions taken by the SCSI layer, driver, or disk. When it occurs
9 # the pool will be unresponsive even though there may be sufficient redundancy
10 # configured to proceeded without this single disk.
12 # When a hung I/O is detected by the kmods it will be posted as a deadman
13 # event. By default an I/O is considered to be hung after 5 minutes. This
14 # value can be changed with the zfs_deadman_ziotime_ms module parameter.
15 # If ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN is set the disk's enclosure
16 # slot will be powered off causing the outstanding I/O to fail. The ZED
17 # will then handle this like a normal disk failure and FAULT the vdev.
19 # We assume the user will be responsible for turning the slot back on
20 # after replacing the disk.
22 # Note that this script requires that your enclosure be supported by the
23 # Linux SCSI Enclosure services (SES) driver. The script will do nothing
24 # if you have no enclosure, or if your enclosure isn't supported.
26 # Exit codes:
27 # 0: slot successfully powered off
28 # 1: enclosure not available
29 # 2: ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN disabled
30 # 3: System not configured to wait on deadman
31 # 4: The enclosure sysfs path passed from ZFS does not exist
32 # 5: Enclosure slot didn't actually turn off after we told it to
34 [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
35 . "${ZED_ZEDLET_DIR}/zed-functions.sh"
37 if [ ! -d /sys/class/enclosure ] ; then
38 # No JBOD enclosure or NVMe slots
39 exit 1
42 if [ "${ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN}" != "1" ] ; then
43 exit 2
46 if [ "$ZEVENT_POOL_FAILMODE" != "wait" ] ; then
47 exit 3
50 if [ ! -f "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" ] ; then
51 exit 4
54 # Turn off the slot and wait for sysfs to report that the slot is off.
55 # It can take ~400ms on some enclosures and multiple retries may be needed.
56 for i in $(seq 1 20) ; do
57 echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status"
59 for j in $(seq 1 5) ; do
60 if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then
61 break 2
63 sleep 0.1
64 done
65 done
67 if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" != "off" ] ; then
68 exit 5
71 zed_log_msg "powered down slot $ZEVENT_VDEV_ENC_SYSFS_PATH for $ZEVENT_VDEV_PATH"