zed.d/history_event-zfs-list-cacher.sh.in: parallelise, simplify
[zfs.git] / scripts / zloop.sh
blob546e7001776db967bb613edebf2792f278bf62e4
1 #!/usr/bin/env bash
4 # CDDL HEADER START
6 # This file and its contents are supplied under the terms of the
7 # Common Development and Distribution License ("CDDL"), version 1.0.
8 # You may only use this file in accordance with the terms of version
9 # 1.0 of the CDDL.
11 # A full copy of the text of the CDDL should have accompanied this
12 # source. A copy of the CDDL is also available via the Internet at
13 # http://www.illumos.org/license/CDDL.
15 # CDDL HEADER END
19 # Copyright (c) 2015 by Delphix. All rights reserved.
20 # Copyright (C) 2016 Lawrence Livermore National Security, LLC.
21 # Copyright (c) 2017, Intel Corporation.
24 BASE_DIR=$(dirname "$0")
25 SCRIPT_COMMON=common.sh
26 if [ -f "${BASE_DIR}/${SCRIPT_COMMON}" ]; then
27 . "${BASE_DIR}/${SCRIPT_COMMON}"
28 else
29 echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
32 # shellcheck disable=SC2034
33 PROG=zloop.sh
34 GDB=${GDB:-gdb}
36 DEFAULTWORKDIR=/var/tmp
37 DEFAULTCOREDIR=/var/tmp/zloop
39 function usage
41 echo -e "\n$0 [-t <timeout>] [ -s <vdev size> ] [-c <dump directory>]" \
42 "[ -- [extra ztest parameters]]\n" \
43 "\n" \
44 " This script runs ztest repeatedly with randomized arguments.\n" \
45 " If a crash is encountered, the ztest logs, any associated\n" \
46 " vdev files, and core file (if one exists) are moved to the\n" \
47 " output directory ($DEFAULTCOREDIR by default). Any options\n" \
48 " after the -- end-of-options marker will be passed to ztest.\n" \
49 "\n" \
50 " Options:\n" \
51 " -t Total time to loop for, in seconds. If not provided,\n" \
52 " zloop runs forever.\n" \
53 " -s Size of vdev devices.\n" \
54 " -f Specify working directory for ztest vdev files.\n" \
55 " -c Specify a core dump directory to use.\n" \
56 " -m Max number of core dumps to allow before exiting.\n" \
57 " -l Create 'ztest.core.N' symlink to core directory.\n" \
58 " -h Print this help message.\n" \
59 "" >&2
62 function or_die
64 # shellcheck disable=SC2068
65 if ! $@; then
66 echo "Command failed: $*"
67 exit 1
71 case $(uname) in
72 FreeBSD)
73 coreglob="z*.core"
75 Linux)
76 # core file helpers
77 origcorepattern="$(cat /proc/sys/kernel/core_pattern)"
78 coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
80 if [[ $coreglob = "*" ]]; then
81 echo "Setting core file pattern..."
82 echo "core" > /proc/sys/kernel/core_pattern
83 coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
84 /proc/sys/kernel/core_pattern)*"
88 exit 1
90 esac
92 function core_file
94 # shellcheck disable=SC2012,SC2086
95 ls -tr1 $coreglob 2>/dev/null | head -1
98 function core_prog
100 prog=$ZTEST
101 core_id=$($GDB --batch -c "$1" | grep "Core was generated by" | \
102 tr \' ' ')
103 if [[ "$core_id" == *"zdb "* ]]; then
104 prog=$ZDB
106 printf "%s" "$prog"
109 function store_core
111 core="$(core_file)"
112 if [[ $ztrc -ne 0 ]] || [[ -f "$core" ]]; then
113 df -h "$workdir" >>ztest.out
114 coreid=$(date "+zloop-%y%m%d-%H%M%S")
115 foundcrashes=$((foundcrashes + 1))
117 # zdb debugging
118 zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
119 zdbdebug=$($zdbcmd 2>&1)
120 echo -e "$zdbcmd\n" >>ztest.zdb
121 echo "$zdbdebug" >>ztest.zdb
123 dest=$coredir/$coreid
124 or_die mkdir -p "$dest"
125 or_die mkdir -p "$dest/vdev"
127 if [[ $symlink -ne 0 ]]; then
128 or_die ln -sf "$dest" ztest.core.$foundcrashes
131 echo "*** ztest crash found - moving logs to $dest"
133 or_die mv ztest.history "$dest/"
134 or_die mv ztest.zdb "$dest/"
135 or_die mv ztest.out "$dest/"
136 or_die mv "$workdir/ztest*" "$dest/vdev/"
138 if [[ -e "$workdir/zpool.cache" ]]; then
139 or_die mv "$workdir/zpool.cache" "$dest/vdev/"
142 # check for core
143 if [[ -f "$core" ]]; then
144 coreprog=$(core_prog "$core")
145 coredebug=$($GDB --batch --quiet \
146 -ex "set print thread-events off" \
147 -ex "printf \"*\n* Backtrace \n*\n\"" \
148 -ex "bt" \
149 -ex "printf \"*\n* Libraries \n*\n\"" \
150 -ex "info sharedlib" \
151 -ex "printf \"*\n* Threads (full) \n*\n\"" \
152 -ex "info threads" \
153 -ex "printf \"*\n* Backtraces \n*\n\"" \
154 -ex "thread apply all bt" \
155 -ex "printf \"*\n* Backtraces (full) \n*\n\"" \
156 -ex "thread apply all bt full" \
157 -ex "quit" "$coreprog" "$core" 2>&1 | \
158 grep -v "New LWP")
160 # Dump core + logs to stored directory
161 echo "$coredebug" >>"$dest/ztest.gdb"
162 or_die mv "$core" "$dest/"
164 # Record info in cores logfile
165 echo "*** core @ $coredir/$coreid/$core:" | \
166 tee -a ztest.cores
169 if [[ $coremax -gt 0 ]] &&
170 [[ $foundcrashes -ge $coremax ]]; then
171 echo "exiting... max $coremax allowed cores"
172 exit 1
173 else
174 echo "continuing..."
179 # parse arguments
180 # expected format: zloop [-t timeout] [-c coredir] [-- extra ztest args]
181 coredir=$DEFAULTCOREDIR
182 basedir=$DEFAULTWORKDIR
183 rundir="zloop-run"
184 timeout=0
185 size="512m"
186 coremax=0
187 symlink=0
188 while getopts ":ht:m:s:c:f:l" opt; do
189 case $opt in
190 t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
191 m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
192 s ) [[ $OPTARG ]] && size=$OPTARG ;;
193 c ) [[ $OPTARG ]] && coredir=$OPTARG ;;
194 f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
195 l ) symlink=1 ;;
196 h ) usage
197 exit 2
199 * ) echo "Invalid argument: -$OPTARG";
200 usage
201 exit 1
202 esac
203 done
204 # pass remaining arguments on to ztest
205 shift $((OPTIND - 1))
207 # enable core dumps
208 ulimit -c unlimited
209 export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0
211 if [[ -f "$(core_file)" ]]; then
212 echo -n "There's a core dump here you might want to look at first... "
213 core_file
214 echo
215 exit 1
218 if [[ ! -d $coredir ]]; then
219 echo "core dump directory ($coredir) does not exist, creating it."
220 or_die mkdir -p "$coredir"
223 if [[ ! -w $coredir ]]; then
224 echo "core dump directory ($coredir) is not writable."
225 exit 1
228 or_die rm -f ztest.history
229 or_die rm -f ztest.zdb
230 or_die rm -f ztest.cores
232 ztrc=0 # ztest return value
233 foundcrashes=0 # number of crashes found so far
234 starttime=$(date +%s)
235 curtime=$starttime
237 # if no timeout was specified, loop forever.
238 while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
239 zopt="-G -VVVVV"
241 # start each run with an empty directory
242 workdir="$basedir/$rundir"
243 or_die rm -rf "$workdir"
244 or_die mkdir "$workdir"
246 # switch between three types of configs
247 # 1/3 basic, 1/3 raidz mix, and 1/3 draid mix
248 choice=$((RANDOM % 3))
250 # ashift range 9 - 15
251 align=$(((RANDOM % 2) * 3 + 9))
253 # randomly use special classes
254 class="special=random"
256 if [[ $choice -eq 0 ]]; then
257 # basic mirror only
258 parity=1
259 mirrors=2
260 draid_data=0
261 draid_spares=0
262 raid_children=0
263 vdevs=2
264 raid_type="raidz"
265 elif [[ $choice -eq 1 ]]; then
266 # fully randomized mirror/raidz (sans dRAID)
267 parity=$(((RANDOM % 3) + 1))
268 mirrors=$(((RANDOM % 3) * 1))
269 draid_data=0
270 draid_spares=0
271 raid_children=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
272 vdevs=$(((RANDOM % 3) + 3))
273 raid_type="raidz"
274 else
275 # fully randomized dRAID (sans mirror/raidz)
276 parity=$(((RANDOM % 3) + 1))
277 mirrors=0
278 draid_data=$(((RANDOM % 8) + 3))
279 draid_spares=$(((RANDOM % 2) + parity))
280 stripe=$((draid_data + parity))
281 extra=$((draid_spares + (RANDOM % 4)))
282 raid_children=$(((((RANDOM % 4) + 1) * stripe) + extra))
283 vdevs=$((RANDOM % 3))
284 raid_type="draid"
287 # run from 30 to 120 seconds
288 runtime=$(((RANDOM % 90) + 30))
289 passtime=$((RANDOM % (runtime / 3 + 1) + 10))
291 zopt="$zopt -K $raid_type"
292 zopt="$zopt -m $mirrors"
293 zopt="$zopt -r $raid_children"
294 zopt="$zopt -D $draid_data"
295 zopt="$zopt -S $draid_spares"
296 zopt="$zopt -R $parity"
297 zopt="$zopt -v $vdevs"
298 zopt="$zopt -a $align"
299 zopt="$zopt -C $class"
300 zopt="$zopt -T $runtime"
301 zopt="$zopt -P $passtime"
302 zopt="$zopt -s $size"
303 zopt="$zopt -f $workdir"
305 cmd="$ZTEST $zopt $*"
306 desc="$(date '+%m/%d %T') $cmd"
307 echo "$desc" | tee -a ztest.history
308 echo "$desc" >>ztest.out
309 $cmd >>ztest.out 2>&1
310 ztrc=$?
311 grep -E '===|WARNING' ztest.out >>ztest.history
313 store_core
315 curtime=$(date +%s)
316 done
318 echo "zloop finished, $foundcrashes crashes found"
320 # restore core pattern.
321 case $(uname) in
322 Linux)
323 echo "$origcorepattern" > /proc/sys/kernel/core_pattern
327 esac
329 uptime >>ztest.out
331 if [[ $foundcrashes -gt 0 ]]; then
332 exit 1