2 # SPDX-License-Identifier: GPL-2.0-only
4 # Copyright 2015, Daniel Axtens, IBM Corporation
8 # do we have ./getscom, ./putscom?
9 if [ -x .
/getscom
] && [ -x .
/putscom
]; then
12 elif which getscom
> /dev
/null
; then
13 GETSCOM
=$
(which getscom
)
14 PUTSCOM
=$
(which putscom
)
17 Can't find getscom/putscom in . or \$PATH.
18 See https://github.com/open-power/skiboot.
19 The tool is in external/xscom-utils
24 # We will get 8 HMI events per injection
25 # todo: deal with things being offline
28 dmesg |
grep -c 'Harmless Hypervisor Maintenance interrupt'
31 # massively expand snooze delay, allowing injection on all cores
32 ppc64_cpu
--smt-snooze-delay=1000000000
34 # when we exit, restore it
35 trap "ppc64_cpu --smt-snooze-delay=100" 0 1
37 # for each chip+core combination
38 # todo - less fragile parsing
39 grep -E -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys
/firmware
/opal
/msglog |
40 while read chipcore
; do
41 chip
=$
(echo "$chipcore"|
awk '{print $3}')
42 core
=$
(echo "$chipcore"|
awk '{print $5}')
43 fir
="0x1${core}013100"
45 # verify that Core FIR is zero as expected
46 if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then
47 echo "FIR was not zero before injection for chip $chip, core $core. Aborting!"
48 echo "Result of $GETSCOM -c 0x${chip} $fir:"
49 $GETSCOM -c 0x
${chip} $fir
50 echo "If you get a -5 error, the core may be in idle state. Try stress-ng."
51 echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0"
55 # keep track of the number of HMIs handled
56 old_hmis
=$
(COUNT_HMIS
)
58 # do injection, adding a marker to dmesg for clarity
59 echo "Injecting HMI on core $core, chip $chip" |
tee /dev
/kmsg
60 # inject a RegFile recoverable error
61 if ! $PUTSCOM -c 0x
${chip} $fir 2000000000000000 > /dev
/null
; then
62 echo "Error injecting. Aborting!"
66 # now we want to wait for all the HMIs to be processed
67 # we expect one per thread on the core
69 new_hmis
=$
(COUNT_HMIS
)
70 while [ $new_hmis -lt $
((old_hmis
+ expected_hmis
)) ] && [ $i -lt 12 ]; do
71 echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping"
74 new_hmis
=$
(COUNT_HMIS
)
77 echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting."
80 echo "Processed $expected_hmis events; presumed success. Check dmesg."