2 # Miscellaneous Intel PT testing (exclusive)
3 # SPDX-License-Identifier: GPL-2.0
8 perf list pmu |
grep -q 'intel_pt//' ||
exit 2
10 shelldir
=$
(dirname "$0")
11 # shellcheck source=lib/waiting.sh
12 .
"${shelldir}"/lib
/waiting.sh
18 temp_dir
=$
(mktemp
-d /tmp
/perf-test-intel-pt-sh.XXXXXXXXXX
)
20 tmpfile
="${temp_dir}/tmp-perf.data"
21 perfdatafile
="${temp_dir}/test-perf.data"
22 outfile
="${temp_dir}/test-out.txt"
23 errfile
="${temp_dir}/test-err.txt"
24 workload
="${temp_dir}/workload"
25 awkscript
="${temp_dir}/awkscript"
26 jitdump_workload
="${temp_dir}/jitdump_workload"
27 maxbrstack
="${temp_dir}/maxbrstack.py"
32 sane
=$
(echo "${temp_dir}" | cut
-b 1-26)
33 if [ "${sane}" = "/tmp/perf-test-intel-pt-sh" ] ; then
34 echo "--- Cleaning up ---"
46 trap trap_cleanup EXIT TERM INT
48 # perf record for testing without decoding
49 perf_record_no_decode
()
51 # Options to speed up recording: no post-processing, no build-id cache update,
53 perf record
-B -N --no-bpf-event "$@"
56 # perf record for testing should not need BPF events
59 # Options for no BPF events
60 perf record
--no-bpf-event "$@"
64 cat << _end_of_file_ | /usr/bin/cc -o "${workload}" -xc - -pthread && have_workload=true
69 struct timespec tm = {
74 /* Run for about 30 seconds */
75 for (i = 0; i < 30000; i++)
79 void *threadfunc(void *arg) {
87 pthread_create(&th, NULL, threadfunc, NULL);
89 pthread_join(th, NULL);
96 echo "Checking for CPU-wide recording on CPU $1"
97 if ! perf_record_no_decode
-o "${tmpfile}" -e dummy
:u
-C "$1" true
>/dev
/null
2>&1 ; then
105 test_system_wide_side_band
()
107 echo "--- Test system-wide sideband ---"
109 # Need CPU 0 and CPU 1
110 can_cpu_wide
0 ||
return $?
111 can_cpu_wide
1 ||
return $?
113 # Record on CPU 0 a task running on CPU 1
114 perf_record_no_decode
-o "${perfdatafile}" -e intel_pt
//u
-C 0 -- taskset
--cpu-list 1 uname
116 # Should get MMAP events from CPU 1 because they can be needed to decode
117 mmap_cnt
=$
(perf
script -i "${perfdatafile}" --no-itrace --show-mmap-events -C 1 2>/dev
/null |
grep -c MMAP
)
119 if [ "${mmap_cnt}" -gt 0 ] ; then
124 echo "Failed to record MMAP events on CPU 1 when tracing CPU 0"
130 if [ -z "${can_kernel_trace}" ] ; then
132 perf_record_no_decode
-o "${tmpfile}" -e dummy
:k true
>/dev
/null
2>&1 && can_kernel_trace
=1
134 if [ ${can_kernel_trace} -eq 0 ] ; then
135 echo "SKIP: no kernel tracing"
146 echo "--- Test per-thread ${desc}recording ---"
148 if ! $have_workload ; then
149 echo "No workload, so skipping"
153 if [ "${k}" = "k" ] ; then
154 can_kernel ||
return 2
157 cat <<- "_end_of_file_" > "${awkscript}"
162 x = s"[0-9a-fA-FxX]+"s
163 mmapping = "idx"u": mmapping fd"u
164 set_output = "idx"u": set output fd"u"->"u
165 perf_event_open = "sys_perf_event_open
: pid
"d"cpu
"d"group_fd
"d"flags
"x"="u
168 /perf record opening and mmapping events/ {
173 /perf record done opening and mmapping events/ {
178 $0 ~ perf_event_open && active {
179 match($0, perf_event_open)
180 $0 = substr($0, RSTART, RLENGTH)
184 print "pid
" pid " cpu
" cpu " fd
" fd " : " $0
190 $0 ~ mmapping && active {
192 $0 = substr($0, RSTART, RLENGTH)
194 print "fd
" fd " : " $0
195 if (fd in fd_array) {
198 print "Unknown fd
" fd
203 $0 ~ set_output && active {
204 match($0, set_output)
205 $0 = substr($0, RSTART, RLENGTH)
208 print "fd
" fd " fd_to
" fd_to " : " $0
209 if (fd in fd_array) {
210 if (fd_to in fd_array) {
211 set_output_array[fd] = fd_to
213 print "Unknown fd
" fd_to
217 print "Unknown fd
" fd
223 print "Checking
" length(fd_array) " fds
"
224 for (fd in fd_array) {
225 if (fd in mmap_array) {
229 print "More than
1 mmap
for PID
" pid
237 print "More than
1 mmap
for CPU
" cpu
242 } else if (!(fd in set_output_array)) {
243 print "No mmap
for fd
" fd
248 if (n != thread_cnt) {
249 print "Expected
" thread_cnt " per-thread mmaps
- found
" n
259 echo "Workload PIDs are
$w1 and
$w2"
260 wait_for_threads ${w1} 2
261 wait_for_threads ${w2} 2
263 perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u"${k}" -vvv --per-thread -p "${w1},${w2}" 2>"${errfile}" >"${outfile}" &
265 echo "perf PID is
$ppid"
266 wait_for_perf_to_start ${ppid} "${errfile}" || return 1
269 wait_for_process_to_exit ${w1} || return 1
270 is_running ${ppid} || return 1
273 wait_for_process_to_exit ${w2} || return 1
274 wait_for_process_to_exit ${ppid} || return 1
276 awk -v thread_cnt=4 -f "${awkscript}" "${errfile}" || return 1
284 echo "--- Test tracing self-modifying code that uses jitdump
---"
286 script_path=$(realpath "$0")
287 script_dir=$(dirname "$script_path")
288 jitdump_incl_dir="${script_dir}/..
/..
/util
"
289 jitdump_h="${jitdump_incl_dir}/jitdump.h
"
291 if [ ! -e "${jitdump_h}" ] ; then
292 echo "SKIP
: Include
file jitdump.h not found
"
296 if [ -z "${have_jitdump_workload}" ] ; then
297 have_jitdump_workload=false
298 # Create a workload that uses self-modifying code and generates its own jitdump file
299 cat <<- "_end_of_file_
" | /usr/bin/cc -o "${jitdump_workload}" -I "${jitdump_incl_dir}" -xc - -pthread && have_jitdump_workload=true
301 #include <sys/mman.h>
302 #include <sys/types.h>
311 #define CHK_BYTE 0x5a
313 static inline uint64_t rdtsc(void)
315 unsigned int low, high;
317 asm volatile("rdtsc
" : "=a
" (low), "=d
" (high));
319 return low | ((uint64_t)high) << 32;
322 static FILE *open_jitdump(void)
324 struct jitheader header = {
325 .magic = JITHEADER_MAGIC,
326 .version = JITHEADER_VERSION,
327 .total_size = sizeof(header),
329 .timestamp = rdtsc(),
330 .flags = JITDUMP_FLAGS_ARCH_TIMESTAMP,
336 snprintf(filename, sizeof(filename), "jit-
%d.dump
", getpid());
337 f = fopen(filename, "w
+");
340 /* Create an MMAP event for the jitdump file. That is how perf tool finds it. */
341 m = mmap(0, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, fileno(f), 0);
345 if (fwrite(&header,sizeof(header),1,f) != 1)
355 static int write_jitdump(FILE *f, void *addr, const uint8_t *dat, size_t sz, uint64_t *idx)
357 struct jr_code_load rec = {
358 .p.id = JIT_CODE_LOAD,
359 .p.total_size = sizeof(rec) + sz,
360 .p.timestamp = rdtsc(),
363 .vma = (unsigned long)addr,
364 .code_addr = (unsigned long)addr,
366 .code_index = ++*idx,
369 if (fwrite(&rec,sizeof(rec),1,f) != 1 ||
370 fwrite(dat, sz, 1, f) != 1)
375 static void close_jitdump(FILE *f)
382 /* Get a memory page to store executable code */
383 void *addr = mmap(0, 4096, PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
384 /* Code to execute: mov CHK_BYTE, %eax ; ret */
385 uint8_t dat[] = {0xb8, CHK_BYTE, 0x00, 0x00, 0x00, 0xc3};
386 FILE *f = open_jitdump();
392 /* Copy executable code to executable memory page */
393 memcpy(addr, dat, sizeof(dat));
394 /* Record it in the jitdump file */
395 if (write_jitdump(f, addr, dat, sizeof(dat), &idx))
398 ret = ((int (*)(void))addr)() - CHK_BYTE;
406 if ! $have_jitdump_workload ; then
407 echo "SKIP
: No jitdump workload
"
411 # Change to temp_dir so jitdump collateral files go there
413 perf_record_no_bpf -o "${tmpfile}" -e intel_pt//u "${jitdump_workload}"
414 perf inject -i "${tmpfile}" -o "${perfdatafile}" --jit
415 decode_br_cnt=$(perf script -i "${perfdatafile}" --itrace=b | wc -l)
416 # Note that overflow and lost errors are suppressed for the error count
417 decode_err_cnt=$(perf script -i "${perfdatafile}" --itrace=e-o-l | grep -ci error)
419 # Should be thousands of branches
420 if [ "${decode_br_cnt}" -lt 1000 ] ; then
421 echo "Decode failed
, only
${decode_br_cnt} branches
"
424 # Should be no errors
425 if [ "${decode_err_cnt}" -ne 0 ] ; then
426 echo "Decode failed
, ${decode_err_cnt} errors
"
427 perf script -i "${perfdatafile}" --itrace=e-o-l --show-mmap-events | cat
437 echo "--- Test with MTC and TSC disabled
---"
438 # Disable MTC and TSC
439 perf_record_no_decode -o "${perfdatafile}" -e intel_pt/mtc=0,tsc=0/u uname
440 # Should not get MTC packet
441 mtc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "MTC
0x
")
442 if [ "${mtc_cnt}" -ne 0 ] ; then
443 echo "Failed to filter with mtc
=0"
446 # Should not get TSC package
447 tsc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TSC
0x
")
448 if [ "${tsc_cnt}" -ne 0 ] ; then
449 echo "Failed to filter with tsc
=0"
456 test_disable_branch()
458 echo "--- Test with branches disabled
---"
460 perf_record_no_decode -o "${perfdatafile}" -e intel_pt/branch=0/u uname
461 # Should not get branch related packets
462 tnt_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TNT
0x
")
463 tip_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TIP
0x
")
464 fup_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "FUP
0x
")
465 if [ "${tnt_cnt}" -ne 0 ] || [ "${tip_cnt}" -ne 0 ] || [ "${fup_cnt}" -ne 0 ] ; then
466 echo "Failed to disable branches
"
475 echo "--- Test with
/without CYC
---"
476 # Check if CYC is supported
477 cyc=$(cat /sys/bus/event_source/devices/intel_pt/caps/psb_cyc)
478 if [ "${cyc}" != "1" ] ; then
479 echo "SKIP
: CYC is not supported
"
483 perf_record_no_decode -o "${perfdatafile}" -e intel_pt/cyc/u uname
484 # should get CYC packets
485 cyc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "CYC
0x
")
486 if [ "${cyc_cnt}" = "0" ] ; then
487 echo "Failed to get CYC packet
"
491 perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u uname
492 # Should not get CYC packets
493 cyc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "CYC
0x
")
494 if [ "${cyc_cnt}" -gt 0 ] ; then
495 echo "Still get CYC packet without cyc
"
504 echo "--- Test recording with sample mode
---"
505 # Check if recording with sample mode is working
506 if ! perf_record_no_decode -o "${perfdatafile}" --aux-sample=8192 -e '{intel_pt//u,branch-misses:u}' uname ; then
507 echo "perf record failed with
--aux-sample"
510 # Check with event with PMU name
511 if perf_record_no_decode -o "${perfdatafile}" -e br_misp_retired.all_branches:u uname ; then
512 if ! perf_record_no_decode -o "${perfdatafile}" -e '{intel_pt//,br_misp_retired.all_branches/aux-sample-size=8192/}:u' uname ; then
513 echo "perf record failed with
--aux-sample-size"
523 echo "--- Test with kernel trace
---"
524 # Check if recording with kernel trace is working
525 can_kernel || return 2
526 if ! perf_record_no_decode -o "${perfdatafile}" -e intel_pt//k -m1,128 uname ; then
527 echo "perf record failed with intel_pt
//k
"
536 echo "--- Test virtual LBR
---"
537 # Check if python script is supported
538 libpython=$(perf version --build-options | grep python | grep -cv OFF)
539 if [ "${libpython}" != "1" ] ; then
540 echo "SKIP
: python scripting is not supported
"
544 # Python script to determine the maximum size of branch stacks
545 cat << "_end_of_file_
" > "${maxbrstack}"
546 from __future__ import print_function
550 def process_event(param_dict):
551 if "brstack
" in param_dict:
552 brstack = param_dict["brstack
"]
559 print("max brstack
", bmax)
562 # Check if virtual lbr is working
563 perf_record_no_bpf -o "${perfdatafile}" --aux-sample -e '{intel_pt//,cycles}:u' uname
564 times_val=$(perf script -i "${perfdatafile}" --itrace=L -s "${maxbrstack}" 2>/dev/null | grep "max brstack
" | cut -d " " -f 3)
565 case "${times_val}" in
569 if [ "${times_val}" -lt 2 ] ; then
570 echo "Failed with virtual lbr
"
579 echo "--- Test power events
---"
580 # Check if power events are supported
581 power_event=$(cat /sys/bus/event_source/devices/intel_pt/caps/power_event_trace)
582 if [ "${power_event}" != "1" ] ; then
583 echo "SKIP
: power_event_trace is not supported
"
586 if ! perf_record_no_decode -o "${perfdatafile}" -a -e intel_pt/pwr_evt/u uname ; then
587 echo "perf record failed with pwr_evt
"
596 echo "--- Test with TNT packets disabled
---"
597 # Check if TNT disable is supported
598 notnt=$(cat /sys/bus/event_source/devices/intel_pt/caps/tnt_disable)
599 if [ "${notnt}" != "1" ] ; then
600 echo "SKIP
: tnt_disable is not supported
"
603 perf_record_no_decode -o "${perfdatafile}" -e intel_pt/notnt/u uname
604 # Should be no TNT packets
605 tnt_cnt=$(perf script -i "${perfdatafile}" -D | grep -c TNT)
606 if [ "${tnt_cnt}" -ne 0 ] ; then
607 echo "TNT packets still there after notnt
"
616 echo "--- Test with event_trace
---"
617 # Check if event_trace is supported
618 event_trace=$(cat /sys/bus/event_source/devices/intel_pt/caps/event_trace)
619 if [ "${event_trace}" != 1 ] ; then
620 echo "SKIP
: event_trace is not supported
"
623 if ! perf_record_no_decode -o "${perfdatafile}" -e intel_pt/event/u uname ; then
624 echo "perf record failed with event trace
"
633 echo "--- Test with pipe mode
---"
634 # Check if it works with pipe
635 if ! perf_record_no_bpf -o- -e intel_pt//u uname | perf report -q -i- --itrace=i10000 ; then
636 echo "perf record
+ report failed with pipe mode
"
639 if ! perf_record_no_bpf -o- -e intel_pt//u uname | perf inject -b > /dev/null ; then
640 echo "perf record
+ inject failed with pipe mode
"
649 if [ "$1" -eq 2 ] ; then
650 skip_cnt=$((skip_cnt + 1))
653 if [ "$1" -eq 0 ] ; then
654 ok_cnt=$((ok_cnt + 1))
657 err_cnt=$((err_cnt + 1))
661 test_system_wide_side_band || ret=$? ; count_result $ret ; ret=0
662 test_per_thread "" "" || ret=$? ; count_result $ret ; ret=0
663 test_per_thread "k
" "(incl. kernel
) " || ret=$? ; count_result $ret ; ret=0
664 test_jitdump || ret=$? ; count_result $ret ; ret=0
665 test_packet_filter || ret=$? ; count_result $ret ; ret=0
666 test_disable_branch || ret=$? ; count_result $ret ; ret=0
667 test_time_cyc || ret=$? ; count_result $ret ; ret=0
668 test_sample || ret=$? ; count_result $ret ; ret=0
669 test_kernel_trace || ret=$? ; count_result $ret ; ret=0
670 test_virtual_lbr || ret=$? ; count_result $ret ; ret=0
671 test_power_event || ret=$? ; count_result $ret ; ret=0
672 test_no_tnt || ret=$? ; count_result $ret ; ret=0
673 test_event_trace || ret=$? ; count_result $ret ; ret=0
674 test_pipe || ret=$? ; count_result $ret ; ret=0
680 if [ ${err_cnt} -gt 0 ] ; then
684 if [ ${ok_cnt} -gt 0 ] ; then