1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright IBM Corp. 2018
4 * Auxtrace support for s390 CPU-Measurement Sampling Facility
6 * Author(s): Thomas Richter <tmricht@linux.ibm.com>
8 * Auxiliary traces are collected during 'perf record' using rbd000 event.
9 * Several PERF_RECORD_XXX are generated during recording:
12 * Records that new data landed in the AUX buffer part.
13 * PERF_RECORD_AUXTRACE:
14 * Defines auxtrace data. Followed by the actual data. The contents of
15 * the auxtrace data is dependent on the event and the CPU.
16 * This record is generated by perf record command. For details
17 * see Documentation/perf.data-file-format.txt.
18 * PERF_RECORD_AUXTRACE_INFO:
19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This
20 * record is generated during 'perf record' command. Each record contains
21 * up to 256 entries describing offset and size of the AUXTRACE data in the
23 * PERF_RECORD_AUXTRACE_ERROR:
24 * Indicates an error during AUXTRACE collection such as buffer overflow.
25 * PERF_RECORD_FINISHED_ROUND:
26 * Perf events are not necessarily in time stamp order, as they can be
27 * collected in parallel on different CPUs. If the events should be
28 * processed in time order they need to be sorted first.
29 * Perf report guarantees that there is no reordering over a
30 * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
31 * time stamp lower than this record are processed (and displayed) before
32 * the succeeding perf record are processed.
34 * These records are evaluated during perf report command.
36 * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
37 * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
39 * Auxiliary trace data is collected per CPU. To merge the data into the report
40 * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
41 * data is in ascending order.
43 * Each queue has a double linked list of auxtrace_buffers. This list contains
44 * the offset and size of a CPU's auxtrace data. During auxtrace processing
45 * the data portion is mmap()'ed.
47 * To sort the queues in chronological order, all queue access is controlled
48 * by the auxtrace_heap. This is basicly a stack, each stack element has two
49 * entries, the queue number and a time stamp. However the stack is sorted by
50 * the time stamps. The highest time stamp is at the bottom the lowest
51 * (nearest) time stamp is at the top. That sort order is maintained at all
54 * After the auxtrace infrastructure has been setup, the auxtrace queues are
55 * filled with data (offset/size pairs) and the auxtrace_heap is populated.
57 * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
58 * Each record is handled by s390_cpumsf_process_event(). The time stamp of
59 * the perf record is compared with the time stamp located on the auxtrace_heap
60 * top element. If that time stamp is lower than the time stamp from the
61 * record sample, the auxtrace queues will be processed. As auxtrace queues
62 * control many auxtrace_buffers and each buffer can be quite large, the
63 * auxtrace buffer might be processed only partially. In this case the
64 * position in the auxtrace_buffer of that queue is remembered and the time
65 * stamp of the last processed entry of the auxtrace_buffer replaces the
66 * current auxtrace_heap top.
68 * 3. Auxtrace_queues might run of out data and are feeded by the
69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
72 * Each sampling-data entry in the auxilary trace data generates a perf sample.
73 * This sample is filled
74 * with data from the auxtrace such as PID/TID, instruction address, CPU state,
75 * etc. This sample is processed with perf_session__deliver_synth_event() to
76 * be included into the GUI.
78 * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
79 * auxiliary traces entries until the time stamp of this record is reached
80 * auxtrace_heap top. This is triggered by ordered_event->deliver().
83 * Perf event processing.
84 * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
85 * This is the function call sequence:
89 * perf_session__process_events()
91 * __perf_session__process_events()
93 * perf_session__process_event()
94 * | This functions splits the PERF_RECORD_XXX records.
95 * | - Those generated by perf record command (type number equal or higher
96 * | than PERF_RECORD_USER_TYPE_START) are handled by
97 * | perf_session__process_user_event(see below)
98 * | - Those generated by the kernel are handled by
99 * | perf_evlist__parse_sample_timestamp()
101 * perf_evlist__parse_sample_timestamp()
102 * | Extract time stamp from sample data.
104 * perf_session__queue_event()
105 * | If timestamp is positive the sample is entered into an ordered_event
106 * | list, sort order is the timestamp. The event processing is deferred until
107 * | later (see perf_session__process_user_event()).
108 * | Other timestamps (0 or -1) are handled immediately by
109 * | perf_session__deliver_event(). These are events generated at start up
110 * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
111 * | records. They are needed to create a list of running processes and its
112 * | memory mappings and layout. They are needed at the beginning to enable
113 * | command perf report to create process trees and memory mappings.
115 * perf_session__deliver_event()
116 * | Delivers a PERF_RECORD_XXX entry for handling.
118 * auxtrace__process_event()
119 * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
120 * | time stamps from the auxiliary trace buffers. This enables
121 * | synchronization between auxiliary trace data and the events on the
124 * machine__deliver_event()
125 * | Handles the PERF_RECORD_XXX event. This depends on the record type.
126 * It might update the process tree, update a process memory map or enter
127 * a sample with IP and call back chain data into GUI data pool.
130 * Deferred processing determined by perf_session__process_user_event() is
131 * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
132 * are generated during command perf record.
133 * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
134 * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
135 * built up while reading the perf.data file.
136 * Each event is now processed by calling perf_session__deliver_event().
137 * This enables time synchronization between the data in the perf.data file and
138 * the data in the auxiliary trace buffers.
143 #include <byteswap.h>
144 #include <inttypes.h>
145 #include <linux/kernel.h>
146 #include <linux/types.h>
147 #include <linux/bitops.h>
148 #include <linux/log2.h>
149 #include <linux/zalloc.h>
151 #include <sys/stat.h>
152 #include <sys/types.h>
161 #include "auxtrace.h"
162 #include "s390-cpumsf.h"
163 #include "s390-cpumsf-kernel.h"
164 #include "s390-cpumcf-kernel.h"
168 struct auxtrace auxtrace
;
169 struct auxtrace_queues queues
;
170 struct auxtrace_heap heap
;
171 struct perf_session
*session
;
172 struct machine
*machine
;
181 struct s390_cpumsf_queue
{
182 struct s390_cpumsf
*sf
;
183 unsigned int queue_nr
;
184 struct auxtrace_buffer
*buffer
;
190 /* Check if the raw data should be dumped to file. If this is the case and
191 * the file to dump to has not been opened for writing, do so.
193 * Return 0 on success and greater zero on error so processing continues.
195 static int s390_cpumcf_dumpctr(struct s390_cpumsf
*sf
,
196 struct perf_sample
*sample
)
198 struct s390_cpumsf_queue
*sfq
;
199 struct auxtrace_queue
*q
;
202 if (!sf
->use_logfile
|| sf
->queues
.nr_queues
<= sample
->cpu
)
205 q
= &sf
->queues
.queue_array
[sample
->cpu
];
207 if (!sfq
) /* Queue not yet allocated */
210 if (!sfq
->logfile_ctr
) {
214 ? asprintf(&name
, "%s/aux.ctr.%02x",
215 sf
->logdir
, sample
->cpu
)
216 : asprintf(&name
, "aux.ctr.%02x", sample
->cpu
);
218 sfq
->logfile_ctr
= fopen(name
, "w");
219 if (sfq
->logfile_ctr
== NULL
) {
220 pr_err("Failed to open counter set log file %s, "
221 "continue...\n", name
);
227 if (sfq
->logfile_ctr
) {
228 /* See comment above for -4 */
229 size_t n
= fwrite(sample
->raw_data
, sample
->raw_size
- 4, 1,
232 pr_err("Failed to write counter set data\n");
239 /* Display s390 CPU measurement facility basic-sampling data entry
240 * Data written on s390 in big endian byte order and contains bit
241 * fields across byte boundaries.
243 static bool s390_cpumsf_basic_show(const char *color
, size_t pos
,
244 struct hws_basic_entry
*basicp
)
246 struct hws_basic_entry
*basic
= basicp
;
247 #if __BYTE_ORDER == __LITTLE_ENDIAN
248 struct hws_basic_entry local
;
249 unsigned long long word
= be64toh(*(unsigned long long *)basicp
);
251 memset(&local
, 0, sizeof(local
));
252 local
.def
= be16toh(basicp
->def
);
253 local
.prim_asn
= word
& 0xffff;
254 local
.CL
= word
>> 30 & 0x3;
255 local
.I
= word
>> 32 & 0x1;
256 local
.AS
= word
>> 33 & 0x3;
257 local
.P
= word
>> 35 & 0x1;
258 local
.W
= word
>> 36 & 0x1;
259 local
.T
= word
>> 37 & 0x1;
260 local
.U
= word
>> 40 & 0xf;
261 local
.ia
= be64toh(basicp
->ia
);
262 local
.gpp
= be64toh(basicp
->gpp
);
263 local
.hpp
= be64toh(basicp
->hpp
);
266 if (basic
->def
!= 1) {
267 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos
);
270 color_fprintf(stdout
, color
, " [%#08zx] Basic Def:%04x Inst:%#04x"
271 " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
272 "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
273 pos
, basic
->def
, basic
->U
,
274 basic
->T
? 'T' : ' ',
275 basic
->W
? 'W' : ' ',
276 basic
->P
? 'P' : ' ',
277 basic
->I
? 'I' : ' ',
278 basic
->AS
, basic
->prim_asn
, basic
->ia
, basic
->CL
,
279 basic
->hpp
, basic
->gpp
);
283 /* Display s390 CPU measurement facility diagnostic-sampling data entry.
284 * Data written on s390 in big endian byte order and contains bit
285 * fields across byte boundaries.
287 static bool s390_cpumsf_diag_show(const char *color
, size_t pos
,
288 struct hws_diag_entry
*diagp
)
290 struct hws_diag_entry
*diag
= diagp
;
291 #if __BYTE_ORDER == __LITTLE_ENDIAN
292 struct hws_diag_entry local
;
293 unsigned long long word
= be64toh(*(unsigned long long *)diagp
);
295 local
.def
= be16toh(diagp
->def
);
296 local
.I
= word
>> 32 & 0x1;
299 if (diag
->def
< S390_CPUMSF_DIAG_DEF_FIRST
) {
300 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos
);
303 color_fprintf(stdout
, color
, " [%#08zx] Diag Def:%04x %c\n",
304 pos
, diag
->def
, diag
->I
? 'I' : ' ');
308 /* Return TOD timestamp contained in an trailer entry */
309 static unsigned long long trailer_timestamp(struct hws_trailer_entry
*te
,
312 /* te->t set: TOD in STCKE format, bytes 8-15
313 * to->t not set: TOD in STCK format, bytes 0-7
315 unsigned long long ts
;
317 memcpy(&ts
, &te
->timestamp
[idx
], sizeof(ts
));
321 /* Display s390 CPU measurement facility trailer entry */
322 static bool s390_cpumsf_trailer_show(const char *color
, size_t pos
,
323 struct hws_trailer_entry
*te
)
325 #if __BYTE_ORDER == __LITTLE_ENDIAN
326 struct hws_trailer_entry local
;
327 const unsigned long long flags
= be64toh(te
->flags
);
329 memset(&local
, 0, sizeof(local
));
330 local
.f
= flags
>> 63 & 0x1;
331 local
.a
= flags
>> 62 & 0x1;
332 local
.t
= flags
>> 61 & 0x1;
333 local
.bsdes
= be16toh((flags
>> 16 & 0xffff));
334 local
.dsdes
= be16toh((flags
& 0xffff));
335 memcpy(&local
.timestamp
, te
->timestamp
, sizeof(te
->timestamp
));
336 local
.overflow
= be64toh(te
->overflow
);
337 local
.clock_base
= be64toh(te
->progusage
[0]) >> 63 & 1;
338 local
.progusage2
= be64toh(te
->progusage2
);
341 if (te
->bsdes
!= sizeof(struct hws_basic_entry
)) {
342 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos
);
345 color_fprintf(stdout
, color
, " [%#08zx] Trailer %c%c%c bsdes:%d"
346 " dsdes:%d Overflow:%lld Time:%#llx\n"
347 "\t\tC:%d TOD:%#lx\n",
352 te
->bsdes
, te
->dsdes
, te
->overflow
,
353 trailer_timestamp(te
, te
->clock_base
),
354 te
->clock_base
, te
->progusage2
);
358 /* Test a sample data block. It must be 4KB or a multiple thereof in size and
359 * 4KB page aligned. Each sample data page has a trailer entry at the
360 * end which contains the sample entry data sizes.
362 * Return true if the sample data block passes the checks and set the
363 * basic set entry size and diagnostic set entry size.
365 * Return false on failure.
367 * Note: Old hardware does not set the basic or diagnostic entry sizes
368 * in the trailer entry. Use the type number instead.
370 static bool s390_cpumsf_validate(int machine_type
,
371 unsigned char *buf
, size_t len
,
372 unsigned short *bsdes
,
373 unsigned short *dsdes
)
375 struct hws_basic_entry
*basic
= (struct hws_basic_entry
*)buf
;
376 struct hws_trailer_entry
*te
;
379 if (len
& (S390_CPUMSF_PAGESZ
- 1)) /* Illegal size */
381 if (be16toh(basic
->def
) != 1) /* No basic set entry, must be first */
383 /* Check for trailer entry at end of SDB */
384 te
= (struct hws_trailer_entry
*)(buf
+ S390_CPUMSF_PAGESZ
386 *bsdes
= be16toh(te
->bsdes
);
387 *dsdes
= be16toh(te
->dsdes
);
388 if (!te
->bsdes
&& !te
->dsdes
) {
389 /* Very old hardware, use CPUID */
390 switch (machine_type
) {
412 /* Illegal trailer entry */
419 /* Return true if there is room for another entry */
420 static bool s390_cpumsf_reached_trailer(size_t entry_sz
, size_t pos
)
422 size_t payload
= S390_CPUMSF_PAGESZ
- sizeof(struct hws_trailer_entry
);
424 if (payload
- (pos
& (S390_CPUMSF_PAGESZ
- 1)) < entry_sz
)
429 /* Dump an auxiliary buffer. These buffers are multiple of
432 static void s390_cpumsf_dump(struct s390_cpumsf
*sf
,
433 unsigned char *buf
, size_t len
)
435 const char *color
= PERF_COLOR_BLUE
;
436 struct hws_basic_entry
*basic
;
437 struct hws_diag_entry
*diag
;
438 unsigned short bsdes
, dsdes
;
441 color_fprintf(stdout
, color
,
442 ". ... s390 AUX data: size %zu bytes\n",
445 if (!s390_cpumsf_validate(sf
->machine_type
, buf
, len
, &bsdes
,
447 pr_err("Invalid AUX trace data block size:%zu"
448 " (type:%d bsdes:%hd dsdes:%hd)\n",
449 len
, sf
->machine_type
, bsdes
, dsdes
);
453 /* s390 kernel always returns 4KB blocks fully occupied,
454 * no partially filled SDBs.
457 /* Handle Basic entry */
458 basic
= (struct hws_basic_entry
*)(buf
+ pos
);
459 if (s390_cpumsf_basic_show(color
, pos
, basic
))
464 /* Handle Diagnostic entry */
465 diag
= (struct hws_diag_entry
*)(buf
+ pos
);
466 if (s390_cpumsf_diag_show(color
, pos
, diag
))
471 /* Check for trailer entry */
472 if (!s390_cpumsf_reached_trailer(bsdes
+ dsdes
, pos
)) {
473 /* Show trailer entry */
474 struct hws_trailer_entry te
;
476 pos
= (pos
+ S390_CPUMSF_PAGESZ
)
477 & ~(S390_CPUMSF_PAGESZ
- 1);
479 memcpy(&te
, buf
+ pos
, sizeof(te
));
480 /* Set descriptor sizes in case of old hardware
481 * where these values are not set.
485 if (s390_cpumsf_trailer_show(color
, pos
, &te
))
493 static void s390_cpumsf_dump_event(struct s390_cpumsf
*sf
, unsigned char *buf
,
497 s390_cpumsf_dump(sf
, buf
, len
);
500 #define S390_LPP_PID_MASK 0xffffffff
502 static bool s390_cpumsf_make_event(size_t pos
,
503 struct hws_basic_entry
*basic
,
504 struct s390_cpumsf_queue
*sfq
)
506 struct perf_sample sample
= {
508 .pid
= basic
->hpp
& S390_LPP_PID_MASK
,
509 .tid
= basic
->hpp
& S390_LPP_PID_MASK
,
510 .cpumode
= PERF_RECORD_MISC_CPUMODE_UNKNOWN
,
514 union perf_event event
;
516 memset(&event
, 0, sizeof(event
));
517 if (basic
->CL
== 1) /* Native LPAR mode */
518 sample
.cpumode
= basic
->P
? PERF_RECORD_MISC_USER
519 : PERF_RECORD_MISC_KERNEL
;
520 else if (basic
->CL
== 2) /* Guest kernel/user space */
521 sample
.cpumode
= basic
->P
? PERF_RECORD_MISC_GUEST_USER
522 : PERF_RECORD_MISC_GUEST_KERNEL
;
523 else if (basic
->gpp
|| basic
->prim_asn
!= 0xffff)
524 /* Use heuristics on old hardware */
525 sample
.cpumode
= basic
->P
? PERF_RECORD_MISC_GUEST_USER
526 : PERF_RECORD_MISC_GUEST_KERNEL
;
528 sample
.cpumode
= basic
->P
? PERF_RECORD_MISC_USER
529 : PERF_RECORD_MISC_KERNEL
;
531 event
.sample
.header
.type
= PERF_RECORD_SAMPLE
;
532 event
.sample
.header
.misc
= sample
.cpumode
;
533 event
.sample
.header
.size
= sizeof(struct perf_event_header
);
535 pr_debug4("%s pos:%#zx ip:%#" PRIx64
" P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
536 __func__
, pos
, sample
.ip
, basic
->P
, basic
->CL
, sample
.pid
,
537 sample
.tid
, sample
.cpumode
, sample
.cpu
);
538 if (perf_session__deliver_synth_event(sfq
->sf
->session
, &event
,
540 pr_err("s390 Auxiliary Trace: failed to deliver event\n");
546 static unsigned long long get_trailer_time(const unsigned char *buf
)
548 struct hws_trailer_entry
*te
;
549 unsigned long long aux_time
, progusage2
;
552 te
= (struct hws_trailer_entry
*)(buf
+ S390_CPUMSF_PAGESZ
555 #if __BYTE_ORDER == __LITTLE_ENDIAN
556 clock_base
= be64toh(te
->progusage
[0]) >> 63 & 0x1;
557 progusage2
= be64toh(te
->progusage
[1]);
559 clock_base
= te
->clock_base
;
560 progusage2
= te
->progusage2
;
562 if (!clock_base
) /* TOD_CLOCK_BASE value missing */
565 /* Correct calculation to convert time stamp in trailer entry to
566 * nano seconds (taken from arch/s390 function tod_to_ns()).
567 * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
569 aux_time
= trailer_timestamp(te
, clock_base
) - progusage2
;
570 aux_time
= (aux_time
>> 9) * 125 + (((aux_time
& 0x1ff) * 125) >> 9);
574 /* Process the data samples of a single queue. The first parameter is a
575 * pointer to the queue, the second parameter is the time stamp. This
577 * - of the event that triggered this processing.
578 * - or the time stamp when the last proccesing of this queue stopped.
579 * In this case it stopped at a 4KB page boundary and record the
580 * position on where to continue processing on the next invocation
581 * (see buffer->use_data and buffer->use_size).
583 * When this function returns the second parameter is updated to
584 * reflect the time stamp of the last processed auxiliary data entry
585 * (taken from the trailer entry of that page). The caller uses this
586 * returned time stamp to record the last processed entry in this
589 * The function returns:
590 * 0: Processing successful. The second parameter returns the
591 * time stamp from the trailer entry until which position
592 * processing took place. Subsequent calls resume from this
594 * <0: An error occurred during processing. The second parameter
595 * returns the maximum time stamp.
596 * >0: Done on this queue. The second parameter returns the
597 * maximum time stamp.
599 static int s390_cpumsf_samples(struct s390_cpumsf_queue
*sfq
, u64
*ts
)
601 struct s390_cpumsf
*sf
= sfq
->sf
;
602 unsigned char *buf
= sfq
->buffer
->use_data
;
603 size_t len
= sfq
->buffer
->use_size
;
604 struct hws_basic_entry
*basic
;
605 unsigned short bsdes
, dsdes
;
610 if (!s390_cpumsf_validate(sf
->machine_type
, buf
, len
, &bsdes
,
616 /* Get trailer entry time stamp and check if entries in
617 * this auxiliary page are ready for processing. If the
618 * time stamp of the first entry is too high, whole buffer
619 * can be skipped. In this case return time stamp.
621 aux_ts
= get_trailer_time(buf
);
623 pr_err("[%#08" PRIx64
"] Invalid AUX trailer entry TOD clock base\n",
624 (s64
)sfq
->buffer
->data_offset
);
634 /* Handle Basic entry */
635 basic
= (struct hws_basic_entry
*)(buf
+ pos
);
636 if (s390_cpumsf_make_event(pos
, basic
, sfq
))
643 pos
+= dsdes
; /* Skip diagnositic entry */
645 /* Check for trailer entry */
646 if (!s390_cpumsf_reached_trailer(bsdes
+ dsdes
, pos
)) {
647 pos
= (pos
+ S390_CPUMSF_PAGESZ
)
648 & ~(S390_CPUMSF_PAGESZ
- 1);
649 /* Check existence of next page */
652 aux_ts
= get_trailer_time(buf
+ pos
);
659 sfq
->buffer
->use_data
+= pos
;
660 sfq
->buffer
->use_size
-= pos
;
667 sfq
->buffer
->use_size
= 0;
668 sfq
->buffer
->use_data
= NULL
;
669 return err
; /* Buffer completely scanned or error */
672 /* Run the s390 auxiliary trace decoder.
673 * Select the queue buffer to operate on, the caller already selected
674 * the proper queue, depending on second parameter 'ts'.
675 * This is the time stamp until which the auxiliary entries should
676 * be processed. This value is updated by called functions and
677 * returned to the caller.
679 * Resume processing in the current buffer. If there is no buffer
680 * get a new buffer from the queue and setup start position for
682 * When a buffer is completely processed remove it from the queue
685 * This function returns
686 * 1: When the queue is empty. Second parameter will be set to
687 * maximum time stamp.
688 * 0: Normal processing done.
689 * <0: Error during queue buffer setup. This causes the caller
690 * to stop processing completely.
692 static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue
*sfq
,
696 struct auxtrace_buffer
*buffer
;
697 struct auxtrace_queue
*queue
;
700 queue
= &sfq
->sf
->queues
.queue_array
[sfq
->queue_nr
];
702 /* Get buffer and last position in buffer to resume
703 * decoding the auxiliary entries. One buffer might be large
704 * and decoding might stop in between. This depends on the time
705 * stamp of the trailer entry in each page of the auxiliary
706 * data and the time stamp of the event triggering the decoding.
708 if (sfq
->buffer
== NULL
) {
709 sfq
->buffer
= buffer
= auxtrace_buffer__next(queue
,
713 return 1; /* Processing done on this queue */
715 /* Start with a new buffer on this queue */
717 buffer
->use_size
= buffer
->size
;
718 buffer
->use_data
= buffer
->data
;
720 if (sfq
->logfile
) { /* Write into log file */
721 size_t rc
= fwrite(buffer
->data
, buffer
->size
, 1,
724 pr_err("Failed to write auxiliary data\n");
727 buffer
= sfq
->buffer
;
730 int fd
= perf_data__fd(sfq
->sf
->session
->data
);
732 buffer
->data
= auxtrace_buffer__get_data(buffer
, fd
);
735 buffer
->use_size
= buffer
->size
;
736 buffer
->use_data
= buffer
->data
;
738 if (sfq
->logfile
) { /* Write into log file */
739 size_t rc
= fwrite(buffer
->data
, buffer
->size
, 1,
742 pr_err("Failed to write auxiliary data\n");
745 pr_debug4("%s queue_nr:%d buffer:%" PRId64
" offset:%#" PRIx64
" size:%#zx rest:%#zx\n",
746 __func__
, sfq
->queue_nr
, buffer
->buffer_nr
, buffer
->offset
,
747 buffer
->size
, buffer
->use_size
);
748 err
= s390_cpumsf_samples(sfq
, ts
);
750 /* If non-zero, there is either an error (err < 0) or the buffer is
751 * completely done (err > 0). The error is unrecoverable, usually
752 * some descriptors could not be read successfully, so continue with
754 * In both cases the parameter 'ts' has been updated.
758 list_del_init(&buffer
->list
);
759 auxtrace_buffer__free(buffer
);
760 if (err
> 0) /* Buffer done, no error */
766 static struct s390_cpumsf_queue
*
767 s390_cpumsf_alloc_queue(struct s390_cpumsf
*sf
, unsigned int queue_nr
)
769 struct s390_cpumsf_queue
*sfq
;
771 sfq
= zalloc(sizeof(struct s390_cpumsf_queue
));
776 sfq
->queue_nr
= queue_nr
;
778 if (sf
->use_logfile
) {
783 ? asprintf(&name
, "%s/aux.smp.%02x",
784 sf
->logdir
, queue_nr
)
785 : asprintf(&name
, "aux.smp.%02x", queue_nr
);
787 sfq
->logfile
= fopen(name
, "w");
788 if (sfq
->logfile
== NULL
) {
789 pr_err("Failed to open auxiliary log file %s,"
790 "continue...\n", name
);
791 sf
->use_logfile
= false;
798 static int s390_cpumsf_setup_queue(struct s390_cpumsf
*sf
,
799 struct auxtrace_queue
*queue
,
800 unsigned int queue_nr
, u64 ts
)
802 struct s390_cpumsf_queue
*sfq
= queue
->priv
;
804 if (list_empty(&queue
->head
))
808 sfq
= s390_cpumsf_alloc_queue(sf
, queue_nr
);
813 if (queue
->cpu
!= -1)
814 sfq
->cpu
= queue
->cpu
;
816 return auxtrace_heap__add(&sf
->heap
, queue_nr
, ts
);
819 static int s390_cpumsf_setup_queues(struct s390_cpumsf
*sf
, u64 ts
)
824 for (i
= 0; i
< sf
->queues
.nr_queues
; i
++) {
825 ret
= s390_cpumsf_setup_queue(sf
, &sf
->queues
.queue_array
[i
],
833 static int s390_cpumsf_update_queues(struct s390_cpumsf
*sf
, u64 ts
)
835 if (!sf
->queues
.new_data
)
838 sf
->queues
.new_data
= false;
839 return s390_cpumsf_setup_queues(sf
, ts
);
842 static int s390_cpumsf_process_queues(struct s390_cpumsf
*sf
, u64 timestamp
)
844 unsigned int queue_nr
;
849 struct auxtrace_queue
*queue
;
850 struct s390_cpumsf_queue
*sfq
;
852 if (!sf
->heap
.heap_cnt
)
855 if (sf
->heap
.heap_array
[0].ordinal
>= timestamp
)
858 queue_nr
= sf
->heap
.heap_array
[0].queue_nr
;
859 queue
= &sf
->queues
.queue_array
[queue_nr
];
862 auxtrace_heap__pop(&sf
->heap
);
863 if (sf
->heap
.heap_cnt
) {
864 ts
= sf
->heap
.heap_array
[0].ordinal
+ 1;
871 ret
= s390_cpumsf_run_decoder(sfq
, &ts
);
873 auxtrace_heap__add(&sf
->heap
, queue_nr
, ts
);
877 ret
= auxtrace_heap__add(&sf
->heap
, queue_nr
, ts
);
885 static int s390_cpumsf_synth_error(struct s390_cpumsf
*sf
, int code
, int cpu
,
886 pid_t pid
, pid_t tid
, u64 ip
, u64 timestamp
)
888 char msg
[MAX_AUXTRACE_ERROR_MSG
];
889 union perf_event event
;
892 strncpy(msg
, "Lost Auxiliary Trace Buffer", sizeof(msg
) - 1);
893 auxtrace_synth_error(&event
.auxtrace_error
, PERF_AUXTRACE_ERROR_ITRACE
,
894 code
, cpu
, pid
, tid
, ip
, msg
, timestamp
);
896 err
= perf_session__deliver_synth_event(sf
->session
, &event
, NULL
);
898 pr_err("s390 Auxiliary Trace: failed to deliver error event,"
903 static int s390_cpumsf_lost(struct s390_cpumsf
*sf
, struct perf_sample
*sample
)
905 return s390_cpumsf_synth_error(sf
, 1, sample
->cpu
,
906 sample
->pid
, sample
->tid
, 0,
911 s390_cpumsf_process_event(struct perf_session
*session
,
912 union perf_event
*event
,
913 struct perf_sample
*sample
,
914 struct perf_tool
*tool
)
916 struct s390_cpumsf
*sf
= container_of(session
->auxtrace
,
919 u64 timestamp
= sample
->time
;
920 struct evsel
*ev_bc000
;
927 if (!tool
->ordered_events
) {
928 pr_err("s390 Auxiliary Trace requires ordered events\n");
932 if (event
->header
.type
== PERF_RECORD_SAMPLE
&&
934 /* Handle event with raw data */
935 ev_bc000
= perf_evlist__event2evsel(session
->evlist
, event
);
937 ev_bc000
->core
.attr
.config
== PERF_EVENT_CPUM_CF_DIAG
)
938 err
= s390_cpumcf_dumpctr(sf
, sample
);
942 if (event
->header
.type
== PERF_RECORD_AUX
&&
943 event
->aux
.flags
& PERF_AUX_FLAG_TRUNCATED
)
944 return s390_cpumsf_lost(sf
, sample
);
947 err
= s390_cpumsf_update_queues(sf
, timestamp
);
949 err
= s390_cpumsf_process_queues(sf
, timestamp
);
954 struct s390_cpumsf_synth
{
955 struct perf_tool cpumsf_tool
;
956 struct perf_session
*session
;
960 s390_cpumsf_process_auxtrace_event(struct perf_session
*session
,
961 union perf_event
*event __maybe_unused
,
962 struct perf_tool
*tool __maybe_unused
)
964 struct s390_cpumsf
*sf
= container_of(session
->auxtrace
,
968 int fd
= perf_data__fd(session
->data
);
969 struct auxtrace_buffer
*buffer
;
976 if (perf_data__is_pipe(session
->data
)) {
979 data_offset
= lseek(fd
, 0, SEEK_CUR
);
980 if (data_offset
== -1)
984 err
= auxtrace_queues__add_event(&sf
->queues
, session
, event
,
985 data_offset
, &buffer
);
989 /* Dump here after copying piped trace out of the pipe */
991 if (auxtrace_buffer__get_data(buffer
, fd
)) {
992 s390_cpumsf_dump_event(sf
, buffer
->data
,
994 auxtrace_buffer__put_data(buffer
);
1000 static void s390_cpumsf_free_events(struct perf_session
*session __maybe_unused
)
1004 static int s390_cpumsf_flush(struct perf_session
*session __maybe_unused
,
1005 struct perf_tool
*tool __maybe_unused
)
1010 static void s390_cpumsf_free_queues(struct perf_session
*session
)
1012 struct s390_cpumsf
*sf
= container_of(session
->auxtrace
,
1015 struct auxtrace_queues
*queues
= &sf
->queues
;
1018 for (i
= 0; i
< queues
->nr_queues
; i
++) {
1019 struct s390_cpumsf_queue
*sfq
= (struct s390_cpumsf_queue
*)
1020 queues
->queue_array
[i
].priv
;
1024 fclose(sfq
->logfile
);
1025 sfq
->logfile
= NULL
;
1027 if (sfq
->logfile_ctr
) {
1028 fclose(sfq
->logfile_ctr
);
1029 sfq
->logfile_ctr
= NULL
;
1032 zfree(&queues
->queue_array
[i
].priv
);
1034 auxtrace_queues__free(queues
);
1037 static void s390_cpumsf_free(struct perf_session
*session
)
1039 struct s390_cpumsf
*sf
= container_of(session
->auxtrace
,
1043 auxtrace_heap__free(&sf
->heap
);
1044 s390_cpumsf_free_queues(session
);
1045 session
->auxtrace
= NULL
;
1051 s390_cpumsf_evsel_is_auxtrace(struct perf_session
*session __maybe_unused
,
1052 struct evsel
*evsel
)
1054 return evsel
->core
.attr
.type
== PERF_TYPE_RAW
&&
1055 evsel
->core
.attr
.config
== PERF_EVENT_CPUM_SF_DIAG
;
1058 static int s390_cpumsf_get_type(const char *cpuid
)
1060 int ret
, family
= 0;
1062 ret
= sscanf(cpuid
, "%*[^,],%u", &family
);
1063 return (ret
== 1) ? family
: 0;
1066 /* Check itrace options set on perf report command.
1067 * Return true, if none are set or all options specified can be
1068 * handled on s390 (currently only option 'd' for logging.
1069 * Return false otherwise.
1071 static bool check_auxtrace_itrace(struct itrace_synth_opts
*itops
)
1075 if (!itops
|| !itops
->set
)
1077 ison
= itops
->inject
|| itops
->instructions
|| itops
->branches
||
1078 itops
->transactions
|| itops
->ptwrites
||
1079 itops
->pwr_events
|| itops
->errors
||
1080 itops
->dont_decode
|| itops
->calls
|| itops
->returns
||
1081 itops
->callchain
|| itops
->thread_stack
||
1082 itops
->last_branch
|| itops
->add_callchain
||
1083 itops
->add_last_branch
;
1086 pr_err("Unsupported --itrace options specified\n");
1090 /* Check for AUXTRACE dump directory if it is needed.
1091 * On failure print an error message but continue.
1092 * Return 0 on wrong keyword in config file and 1 otherwise.
1094 static int s390_cpumsf__config(const char *var
, const char *value
, void *cb
)
1096 struct s390_cpumsf
*sf
= cb
;
1100 if (strcmp(var
, "auxtrace.dumpdir"))
1102 sf
->logdir
= strdup(value
);
1103 if (sf
->logdir
== NULL
) {
1104 pr_err("Failed to find auxtrace log directory %s,"
1105 " continue with current directory...\n", value
);
1108 rc
= stat(sf
->logdir
, &stbuf
);
1109 if (rc
== -1 || !S_ISDIR(stbuf
.st_mode
)) {
1110 pr_err("Missing auxtrace log directory %s,"
1111 " continue with current directory...\n", value
);
1117 int s390_cpumsf_process_auxtrace_info(union perf_event
*event
,
1118 struct perf_session
*session
)
1120 struct perf_record_auxtrace_info
*auxtrace_info
= &event
->auxtrace_info
;
1121 struct s390_cpumsf
*sf
;
1124 if (auxtrace_info
->header
.size
< sizeof(struct perf_record_auxtrace_info
))
1127 sf
= zalloc(sizeof(struct s390_cpumsf
));
1131 if (!check_auxtrace_itrace(session
->itrace_synth_opts
)) {
1135 sf
->use_logfile
= session
->itrace_synth_opts
->log
;
1136 if (sf
->use_logfile
)
1137 perf_config(s390_cpumsf__config
, sf
);
1139 err
= auxtrace_queues__init(&sf
->queues
);
1143 sf
->session
= session
;
1144 sf
->machine
= &session
->machines
.host
; /* No kvm support */
1145 sf
->auxtrace_type
= auxtrace_info
->type
;
1146 sf
->pmu_type
= PERF_TYPE_RAW
;
1147 sf
->machine_type
= s390_cpumsf_get_type(session
->evlist
->env
->cpuid
);
1149 sf
->auxtrace
.process_event
= s390_cpumsf_process_event
;
1150 sf
->auxtrace
.process_auxtrace_event
= s390_cpumsf_process_auxtrace_event
;
1151 sf
->auxtrace
.flush_events
= s390_cpumsf_flush
;
1152 sf
->auxtrace
.free_events
= s390_cpumsf_free_events
;
1153 sf
->auxtrace
.free
= s390_cpumsf_free
;
1154 sf
->auxtrace
.evsel_is_auxtrace
= s390_cpumsf_evsel_is_auxtrace
;
1155 session
->auxtrace
= &sf
->auxtrace
;
1160 err
= auxtrace_queues__process_index(&sf
->queues
, session
);
1162 goto err_free_queues
;
1164 if (sf
->queues
.populated
)
1165 sf
->data_queued
= true;
1170 auxtrace_queues__free(&sf
->queues
);
1171 session
->auxtrace
= NULL
;