Linux 5.1.15
[linux/fpc-iii.git] / tools / perf / util / s390-cpumsf.c
blobc215704931dc990481060fae256fdb6e475da587
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright IBM Corp. 2018
4 * Auxtrace support for s390 CPU-Measurement Sampling Facility
6 * Author(s): Thomas Richter <tmricht@linux.ibm.com>
8 * Auxiliary traces are collected during 'perf record' using rbd000 event.
9 * Several PERF_RECORD_XXX are generated during recording:
11 * PERF_RECORD_AUX:
12 * Records that new data landed in the AUX buffer part.
13 * PERF_RECORD_AUXTRACE:
14 * Defines auxtrace data. Followed by the actual data. The contents of
15 * the auxtrace data is dependent on the event and the CPU.
16 * This record is generated by perf record command. For details
17 * see Documentation/perf.data-file-format.txt.
18 * PERF_RECORD_AUXTRACE_INFO:
19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This
20 * record is generated during 'perf record' command. Each record contains up
21 * to 256 entries describing offset and size of the AUXTRACE data in the
22 * perf.data file.
23 * PERF_RECORD_AUXTRACE_ERROR:
24 * Indicates an error during AUXTRACE collection such as buffer overflow.
25 * PERF_RECORD_FINISHED_ROUND:
26 * Perf events are not necessarily in time stamp order, as they can be
27 * collected in parallel on different CPUs. If the events should be
28 * processed in time order they need to be sorted first.
29 * Perf report guarantees that there is no reordering over a
30 * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
31 * time stamp lower than this record are processed (and displayed) before
32 * the succeeding perf record are processed.
34 * These records are evaluated during perf report command.
36 * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
37 * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
38 * below.
39 * Auxiliary trace data is collected per CPU. To merge the data into the report
40 * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
41 * data is in ascending order.
43 * Each queue has a double linked list of auxtrace_buffers. This list contains
44 * the offset and size of a CPU's auxtrace data. During auxtrace processing
45 * the data portion is mmap()'ed.
47 * To sort the queues in chronological order, all queue access is controlled
48 * by the auxtrace_heap. This is basicly a stack, each stack element has two
49 * entries, the queue number and a time stamp. However the stack is sorted by
50 * the time stamps. The highest time stamp is at the bottom the lowest
51 * (nearest) time stamp is at the top. That sort order is maintained at all
52 * times!
54 * After the auxtrace infrastructure has been setup, the auxtrace queues are
55 * filled with data (offset/size pairs) and the auxtrace_heap is populated.
57 * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
58 * Each record is handled by s390_cpumsf_process_event(). The time stamp of
59 * the perf record is compared with the time stamp located on the auxtrace_heap
60 * top element. If that time stamp is lower than the time stamp from the
61 * record sample, the auxtrace queues will be processed. As auxtrace queues
62 * control many auxtrace_buffers and each buffer can be quite large, the
63 * auxtrace buffer might be processed only partially. In this case the
64 * position in the auxtrace_buffer of that queue is remembered and the time
65 * stamp of the last processed entry of the auxtrace_buffer replaces the
66 * current auxtrace_heap top.
68 * 3. Auxtrace_queues might run of out data and are feeded by the
69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
71 * Event Generation
72 * Each sampling-data entry in the auxilary trace data generates a perf sample.
73 * This sample is filled
74 * with data from the auxtrace such as PID/TID, instruction address, CPU state,
75 * etc. This sample is processed with perf_session__deliver_synth_event() to
76 * be included into the GUI.
78 * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
79 * auxiliary traces entries until the time stamp of this record is reached
80 * auxtrace_heap top. This is triggered by ordered_event->deliver().
83 * Perf event processing.
84 * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
85 * This is the function call sequence:
87 * __cmd_report()
88 * |
89 * perf_session__process_events()
90 * |
91 * __perf_session__process_events()
92 * |
93 * perf_session__process_event()
94 * | This functions splits the PERF_RECORD_XXX records.
95 * | - Those generated by perf record command (type number equal or higher
96 * | than PERF_RECORD_USER_TYPE_START) are handled by
97 * | perf_session__process_user_event(see below)
98 * | - Those generated by the kernel are handled by
99 * | perf_evlist__parse_sample_timestamp()
101 * perf_evlist__parse_sample_timestamp()
102 * | Extract time stamp from sample data.
104 * perf_session__queue_event()
105 * | If timestamp is positive the sample is entered into an ordered_event
106 * | list, sort order is the timestamp. The event processing is deferred until
107 * | later (see perf_session__process_user_event()).
108 * | Other timestamps (0 or -1) are handled immediately by
109 * | perf_session__deliver_event(). These are events generated at start up
110 * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
111 * | records. They are needed to create a list of running processes and its
112 * | memory mappings and layout. They are needed at the beginning to enable
113 * | command perf report to create process trees and memory mappings.
115 * perf_session__deliver_event()
116 * | Delivers a PERF_RECORD_XXX entry for handling.
118 * auxtrace__process_event()
119 * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
120 * | time stamps from the auxiliary trace buffers. This enables
121 * | synchronization between auxiliary trace data and the events on the
122 * | perf.data file.
124 * machine__deliver_event()
125 * | Handles the PERF_RECORD_XXX event. This depends on the record type.
126 * It might update the process tree, update a process memory map or enter
127 * a sample with IP and call back chain data into GUI data pool.
130 * Deferred processing determined by perf_session__process_user_event() is
131 * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
132 * are generated during command perf record.
133 * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
134 * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
135 * built up while reading the perf.data file.
136 * Each event is now processed by calling perf_session__deliver_event().
137 * This enables time synchronization between the data in the perf.data file and
138 * the data in the auxiliary trace buffers.
141 #include <endian.h>
142 #include <errno.h>
143 #include <byteswap.h>
144 #include <inttypes.h>
145 #include <linux/kernel.h>
146 #include <linux/types.h>
147 #include <linux/bitops.h>
148 #include <linux/log2.h>
150 #include <sys/stat.h>
151 #include <sys/types.h>
153 #include "cpumap.h"
154 #include "color.h"
155 #include "evsel.h"
156 #include "evlist.h"
157 #include "machine.h"
158 #include "session.h"
159 #include "util.h"
160 #include "thread.h"
161 #include "debug.h"
162 #include "auxtrace.h"
163 #include "s390-cpumsf.h"
164 #include "s390-cpumsf-kernel.h"
165 #include "s390-cpumcf-kernel.h"
166 #include "config.h"
168 struct s390_cpumsf {
169 struct auxtrace auxtrace;
170 struct auxtrace_queues queues;
171 struct auxtrace_heap heap;
172 struct perf_session *session;
173 struct machine *machine;
174 u32 auxtrace_type;
175 u32 pmu_type;
176 u16 machine_type;
177 bool data_queued;
178 bool use_logfile;
179 char *logdir;
182 struct s390_cpumsf_queue {
183 struct s390_cpumsf *sf;
184 unsigned int queue_nr;
185 struct auxtrace_buffer *buffer;
186 int cpu;
187 FILE *logfile;
188 FILE *logfile_ctr;
191 /* Check if the raw data should be dumped to file. If this is the case and
192 * the file to dump to has not been opened for writing, do so.
194 * Return 0 on success and greater zero on error so processing continues.
196 static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
197 struct perf_sample *sample)
199 struct s390_cpumsf_queue *sfq;
200 struct auxtrace_queue *q;
201 int rc = 0;
203 if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu)
204 return rc;
206 q = &sf->queues.queue_array[sample->cpu];
207 sfq = q->priv;
208 if (!sfq) /* Queue not yet allocated */
209 return rc;
211 if (!sfq->logfile_ctr) {
212 char *name;
214 rc = (sf->logdir)
215 ? asprintf(&name, "%s/aux.ctr.%02x",
216 sf->logdir, sample->cpu)
217 : asprintf(&name, "aux.ctr.%02x", sample->cpu);
218 if (rc > 0)
219 sfq->logfile_ctr = fopen(name, "w");
220 if (sfq->logfile_ctr == NULL) {
221 pr_err("Failed to open counter set log file %s, "
222 "continue...\n", name);
223 rc = 1;
225 free(name);
228 if (sfq->logfile_ctr) {
229 /* See comment above for -4 */
230 size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1,
231 sfq->logfile_ctr);
232 if (n != 1) {
233 pr_err("Failed to write counter set data\n");
234 rc = 1;
237 return rc;
240 /* Display s390 CPU measurement facility basic-sampling data entry */
241 static bool s390_cpumsf_basic_show(const char *color, size_t pos,
242 struct hws_basic_entry *basic)
244 if (basic->def != 1) {
245 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
246 return false;
248 color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x"
249 " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
250 "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
251 pos, basic->def, basic->U,
252 basic->T ? 'T' : ' ',
253 basic->W ? 'W' : ' ',
254 basic->P ? 'P' : ' ',
255 basic->I ? 'I' : ' ',
256 basic->AS, basic->prim_asn, basic->ia, basic->CL,
257 basic->hpp, basic->gpp);
258 return true;
261 /* Display s390 CPU measurement facility diagnostic-sampling data entry */
262 static bool s390_cpumsf_diag_show(const char *color, size_t pos,
263 struct hws_diag_entry *diag)
265 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
266 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
267 return false;
269 color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n",
270 pos, diag->def, diag->I ? 'I' : ' ');
271 return true;
274 /* Return TOD timestamp contained in an trailer entry */
275 static unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
277 /* te->t set: TOD in STCKE format, bytes 8-15
278 * to->t not set: TOD in STCK format, bytes 0-7
280 unsigned long long ts;
282 memcpy(&ts, &te->timestamp[te->t], sizeof(ts));
283 return ts;
286 /* Display s390 CPU measurement facility trailer entry */
287 static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
288 struct hws_trailer_entry *te)
290 if (te->bsdes != sizeof(struct hws_basic_entry)) {
291 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
292 return false;
294 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
295 " dsdes:%d Overflow:%lld Time:%#llx\n"
296 "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n",
297 pos,
298 te->f ? 'F' : ' ',
299 te->a ? 'A' : ' ',
300 te->t ? 'T' : ' ',
301 te->bsdes, te->dsdes, te->overflow,
302 trailer_timestamp(te), te->clock_base, te->progusage2,
303 te->progusage[0], te->progusage[1]);
304 return true;
307 /* Test a sample data block. It must be 4KB or a multiple thereof in size and
308 * 4KB page aligned. Each sample data page has a trailer entry at the
309 * end which contains the sample entry data sizes.
311 * Return true if the sample data block passes the checks and set the
312 * basic set entry size and diagnostic set entry size.
314 * Return false on failure.
316 * Note: Old hardware does not set the basic or diagnostic entry sizes
317 * in the trailer entry. Use the type number instead.
319 static bool s390_cpumsf_validate(int machine_type,
320 unsigned char *buf, size_t len,
321 unsigned short *bsdes,
322 unsigned short *dsdes)
324 struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
325 struct hws_trailer_entry *te;
327 *dsdes = *bsdes = 0;
328 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
329 return false;
330 if (basic->def != 1) /* No basic set entry, must be first */
331 return false;
332 /* Check for trailer entry at end of SDB */
333 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
334 - sizeof(*te));
335 *bsdes = te->bsdes;
336 *dsdes = te->dsdes;
337 if (!te->bsdes && !te->dsdes) {
338 /* Very old hardware, use CPUID */
339 switch (machine_type) {
340 case 2097:
341 case 2098:
342 *dsdes = 64;
343 *bsdes = 32;
344 break;
345 case 2817:
346 case 2818:
347 *dsdes = 74;
348 *bsdes = 32;
349 break;
350 case 2827:
351 case 2828:
352 *dsdes = 85;
353 *bsdes = 32;
354 break;
355 case 2964:
356 case 2965:
357 *dsdes = 112;
358 *bsdes = 32;
359 break;
360 default:
361 /* Illegal trailer entry */
362 return false;
365 return true;
368 /* Return true if there is room for another entry */
369 static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
371 size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
373 if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
374 return false;
375 return true;
378 /* Dump an auxiliary buffer. These buffers are multiple of
379 * 4KB SDB pages.
381 static void s390_cpumsf_dump(struct s390_cpumsf *sf,
382 unsigned char *buf, size_t len)
384 const char *color = PERF_COLOR_BLUE;
385 struct hws_basic_entry *basic;
386 struct hws_diag_entry *diag;
387 unsigned short bsdes, dsdes;
388 size_t pos = 0;
390 color_fprintf(stdout, color,
391 ". ... s390 AUX data: size %zu bytes\n",
392 len);
394 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
395 &dsdes)) {
396 pr_err("Invalid AUX trace data block size:%zu"
397 " (type:%d bsdes:%hd dsdes:%hd)\n",
398 len, sf->machine_type, bsdes, dsdes);
399 return;
402 /* s390 kernel always returns 4KB blocks fully occupied,
403 * no partially filled SDBs.
405 while (pos < len) {
406 /* Handle Basic entry */
407 basic = (struct hws_basic_entry *)(buf + pos);
408 if (s390_cpumsf_basic_show(color, pos, basic))
409 pos += bsdes;
410 else
411 return;
413 /* Handle Diagnostic entry */
414 diag = (struct hws_diag_entry *)(buf + pos);
415 if (s390_cpumsf_diag_show(color, pos, diag))
416 pos += dsdes;
417 else
418 return;
420 /* Check for trailer entry */
421 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
422 /* Show trailer entry */
423 struct hws_trailer_entry te;
425 pos = (pos + S390_CPUMSF_PAGESZ)
426 & ~(S390_CPUMSF_PAGESZ - 1);
427 pos -= sizeof(te);
428 memcpy(&te, buf + pos, sizeof(te));
429 /* Set descriptor sizes in case of old hardware
430 * where these values are not set.
432 te.bsdes = bsdes;
433 te.dsdes = dsdes;
434 if (s390_cpumsf_trailer_show(color, pos, &te))
435 pos += sizeof(te);
436 else
437 return;
442 static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
443 size_t len)
445 printf(".\n");
446 s390_cpumsf_dump(sf, buf, len);
449 #define S390_LPP_PID_MASK 0xffffffff
451 static bool s390_cpumsf_make_event(size_t pos,
452 struct hws_basic_entry *basic,
453 struct s390_cpumsf_queue *sfq)
455 struct perf_sample sample = {
456 .ip = basic->ia,
457 .pid = basic->hpp & S390_LPP_PID_MASK,
458 .tid = basic->hpp & S390_LPP_PID_MASK,
459 .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
460 .cpu = sfq->cpu,
461 .period = 1
463 union perf_event event;
465 memset(&event, 0, sizeof(event));
466 if (basic->CL == 1) /* Native LPAR mode */
467 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
468 : PERF_RECORD_MISC_KERNEL;
469 else if (basic->CL == 2) /* Guest kernel/user space */
470 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
471 : PERF_RECORD_MISC_GUEST_KERNEL;
472 else if (basic->gpp || basic->prim_asn != 0xffff)
473 /* Use heuristics on old hardware */
474 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
475 : PERF_RECORD_MISC_GUEST_KERNEL;
476 else
477 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
478 : PERF_RECORD_MISC_KERNEL;
480 event.sample.header.type = PERF_RECORD_SAMPLE;
481 event.sample.header.misc = sample.cpumode;
482 event.sample.header.size = sizeof(struct perf_event_header);
484 pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
485 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
486 sample.tid, sample.cpumode, sample.cpu);
487 if (perf_session__deliver_synth_event(sfq->sf->session, &event,
488 &sample)) {
489 pr_err("s390 Auxiliary Trace: failed to deliver event\n");
490 return false;
492 return true;
495 static unsigned long long get_trailer_time(const unsigned char *buf)
497 struct hws_trailer_entry *te;
498 unsigned long long aux_time;
500 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
501 - sizeof(*te));
503 if (!te->clock_base) /* TOD_CLOCK_BASE value missing */
504 return 0;
506 /* Correct calculation to convert time stamp in trailer entry to
507 * nano seconds (taken from arch/s390 function tod_to_ns()).
508 * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
510 aux_time = trailer_timestamp(te) - te->progusage2;
511 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
512 return aux_time;
515 /* Process the data samples of a single queue. The first parameter is a
516 * pointer to the queue, the second parameter is the time stamp. This
517 * is the time stamp:
518 * - of the event that triggered this processing.
519 * - or the time stamp when the last proccesing of this queue stopped.
520 * In this case it stopped at a 4KB page boundary and record the
521 * position on where to continue processing on the next invocation
522 * (see buffer->use_data and buffer->use_size).
524 * When this function returns the second parameter is updated to
525 * reflect the time stamp of the last processed auxiliary data entry
526 * (taken from the trailer entry of that page). The caller uses this
527 * returned time stamp to record the last processed entry in this
528 * queue.
530 * The function returns:
531 * 0: Processing successful. The second parameter returns the
532 * time stamp from the trailer entry until which position
533 * processing took place. Subsequent calls resume from this
534 * position.
535 * <0: An error occurred during processing. The second parameter
536 * returns the maximum time stamp.
537 * >0: Done on this queue. The second parameter returns the
538 * maximum time stamp.
540 static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
542 struct s390_cpumsf *sf = sfq->sf;
543 unsigned char *buf = sfq->buffer->use_data;
544 size_t len = sfq->buffer->use_size;
545 struct hws_basic_entry *basic;
546 unsigned short bsdes, dsdes;
547 size_t pos = 0;
548 int err = 1;
549 u64 aux_ts;
551 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
552 &dsdes)) {
553 *ts = ~0ULL;
554 return -1;
557 /* Get trailer entry time stamp and check if entries in
558 * this auxiliary page are ready for processing. If the
559 * time stamp of the first entry is too high, whole buffer
560 * can be skipped. In this case return time stamp.
562 aux_ts = get_trailer_time(buf);
563 if (!aux_ts) {
564 pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
565 (s64)sfq->buffer->data_offset);
566 aux_ts = ~0ULL;
567 goto out;
569 if (aux_ts > *ts) {
570 *ts = aux_ts;
571 return 0;
574 while (pos < len) {
575 /* Handle Basic entry */
576 basic = (struct hws_basic_entry *)(buf + pos);
577 if (s390_cpumsf_make_event(pos, basic, sfq))
578 pos += bsdes;
579 else {
580 err = -EBADF;
581 goto out;
584 pos += dsdes; /* Skip diagnositic entry */
586 /* Check for trailer entry */
587 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
588 pos = (pos + S390_CPUMSF_PAGESZ)
589 & ~(S390_CPUMSF_PAGESZ - 1);
590 /* Check existence of next page */
591 if (pos >= len)
592 break;
593 aux_ts = get_trailer_time(buf + pos);
594 if (!aux_ts) {
595 aux_ts = ~0ULL;
596 goto out;
598 if (aux_ts > *ts) {
599 *ts = aux_ts;
600 sfq->buffer->use_data += pos;
601 sfq->buffer->use_size -= pos;
602 return 0;
606 out:
607 *ts = aux_ts;
608 sfq->buffer->use_size = 0;
609 sfq->buffer->use_data = NULL;
610 return err; /* Buffer completely scanned or error */
613 /* Run the s390 auxiliary trace decoder.
614 * Select the queue buffer to operate on, the caller already selected
615 * the proper queue, depending on second parameter 'ts'.
616 * This is the time stamp until which the auxiliary entries should
617 * be processed. This value is updated by called functions and
618 * returned to the caller.
620 * Resume processing in the current buffer. If there is no buffer
621 * get a new buffer from the queue and setup start position for
622 * processing.
623 * When a buffer is completely processed remove it from the queue
624 * before returning.
626 * This function returns
627 * 1: When the queue is empty. Second parameter will be set to
628 * maximum time stamp.
629 * 0: Normal processing done.
630 * <0: Error during queue buffer setup. This causes the caller
631 * to stop processing completely.
633 static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
634 u64 *ts)
637 struct auxtrace_buffer *buffer;
638 struct auxtrace_queue *queue;
639 int err;
641 queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
643 /* Get buffer and last position in buffer to resume
644 * decoding the auxiliary entries. One buffer might be large
645 * and decoding might stop in between. This depends on the time
646 * stamp of the trailer entry in each page of the auxiliary
647 * data and the time stamp of the event triggering the decoding.
649 if (sfq->buffer == NULL) {
650 sfq->buffer = buffer = auxtrace_buffer__next(queue,
651 sfq->buffer);
652 if (!buffer) {
653 *ts = ~0ULL;
654 return 1; /* Processing done on this queue */
656 /* Start with a new buffer on this queue */
657 if (buffer->data) {
658 buffer->use_size = buffer->size;
659 buffer->use_data = buffer->data;
661 if (sfq->logfile) { /* Write into log file */
662 size_t rc = fwrite(buffer->data, buffer->size, 1,
663 sfq->logfile);
664 if (rc != 1)
665 pr_err("Failed to write auxiliary data\n");
667 } else
668 buffer = sfq->buffer;
670 if (!buffer->data) {
671 int fd = perf_data__fd(sfq->sf->session->data);
673 buffer->data = auxtrace_buffer__get_data(buffer, fd);
674 if (!buffer->data)
675 return -ENOMEM;
676 buffer->use_size = buffer->size;
677 buffer->use_data = buffer->data;
679 if (sfq->logfile) { /* Write into log file */
680 size_t rc = fwrite(buffer->data, buffer->size, 1,
681 sfq->logfile);
682 if (rc != 1)
683 pr_err("Failed to write auxiliary data\n");
686 pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
687 __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
688 buffer->size, buffer->use_size);
689 err = s390_cpumsf_samples(sfq, ts);
691 /* If non-zero, there is either an error (err < 0) or the buffer is
692 * completely done (err > 0). The error is unrecoverable, usually
693 * some descriptors could not be read successfully, so continue with
694 * the next buffer.
695 * In both cases the parameter 'ts' has been updated.
697 if (err) {
698 sfq->buffer = NULL;
699 list_del(&buffer->list);
700 auxtrace_buffer__free(buffer);
701 if (err > 0) /* Buffer done, no error */
702 err = 0;
704 return err;
707 static struct s390_cpumsf_queue *
708 s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
710 struct s390_cpumsf_queue *sfq;
712 sfq = zalloc(sizeof(struct s390_cpumsf_queue));
713 if (sfq == NULL)
714 return NULL;
716 sfq->sf = sf;
717 sfq->queue_nr = queue_nr;
718 sfq->cpu = -1;
719 if (sf->use_logfile) {
720 char *name;
721 int rc;
723 rc = (sf->logdir)
724 ? asprintf(&name, "%s/aux.smp.%02x",
725 sf->logdir, queue_nr)
726 : asprintf(&name, "aux.smp.%02x", queue_nr);
727 if (rc > 0)
728 sfq->logfile = fopen(name, "w");
729 if (sfq->logfile == NULL) {
730 pr_err("Failed to open auxiliary log file %s,"
731 "continue...\n", name);
732 sf->use_logfile = false;
734 free(name);
736 return sfq;
739 static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
740 struct auxtrace_queue *queue,
741 unsigned int queue_nr, u64 ts)
743 struct s390_cpumsf_queue *sfq = queue->priv;
745 if (list_empty(&queue->head))
746 return 0;
748 if (sfq == NULL) {
749 sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
750 if (!sfq)
751 return -ENOMEM;
752 queue->priv = sfq;
754 if (queue->cpu != -1)
755 sfq->cpu = queue->cpu;
757 return auxtrace_heap__add(&sf->heap, queue_nr, ts);
760 static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
762 unsigned int i;
763 int ret = 0;
765 for (i = 0; i < sf->queues.nr_queues; i++) {
766 ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
767 i, ts);
768 if (ret)
769 break;
771 return ret;
774 static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
776 if (!sf->queues.new_data)
777 return 0;
779 sf->queues.new_data = false;
780 return s390_cpumsf_setup_queues(sf, ts);
783 static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
785 unsigned int queue_nr;
786 u64 ts;
787 int ret;
789 while (1) {
790 struct auxtrace_queue *queue;
791 struct s390_cpumsf_queue *sfq;
793 if (!sf->heap.heap_cnt)
794 return 0;
796 if (sf->heap.heap_array[0].ordinal >= timestamp)
797 return 0;
799 queue_nr = sf->heap.heap_array[0].queue_nr;
800 queue = &sf->queues.queue_array[queue_nr];
801 sfq = queue->priv;
803 auxtrace_heap__pop(&sf->heap);
804 if (sf->heap.heap_cnt) {
805 ts = sf->heap.heap_array[0].ordinal + 1;
806 if (ts > timestamp)
807 ts = timestamp;
808 } else {
809 ts = timestamp;
812 ret = s390_cpumsf_run_decoder(sfq, &ts);
813 if (ret < 0) {
814 auxtrace_heap__add(&sf->heap, queue_nr, ts);
815 return ret;
817 if (!ret) {
818 ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
819 if (ret < 0)
820 return ret;
823 return 0;
826 static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
827 pid_t pid, pid_t tid, u64 ip, u64 timestamp)
829 char msg[MAX_AUXTRACE_ERROR_MSG];
830 union perf_event event;
831 int err;
833 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
834 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
835 code, cpu, pid, tid, ip, msg, timestamp);
837 err = perf_session__deliver_synth_event(sf->session, &event, NULL);
838 if (err)
839 pr_err("s390 Auxiliary Trace: failed to deliver error event,"
840 "error %d\n", err);
841 return err;
844 static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
846 return s390_cpumsf_synth_error(sf, 1, sample->cpu,
847 sample->pid, sample->tid, 0,
848 sample->time);
851 static int
852 s390_cpumsf_process_event(struct perf_session *session,
853 union perf_event *event,
854 struct perf_sample *sample,
855 struct perf_tool *tool)
857 struct s390_cpumsf *sf = container_of(session->auxtrace,
858 struct s390_cpumsf,
859 auxtrace);
860 u64 timestamp = sample->time;
861 struct perf_evsel *ev_bc000;
863 int err = 0;
865 if (dump_trace)
866 return 0;
868 if (!tool->ordered_events) {
869 pr_err("s390 Auxiliary Trace requires ordered events\n");
870 return -EINVAL;
873 if (event->header.type == PERF_RECORD_SAMPLE &&
874 sample->raw_size) {
875 /* Handle event with raw data */
876 ev_bc000 = perf_evlist__event2evsel(session->evlist, event);
877 if (ev_bc000 &&
878 ev_bc000->attr.config == PERF_EVENT_CPUM_CF_DIAG)
879 err = s390_cpumcf_dumpctr(sf, sample);
880 return err;
883 if (event->header.type == PERF_RECORD_AUX &&
884 event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
885 return s390_cpumsf_lost(sf, sample);
887 if (timestamp) {
888 err = s390_cpumsf_update_queues(sf, timestamp);
889 if (!err)
890 err = s390_cpumsf_process_queues(sf, timestamp);
892 return err;
895 struct s390_cpumsf_synth {
896 struct perf_tool cpumsf_tool;
897 struct perf_session *session;
900 static int
901 s390_cpumsf_process_auxtrace_event(struct perf_session *session,
902 union perf_event *event __maybe_unused,
903 struct perf_tool *tool __maybe_unused)
905 struct s390_cpumsf *sf = container_of(session->auxtrace,
906 struct s390_cpumsf,
907 auxtrace);
909 int fd = perf_data__fd(session->data);
910 struct auxtrace_buffer *buffer;
911 off_t data_offset;
912 int err;
914 if (sf->data_queued)
915 return 0;
917 if (perf_data__is_pipe(session->data)) {
918 data_offset = 0;
919 } else {
920 data_offset = lseek(fd, 0, SEEK_CUR);
921 if (data_offset == -1)
922 return -errno;
925 err = auxtrace_queues__add_event(&sf->queues, session, event,
926 data_offset, &buffer);
927 if (err)
928 return err;
930 /* Dump here after copying piped trace out of the pipe */
931 if (dump_trace) {
932 if (auxtrace_buffer__get_data(buffer, fd)) {
933 s390_cpumsf_dump_event(sf, buffer->data,
934 buffer->size);
935 auxtrace_buffer__put_data(buffer);
938 return 0;
941 static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
945 static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
946 struct perf_tool *tool __maybe_unused)
948 return 0;
951 static void s390_cpumsf_free_queues(struct perf_session *session)
953 struct s390_cpumsf *sf = container_of(session->auxtrace,
954 struct s390_cpumsf,
955 auxtrace);
956 struct auxtrace_queues *queues = &sf->queues;
957 unsigned int i;
959 for (i = 0; i < queues->nr_queues; i++) {
960 struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
961 queues->queue_array[i].priv;
963 if (sfq != NULL) {
964 if (sfq->logfile) {
965 fclose(sfq->logfile);
966 sfq->logfile = NULL;
968 if (sfq->logfile_ctr) {
969 fclose(sfq->logfile_ctr);
970 sfq->logfile_ctr = NULL;
973 zfree(&queues->queue_array[i].priv);
975 auxtrace_queues__free(queues);
978 static void s390_cpumsf_free(struct perf_session *session)
980 struct s390_cpumsf *sf = container_of(session->auxtrace,
981 struct s390_cpumsf,
982 auxtrace);
984 auxtrace_heap__free(&sf->heap);
985 s390_cpumsf_free_queues(session);
986 session->auxtrace = NULL;
987 free(sf->logdir);
988 free(sf);
991 static int s390_cpumsf_get_type(const char *cpuid)
993 int ret, family = 0;
995 ret = sscanf(cpuid, "%*[^,],%u", &family);
996 return (ret == 1) ? family : 0;
999 /* Check itrace options set on perf report command.
1000 * Return true, if none are set or all options specified can be
1001 * handled on s390 (currently only option 'd' for logging.
1002 * Return false otherwise.
1004 static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
1006 bool ison = false;
1008 if (!itops || !itops->set)
1009 return true;
1010 ison = itops->inject || itops->instructions || itops->branches ||
1011 itops->transactions || itops->ptwrites ||
1012 itops->pwr_events || itops->errors ||
1013 itops->dont_decode || itops->calls || itops->returns ||
1014 itops->callchain || itops->thread_stack ||
1015 itops->last_branch;
1016 if (!ison)
1017 return true;
1018 pr_err("Unsupported --itrace options specified\n");
1019 return false;
1022 /* Check for AUXTRACE dump directory if it is needed.
1023 * On failure print an error message but continue.
1024 * Return 0 on wrong keyword in config file and 1 otherwise.
1026 static int s390_cpumsf__config(const char *var, const char *value, void *cb)
1028 struct s390_cpumsf *sf = cb;
1029 struct stat stbuf;
1030 int rc;
1032 if (strcmp(var, "auxtrace.dumpdir"))
1033 return 0;
1034 sf->logdir = strdup(value);
1035 if (sf->logdir == NULL) {
1036 pr_err("Failed to find auxtrace log directory %s,"
1037 " continue with current directory...\n", value);
1038 return 1;
1040 rc = stat(sf->logdir, &stbuf);
1041 if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
1042 pr_err("Missing auxtrace log directory %s,"
1043 " continue with current directory...\n", value);
1044 free(sf->logdir);
1045 sf->logdir = NULL;
1047 return 1;
1050 int s390_cpumsf_process_auxtrace_info(union perf_event *event,
1051 struct perf_session *session)
1053 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
1054 struct s390_cpumsf *sf;
1055 int err;
1057 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event))
1058 return -EINVAL;
1060 sf = zalloc(sizeof(struct s390_cpumsf));
1061 if (sf == NULL)
1062 return -ENOMEM;
1064 if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
1065 err = -EINVAL;
1066 goto err_free;
1068 sf->use_logfile = session->itrace_synth_opts->log;
1069 if (sf->use_logfile)
1070 perf_config(s390_cpumsf__config, sf);
1072 err = auxtrace_queues__init(&sf->queues);
1073 if (err)
1074 goto err_free;
1076 sf->session = session;
1077 sf->machine = &session->machines.host; /* No kvm support */
1078 sf->auxtrace_type = auxtrace_info->type;
1079 sf->pmu_type = PERF_TYPE_RAW;
1080 sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
1082 sf->auxtrace.process_event = s390_cpumsf_process_event;
1083 sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
1084 sf->auxtrace.flush_events = s390_cpumsf_flush;
1085 sf->auxtrace.free_events = s390_cpumsf_free_events;
1086 sf->auxtrace.free = s390_cpumsf_free;
1087 session->auxtrace = &sf->auxtrace;
1089 if (dump_trace)
1090 return 0;
1092 err = auxtrace_queues__process_index(&sf->queues, session);
1093 if (err)
1094 goto err_free_queues;
1096 if (sf->queues.populated)
1097 sf->data_queued = true;
1099 return 0;
1101 err_free_queues:
1102 auxtrace_queues__free(&sf->queues);
1103 session->auxtrace = NULL;
1104 err_free:
1105 free(sf->logdir);
1106 free(sf);
1107 return err;