1 // SPDX-License-Identifier: GPL-2.0
11 #include <linux/kernel.h>
12 #include <linux/time64.h>
13 #include <linux/list.h>
14 #include <linux/err.h>
15 #include <linux/zalloc.h>
16 #include <internal/lib.h>
17 #include <subcmd/parse-options.h>
20 #include "util/data.h"
21 #include "util/stat.h"
22 #include "util/debug.h"
23 #include "util/symbol.h"
24 #include "util/session.h"
25 #include "util/build-id.h"
26 #include "util/sample.h"
27 #include "util/synthetic-events.h"
29 #define MMAP_DEV_MAJOR 8
30 #define DSO_MMAP_RATIO 4
32 static unsigned int iterations
= 100;
33 static unsigned int nr_mmaps
= 100;
34 static unsigned int nr_samples
= 100; /* samples per mmap */
36 static u64 bench_sample_type
;
37 static u16 bench_id_hdr_size
;
47 struct list_head list
;
53 static struct bench_dso
*dsos
;
55 extern int cmd_inject(int argc
, const char *argv
[]);
57 static const struct option options
[] = {
58 OPT_UINTEGER('i', "iterations", &iterations
,
59 "Number of iterations used to compute average (default: 100)"),
60 OPT_UINTEGER('m', "nr-mmaps", &nr_mmaps
,
61 "Number of mmap events for each iteration (default: 100)"),
62 OPT_UINTEGER('n', "nr-samples", &nr_samples
,
63 "Number of sample events per mmap event (default: 100)"),
64 OPT_INCR('v', "verbose", &verbose
,
65 "be more verbose (show iteration count, DSO name, etc)"),
69 static const char *const bench_usage
[] = {
70 "perf bench internals inject-build-id <options>",
75 * Helper for collect_dso that adds the given file as a dso to dso_list
76 * if it contains a build-id. Stops after collecting 4 times more than
77 * we need (for MMAP2 events).
79 static int add_dso(const char *fpath
, const struct stat
*sb __maybe_unused
,
80 int typeflag
, struct FTW
*ftwbuf __maybe_unused
)
82 struct bench_dso
*dso
= &dsos
[nr_dsos
];
85 if (typeflag
== FTW_D
|| typeflag
== FTW_SL
)
88 if (filename__read_build_id(fpath
, &bid
) < 0)
91 dso
->name
= realpath(fpath
, NULL
);
92 if (dso
->name
== NULL
)
96 pr_debug2(" Adding DSO: %s\n", fpath
);
98 /* stop if we collected enough DSOs */
99 if ((unsigned int)nr_dsos
== DSO_MMAP_RATIO
* nr_mmaps
)
105 static void collect_dso(void)
107 dsos
= calloc(nr_mmaps
* DSO_MMAP_RATIO
, sizeof(*dsos
));
109 printf(" Memory allocation failed\n");
113 if (nftw("/usr/lib/", add_dso
, 10, FTW_PHYS
) < 0)
116 pr_debug(" Collected %d DSOs\n", nr_dsos
);
119 static void release_dso(void)
123 for (i
= 0; i
< nr_dsos
; i
++) {
124 struct bench_dso
*dso
= &dsos
[i
];
131 /* Fake address used by mmap and sample events */
132 static u64
dso_map_addr(struct bench_dso
*dso
)
134 return 0x400000ULL
+ dso
->ino
* 8192ULL;
137 static ssize_t
synthesize_attr(struct bench_data
*data
)
139 union perf_event event
;
141 memset(&event
, 0, sizeof(event
.attr
) + sizeof(u64
));
143 event
.header
.type
= PERF_RECORD_HEADER_ATTR
;
144 event
.header
.size
= sizeof(event
.attr
) + sizeof(u64
);
146 event
.attr
.attr
.type
= PERF_TYPE_SOFTWARE
;
147 event
.attr
.attr
.config
= PERF_COUNT_SW_TASK_CLOCK
;
148 event
.attr
.attr
.exclude_kernel
= 1;
149 event
.attr
.attr
.sample_id_all
= 1;
150 event
.attr
.attr
.sample_type
= bench_sample_type
;
152 return writen(data
->input_pipe
[1], &event
, event
.header
.size
);
155 static ssize_t
synthesize_fork(struct bench_data
*data
)
157 union perf_event event
;
159 memset(&event
, 0, sizeof(event
.fork
) + bench_id_hdr_size
);
161 event
.header
.type
= PERF_RECORD_FORK
;
162 event
.header
.misc
= PERF_RECORD_MISC_FORK_EXEC
;
163 event
.header
.size
= sizeof(event
.fork
) + bench_id_hdr_size
;
167 event
.fork
.pid
= data
->pid
;
168 event
.fork
.tid
= data
->pid
;
170 return writen(data
->input_pipe
[1], &event
, event
.header
.size
);
173 static ssize_t
synthesize_mmap(struct bench_data
*data
, struct bench_dso
*dso
, u64 timestamp
)
175 union perf_event event
;
176 size_t len
= offsetof(struct perf_record_mmap2
, filename
);
177 u64
*id_hdr_ptr
= (void *)&event
;
180 len
+= roundup(strlen(dso
->name
) + 1, 8) + bench_id_hdr_size
;
182 memset(&event
, 0, min(len
, sizeof(event
.mmap2
)));
184 event
.header
.type
= PERF_RECORD_MMAP2
;
185 event
.header
.misc
= PERF_RECORD_MISC_USER
;
186 event
.header
.size
= len
;
188 event
.mmap2
.pid
= data
->pid
;
189 event
.mmap2
.tid
= data
->pid
;
190 event
.mmap2
.maj
= MMAP_DEV_MAJOR
;
191 event
.mmap2
.ino
= dso
->ino
;
193 strcpy(event
.mmap2
.filename
, dso
->name
);
195 event
.mmap2
.start
= dso_map_addr(dso
);
196 event
.mmap2
.len
= 4096;
197 event
.mmap2
.prot
= PROT_EXEC
;
199 if (len
> sizeof(event
.mmap2
)) {
200 /* write mmap2 event first */
201 if (writen(data
->input_pipe
[1], &event
, len
- bench_id_hdr_size
) < 0)
203 /* zero-fill sample id header */
204 memset(id_hdr_ptr
, 0, bench_id_hdr_size
);
205 /* put timestamp in the right position */
206 ts_idx
= (bench_id_hdr_size
/ sizeof(u64
)) - 2;
207 id_hdr_ptr
[ts_idx
] = timestamp
;
208 if (writen(data
->input_pipe
[1], id_hdr_ptr
, bench_id_hdr_size
) < 0)
214 ts_idx
= (len
/ sizeof(u64
)) - 2;
215 id_hdr_ptr
[ts_idx
] = timestamp
;
216 return writen(data
->input_pipe
[1], &event
, len
);
219 static ssize_t
synthesize_sample(struct bench_data
*data
, struct bench_dso
*dso
, u64 timestamp
)
221 union perf_event event
;
222 struct perf_sample sample
= {
225 .ip
= dso_map_addr(dso
),
229 event
.header
.type
= PERF_RECORD_SAMPLE
;
230 event
.header
.misc
= PERF_RECORD_MISC_USER
;
231 event
.header
.size
= perf_event__sample_event_size(&sample
, bench_sample_type
, 0);
233 perf_event__synthesize_sample(&event
, bench_sample_type
, 0, &sample
);
235 return writen(data
->input_pipe
[1], &event
, event
.header
.size
);
238 static ssize_t
synthesize_flush(struct bench_data
*data
)
240 struct perf_event_header header
= {
241 .size
= sizeof(header
),
242 .type
= PERF_RECORD_FINISHED_ROUND
,
245 return writen(data
->input_pipe
[1], &header
, header
.size
);
248 static void *data_reader(void *arg
)
250 struct bench_data
*data
= arg
;
255 flag
= fcntl(data
->output_pipe
[0], F_GETFL
);
256 fcntl(data
->output_pipe
[0], F_SETFL
, flag
| O_NONBLOCK
);
258 /* read out data from child */
260 n
= read(data
->output_pipe
[0], buf
, sizeof(buf
));
266 if (errno
!= EINTR
&& errno
!= EAGAIN
)
272 close(data
->output_pipe
[0]);
276 static int setup_injection(struct bench_data
*data
, bool build_id_all
)
282 if (pipe(ready_pipe
) < 0)
285 if (pipe(data
->input_pipe
) < 0)
288 if (pipe(data
->output_pipe
) < 0)
295 if (data
->pid
== 0) {
296 const char **inject_argv
;
299 close(data
->input_pipe
[1]);
300 close(data
->output_pipe
[0]);
301 close(ready_pipe
[0]);
303 dup2(data
->input_pipe
[0], STDIN_FILENO
);
304 close(data
->input_pipe
[0]);
305 dup2(data
->output_pipe
[1], STDOUT_FILENO
);
306 close(data
->output_pipe
[1]);
308 dev_null_fd
= open("/dev/null", O_WRONLY
);
312 dup2(dev_null_fd
, STDERR_FILENO
);
317 inject_argv
= calloc(inject_argc
+ 1, sizeof(*inject_argv
));
318 if (inject_argv
== NULL
)
321 inject_argv
[0] = strdup("inject");
322 inject_argv
[1] = strdup("-b");
324 inject_argv
[2] = strdup("--buildid-all");
326 /* signal that we're ready to go */
327 close(ready_pipe
[1]);
329 cmd_inject(inject_argc
, inject_argv
);
334 pthread_create(&data
->th
, NULL
, data_reader
, data
);
336 close(ready_pipe
[1]);
337 close(data
->input_pipe
[0]);
338 close(data
->output_pipe
[1]);
340 /* wait for child ready */
341 if (read(ready_pipe
[0], &buf
, 1) < 0)
343 close(ready_pipe
[0]);
348 static int inject_build_id(struct bench_data
*data
, u64
*max_rss
)
352 struct rusage rusage
;
354 /* this makes the child to run */
355 if (perf_header__write_pipe(data
->input_pipe
[1]) < 0)
358 if (synthesize_attr(data
) < 0)
361 if (synthesize_fork(data
) < 0)
364 for (i
= 0; i
< nr_mmaps
; i
++) {
365 int idx
= rand() % nr_dsos
;
366 struct bench_dso
*dso
= &dsos
[idx
];
367 u64 timestamp
= rand() % 1000000;
369 pr_debug2(" [%d] injecting: %s\n", i
+1, dso
->name
);
370 if (synthesize_mmap(data
, dso
, timestamp
) < 0)
373 for (k
= 0; k
< nr_samples
; k
++) {
374 if (synthesize_sample(data
, dso
, timestamp
+ k
* 1000) < 0)
378 if ((i
+ 1) % 10 == 0) {
379 if (synthesize_flush(data
) < 0)
384 /* this makes the child to finish */
385 close(data
->input_pipe
[1]);
387 wait4(data
->pid
, &status
, 0, &rusage
);
388 *max_rss
= rusage
.ru_maxrss
;
390 pr_debug(" Child %d exited with %d\n", data
->pid
, status
);
395 static void do_inject_loop(struct bench_data
*data
, bool build_id_all
)
398 struct stats time_stats
, mem_stats
;
399 double time_average
, time_stddev
;
400 double mem_average
, mem_stddev
;
402 init_stats(&time_stats
);
403 init_stats(&mem_stats
);
405 pr_debug(" Build-id%s injection benchmark\n", build_id_all
? "-all" : "");
407 for (i
= 0; i
< iterations
; i
++) {
408 struct timeval start
, end
, diff
;
409 u64 runtime_us
, max_rss
;
411 pr_debug(" Iteration #%d\n", i
+1);
413 if (setup_injection(data
, build_id_all
) < 0) {
414 printf(" Build-id injection setup failed\n");
418 gettimeofday(&start
, NULL
);
419 if (inject_build_id(data
, &max_rss
) < 0) {
420 printf(" Build-id injection failed\n");
424 gettimeofday(&end
, NULL
);
425 timersub(&end
, &start
, &diff
);
426 runtime_us
= diff
.tv_sec
* USEC_PER_SEC
+ diff
.tv_usec
;
427 update_stats(&time_stats
, runtime_us
);
428 update_stats(&mem_stats
, max_rss
);
430 pthread_join(data
->th
, NULL
);
433 time_average
= avg_stats(&time_stats
) / USEC_PER_MSEC
;
434 time_stddev
= stddev_stats(&time_stats
) / USEC_PER_MSEC
;
435 printf(" Average build-id%s injection took: %.3f msec (+- %.3f msec)\n",
436 build_id_all
? "-all" : "", time_average
, time_stddev
);
438 /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */
439 time_average
= avg_stats(&time_stats
) / (nr_mmaps
* (nr_samples
+ 2));
440 time_stddev
= stddev_stats(&time_stats
) / (nr_mmaps
* (nr_samples
+ 2));
441 printf(" Average time per event: %.3f usec (+- %.3f usec)\n",
442 time_average
, time_stddev
);
444 mem_average
= avg_stats(&mem_stats
);
445 mem_stddev
= stddev_stats(&mem_stats
);
446 printf(" Average memory usage: %.0f KB (+- %.0f KB)\n",
447 mem_average
, mem_stddev
);
450 static int do_inject_loops(struct bench_data
*data
)
456 bench_sample_type
= PERF_SAMPLE_IDENTIFIER
| PERF_SAMPLE_IP
;
457 bench_sample_type
|= PERF_SAMPLE_TID
| PERF_SAMPLE_TIME
;
458 bench_id_hdr_size
= 32;
462 printf(" Cannot collect DSOs for injection\n");
466 do_inject_loop(data
, false);
467 do_inject_loop(data
, true);
473 int bench_inject_build_id(int argc
, const char **argv
)
475 struct bench_data data
;
477 argc
= parse_options(argc
, argv
, options
, bench_usage
, 0);
479 usage_with_options(bench_usage
, options
);
483 return do_inject_loops(&data
);