1 // SPDX-License-Identifier: GPL-2.0
2 #include <sys/sysmacros.h>
15 #include <linux/stringify.h>
21 #include "namespaces.h"
32 #include <linux/ctype.h>
33 #include <linux/zalloc.h>
36 struct perf_data
*output
;
37 struct perf_session
*session
;
38 struct machine
*machine
;
40 union jr_entry
*entry
;
45 bool needs_bswap
; /* handles cross-endianness */
46 bool use_arch_timestamp
;
49 uint64_t unwinding_size
;
50 uint64_t unwinding_mapped_size
;
51 uint64_t eh_frame_hdr_size
;
52 size_t nr_debug_entries
;
53 uint32_t code_load_count
;
55 struct rb_root code_root
;
60 struct perf_tool tool
;
61 struct perf_data output
;
62 struct perf_data input
;
66 #define hmax(a, b) ((a) > (b) ? (a) : (b))
67 #define get_jit_tool(t) (container_of(tool, struct jit_tool, tool))
70 jit_emit_elf(struct jit_buf_desc
*jd
,
79 uint32_t unwinding_header_size
,
80 uint32_t unwinding_size
)
82 int ret
, fd
, saved_errno
;
86 fprintf(stderr
, "write ELF image %s\n", filename
);
88 nsinfo__mountns_enter(jd
->nsi
, &nsc
);
89 fd
= open(filename
, O_CREAT
|O_TRUNC
|O_WRONLY
, 0644);
91 nsinfo__mountns_exit(&nsc
);
93 pr_warning("cannot create jit ELF %s: %s\n", filename
, strerror(saved_errno
));
97 ret
= jit_write_elf(fd
, code_addr
, sym
, (const void *)code
, csize
, debug
, nr_debug_entries
,
98 unwinding
, unwinding_header_size
, unwinding_size
);
103 nsinfo__mountns_enter(jd
->nsi
, &nsc
);
105 nsinfo__mountns_exit(&nsc
);
112 jit_close(struct jit_buf_desc
*jd
)
122 jit_validate_events(struct perf_session
*session
)
127 * check that all events use CLOCK_MONOTONIC
129 evlist__for_each_entry(session
->evlist
, evsel
) {
130 if (evsel
->core
.attr
.use_clockid
== 0 || evsel
->core
.attr
.clockid
!= CLOCK_MONOTONIC
)
137 jit_open(struct jit_buf_desc
*jd
, const char *name
)
139 struct jitheader header
;
141 struct jr_prefix
*prefix
;
143 void *n
, *buf
= NULL
;
144 int ret
, retval
= -1;
146 nsinfo__mountns_enter(jd
->nsi
, &nsc
);
147 jd
->in
= fopen(name
, "r");
148 nsinfo__mountns_exit(&nsc
);
152 bsz
= hmax(sizeof(header
), sizeof(*prefix
));
159 * protect from writer modifying the file while we are reading it
163 ret
= fread(buf
, sizeof(header
), 1, jd
->in
);
167 memcpy(&header
, buf
, sizeof(header
));
169 if (header
.magic
!= JITHEADER_MAGIC
) {
170 if (header
.magic
!= JITHEADER_MAGIC_SW
)
172 jd
->needs_bswap
= true;
175 if (jd
->needs_bswap
) {
176 header
.version
= bswap_32(header
.version
);
177 header
.total_size
= bswap_32(header
.total_size
);
178 header
.pid
= bswap_32(header
.pid
);
179 header
.elf_mach
= bswap_32(header
.elf_mach
);
180 header
.timestamp
= bswap_64(header
.timestamp
);
181 header
.flags
= bswap_64(header
.flags
);
184 jd
->use_arch_timestamp
= header
.flags
& JITDUMP_FLAGS_ARCH_TIMESTAMP
;
187 pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n",
190 (unsigned long long)header
.timestamp
,
193 jd
->use_arch_timestamp
);
195 if (header
.version
> JITHEADER_VERSION
) {
196 pr_err("wrong jitdump version %u, expected " __stringify(JITHEADER_VERSION
),
201 if (header
.flags
& JITDUMP_FLAGS_RESERVED
) {
202 pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
203 (unsigned long long)header
.flags
& JITDUMP_FLAGS_RESERVED
);
207 if (jd
->use_arch_timestamp
&& !jd
->session
->time_conv
.time_mult
) {
208 pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n");
213 * validate event is using the correct clockid
215 if (!jd
->use_arch_timestamp
&& jit_validate_events(jd
->session
)) {
216 pr_err("error, jitted code must be sampled with perf record -k 1\n");
220 bs
= header
.total_size
- sizeof(header
);
223 n
= realloc(buf
, bs
);
228 /* read extra we do not know about */
229 ret
= fread(buf
, bs
- bsz
, 1, jd
->in
);
234 * keep dirname for generating files and mmap records
236 strcpy(jd
->dir
, name
);
248 static union jr_entry
*
249 jit_get_next_entry(struct jit_buf_desc
*jd
)
251 struct jr_prefix
*prefix
;
260 if (jd
->buf
== NULL
) {
261 size_t sz
= getpagesize();
262 if (sz
< sizeof(*prefix
))
263 sz
= sizeof(*prefix
);
265 jd
->buf
= malloc(sz
);
275 * file is still locked at this point
277 ret
= fread(prefix
, sizeof(*prefix
), 1, jd
->in
);
281 if (jd
->needs_bswap
) {
282 prefix
->id
= bswap_32(prefix
->id
);
283 prefix
->total_size
= bswap_32(prefix
->total_size
);
284 prefix
->timestamp
= bswap_64(prefix
->timestamp
);
287 size
= prefix
->total_size
;
290 if (bs
< sizeof(*prefix
))
293 if (id
>= JIT_CODE_MAX
) {
294 pr_warning("next_entry: unknown record type %d, skipping\n", id
);
296 if (bs
> jd
->bufsize
) {
298 n
= realloc(jd
->buf
, bs
);
305 addr
= ((void *)jd
->buf
) + sizeof(*prefix
);
307 ret
= fread(addr
, bs
- sizeof(*prefix
), 1, jd
->in
);
311 jr
= (union jr_entry
*)jd
->buf
;
314 case JIT_CODE_DEBUG_INFO
:
315 if (jd
->needs_bswap
) {
317 jr
->info
.code_addr
= bswap_64(jr
->info
.code_addr
);
318 jr
->info
.nr_entry
= bswap_64(jr
->info
.nr_entry
);
319 for (n
= 0 ; n
< jr
->info
.nr_entry
; n
++) {
320 jr
->info
.entries
[n
].addr
= bswap_64(jr
->info
.entries
[n
].addr
);
321 jr
->info
.entries
[n
].lineno
= bswap_32(jr
->info
.entries
[n
].lineno
);
322 jr
->info
.entries
[n
].discrim
= bswap_32(jr
->info
.entries
[n
].discrim
);
326 case JIT_CODE_UNWINDING_INFO
:
327 if (jd
->needs_bswap
) {
328 jr
->unwinding
.unwinding_size
= bswap_64(jr
->unwinding
.unwinding_size
);
329 jr
->unwinding
.eh_frame_hdr_size
= bswap_64(jr
->unwinding
.eh_frame_hdr_size
);
330 jr
->unwinding
.mapped_size
= bswap_64(jr
->unwinding
.mapped_size
);
336 if (jd
->needs_bswap
) {
337 jr
->load
.pid
= bswap_32(jr
->load
.pid
);
338 jr
->load
.tid
= bswap_32(jr
->load
.tid
);
339 jr
->load
.vma
= bswap_64(jr
->load
.vma
);
340 jr
->load
.code_addr
= bswap_64(jr
->load
.code_addr
);
341 jr
->load
.code_size
= bswap_64(jr
->load
.code_size
);
342 jr
->load
.code_index
= bswap_64(jr
->load
.code_index
);
344 jd
->code_load_count
++;
347 if (jd
->needs_bswap
) {
348 jr
->move
.pid
= bswap_32(jr
->move
.pid
);
349 jr
->move
.tid
= bswap_32(jr
->move
.tid
);
350 jr
->move
.vma
= bswap_64(jr
->move
.vma
);
351 jr
->move
.old_code_addr
= bswap_64(jr
->move
.old_code_addr
);
352 jr
->move
.new_code_addr
= bswap_64(jr
->move
.new_code_addr
);
353 jr
->move
.code_size
= bswap_64(jr
->move
.code_size
);
354 jr
->move
.code_index
= bswap_64(jr
->move
.code_index
);
359 /* skip unknown record (we have read them) */
366 jit_inject_event(struct jit_buf_desc
*jd
, union perf_event
*event
)
370 size
= perf_data__write(jd
->output
, event
, event
->header
.size
);
374 jd
->bytes_written
+= size
;
378 static pid_t
jr_entry_pid(struct jit_buf_desc
*jd
, union jr_entry
*jr
)
380 if (jd
->nsi
&& nsinfo__in_pidns(jd
->nsi
))
381 return nsinfo__tgid(jd
->nsi
);
385 static pid_t
jr_entry_tid(struct jit_buf_desc
*jd
, union jr_entry
*jr
)
387 if (jd
->nsi
&& nsinfo__in_pidns(jd
->nsi
))
388 return nsinfo__pid(jd
->nsi
);
392 static uint64_t convert_timestamp(struct jit_buf_desc
*jd
, uint64_t timestamp
)
394 struct perf_tsc_conversion tc
= { .time_shift
= 0, };
395 struct perf_record_time_conv
*time_conv
= &jd
->session
->time_conv
;
397 if (!jd
->use_arch_timestamp
)
400 tc
.time_shift
= time_conv
->time_shift
;
401 tc
.time_mult
= time_conv
->time_mult
;
402 tc
.time_zero
= time_conv
->time_zero
;
405 * The event TIME_CONV was extended for the fields from "time_cycles"
406 * when supported cap_user_time_short, for backward compatibility,
407 * checks the event size and assigns these extended fields if these
408 * fields are contained in the event.
410 if (event_contains(*time_conv
, time_cycles
)) {
411 tc
.time_cycles
= time_conv
->time_cycles
;
412 tc
.time_mask
= time_conv
->time_mask
;
413 tc
.cap_user_time_zero
= time_conv
->cap_user_time_zero
;
414 tc
.cap_user_time_short
= time_conv
->cap_user_time_short
;
416 if (!tc
.cap_user_time_zero
)
420 return tsc_to_perf_time(timestamp
, &tc
);
423 static int jit_repipe_code_load(struct jit_buf_desc
*jd
, union jr_entry
*jr
)
425 struct perf_sample sample
;
426 union perf_event
*event
;
427 const struct perf_tool
*tool
= jd
->session
->tool
;
436 int ret
, csize
, usize
;
437 pid_t nspid
, pid
, tid
;
443 nspid
= jr
->load
.pid
;
444 pid
= jr_entry_pid(jd
, jr
);
445 tid
= jr_entry_tid(jd
, jr
);
446 csize
= jr
->load
.code_size
;
447 usize
= jd
->unwinding_mapped_size
;
448 addr
= jr
->load
.code_addr
;
449 sym
= (void *)((unsigned long)jr
+ sizeof(jr
->load
));
450 code
= (unsigned long)jr
+ jr
->load
.p
.total_size
- csize
;
451 count
= jr
->load
.code_index
;
452 idr_size
= jd
->machine
->id_hdr_size
;
454 event
= calloc(1, sizeof(*event
) + idr_size
);
458 filename
= event
->mmap2
.filename
;
459 size
= snprintf(filename
, PATH_MAX
, "%s/jitted-%d-%" PRIu64
".so",
466 size
= PERF_ALIGN(size
, sizeof(u64
));
467 uaddr
= (uintptr_t)code
;
468 ret
= jit_emit_elf(jd
, filename
, sym
, addr
, (const void *)uaddr
, csize
, jd
->debug_data
, jd
->nr_debug_entries
,
469 jd
->unwinding_data
, jd
->eh_frame_hdr_size
, jd
->unwinding_size
);
471 if (jd
->debug_data
&& jd
->nr_debug_entries
) {
472 zfree(&jd
->debug_data
);
473 jd
->nr_debug_entries
= 0;
476 if (jd
->unwinding_data
&& jd
->eh_frame_hdr_size
) {
477 zfree(&jd
->unwinding_data
);
478 jd
->eh_frame_hdr_size
= 0;
479 jd
->unwinding_mapped_size
= 0;
480 jd
->unwinding_size
= 0;
487 if (nsinfo__stat(filename
, &st
, jd
->nsi
))
488 memset(&st
, 0, sizeof(st
));
490 event
->mmap2
.header
.type
= PERF_RECORD_MMAP2
;
491 event
->mmap2
.header
.misc
= PERF_RECORD_MISC_USER
;
492 event
->mmap2
.header
.size
= (sizeof(event
->mmap2
) -
493 (sizeof(event
->mmap2
.filename
) - size
) + idr_size
);
495 event
->mmap2
.pgoff
= GEN_ELF_TEXT_OFFSET
;
496 event
->mmap2
.start
= addr
;
497 event
->mmap2
.len
= usize
? ALIGN_8(csize
) + usize
: csize
;
498 event
->mmap2
.pid
= pid
;
499 event
->mmap2
.tid
= tid
;
500 event
->mmap2
.ino
= st
.st_ino
;
501 event
->mmap2
.maj
= major(st
.st_dev
);
502 event
->mmap2
.min
= minor(st
.st_dev
);
503 event
->mmap2
.prot
= st
.st_mode
;
504 event
->mmap2
.flags
= MAP_SHARED
;
505 event
->mmap2
.ino_generation
= 1;
507 id
= (void *)((unsigned long)event
+ event
->mmap
.header
.size
- idr_size
);
508 if (jd
->sample_type
& PERF_SAMPLE_TID
) {
512 if (jd
->sample_type
& PERF_SAMPLE_TIME
)
513 id
->time
= convert_timestamp(jd
, jr
->load
.p
.timestamp
);
516 * create pseudo sample to induce dso hit increment
517 * use first address as sample address
519 memset(&sample
, 0, sizeof(sample
));
520 sample
.cpumode
= PERF_RECORD_MISC_USER
;
523 sample
.time
= id
->time
;
526 ret
= perf_event__process_mmap2(tool
, event
, &sample
, jd
->machine
);
530 ret
= jit_inject_event(jd
, event
);
532 * mark dso as use to generate buildid in the header
535 build_id__mark_dso_hit(tool
, event
, &sample
, NULL
, jd
->machine
);
542 static int jit_repipe_code_move(struct jit_buf_desc
*jd
, union jr_entry
*jr
)
544 struct perf_sample sample
;
545 union perf_event
*event
;
546 const struct perf_tool
*tool
= jd
->session
->tool
;
553 pid_t nspid
, pid
, tid
;
559 nspid
= jr
->load
.pid
;
560 pid
= jr_entry_pid(jd
, jr
);
561 tid
= jr_entry_tid(jd
, jr
);
562 usize
= jd
->unwinding_mapped_size
;
563 idr_size
= jd
->machine
->id_hdr_size
;
566 * +16 to account for sample_id_all (hack)
568 event
= calloc(1, sizeof(*event
) + 16);
572 filename
= event
->mmap2
.filename
;
573 size
= snprintf(filename
, PATH_MAX
, "%s/jitted-%d-%" PRIu64
".so",
576 jr
->move
.code_index
);
580 if (nsinfo__stat(filename
, &st
, jd
->nsi
))
581 memset(&st
, 0, sizeof(st
));
583 size
= PERF_ALIGN(size
, sizeof(u64
));
585 event
->mmap2
.header
.type
= PERF_RECORD_MMAP2
;
586 event
->mmap2
.header
.misc
= PERF_RECORD_MISC_USER
;
587 event
->mmap2
.header
.size
= (sizeof(event
->mmap2
) -
588 (sizeof(event
->mmap2
.filename
) - size
) + idr_size
);
589 event
->mmap2
.pgoff
= GEN_ELF_TEXT_OFFSET
;
590 event
->mmap2
.start
= jr
->move
.new_code_addr
;
591 event
->mmap2
.len
= usize
? ALIGN_8(jr
->move
.code_size
) + usize
592 : jr
->move
.code_size
;
593 event
->mmap2
.pid
= pid
;
594 event
->mmap2
.tid
= tid
;
595 event
->mmap2
.ino
= st
.st_ino
;
596 event
->mmap2
.maj
= major(st
.st_dev
);
597 event
->mmap2
.min
= minor(st
.st_dev
);
598 event
->mmap2
.prot
= st
.st_mode
;
599 event
->mmap2
.flags
= MAP_SHARED
;
600 event
->mmap2
.ino_generation
= 1;
602 id
= (void *)((unsigned long)event
+ event
->mmap
.header
.size
- idr_size
);
603 if (jd
->sample_type
& PERF_SAMPLE_TID
) {
607 if (jd
->sample_type
& PERF_SAMPLE_TIME
)
608 id
->time
= convert_timestamp(jd
, jr
->load
.p
.timestamp
);
611 * create pseudo sample to induce dso hit increment
612 * use first address as sample address
614 memset(&sample
, 0, sizeof(sample
));
615 sample
.cpumode
= PERF_RECORD_MISC_USER
;
618 sample
.time
= id
->time
;
619 sample
.ip
= jr
->move
.new_code_addr
;
621 ret
= perf_event__process_mmap2(tool
, event
, &sample
, jd
->machine
);
625 ret
= jit_inject_event(jd
, event
);
627 build_id__mark_dso_hit(tool
, event
, &sample
, NULL
, jd
->machine
);
632 static int jit_repipe_debug_info(struct jit_buf_desc
*jd
, union jr_entry
*jr
)
640 sz
= jr
->prefix
.total_size
- sizeof(jr
->info
);
645 memcpy(data
, &jr
->info
.entries
, sz
);
647 jd
->debug_data
= data
;
650 * we must use nr_entry instead of size here because
651 * we cannot distinguish actual entry from padding otherwise
653 jd
->nr_debug_entries
= jr
->info
.nr_entry
;
659 jit_repipe_unwinding_info(struct jit_buf_desc
*jd
, union jr_entry
*jr
)
661 void *unwinding_data
;
662 uint32_t unwinding_data_size
;
667 unwinding_data_size
= jr
->prefix
.total_size
- sizeof(jr
->unwinding
);
668 unwinding_data
= malloc(unwinding_data_size
);
672 memcpy(unwinding_data
, &jr
->unwinding
.unwinding_data
,
673 unwinding_data_size
);
675 jd
->eh_frame_hdr_size
= jr
->unwinding
.eh_frame_hdr_size
;
676 jd
->unwinding_size
= jr
->unwinding
.unwinding_size
;
677 jd
->unwinding_mapped_size
= jr
->unwinding
.mapped_size
;
678 free(jd
->unwinding_data
);
679 jd
->unwinding_data
= unwinding_data
;
685 jit_process_dump(struct jit_buf_desc
*jd
)
690 while ((jr
= jit_get_next_entry(jd
))) {
691 switch(jr
->prefix
.id
) {
693 ret
= jit_repipe_code_load(jd
, jr
);
696 ret
= jit_repipe_code_move(jd
, jr
);
698 case JIT_CODE_DEBUG_INFO
:
699 ret
= jit_repipe_debug_info(jd
, jr
);
701 case JIT_CODE_UNWINDING_INFO
:
702 ret
= jit_repipe_unwinding_info(jd
, jr
);
713 jit_inject(struct jit_buf_desc
*jd
, const char *path
)
718 fprintf(stderr
, "injecting: %s\n", path
);
720 ret
= jit_open(jd
, path
);
724 ret
= jit_process_dump(jd
);
729 fprintf(stderr
, "injected: %s (%d)\n", path
, ret
);
735 * File must be with pattern .../jit-XXXX.dump
736 * where XXXX is the PID of the process which did the mmap()
737 * as captured in the RECORD_MMAP record
740 jit_detect(const char *mmap_name
, pid_t pid
, struct nsinfo
*nsi
)
747 fprintf(stderr
, "jit marker trying : %s\n", mmap_name
);
751 p
= strrchr(mmap_name
, '/');
758 if (strncmp(p
, "/jit-", 5))
767 * must be followed by a pid
772 pid2
= (int)strtol(p
, &end
, 10);
777 * pid does not match mmap pid
778 * pid==0 in system-wide mode (synthesized)
780 if (pid
&& pid2
!= nsinfo__nstgid(nsi
))
785 if (strcmp(end
, ".dump"))
789 fprintf(stderr
, "jit marker found: %s\n", mmap_name
);
794 static void jit_add_pid(struct machine
*machine
, pid_t pid
)
796 struct thread
*thread
= machine__findnew_thread(machine
, pid
, pid
);
799 pr_err("%s: thread %d not found or created\n", __func__
, pid
);
803 thread__set_priv(thread
, (void *)true);
807 static bool jit_has_pid(struct machine
*machine
, pid_t pid
)
809 struct thread
*thread
= machine__find_thread(machine
, pid
, pid
);
815 priv
= thread__priv(thread
);
821 jit_process(struct perf_session
*session
,
822 struct perf_data
*output
,
823 struct machine
*machine
,
824 const char *filename
,
829 struct thread
*thread
;
832 struct jit_buf_desc jd
;
835 thread
= machine__findnew_thread(machine
, pid
, tid
);
836 if (thread
== NULL
) {
837 pr_err("problem processing JIT mmap event, skipping it.\n");
841 nsi
= nsinfo__get(thread__nsinfo(thread
));
845 * first, detect marker mmap (i.e., the jitdump mmap)
847 if (jit_detect(filename
, pid
, nsi
)) {
851 * Strip //anon*, [anon:* and /memfd:* mmaps if we processed a jitdump for this pid
853 if (jit_has_pid(machine
, pid
) &&
854 ((strncmp(filename
, "//anon", 6) == 0) ||
855 (strncmp(filename
, "[anon:", 6) == 0) ||
856 (strncmp(filename
, "/memfd:", 7) == 0)))
862 memset(&jd
, 0, sizeof(jd
));
864 jd
.session
= session
;
866 jd
.machine
= machine
;
870 * track sample_type to compute id_all layout
871 * perf sets the same sample type to all events as of now
873 first
= evlist__first(session
->evlist
);
874 jd
.sample_type
= first
->core
.attr
.sample_type
;
878 ret
= jit_inject(&jd
, filename
);
880 jit_add_pid(machine
, pid
);
881 *nbytes
= jd
.bytes_written
;