1 //===-- PerfContextSwitchDecoder.cpp --======------------------------------===//
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
8 #include "PerfContextSwitchDecoder.h"
12 using namespace lldb_private
;
13 using namespace lldb_private::trace_intel_pt
;
16 /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on
17 /// non-linux platforms.
19 #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
21 #define PERF_RECORD_LOST 2
22 #define PERF_RECORD_THROTTLE 5
23 #define PERF_RECORD_UNTHROTTLE 6
24 #define PERF_RECORD_LOST_SAMPLES 13
25 #define PERF_RECORD_SWITCH_CPU_WIDE 15
26 #define PERF_RECORD_MAX 19
28 struct perf_event_header
{
34 /// An \a llvm::Error if the record looks obviously wrong, or \a
35 /// llvm::Error::success() otherwise.
36 Error
SanityCheck() const {
37 // The following checks are based on visual inspection of the records and
39 // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h
40 // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records
43 // A record of too many uint64_t's or more should mean that the data is
45 const uint64_t max_valid_size_bytes
= 8000;
46 if (size
== 0 || size
> max_valid_size_bytes
)
47 return createStringError(
48 inconvertibleErrorCode(),
49 formatv("A record of {0} bytes was found.", size
));
51 // We add some numbers to PERF_RECORD_MAX because some systems might have
52 // custom records. In any case, we are looking only for abnormal data.
53 if (type
>= PERF_RECORD_MAX
+ 100)
54 return createStringError(
55 inconvertibleErrorCode(),
56 formatv("Invalid record type {0} was found.", type
));
57 return Error::success();
60 bool IsContextSwitchRecord() const {
61 return type
== PERF_RECORD_SWITCH_CPU_WIDE
;
64 bool IsErrorRecord() const {
65 return type
== PERF_RECORD_LOST
|| type
== PERF_RECORD_THROTTLE
||
66 type
== PERF_RECORD_UNTHROTTLE
|| type
== PERF_RECORD_LOST_SAMPLES
;
71 /// Record found in the perf_event context switch traces. It might contain
72 /// additional fields in memory, but header.size should have the actual size
74 struct PerfContextSwitchRecord
{
75 struct perf_event_header header
;
76 uint32_t next_prev_pid
;
77 uint32_t next_prev_tid
;
79 uint64_t time_in_nanos
;
81 bool IsOut() const { return header
.misc
& PERF_RECORD_MISC_SWITCH_OUT
; }
84 /// Record produced after parsing the raw context switch trace produce by
85 /// perf_event. A major difference between this struct and
86 /// PerfContextSwitchRecord is that this one uses tsc instead of nanos.
87 struct ContextSwitchRecord
{
89 /// Whether the switch is in or out
91 /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally
92 /// runs after a context switch out of a normal user thread.
96 bool IsOut() const { return is_out
; }
98 bool IsIn() const { return !is_out
; }
101 uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const {
103 case Variant::Complete
:
104 return tscs
.complete
.start
;
105 case Variant::OnlyStart
:
106 return tscs
.only_start
.start
;
107 case Variant::OnlyEnd
:
108 return tscs
.only_end
.end
;
109 case Variant::HintedEnd
:
110 return tscs
.hinted_end
.start
;
111 case Variant::HintedStart
:
112 return tscs
.hinted_start
.end
;
116 uint64_t ThreadContinuousExecution::GetStartTSC() const {
118 case Variant::Complete
:
119 return tscs
.complete
.start
;
120 case Variant::OnlyStart
:
121 return tscs
.only_start
.start
;
122 case Variant::OnlyEnd
:
124 case Variant::HintedEnd
:
125 return tscs
.hinted_end
.start
;
126 case Variant::HintedStart
:
127 return tscs
.hinted_start
.hinted_start
;
131 uint64_t ThreadContinuousExecution::GetEndTSC() const {
133 case Variant::Complete
:
134 return tscs
.complete
.end
;
135 case Variant::OnlyStart
:
136 return std::numeric_limits
<uint64_t>::max();
137 case Variant::OnlyEnd
:
138 return tscs
.only_end
.end
;
139 case Variant::HintedEnd
:
140 return tscs
.hinted_end
.hinted_end
;
141 case Variant::HintedStart
:
142 return tscs
.hinted_start
.end
;
146 ThreadContinuousExecution
ThreadContinuousExecution::CreateCompleteExecution(
147 lldb::cpu_id_t cpu_id
, lldb::tid_t tid
, lldb::pid_t pid
, uint64_t start
,
149 ThreadContinuousExecution
o(cpu_id
, tid
, pid
);
150 o
.variant
= Variant::Complete
;
151 o
.tscs
.complete
.start
= start
;
152 o
.tscs
.complete
.end
= end
;
156 ThreadContinuousExecution
ThreadContinuousExecution::CreateHintedStartExecution(
157 lldb::cpu_id_t cpu_id
, lldb::tid_t tid
, lldb::pid_t pid
,
158 uint64_t hinted_start
, uint64_t end
) {
159 ThreadContinuousExecution
o(cpu_id
, tid
, pid
);
160 o
.variant
= Variant::HintedStart
;
161 o
.tscs
.hinted_start
.hinted_start
= hinted_start
;
162 o
.tscs
.hinted_start
.end
= end
;
166 ThreadContinuousExecution
ThreadContinuousExecution::CreateHintedEndExecution(
167 lldb::cpu_id_t cpu_id
, lldb::tid_t tid
, lldb::pid_t pid
, uint64_t start
,
168 uint64_t hinted_end
) {
169 ThreadContinuousExecution
o(cpu_id
, tid
, pid
);
170 o
.variant
= Variant::HintedEnd
;
171 o
.tscs
.hinted_end
.start
= start
;
172 o
.tscs
.hinted_end
.hinted_end
= hinted_end
;
176 ThreadContinuousExecution
ThreadContinuousExecution::CreateOnlyEndExecution(
177 lldb::cpu_id_t cpu_id
, lldb::tid_t tid
, lldb::pid_t pid
, uint64_t end
) {
178 ThreadContinuousExecution
o(cpu_id
, tid
, pid
);
179 o
.variant
= Variant::OnlyEnd
;
180 o
.tscs
.only_end
.end
= end
;
184 ThreadContinuousExecution
ThreadContinuousExecution::CreateOnlyStartExecution(
185 lldb::cpu_id_t cpu_id
, lldb::tid_t tid
, lldb::pid_t pid
, uint64_t start
) {
186 ThreadContinuousExecution
o(cpu_id
, tid
, pid
);
187 o
.variant
= Variant::OnlyStart
;
188 o
.tscs
.only_start
.start
= start
;
192 static Error
RecoverExecutionsFromConsecutiveRecords(
193 cpu_id_t cpu_id
, const LinuxPerfZeroTscConversion
&tsc_conversion
,
194 const ContextSwitchRecord
¤t_record
,
195 const std::optional
<ContextSwitchRecord
> &prev_record
,
196 std::function
<void(const ThreadContinuousExecution
&execution
)>
199 if (current_record
.IsOut()) {
200 on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution(
201 cpu_id
, current_record
.tid
, current_record
.pid
, current_record
.tsc
));
203 // The 'in' case will be handled later when we try to look for its end
204 return Error::success();
207 const ContextSwitchRecord
&prev
= *prev_record
;
208 if (prev
.tsc
>= current_record
.tsc
)
209 return createStringError(
210 inconvertibleErrorCode(),
211 formatv("A context switch record doesn't happen after the previous "
212 "record. Previous TSC= {0}, current TSC = {1}.",
213 prev
.tsc
, current_record
.tsc
));
215 if (current_record
.IsIn() && prev
.IsIn()) {
216 // We found two consecutive ins, which means that we didn't capture
217 // the end of the previous execution.
218 on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution(
219 cpu_id
, prev
.tid
, prev
.pid
, prev
.tsc
, current_record
.tsc
- 1));
220 } else if (current_record
.IsOut() && prev
.IsOut()) {
221 // We found two consecutive outs, that means that we didn't capture
222 // the beginning of the current execution.
223 on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution(
224 cpu_id
, current_record
.tid
, current_record
.pid
, prev
.tsc
+ 1,
225 current_record
.tsc
));
226 } else if (current_record
.IsOut() && prev
.IsIn()) {
227 if (current_record
.pid
== prev
.pid
&& current_record
.tid
== prev
.tid
) {
228 /// A complete execution
229 on_new_execution(ThreadContinuousExecution::CreateCompleteExecution(
230 cpu_id
, current_record
.tid
, current_record
.pid
, prev
.tsc
,
231 current_record
.tsc
));
233 // An out after the in of a different thread. The first one doesn't
234 // have an end, and the second one doesn't have a start.
235 on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution(
236 cpu_id
, prev
.tid
, prev
.pid
, prev
.tsc
, current_record
.tsc
- 1));
237 on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution(
238 cpu_id
, current_record
.tid
, current_record
.pid
, prev
.tsc
+ 1,
239 current_record
.tsc
));
242 return Error::success();
245 Expected
<std::vector
<ThreadContinuousExecution
>>
246 lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace(
247 ArrayRef
<uint8_t> data
, cpu_id_t cpu_id
,
248 const LinuxPerfZeroTscConversion
&tsc_conversion
) {
250 std::vector
<ThreadContinuousExecution
> executions
;
252 // This offset is used to create the error message in case of failures.
255 auto do_decode
= [&]() -> Error
{
256 std::optional
<ContextSwitchRecord
> prev_record
;
257 while (offset
< data
.size()) {
258 const perf_event_header
&perf_record
=
259 *reinterpret_cast<const perf_event_header
*>(data
.data() + offset
);
260 if (Error err
= perf_record
.SanityCheck())
263 if (perf_record
.IsContextSwitchRecord()) {
264 const PerfContextSwitchRecord
&context_switch_record
=
265 *reinterpret_cast<const PerfContextSwitchRecord
*>(data
.data() +
267 ContextSwitchRecord record
{
268 tsc_conversion
.ToTSC(context_switch_record
.time_in_nanos
),
269 context_switch_record
.IsOut(),
270 static_cast<lldb::pid_t
>(context_switch_record
.pid
),
271 static_cast<lldb::tid_t
>(context_switch_record
.tid
)};
273 if (Error err
= RecoverExecutionsFromConsecutiveRecords(
274 cpu_id
, tsc_conversion
, record
, prev_record
,
275 [&](const ThreadContinuousExecution
&execution
) {
276 executions
.push_back(execution
);
280 prev_record
= record
;
282 offset
+= perf_record
.size
;
285 // We might have an incomplete last record
286 if (prev_record
&& prev_record
->IsIn())
287 executions
.push_back(ThreadContinuousExecution::CreateOnlyStartExecution(
288 cpu_id
, prev_record
->tid
, prev_record
->pid
, prev_record
->tsc
));
289 return Error::success();
292 if (Error err
= do_decode())
293 return createStringError(inconvertibleErrorCode(),
294 formatv("Malformed perf context switch trace for "
295 "cpu {0} at offset {1}. {2}",
296 cpu_id
, offset
, toString(std::move(err
))));
301 Expected
<std::vector
<uint8_t>>
302 lldb_private::trace_intel_pt::FilterProcessesFromContextSwitchTrace(
303 llvm::ArrayRef
<uint8_t> data
, const std::set
<lldb::pid_t
> &pids
) {
305 std::vector
<uint8_t> out_data
;
307 while (offset
< data
.size()) {
308 const perf_event_header
&perf_record
=
309 *reinterpret_cast<const perf_event_header
*>(data
.data() + offset
);
310 if (Error err
= perf_record
.SanityCheck())
311 return std::move(err
);
312 bool should_copy
= false;
313 if (perf_record
.IsContextSwitchRecord()) {
314 const PerfContextSwitchRecord
&context_switch_record
=
315 *reinterpret_cast<const PerfContextSwitchRecord
*>(data
.data() +
317 if (pids
.count(context_switch_record
.pid
))
319 } else if (perf_record
.IsErrorRecord()) {
324 for (size_t i
= 0; i
< perf_record
.size
; i
++) {
325 out_data
.push_back(data
[offset
+ i
]);
329 offset
+= perf_record
.size
;