1 //===-- DecodedThread.cpp -------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "DecodedThread.h"
10 #include "TraceCursorIntelPT.h"
16 using namespace lldb_private
;
17 using namespace lldb_private::trace_intel_pt
;
20 char IntelPTError::ID
;
22 IntelPTError::IntelPTError(int libipt_error_code
, lldb::addr_t address
)
23 : m_libipt_error_code(libipt_error_code
), m_address(address
) {
24 assert(libipt_error_code
< 0);
27 void IntelPTError::log(llvm::raw_ostream
&OS
) const {
28 OS
<< pt_errstr(pt_errcode(m_libipt_error_code
));
29 if (m_address
!= LLDB_INVALID_ADDRESS
&& m_address
> 0)
30 OS
<< formatv(": {0:x+16}", m_address
);
33 bool DecodedThread::TSCRange::InRange(uint64_t item_index
) const {
34 return item_index
>= first_item_index
&&
35 item_index
< first_item_index
+ items_count
;
38 bool DecodedThread::NanosecondsRange::InRange(uint64_t item_index
) const {
39 return item_index
>= first_item_index
&&
40 item_index
< first_item_index
+ items_count
;
43 double DecodedThread::NanosecondsRange::GetInterpolatedTime(
44 uint64_t item_index
, uint64_t begin_of_time_nanos
,
45 const LinuxPerfZeroTscConversion
&tsc_conversion
) const {
46 uint64_t items_since_last_tsc
= item_index
- first_item_index
;
48 auto interpolate
= [&](uint64_t next_range_start_ns
) {
49 if (next_range_start_ns
== nanos
) {
50 // If the resolution of the conversion formula is bad enough to consider
51 // these two timestamps as equal, then we just increase the next one by 1
53 next_range_start_ns
++;
55 long double item_duration
=
56 static_cast<long double>(items_count
) / (next_range_start_ns
- nanos
);
57 return (nanos
- begin_of_time_nanos
) + items_since_last_tsc
* item_duration
;
61 // If this is the last TSC range, so we have to extrapolate. In this case,
62 // we assume that each instruction took one TSC, which is what an
63 // instruction would take if no parallelism is achieved and the frequency
65 return interpolate(tsc_conversion
.ToNanos(tsc
+ items_count
));
67 if (items_count
< (next_range
->tsc
- tsc
)) {
68 // If the numbers of items in this range is less than the total TSC duration
69 // of this range, i.e. each instruction taking longer than 1 TSC, then we
70 // can assume that something else happened between these TSCs (e.g. a
71 // context switch, change to kernel, decoding errors, etc). In this case, we
72 // also assume that each instruction took 1 TSC. A proper way to improve
73 // this would be to analize the next events in the trace looking for context
74 // switches or trace disablement events, but for now, as we only want an
75 // approximation, we keep it simple. We are also guaranteed that the time in
76 // nanos of the next range is different to the current one, just because of
77 // the definition of a NanosecondsRange.
79 std::min(tsc_conversion
.ToNanos(tsc
+ items_count
), next_range
->nanos
));
82 // In this case, each item took less than 1 TSC, so some parallelism was
83 // achieved, which is an indication that we didn't suffered of any kind of
85 return interpolate(next_range
->nanos
);
88 uint64_t DecodedThread::GetItemsCount() const { return m_item_kinds
.size(); }
91 DecodedThread::GetInstructionLoadAddress(uint64_t item_index
) const {
92 return m_item_data
[item_index
].load_address
;
96 DecodedThread::GetSyncPointOffsetByIndex(uint64_t item_index
) const {
97 return m_psb_offsets
.find(item_index
)->second
;
100 ThreadSP
DecodedThread::GetThread() { return m_thread_sp
; }
102 DecodedThread::TraceItemStorage
&
103 DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind
) {
104 m_item_kinds
.push_back(kind
);
105 m_item_data
.emplace_back();
107 (*m_last_tsc
)->second
.items_count
++;
108 if (m_last_nanoseconds
)
109 (*m_last_nanoseconds
)->second
.items_count
++;
110 return m_item_data
.back();
113 void DecodedThread::NotifySyncPoint(lldb::addr_t psb_offset
) {
114 m_psb_offsets
.try_emplace(GetItemsCount(), psb_offset
);
115 AppendEvent(lldb::eTraceEventSyncPoint
);
118 void DecodedThread::NotifyTsc(TSC tsc
) {
119 if (m_last_tsc
&& (*m_last_tsc
)->second
.tsc
== tsc
)
122 assert(tsc
>= (*m_last_tsc
)->second
.tsc
&&
123 "We can't have decreasing times");
126 m_tscs
.emplace(GetItemsCount(), TSCRange
{tsc
, 0, GetItemsCount()}).first
;
128 if (m_tsc_conversion
) {
129 uint64_t nanos
= m_tsc_conversion
->ToNanos(tsc
);
130 if (!m_last_nanoseconds
|| (*m_last_nanoseconds
)->second
.nanos
!= nanos
) {
133 .emplace(GetItemsCount(), NanosecondsRange
{nanos
, tsc
, nullptr, 0,
136 if (*m_last_nanoseconds
!= m_nanoseconds
.begin()) {
137 auto prev_range
= prev(*m_last_nanoseconds
);
138 prev_range
->second
.next_range
= &(*m_last_nanoseconds
)->second
;
142 AppendEvent(lldb::eTraceEventHWClockTick
);
145 void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id
) {
146 if (!m_last_cpu
|| *m_last_cpu
!= cpu_id
) {
147 m_cpus
.emplace(GetItemsCount(), cpu_id
);
149 AppendEvent(lldb::eTraceEventCPUChanged
);
153 lldb::cpu_id_t
DecodedThread::GetCPUByIndex(uint64_t item_index
) const {
154 auto it
= m_cpus
.upper_bound(item_index
);
155 return it
== m_cpus
.begin() ? LLDB_INVALID_CPU_ID
: prev(it
)->second
;
158 std::optional
<DecodedThread::TSCRange
>
159 DecodedThread::GetTSCRangeByIndex(uint64_t item_index
) const {
160 auto next_it
= m_tscs
.upper_bound(item_index
);
161 if (next_it
== m_tscs
.begin())
163 return prev(next_it
)->second
;
166 std::optional
<DecodedThread::NanosecondsRange
>
167 DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index
) {
168 auto next_it
= m_nanoseconds
.upper_bound(item_index
);
169 if (next_it
== m_nanoseconds
.begin())
171 return prev(next_it
)->second
;
174 uint64_t DecodedThread::GetTotalInstructionCount() const {
178 void DecodedThread::AppendEvent(lldb::TraceEvent event
) {
179 CreateNewTraceItem(lldb::eTraceItemKindEvent
).event
= event
;
180 m_events_stats
.RecordEvent(event
);
183 void DecodedThread::AppendInstruction(const pt_insn
&insn
) {
184 CreateNewTraceItem(lldb::eTraceItemKindInstruction
).load_address
= insn
.ip
;
188 void DecodedThread::AppendError(const IntelPTError
&error
) {
189 CreateNewTraceItem(lldb::eTraceItemKindError
).error
= error
.message();
190 m_error_stats
.RecordError(/*fatal=*/false);
193 void DecodedThread::AppendCustomError(StringRef err
, bool fatal
) {
194 CreateNewTraceItem(lldb::eTraceItemKindError
).error
= err
.str();
195 m_error_stats
.RecordError(fatal
);
198 lldb::TraceEvent
DecodedThread::GetEventByIndex(int item_index
) const {
199 return m_item_data
[item_index
].event
;
202 const DecodedThread::EventsStats
&DecodedThread::GetEventsStats() const {
203 return m_events_stats
;
206 void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event
) {
207 events_counts
[event
]++;
211 uint64_t DecodedThread::ErrorStats::GetTotalCount() const {
213 for (const auto &[kind
, count
] : libipt_errors
)
216 return total
+ other_errors
+ fatal_errors
;
219 void DecodedThread::ErrorStats::RecordError(bool fatal
) {
226 void DecodedThread::ErrorStats::RecordError(int libipt_error_code
) {
227 libipt_errors
[pt_errstr(pt_errcode(libipt_error_code
))]++;
230 const DecodedThread::ErrorStats
&DecodedThread::GetErrorStats() const {
231 return m_error_stats
;
235 DecodedThread::GetItemKindByIndex(uint64_t item_index
) const {
236 return static_cast<lldb::TraceItemKind
>(m_item_kinds
[item_index
]);
239 llvm::StringRef
DecodedThread::GetErrorByIndex(uint64_t item_index
) const {
240 if (item_index
>= m_item_data
.size())
241 return llvm::StringRef();
242 return m_item_data
[item_index
].error
;
245 DecodedThread::DecodedThread(
247 const std::optional
<LinuxPerfZeroTscConversion
> &tsc_conversion
)
248 : m_thread_sp(thread_sp
), m_tsc_conversion(tsc_conversion
) {}
250 size_t DecodedThread::CalculateApproximateMemoryUsage() const {
251 return sizeof(TraceItemStorage
) * m_item_data
.size() +
252 sizeof(uint8_t) * m_item_kinds
.size() +
253 (sizeof(uint64_t) + sizeof(TSC
)) * m_tscs
.size() +
254 (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds
.size() +
255 (sizeof(uint64_t) + sizeof(lldb::cpu_id_t
)) * m_cpus
.size();