lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp

   1 //===-- DecodedThread.cpp -------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "DecodedThread.h"
  10 #include "TraceCursorIntelPT.h"
  11 #include <intel-pt.h>
  12 #include <memory>
  13 #include <optional>
  14
  15 using namespace lldb;
  16 using namespace lldb_private;
  17 using namespace lldb_private::trace_intel_pt;
  18 using namespace llvm;
  19
  20 char IntelPTError::ID;
  21
  22 IntelPTError::IntelPTError(int libipt_error_code, lldb::addr_t address)
  23     : m_libipt_error_code(libipt_error_code), m_address(address) {
  24   assert(libipt_error_code < 0);
  25 }
  26
  27 void IntelPTError::log(llvm::raw_ostream &OS) const {
  28   OS << pt_errstr(pt_errcode(m_libipt_error_code));
  29   if (m_address != LLDB_INVALID_ADDRESS && m_address > 0)
  30     OS << formatv(": {0:x+16}", m_address);
  31 }
  32
  33 bool DecodedThread::TSCRange::InRange(uint64_t item_index) const {
  34   return item_index >= first_item_index &&
  35          item_index < first_item_index + items_count;
  36 }
  37
  38 bool DecodedThread::NanosecondsRange::InRange(uint64_t item_index) const {
  39   return item_index >= first_item_index &&
  40          item_index < first_item_index + items_count;
  41 }
  42
  43 double DecodedThread::NanosecondsRange::GetInterpolatedTime(
  44     uint64_t item_index, uint64_t begin_of_time_nanos,
  45     const LinuxPerfZeroTscConversion &tsc_conversion) const {
  46   uint64_t items_since_last_tsc = item_index - first_item_index;
  47
  48   auto interpolate = [&](uint64_t next_range_start_ns) {
  49     if (next_range_start_ns == nanos) {
  50       // If the resolution of the conversion formula is bad enough to consider
  51       // these two timestamps as equal, then we just increase the next one by 1
  52       // for correction
  53       next_range_start_ns++;
  54     }
  55     long double item_duration =
  56         static_cast<long double>(items_count) / (next_range_start_ns - nanos);
  57     return (nanos - begin_of_time_nanos) + items_since_last_tsc * item_duration;
  58   };
  59
  60   if (!next_range) {
  61     // If this is the last TSC range, so we have to extrapolate. In this case,
  62     // we assume that each instruction took one TSC, which is what an
  63     // instruction would take if no parallelism is achieved and the frequency
  64     // multiplier is 1.
  65     return interpolate(tsc_conversion.ToNanos(tsc + items_count));
  66   }
  67   if (items_count < (next_range->tsc - tsc)) {
  68     // If the numbers of items in this range is less than the total TSC duration
  69     // of this range, i.e. each instruction taking longer than 1 TSC, then we
  70     // can assume that something else happened between these TSCs (e.g. a
  71     // context switch, change to kernel, decoding errors, etc). In this case, we
  72     // also assume that each instruction took 1 TSC. A proper way to improve
  73     // this would be to analize the next events in the trace looking for context
  74     // switches or trace disablement events, but for now, as we only want an
  75     // approximation, we keep it simple. We are also guaranteed that the time in
  76     // nanos of the next range is different to the current one, just because of
  77     // the definition of a NanosecondsRange.
  78     return interpolate(
  79         std::min(tsc_conversion.ToNanos(tsc + items_count), next_range->nanos));
  80   }
  81
  82   // In this case, each item took less than 1 TSC, so some parallelism was
  83   // achieved, which is an indication that we didn't suffered of any kind of
  84   // interruption.
  85   return interpolate(next_range->nanos);
  86 }
  87
  88 uint64_t DecodedThread::GetItemsCount() const { return m_item_kinds.size(); }
  89
  90 lldb::addr_t
  91 DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const {
  92   return m_item_data[item_index].load_address;
  93 }
  94
  95 lldb::addr_t
  96 DecodedThread::GetSyncPointOffsetByIndex(uint64_t item_index) const {
  97   return m_psb_offsets.find(item_index)->second;
  98 }
  99
 100 ThreadSP DecodedThread::GetThread() { return m_thread_sp; }
 101
 102 DecodedThread::TraceItemStorage &
 103 DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind) {
 104   m_item_kinds.push_back(kind);
 105   m_item_data.emplace_back();
 106   if (m_last_tsc)
 107     (*m_last_tsc)->second.items_count++;
 108   if (m_last_nanoseconds)
 109     (*m_last_nanoseconds)->second.items_count++;
 110   return m_item_data.back();
 111 }
 112
 113 void DecodedThread::NotifySyncPoint(lldb::addr_t psb_offset) {
 114   m_psb_offsets.try_emplace(GetItemsCount(), psb_offset);
 115   AppendEvent(lldb::eTraceEventSyncPoint);
 116 }
 117
 118 void DecodedThread::NotifyTsc(TSC tsc) {
 119   if (m_last_tsc && (*m_last_tsc)->second.tsc == tsc)
 120     return;
 121   if (m_last_tsc)
 122     assert(tsc >= (*m_last_tsc)->second.tsc &&
 123            "We can't have decreasing times");
 124
 125   m_last_tsc =
 126       m_tscs.emplace(GetItemsCount(), TSCRange{tsc, 0, GetItemsCount()}).first;
 127
 128   if (m_tsc_conversion) {
 129     uint64_t nanos = m_tsc_conversion->ToNanos(tsc);
 130     if (!m_last_nanoseconds || (*m_last_nanoseconds)->second.nanos != nanos) {
 131       m_last_nanoseconds =
 132           m_nanoseconds
 133               .emplace(GetItemsCount(), NanosecondsRange{nanos, tsc, nullptr, 0,
 134                                                          GetItemsCount()})
 135               .first;
 136       if (*m_last_nanoseconds != m_nanoseconds.begin()) {
 137         auto prev_range = prev(*m_last_nanoseconds);
 138         prev_range->second.next_range = &(*m_last_nanoseconds)->second;
 139       }
 140     }
 141   }
 142   AppendEvent(lldb::eTraceEventHWClockTick);
 143 }
 144
 145 void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) {
 146   if (!m_last_cpu || *m_last_cpu != cpu_id) {
 147     m_cpus.emplace(GetItemsCount(), cpu_id);
 148     m_last_cpu = cpu_id;
 149     AppendEvent(lldb::eTraceEventCPUChanged);
 150   }
 151 }
 152
 153 lldb::cpu_id_t DecodedThread::GetCPUByIndex(uint64_t item_index) const {
 154   auto it = m_cpus.upper_bound(item_index);
 155   return it == m_cpus.begin() ? LLDB_INVALID_CPU_ID : prev(it)->second;
 156 }
 157
 158 std::optional<DecodedThread::TSCRange>
 159 DecodedThread::GetTSCRangeByIndex(uint64_t item_index) const {
 160   auto next_it = m_tscs.upper_bound(item_index);
 161   if (next_it == m_tscs.begin())
 162     return std::nullopt;
 163   return prev(next_it)->second;
 164 }
 165
 166 std::optional<DecodedThread::NanosecondsRange>
 167 DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index) {
 168   auto next_it = m_nanoseconds.upper_bound(item_index);
 169   if (next_it == m_nanoseconds.begin())
 170     return std::nullopt;
 171   return prev(next_it)->second;
 172 }
 173
 174 uint64_t DecodedThread::GetTotalInstructionCount() const {
 175   return m_insn_count;
 176 }
 177
 178 void DecodedThread::AppendEvent(lldb::TraceEvent event) {
 179   CreateNewTraceItem(lldb::eTraceItemKindEvent).event = event;
 180   m_events_stats.RecordEvent(event);
 181 }
 182
 183 void DecodedThread::AppendInstruction(const pt_insn &insn) {
 184   CreateNewTraceItem(lldb::eTraceItemKindInstruction).load_address = insn.ip;
 185   m_insn_count++;
 186 }
 187
 188 void DecodedThread::AppendError(const IntelPTError &error) {
 189   CreateNewTraceItem(lldb::eTraceItemKindError).error = error.message();
 190   m_error_stats.RecordError(/*fatal=*/false);
 191 }
 192
 193 void DecodedThread::AppendCustomError(StringRef err, bool fatal) {
 194   CreateNewTraceItem(lldb::eTraceItemKindError).error = err.str();
 195   m_error_stats.RecordError(fatal);
 196 }
 197
 198 lldb::TraceEvent DecodedThread::GetEventByIndex(int item_index) const {
 199   return m_item_data[item_index].event;
 200 }
 201
 202 const DecodedThread::EventsStats &DecodedThread::GetEventsStats() const {
 203   return m_events_stats;
 204 }
 205
 206 void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event) {
 207   events_counts[event]++;
 208   total_count++;
 209 }
 210
 211 uint64_t DecodedThread::ErrorStats::GetTotalCount() const {
 212   uint64_t total = 0;
 213   for (const auto &[kind, count] : libipt_errors)
 214     total += count;
 215
 216   return total + other_errors + fatal_errors;
 217 }
 218
 219 void DecodedThread::ErrorStats::RecordError(bool fatal) {
 220   if (fatal)
 221     fatal_errors++;
 222   else
 223     other_errors++;
 224 }
 225
 226 void DecodedThread::ErrorStats::RecordError(int libipt_error_code) {
 227   libipt_errors[pt_errstr(pt_errcode(libipt_error_code))]++;
 228 }
 229
 230 const DecodedThread::ErrorStats &DecodedThread::GetErrorStats() const {
 231   return m_error_stats;
 232 }
 233
 234 lldb::TraceItemKind
 235 DecodedThread::GetItemKindByIndex(uint64_t item_index) const {
 236   return static_cast<lldb::TraceItemKind>(m_item_kinds[item_index]);
 237 }
 238
 239 llvm::StringRef DecodedThread::GetErrorByIndex(uint64_t item_index) const {
 240   if (item_index >= m_item_data.size())
 241     return llvm::StringRef();
 242   return m_item_data[item_index].error;
 243 }
 244
 245 DecodedThread::DecodedThread(
 246     ThreadSP thread_sp,
 247     const std::optional<LinuxPerfZeroTscConversion> &tsc_conversion)
 248     : m_thread_sp(thread_sp), m_tsc_conversion(tsc_conversion) {}
 249
 250 size_t DecodedThread::CalculateApproximateMemoryUsage() const {
 251   return sizeof(TraceItemStorage) * m_item_data.size() +
 252          sizeof(uint8_t) * m_item_kinds.size() +
 253          (sizeof(uint64_t) + sizeof(TSC)) * m_tscs.size() +
 254          (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds.size() +
 255          (sizeof(uint64_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size();
 256 }