[memprof] Move YAML traits to MemProf.h (NFC) (#118668)
[llvm-project.git] / lldb / source / Plugins / ObjectFile / wasm / ObjectFileWasm.cpp
blob06eb6ff9cafb5d50b560c174c8131eba26a565e9
1 //===-- ObjectFileWasm.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "ObjectFileWasm.h"
10 #include "lldb/Core/Module.h"
11 #include "lldb/Core/ModuleSpec.h"
12 #include "lldb/Core/PluginManager.h"
13 #include "lldb/Core/Section.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/SectionLoadList.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/LLDBLog.h"
19 #include "lldb/Utility/Log.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/BinaryFormat/Magic.h"
24 #include "llvm/BinaryFormat/Wasm.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/Format.h"
27 #include <optional>
29 using namespace lldb;
30 using namespace lldb_private;
31 using namespace lldb_private::wasm;
33 LLDB_PLUGIN_DEFINE(ObjectFileWasm)
35 static const uint32_t kWasmHeaderSize =
36 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
38 /// Checks whether the data buffer starts with a valid Wasm module header.
39 static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
40 if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
41 return false;
43 if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=
44 llvm::file_magic::wasm_object)
45 return false;
47 const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
49 uint32_t version = llvm::support::endian::read32le(Ptr);
50 return version == llvm::wasm::WasmVersion;
53 static std::optional<ConstString>
54 GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
55 // A Wasm string is encoded as a vector of UTF-8 codes.
56 // Vectors are encoded with their u32 length followed by the element
57 // sequence.
58 uint64_t len = data.getULEB128(c);
59 if (!c) {
60 consumeError(c.takeError());
61 return std::nullopt;
64 if (len >= (uint64_t(1) << 32)) {
65 return std::nullopt;
68 llvm::SmallVector<uint8_t, 32> str_storage;
69 data.getU8(c, str_storage, len);
70 if (!c) {
71 consumeError(c.takeError());
72 return std::nullopt;
75 llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage));
76 return ConstString(str);
79 char ObjectFileWasm::ID;
81 void ObjectFileWasm::Initialize() {
82 PluginManager::RegisterPlugin(GetPluginNameStatic(),
83 GetPluginDescriptionStatic(), CreateInstance,
84 CreateMemoryInstance, GetModuleSpecifications);
87 void ObjectFileWasm::Terminate() {
88 PluginManager::UnregisterPlugin(CreateInstance);
91 ObjectFile *
92 ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp,
93 offset_t data_offset, const FileSpec *file,
94 offset_t file_offset, offset_t length) {
95 Log *log = GetLog(LLDBLog::Object);
97 if (!data_sp) {
98 data_sp = MapFileData(*file, length, file_offset);
99 if (!data_sp) {
100 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
101 file->GetPath().c_str());
102 return nullptr;
104 data_offset = 0;
107 assert(data_sp);
108 if (!ValidateModuleHeader(data_sp)) {
109 LLDB_LOGF(log,
110 "Failed to create ObjectFileWasm instance: invalid Wasm header");
111 return nullptr;
114 // Update the data to contain the entire file if it doesn't contain it
115 // already.
116 if (data_sp->GetByteSize() < length) {
117 data_sp = MapFileData(*file, length, file_offset);
118 if (!data_sp) {
119 LLDB_LOGF(log,
120 "Failed to create ObjectFileWasm instance: cannot read file %s",
121 file->GetPath().c_str());
122 return nullptr;
124 data_offset = 0;
127 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
128 module_sp, data_sp, data_offset, file, file_offset, length));
129 ArchSpec spec = objfile_up->GetArchitecture();
130 if (spec && objfile_up->SetModulesArchitecture(spec)) {
131 LLDB_LOGF(log,
132 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
133 static_cast<void *>(objfile_up.get()),
134 static_cast<void *>(objfile_up->GetModule().get()),
135 objfile_up->GetModule()->GetSpecificationDescription().c_str(),
136 file ? file->GetPath().c_str() : "<NULL>");
137 return objfile_up.release();
140 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
141 return nullptr;
144 ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp,
145 WritableDataBufferSP data_sp,
146 const ProcessSP &process_sp,
147 addr_t header_addr) {
148 if (!ValidateModuleHeader(data_sp))
149 return nullptr;
151 std::unique_ptr<ObjectFileWasm> objfile_up(
152 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
153 ArchSpec spec = objfile_up->GetArchitecture();
154 if (spec && objfile_up->SetModulesArchitecture(spec))
155 return objfile_up.release();
156 return nullptr;
159 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
160 // Buffer sufficient to read a section header and find the pointer to the next
161 // section.
162 const uint32_t kBufferSize = 1024;
163 DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
165 llvm::DataExtractor data = section_header_data.GetAsLLVM();
166 llvm::DataExtractor::Cursor c(0);
168 // Each section consists of:
169 // - a one-byte section id,
170 // - the u32 size of the contents, in bytes,
171 // - the actual contents.
172 uint8_t section_id = data.getU8(c);
173 uint64_t payload_len = data.getULEB128(c);
174 if (!c)
175 return !llvm::errorToBool(c.takeError());
177 if (payload_len >= (uint64_t(1) << 32))
178 return false;
180 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
181 // Custom sections have the id 0. Their contents consist of a name
182 // identifying the custom section, followed by an uninterpreted sequence
183 // of bytes.
184 lldb::offset_t prev_offset = c.tell();
185 std::optional<ConstString> sect_name = GetWasmString(data, c);
186 if (!sect_name)
187 return false;
189 if (payload_len < c.tell() - prev_offset)
190 return false;
192 uint32_t section_length = payload_len - (c.tell() - prev_offset);
193 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
194 section_id, *sect_name});
195 *offset_ptr += (c.tell() + section_length);
196 } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
198 static_cast<uint32_t>(payload_len),
199 section_id, ConstString()});
200 *offset_ptr += (c.tell() + payload_len);
201 } else {
202 // Invalid section id.
203 return false;
205 return true;
208 bool ObjectFileWasm::DecodeSections() {
209 lldb::offset_t offset = kWasmHeaderSize;
210 if (IsInMemory()) {
211 offset += m_memory_addr;
214 while (DecodeNextSection(&offset))
216 return true;
219 size_t ObjectFileWasm::GetModuleSpecifications(
220 const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
221 offset_t file_offset, offset_t length, ModuleSpecList &specs) {
222 if (!ValidateModuleHeader(data_sp)) {
223 return 0;
226 ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
227 specs.Append(spec);
228 return 1;
231 ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp,
232 offset_t data_offset, const FileSpec *file,
233 offset_t offset, offset_t length)
234 : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
235 m_arch("wasm32-unknown-unknown-wasm") {
236 m_data.SetAddressByteSize(4);
239 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp,
240 lldb::WritableDataBufferSP header_data_sp,
241 const lldb::ProcessSP &process_sp,
242 lldb::addr_t header_addr)
243 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
244 m_arch("wasm32-unknown-unknown-wasm") {}
246 bool ObjectFileWasm::ParseHeader() {
247 // We already parsed the header during initialization.
248 return true;
251 void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}
253 static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
254 if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
255 return llvm::StringSwitch<SectionType>(Name)
256 .Case("abbrev", eSectionTypeDWARFDebugAbbrev)
257 .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo)
258 .Case("addr", eSectionTypeDWARFDebugAddr)
259 .Case("aranges", eSectionTypeDWARFDebugAranges)
260 .Case("cu_index", eSectionTypeDWARFDebugCuIndex)
261 .Case("frame", eSectionTypeDWARFDebugFrame)
262 .Case("info", eSectionTypeDWARFDebugInfo)
263 .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo)
264 .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine)
265 .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr)
266 .Case("loc", eSectionTypeDWARFDebugLoc)
267 .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo)
268 .Case("loclists", eSectionTypeDWARFDebugLocLists)
269 .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo)
270 .Case("macinfo", eSectionTypeDWARFDebugMacInfo)
271 .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro)
272 .Case("names", eSectionTypeDWARFDebugNames)
273 .Case("pubnames", eSectionTypeDWARFDebugPubNames)
274 .Case("pubtypes", eSectionTypeDWARFDebugPubTypes)
275 .Case("ranges", eSectionTypeDWARFDebugRanges)
276 .Case("rnglists", eSectionTypeDWARFDebugRngLists)
277 .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo)
278 .Case("str", eSectionTypeDWARFDebugStr)
279 .Case("str.dwo", eSectionTypeDWARFDebugStrDwo)
280 .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets)
281 .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo)
282 .Case("tu_index", eSectionTypeDWARFDebugTuIndex)
283 .Case("types", eSectionTypeDWARFDebugTypes)
284 .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo)
285 .Default(eSectionTypeOther);
287 return eSectionTypeOther;
290 void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
291 if (m_sections_up)
292 return;
294 m_sections_up = std::make_unique<SectionList>();
296 if (m_sect_infos.empty()) {
297 DecodeSections();
300 for (const section_info &sect_info : m_sect_infos) {
301 SectionType section_type = eSectionTypeOther;
302 ConstString section_name;
303 offset_t file_offset = sect_info.offset & 0xffffffff;
304 addr_t vm_addr = file_offset;
305 size_t vm_size = sect_info.size;
307 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
308 section_type = eSectionTypeCode;
309 section_name = ConstString("code");
311 // A code address in DWARF for WebAssembly is the offset of an
312 // instruction relative within the Code section of the WebAssembly file.
313 // For this reason Section::GetFileAddress() must return zero for the
314 // Code section.
315 vm_addr = 0;
316 } else {
317 section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
318 if (section_type == eSectionTypeOther)
319 continue;
320 section_name = sect_info.name;
321 if (!IsInMemory()) {
322 vm_size = 0;
323 vm_addr = 0;
327 SectionSP section_sp(
328 new Section(GetModule(), // Module to which this section belongs.
329 this, // ObjectFile to which this section belongs and
330 // should read section data from.
331 section_type, // Section ID.
332 section_name, // Section name.
333 section_type, // Section type.
334 vm_addr, // VM address.
335 vm_size, // VM size in bytes of this section.
336 file_offset, // Offset of this section in the file.
337 sect_info.size, // Size of the section as found in the file.
338 0, // Alignment of the section
339 0, // Flags for this section.
340 1)); // Number of host bytes per target byte
341 m_sections_up->AddSection(section_sp);
342 unified_section_list.AddSection(section_sp);
346 bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address,
347 bool value_is_offset) {
348 /// In WebAssembly, linear memory is disjointed from code space. The VM can
349 /// load multiple instances of a module, which logically share the same code.
350 /// We represent a wasm32 code address with 64-bits, like:
351 /// 63 32 31 0
352 /// +---------------+---------------+
353 /// + module_id | offset |
354 /// +---------------+---------------+
355 /// where the lower 32 bits represent a module offset (relative to the module
356 /// start not to the beginning of the code section) and the higher 32 bits
357 /// uniquely identify the module in the WebAssembly VM.
358 /// In other words, we assume that each WebAssembly module is loaded by the
359 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
360 /// 0x0000000400000000 for module_id == 4.
361 /// These 64-bit addresses will be used to request code ranges for a specific
362 /// module from the WebAssembly engine.
364 assert(m_memory_addr == LLDB_INVALID_ADDRESS ||
365 m_memory_addr == load_address);
367 ModuleSP module_sp = GetModule();
368 if (!module_sp)
369 return false;
371 DecodeSections();
373 size_t num_loaded_sections = 0;
374 SectionList *section_list = GetSectionList();
375 if (!section_list)
376 return false;
378 const size_t num_sections = section_list->GetSize();
379 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
380 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
381 if (target.SetSectionLoadAddress(
382 section_sp, load_address | section_sp->GetFileOffset())) {
383 ++num_loaded_sections;
387 return num_loaded_sections > 0;
390 DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) {
391 DataExtractor data;
392 if (m_file) {
393 if (offset < GetByteSize()) {
394 size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
395 auto buffer_sp = MapFileData(m_file, size, offset);
396 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
398 } else {
399 ProcessSP process_sp(m_process_wp.lock());
400 if (process_sp) {
401 auto data_up = std::make_unique<DataBufferHeap>(size, 0);
402 Status readmem_error;
403 size_t bytes_read = process_sp->ReadMemory(
404 offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
405 if (bytes_read > 0) {
406 DataBufferSP buffer_sp(data_up.release());
407 data.SetData(buffer_sp, 0, buffer_sp->GetByteSize());
412 data.SetByteOrder(GetByteOrder());
413 return data;
416 std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
417 static ConstString g_sect_name_external_debug_info("external_debug_info");
419 for (const section_info &sect_info : m_sect_infos) {
420 if (g_sect_name_external_debug_info == sect_info.name) {
421 const uint32_t kBufferSize = 1024;
422 DataExtractor section_header_data =
423 ReadImageData(sect_info.offset, kBufferSize);
424 llvm::DataExtractor data = section_header_data.GetAsLLVM();
425 llvm::DataExtractor::Cursor c(0);
426 std::optional<ConstString> symbols_url = GetWasmString(data, c);
427 if (symbols_url)
428 return FileSpec(symbols_url->GetStringRef());
431 return std::nullopt;
434 void ObjectFileWasm::Dump(Stream *s) {
435 ModuleSP module_sp(GetModule());
436 if (!module_sp)
437 return;
439 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
441 llvm::raw_ostream &ostream = s->AsRawOstream();
442 ostream << static_cast<void *>(this) << ": ";
443 s->Indent();
444 ostream << "ObjectFileWasm, file = '";
445 m_file.Dump(ostream);
446 ostream << "', arch = ";
447 ostream << GetArchitecture().GetArchitectureName() << "\n";
449 SectionList *sections = GetSectionList();
450 if (sections) {
451 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
452 UINT32_MAX);
454 ostream << "\n";
455 DumpSectionHeaders(ostream);
456 ostream << "\n";
459 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
460 const section_info_t &sh) {
461 ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
462 << llvm::format_hex(sh.offset, 10) << " "
463 << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
464 << "\n";
467 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
468 ostream << "Section Headers\n";
469 ostream << "IDX name addr size id\n";
470 ostream << "==== ---------------- ---------- ---------- ------\n";
472 uint32_t idx = 0;
473 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
474 ++pos, ++idx) {
475 ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
476 ObjectFileWasm::DumpSectionHeader(ostream, *pos);