1 //===-- ObjectFileWasm.cpp ------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "ObjectFileWasm.h"
10 #include "lldb/Core/Module.h"
11 #include "lldb/Core/ModuleSpec.h"
12 #include "lldb/Core/PluginManager.h"
13 #include "lldb/Core/Section.h"
14 #include "lldb/Target/Process.h"
15 #include "lldb/Target/SectionLoadList.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/DataBufferHeap.h"
18 #include "lldb/Utility/LLDBLog.h"
19 #include "lldb/Utility/Log.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/BinaryFormat/Magic.h"
24 #include "llvm/BinaryFormat/Wasm.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/Format.h"
30 using namespace lldb_private
;
31 using namespace lldb_private::wasm
;
33 LLDB_PLUGIN_DEFINE(ObjectFileWasm
)
35 static const uint32_t kWasmHeaderSize
=
36 sizeof(llvm::wasm::WasmMagic
) + sizeof(llvm::wasm::WasmVersion
);
38 /// Checks whether the data buffer starts with a valid Wasm module header.
39 static bool ValidateModuleHeader(const DataBufferSP
&data_sp
) {
40 if (!data_sp
|| data_sp
->GetByteSize() < kWasmHeaderSize
)
43 if (llvm::identify_magic(toStringRef(data_sp
->GetData())) !=
44 llvm::file_magic::wasm_object
)
47 const uint8_t *Ptr
= data_sp
->GetBytes() + sizeof(llvm::wasm::WasmMagic
);
49 uint32_t version
= llvm::support::endian::read32le(Ptr
);
50 return version
== llvm::wasm::WasmVersion
;
53 static std::optional
<ConstString
>
54 GetWasmString(llvm::DataExtractor
&data
, llvm::DataExtractor::Cursor
&c
) {
55 // A Wasm string is encoded as a vector of UTF-8 codes.
56 // Vectors are encoded with their u32 length followed by the element
58 uint64_t len
= data
.getULEB128(c
);
60 consumeError(c
.takeError());
64 if (len
>= (uint64_t(1) << 32)) {
68 llvm::SmallVector
<uint8_t, 32> str_storage
;
69 data
.getU8(c
, str_storage
, len
);
71 consumeError(c
.takeError());
75 llvm::StringRef str
= toStringRef(llvm::ArrayRef(str_storage
));
76 return ConstString(str
);
79 char ObjectFileWasm::ID
;
81 void ObjectFileWasm::Initialize() {
82 PluginManager::RegisterPlugin(GetPluginNameStatic(),
83 GetPluginDescriptionStatic(), CreateInstance
,
84 CreateMemoryInstance
, GetModuleSpecifications
);
87 void ObjectFileWasm::Terminate() {
88 PluginManager::UnregisterPlugin(CreateInstance
);
92 ObjectFileWasm::CreateInstance(const ModuleSP
&module_sp
, DataBufferSP data_sp
,
93 offset_t data_offset
, const FileSpec
*file
,
94 offset_t file_offset
, offset_t length
) {
95 Log
*log
= GetLog(LLDBLog::Object
);
98 data_sp
= MapFileData(*file
, length
, file_offset
);
100 LLDB_LOGF(log
, "Failed to create ObjectFileWasm instance for file %s",
101 file
->GetPath().c_str());
108 if (!ValidateModuleHeader(data_sp
)) {
110 "Failed to create ObjectFileWasm instance: invalid Wasm header");
114 // Update the data to contain the entire file if it doesn't contain it
116 if (data_sp
->GetByteSize() < length
) {
117 data_sp
= MapFileData(*file
, length
, file_offset
);
120 "Failed to create ObjectFileWasm instance: cannot read file %s",
121 file
->GetPath().c_str());
127 std::unique_ptr
<ObjectFileWasm
> objfile_up(new ObjectFileWasm(
128 module_sp
, data_sp
, data_offset
, file
, file_offset
, length
));
129 ArchSpec spec
= objfile_up
->GetArchitecture();
130 if (spec
&& objfile_up
->SetModulesArchitecture(spec
)) {
132 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
133 static_cast<void *>(objfile_up
.get()),
134 static_cast<void *>(objfile_up
->GetModule().get()),
135 objfile_up
->GetModule()->GetSpecificationDescription().c_str(),
136 file
? file
->GetPath().c_str() : "<NULL>");
137 return objfile_up
.release();
140 LLDB_LOGF(log
, "Failed to create ObjectFileWasm instance");
144 ObjectFile
*ObjectFileWasm::CreateMemoryInstance(const ModuleSP
&module_sp
,
145 WritableDataBufferSP data_sp
,
146 const ProcessSP
&process_sp
,
147 addr_t header_addr
) {
148 if (!ValidateModuleHeader(data_sp
))
151 std::unique_ptr
<ObjectFileWasm
> objfile_up(
152 new ObjectFileWasm(module_sp
, data_sp
, process_sp
, header_addr
));
153 ArchSpec spec
= objfile_up
->GetArchitecture();
154 if (spec
&& objfile_up
->SetModulesArchitecture(spec
))
155 return objfile_up
.release();
159 bool ObjectFileWasm::DecodeNextSection(lldb::offset_t
*offset_ptr
) {
160 // Buffer sufficient to read a section header and find the pointer to the next
162 const uint32_t kBufferSize
= 1024;
163 DataExtractor section_header_data
= ReadImageData(*offset_ptr
, kBufferSize
);
165 llvm::DataExtractor data
= section_header_data
.GetAsLLVM();
166 llvm::DataExtractor::Cursor
c(0);
168 // Each section consists of:
169 // - a one-byte section id,
170 // - the u32 size of the contents, in bytes,
171 // - the actual contents.
172 uint8_t section_id
= data
.getU8(c
);
173 uint64_t payload_len
= data
.getULEB128(c
);
175 return !llvm::errorToBool(c
.takeError());
177 if (payload_len
>= (uint64_t(1) << 32))
180 if (section_id
== llvm::wasm::WASM_SEC_CUSTOM
) {
181 // Custom sections have the id 0. Their contents consist of a name
182 // identifying the custom section, followed by an uninterpreted sequence
184 lldb::offset_t prev_offset
= c
.tell();
185 std::optional
<ConstString
> sect_name
= GetWasmString(data
, c
);
189 if (payload_len
< c
.tell() - prev_offset
)
192 uint32_t section_length
= payload_len
- (c
.tell() - prev_offset
);
193 m_sect_infos
.push_back(section_info
{*offset_ptr
+ c
.tell(), section_length
,
194 section_id
, *sect_name
});
195 *offset_ptr
+= (c
.tell() + section_length
);
196 } else if (section_id
<= llvm::wasm::WASM_SEC_LAST_KNOWN
) {
197 m_sect_infos
.push_back(section_info
{*offset_ptr
+ c
.tell(),
198 static_cast<uint32_t>(payload_len
),
199 section_id
, ConstString()});
200 *offset_ptr
+= (c
.tell() + payload_len
);
202 // Invalid section id.
208 bool ObjectFileWasm::DecodeSections() {
209 lldb::offset_t offset
= kWasmHeaderSize
;
211 offset
+= m_memory_addr
;
214 while (DecodeNextSection(&offset
))
219 size_t ObjectFileWasm::GetModuleSpecifications(
220 const FileSpec
&file
, DataBufferSP
&data_sp
, offset_t data_offset
,
221 offset_t file_offset
, offset_t length
, ModuleSpecList
&specs
) {
222 if (!ValidateModuleHeader(data_sp
)) {
226 ModuleSpec
spec(file
, ArchSpec("wasm32-unknown-unknown-wasm"));
231 ObjectFileWasm::ObjectFileWasm(const ModuleSP
&module_sp
, DataBufferSP data_sp
,
232 offset_t data_offset
, const FileSpec
*file
,
233 offset_t offset
, offset_t length
)
234 : ObjectFile(module_sp
, file
, offset
, length
, data_sp
, data_offset
),
235 m_arch("wasm32-unknown-unknown-wasm") {
236 m_data
.SetAddressByteSize(4);
239 ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP
&module_sp
,
240 lldb::WritableDataBufferSP header_data_sp
,
241 const lldb::ProcessSP
&process_sp
,
242 lldb::addr_t header_addr
)
243 : ObjectFile(module_sp
, process_sp
, header_addr
, header_data_sp
),
244 m_arch("wasm32-unknown-unknown-wasm") {}
246 bool ObjectFileWasm::ParseHeader() {
247 // We already parsed the header during initialization.
251 void ObjectFileWasm::ParseSymtab(Symtab
&symtab
) {}
253 static SectionType
GetSectionTypeFromName(llvm::StringRef Name
) {
254 if (Name
.consume_front(".debug_") || Name
.consume_front(".zdebug_")) {
255 return llvm::StringSwitch
<SectionType
>(Name
)
256 .Case("abbrev", eSectionTypeDWARFDebugAbbrev
)
257 .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo
)
258 .Case("addr", eSectionTypeDWARFDebugAddr
)
259 .Case("aranges", eSectionTypeDWARFDebugAranges
)
260 .Case("cu_index", eSectionTypeDWARFDebugCuIndex
)
261 .Case("frame", eSectionTypeDWARFDebugFrame
)
262 .Case("info", eSectionTypeDWARFDebugInfo
)
263 .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo
)
264 .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine
)
265 .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr
)
266 .Case("loc", eSectionTypeDWARFDebugLoc
)
267 .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo
)
268 .Case("loclists", eSectionTypeDWARFDebugLocLists
)
269 .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo
)
270 .Case("macinfo", eSectionTypeDWARFDebugMacInfo
)
271 .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro
)
272 .Case("names", eSectionTypeDWARFDebugNames
)
273 .Case("pubnames", eSectionTypeDWARFDebugPubNames
)
274 .Case("pubtypes", eSectionTypeDWARFDebugPubTypes
)
275 .Case("ranges", eSectionTypeDWARFDebugRanges
)
276 .Case("rnglists", eSectionTypeDWARFDebugRngLists
)
277 .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo
)
278 .Case("str", eSectionTypeDWARFDebugStr
)
279 .Case("str.dwo", eSectionTypeDWARFDebugStrDwo
)
280 .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets
)
281 .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo
)
282 .Case("tu_index", eSectionTypeDWARFDebugTuIndex
)
283 .Case("types", eSectionTypeDWARFDebugTypes
)
284 .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo
)
285 .Default(eSectionTypeOther
);
287 return eSectionTypeOther
;
290 void ObjectFileWasm::CreateSections(SectionList
&unified_section_list
) {
294 m_sections_up
= std::make_unique
<SectionList
>();
296 if (m_sect_infos
.empty()) {
300 for (const section_info
§_info
: m_sect_infos
) {
301 SectionType section_type
= eSectionTypeOther
;
302 ConstString section_name
;
303 offset_t file_offset
= sect_info
.offset
& 0xffffffff;
304 addr_t vm_addr
= file_offset
;
305 size_t vm_size
= sect_info
.size
;
307 if (llvm::wasm::WASM_SEC_CODE
== sect_info
.id
) {
308 section_type
= eSectionTypeCode
;
309 section_name
= ConstString("code");
311 // A code address in DWARF for WebAssembly is the offset of an
312 // instruction relative within the Code section of the WebAssembly file.
313 // For this reason Section::GetFileAddress() must return zero for the
317 section_type
= GetSectionTypeFromName(sect_info
.name
.GetStringRef());
318 if (section_type
== eSectionTypeOther
)
320 section_name
= sect_info
.name
;
327 SectionSP
section_sp(
328 new Section(GetModule(), // Module to which this section belongs.
329 this, // ObjectFile to which this section belongs and
330 // should read section data from.
331 section_type
, // Section ID.
332 section_name
, // Section name.
333 section_type
, // Section type.
334 vm_addr
, // VM address.
335 vm_size
, // VM size in bytes of this section.
336 file_offset
, // Offset of this section in the file.
337 sect_info
.size
, // Size of the section as found in the file.
338 0, // Alignment of the section
339 0, // Flags for this section.
340 1)); // Number of host bytes per target byte
341 m_sections_up
->AddSection(section_sp
);
342 unified_section_list
.AddSection(section_sp
);
346 bool ObjectFileWasm::SetLoadAddress(Target
&target
, lldb::addr_t load_address
,
347 bool value_is_offset
) {
348 /// In WebAssembly, linear memory is disjointed from code space. The VM can
349 /// load multiple instances of a module, which logically share the same code.
350 /// We represent a wasm32 code address with 64-bits, like:
352 /// +---------------+---------------+
353 /// + module_id | offset |
354 /// +---------------+---------------+
355 /// where the lower 32 bits represent a module offset (relative to the module
356 /// start not to the beginning of the code section) and the higher 32 bits
357 /// uniquely identify the module in the WebAssembly VM.
358 /// In other words, we assume that each WebAssembly module is loaded by the
359 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
360 /// 0x0000000400000000 for module_id == 4.
361 /// These 64-bit addresses will be used to request code ranges for a specific
362 /// module from the WebAssembly engine.
364 assert(m_memory_addr
== LLDB_INVALID_ADDRESS
||
365 m_memory_addr
== load_address
);
367 ModuleSP module_sp
= GetModule();
373 size_t num_loaded_sections
= 0;
374 SectionList
*section_list
= GetSectionList();
378 const size_t num_sections
= section_list
->GetSize();
379 for (size_t sect_idx
= 0; sect_idx
< num_sections
; ++sect_idx
) {
380 SectionSP
section_sp(section_list
->GetSectionAtIndex(sect_idx
));
381 if (target
.SetSectionLoadAddress(
382 section_sp
, load_address
| section_sp
->GetFileOffset())) {
383 ++num_loaded_sections
;
387 return num_loaded_sections
> 0;
390 DataExtractor
ObjectFileWasm::ReadImageData(offset_t offset
, uint32_t size
) {
393 if (offset
< GetByteSize()) {
394 size
= std::min(static_cast<uint64_t>(size
), GetByteSize() - offset
);
395 auto buffer_sp
= MapFileData(m_file
, size
, offset
);
396 return DataExtractor(buffer_sp
, GetByteOrder(), GetAddressByteSize());
399 ProcessSP
process_sp(m_process_wp
.lock());
401 auto data_up
= std::make_unique
<DataBufferHeap
>(size
, 0);
402 Status readmem_error
;
403 size_t bytes_read
= process_sp
->ReadMemory(
404 offset
, data_up
->GetBytes(), data_up
->GetByteSize(), readmem_error
);
405 if (bytes_read
> 0) {
406 DataBufferSP
buffer_sp(data_up
.release());
407 data
.SetData(buffer_sp
, 0, buffer_sp
->GetByteSize());
412 data
.SetByteOrder(GetByteOrder());
416 std::optional
<FileSpec
> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
417 static ConstString
g_sect_name_external_debug_info("external_debug_info");
419 for (const section_info
§_info
: m_sect_infos
) {
420 if (g_sect_name_external_debug_info
== sect_info
.name
) {
421 const uint32_t kBufferSize
= 1024;
422 DataExtractor section_header_data
=
423 ReadImageData(sect_info
.offset
, kBufferSize
);
424 llvm::DataExtractor data
= section_header_data
.GetAsLLVM();
425 llvm::DataExtractor::Cursor
c(0);
426 std::optional
<ConstString
> symbols_url
= GetWasmString(data
, c
);
428 return FileSpec(symbols_url
->GetStringRef());
434 void ObjectFileWasm::Dump(Stream
*s
) {
435 ModuleSP
module_sp(GetModule());
439 std::lock_guard
<std::recursive_mutex
> guard(module_sp
->GetMutex());
441 llvm::raw_ostream
&ostream
= s
->AsRawOstream();
442 ostream
<< static_cast<void *>(this) << ": ";
444 ostream
<< "ObjectFileWasm, file = '";
445 m_file
.Dump(ostream
);
446 ostream
<< "', arch = ";
447 ostream
<< GetArchitecture().GetArchitectureName() << "\n";
449 SectionList
*sections
= GetSectionList();
451 sections
->Dump(s
->AsRawOstream(), s
->GetIndentLevel(), nullptr, true,
455 DumpSectionHeaders(ostream
);
459 void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream
&ostream
,
460 const section_info_t
&sh
) {
461 ostream
<< llvm::left_justify(sh
.name
.GetStringRef(), 16) << " "
462 << llvm::format_hex(sh
.offset
, 10) << " "
463 << llvm::format_hex(sh
.size
, 10) << " " << llvm::format_hex(sh
.id
, 6)
467 void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream
&ostream
) {
468 ostream
<< "Section Headers\n";
469 ostream
<< "IDX name addr size id\n";
470 ostream
<< "==== ---------------- ---------- ---------- ------\n";
473 for (auto pos
= m_sect_infos
.begin(); pos
!= m_sect_infos
.end();
475 ostream
<< "[" << llvm::format_decimal(idx
, 2) << "] ";
476 ObjectFileWasm::DumpSectionHeader(ostream
, *pos
);