1 //===-- DisassemblerLLVMC.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "DisassemblerLLVMC.h"
11 #include "llvm-c/Disassembler.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/StringExtras.h"
14 #include "llvm/MC/MCAsmInfo.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
17 #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
18 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstPrinter.h"
21 #include "llvm/MC/MCInstrAnalysis.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCTargetOptions.h"
26 #include "llvm/MC/TargetRegistry.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/ScopedPrinter.h"
29 #include "llvm/Support/TargetSelect.h"
30 #include "llvm/TargetParser/AArch64TargetParser.h"
32 #include "lldb/Core/Address.h"
33 #include "lldb/Core/Module.h"
34 #include "lldb/Symbol/SymbolContext.h"
35 #include "lldb/Target/ExecutionContext.h"
36 #include "lldb/Target/Process.h"
37 #include "lldb/Target/RegisterContext.h"
38 #include "lldb/Target/SectionLoadList.h"
39 #include "lldb/Target/StackFrame.h"
40 #include "lldb/Target/Target.h"
41 #include "lldb/Utility/DataExtractor.h"
42 #include "lldb/Utility/LLDBLog.h"
43 #include "lldb/Utility/Log.h"
44 #include "lldb/Utility/RegularExpression.h"
45 #include "lldb/Utility/Stream.h"
49 using namespace lldb_private
;
51 LLDB_PLUGIN_DEFINE(DisassemblerLLVMC
)
53 class DisassemblerLLVMC::MCDisasmInstance
{
55 static std::unique_ptr
<MCDisasmInstance
>
56 Create(const char *triple
, const char *cpu
, const char *features_str
,
57 unsigned flavor
, DisassemblerLLVMC
&owner
);
59 ~MCDisasmInstance() = default;
61 uint64_t GetMCInst(const uint8_t *opcode_data
, size_t opcode_data_len
,
62 lldb::addr_t pc
, llvm::MCInst
&mc_inst
) const;
63 void PrintMCInst(llvm::MCInst
&mc_inst
, lldb::addr_t pc
,
64 std::string
&inst_string
, std::string
&comments_string
);
65 void SetStyle(bool use_hex_immed
, HexImmediateStyle hex_style
);
66 void SetUseColor(bool use_color
);
67 bool GetUseColor() const;
68 bool CanBranch(llvm::MCInst
&mc_inst
) const;
69 bool HasDelaySlot(llvm::MCInst
&mc_inst
) const;
70 bool IsCall(llvm::MCInst
&mc_inst
) const;
71 bool IsLoad(llvm::MCInst
&mc_inst
) const;
72 bool IsAuthenticated(llvm::MCInst
&mc_inst
) const;
75 MCDisasmInstance(std::unique_ptr
<llvm::MCInstrInfo
> &&instr_info_up
,
76 std::unique_ptr
<llvm::MCRegisterInfo
> &®_info_up
,
77 std::unique_ptr
<llvm::MCSubtargetInfo
> &&subtarget_info_up
,
78 std::unique_ptr
<llvm::MCAsmInfo
> &&asm_info_up
,
79 std::unique_ptr
<llvm::MCContext
> &&context_up
,
80 std::unique_ptr
<llvm::MCDisassembler
> &&disasm_up
,
81 std::unique_ptr
<llvm::MCInstPrinter
> &&instr_printer_up
,
82 std::unique_ptr
<llvm::MCInstrAnalysis
> &&instr_analysis_up
);
84 std::unique_ptr
<llvm::MCInstrInfo
> m_instr_info_up
;
85 std::unique_ptr
<llvm::MCRegisterInfo
> m_reg_info_up
;
86 std::unique_ptr
<llvm::MCSubtargetInfo
> m_subtarget_info_up
;
87 std::unique_ptr
<llvm::MCAsmInfo
> m_asm_info_up
;
88 std::unique_ptr
<llvm::MCContext
> m_context_up
;
89 std::unique_ptr
<llvm::MCDisassembler
> m_disasm_up
;
90 std::unique_ptr
<llvm::MCInstPrinter
> m_instr_printer_up
;
91 std::unique_ptr
<llvm::MCInstrAnalysis
> m_instr_analysis_up
;
96 /// These are the three values deciding instruction control flow kind.
97 /// InstructionLengthDecode function decodes an instruction and get this struct.
100 /// Primary opcode of the instruction.
101 /// For one-byte opcode instruction, it's the first byte after prefix.
102 /// For two- and three-byte opcodes, it's the second byte.
105 /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
108 /// ModR/M byte of the instruction.
109 /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
110 /// may contain a register or specify an addressing mode, depending on MOD.
111 struct InstructionOpcodeAndModrm
{
112 uint8_t primary_opcode
;
117 /// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
118 /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
121 /// \param[in] opcode_and_modrm
122 /// Contains primary_opcode byte, its length, and ModR/M byte.
123 /// Refer to the struct InstructionOpcodeAndModrm for details.
126 /// The control flow kind of the instruction or
127 /// eInstructionControlFlowKindOther if the instruction doesn't affect
128 /// the control flow of the program.
129 lldb::InstructionControlFlowKind
130 MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm
) {
131 uint8_t opcode
= opcode_and_modrm
.primary_opcode
;
132 uint8_t opcode_len
= opcode_and_modrm
.opcode_len
;
133 uint8_t modrm
= opcode_and_modrm
.modrm
;
136 return lldb::eInstructionControlFlowKindOther
;
138 if (opcode
>= 0x70 && opcode
<= 0x7F) {
140 return lldb::eInstructionControlFlowKindCondJump
;
142 return lldb::eInstructionControlFlowKindOther
;
145 if (opcode
>= 0x80 && opcode
<= 0x8F) {
147 return lldb::eInstructionControlFlowKindCondJump
;
149 return lldb::eInstructionControlFlowKindOther
;
155 return lldb::eInstructionControlFlowKindFarCall
;
158 if (opcode_len
== 1) {
159 uint8_t modrm_reg
= (modrm
>> 3) & 7;
161 return lldb::eInstructionControlFlowKindCall
;
162 else if (modrm_reg
== 3)
163 return lldb::eInstructionControlFlowKindFarCall
;
164 else if (modrm_reg
== 4)
165 return lldb::eInstructionControlFlowKindJump
;
166 else if (modrm_reg
== 5)
167 return lldb::eInstructionControlFlowKindFarJump
;
172 return lldb::eInstructionControlFlowKindCall
;
179 return lldb::eInstructionControlFlowKindFarCall
;
183 return lldb::eInstructionControlFlowKindFarReturn
;
188 return lldb::eInstructionControlFlowKindJump
;
192 return lldb::eInstructionControlFlowKindFarJump
;
199 return lldb::eInstructionControlFlowKindCondJump
;
204 return lldb::eInstructionControlFlowKindReturn
;
209 return lldb::eInstructionControlFlowKindFarReturn
;
214 return lldb::eInstructionControlFlowKindFarCall
;
219 return lldb::eInstructionControlFlowKindFarReturn
;
222 if (opcode_len
== 2) {
225 return lldb::eInstructionControlFlowKindFarCall
;
228 return lldb::eInstructionControlFlowKindFarReturn
;
238 return lldb::eInstructionControlFlowKindOther
;
241 /// Decode an instruction into opcode, modrm and opcode_len.
242 /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
243 /// Opcodes in x86 are generally the first byte of instruction, though two-byte
244 /// instructions and prefixes exist. ModR/M is the byte following the opcode
245 /// and adds additional information for how the instruction is executed.
247 /// \param[in] inst_bytes
248 /// Raw bytes of the instruction
251 /// \param[in] bytes_len
252 /// The length of the inst_bytes array.
254 /// \param[in] is_exec_mode_64b
255 /// If true, the execution mode is 64 bit.
258 /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
259 /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition
260 /// for more details.
261 /// Otherwise if the given instruction is invalid, returns std::nullopt.
262 std::optional
<InstructionOpcodeAndModrm
>
263 InstructionLengthDecode(const uint8_t *inst_bytes
, int bytes_len
,
264 bool is_exec_mode_64b
) {
266 bool prefix_done
= false;
267 InstructionOpcodeAndModrm ret
= {0, 0, 0};
269 // In most cases, the primary_opcode is the first byte of the instruction
270 // but some instructions have a prefix to be skipped for these calculations.
271 // The following mapping is inspired from libipt's instruction decoding logic
273 while (!prefix_done
) {
274 if (op_idx
>= bytes_len
)
277 ret
.primary_opcode
= inst_bytes
[op_idx
];
278 switch (ret
.primary_opcode
) {
286 // prefix_osz, prefix_asz
289 // prefix_lock, prefix_f2, prefix_f3
313 if (is_exec_mode_64b
)
321 if (!is_exec_mode_64b
&& (inst_bytes
[op_idx
+ 1] & 0xc0) != 0xc0) {
327 ret
.primary_opcode
= inst_bytes
[op_idx
+ 2];
328 ret
.modrm
= inst_bytes
[op_idx
+ 3];
332 if (!is_exec_mode_64b
&& (inst_bytes
[op_idx
+ 1] & 0xc0) != 0xc0) {
336 ret
.opcode_len
= inst_bytes
[op_idx
+ 1] & 0x1f;
337 ret
.primary_opcode
= inst_bytes
[op_idx
+ 3];
338 ret
.modrm
= inst_bytes
[op_idx
+ 4];
343 if (!is_exec_mode_64b
&& (inst_bytes
[op_idx
+ 1] & 0xc0) != 0xc0) {
347 ret
.opcode_len
= inst_bytes
[op_idx
+ 1] & 0x03;
348 ret
.primary_opcode
= inst_bytes
[op_idx
+ 4];
349 ret
.modrm
= inst_bytes
[op_idx
+ 5];
358 ret
.primary_opcode
= inst_bytes
[op_idx
];
359 ret
.modrm
= inst_bytes
[op_idx
+ 1];
362 // If the first opcode is 0F, it's two- or three- byte opcodes.
363 if (ret
.primary_opcode
== 0x0F) {
364 ret
.primary_opcode
= inst_bytes
[++op_idx
]; // get the next byte
366 if (ret
.primary_opcode
== 0x38) {
368 ret
.primary_opcode
= inst_bytes
[++op_idx
]; // get the next byte
369 ret
.modrm
= inst_bytes
[op_idx
+ 1];
370 } else if (ret
.primary_opcode
== 0x3A) {
372 ret
.primary_opcode
= inst_bytes
[++op_idx
];
373 ret
.modrm
= inst_bytes
[op_idx
+ 1];
374 } else if ((ret
.primary_opcode
& 0xf8) == 0x38) {
376 ret
.primary_opcode
= inst_bytes
[++op_idx
];
377 ret
.modrm
= inst_bytes
[op_idx
+ 1];
378 } else if (ret
.primary_opcode
== 0x0F) {
380 // opcode is 0x0F, no needs to update
381 ret
.modrm
= inst_bytes
[op_idx
+ 1];
384 ret
.modrm
= inst_bytes
[op_idx
+ 1];
391 lldb::InstructionControlFlowKind
GetControlFlowKind(bool is_exec_mode_64b
,
393 std::optional
<InstructionOpcodeAndModrm
> ret
;
395 if (m_opcode
.GetOpcodeBytes() == nullptr || m_opcode
.GetByteSize() <= 0) {
396 // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
397 return lldb::eInstructionControlFlowKindUnknown
;
400 // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
401 // These are the three values deciding instruction control flow kind.
402 ret
= InstructionLengthDecode((const uint8_t *)m_opcode
.GetOpcodeBytes(),
403 m_opcode
.GetByteSize(), is_exec_mode_64b
);
405 return lldb::eInstructionControlFlowKindUnknown
;
407 return MapOpcodeIntoControlFlowKind(*ret
);
412 class InstructionLLVMC
: public lldb_private::Instruction
{
414 InstructionLLVMC(DisassemblerLLVMC
&disasm
,
415 const lldb_private::Address
&address
,
416 AddressClass addr_class
)
417 : Instruction(address
, addr_class
),
418 m_disasm_wp(std::static_pointer_cast
<DisassemblerLLVMC
>(
419 disasm
.shared_from_this())) {}
421 ~InstructionLLVMC() override
= default;
423 bool DoesBranch() override
{
425 return m_does_branch
;
428 bool HasDelaySlot() override
{
430 return m_has_delay_slot
;
433 bool IsLoad() override
{
438 bool IsAuthenticated() override
{
440 return m_is_authenticated
;
443 DisassemblerLLVMC::MCDisasmInstance
*GetDisasmToUse(bool &is_alternate_isa
) {
444 DisassemblerScope
disasm(*this);
445 return GetDisasmToUse(is_alternate_isa
, disasm
);
448 size_t Decode(const lldb_private::Disassembler
&disassembler
,
449 const lldb_private::DataExtractor
&data
,
450 lldb::offset_t data_offset
) override
{
451 // All we have to do is read the opcode which can be easy for some
454 DisassemblerScope
disasm(*this);
456 const ArchSpec
&arch
= disasm
->GetArchitecture();
457 const lldb::ByteOrder byte_order
= data
.GetByteOrder();
459 const uint32_t min_op_byte_size
= arch
.GetMinimumOpcodeByteSize();
460 const uint32_t max_op_byte_size
= arch
.GetMaximumOpcodeByteSize();
461 if (min_op_byte_size
== max_op_byte_size
) {
462 // Fixed size instructions, just read that amount of data.
463 if (!data
.ValidOffsetForDataOfSize(data_offset
, min_op_byte_size
))
466 switch (min_op_byte_size
) {
468 m_opcode
.SetOpcode8(data
.GetU8(&data_offset
), byte_order
);
473 m_opcode
.SetOpcode16(data
.GetU16(&data_offset
), byte_order
);
478 m_opcode
.SetOpcode32(data
.GetU32(&data_offset
), byte_order
);
483 m_opcode
.SetOpcode64(data
.GetU64(&data_offset
), byte_order
);
488 m_opcode
.SetOpcodeBytes(data
.PeekData(data_offset
, min_op_byte_size
),
495 bool is_alternate_isa
= false;
496 DisassemblerLLVMC::MCDisasmInstance
*mc_disasm_ptr
=
497 GetDisasmToUse(is_alternate_isa
, disasm
);
499 const llvm::Triple::ArchType machine
= arch
.GetMachine();
500 if (machine
== llvm::Triple::arm
|| machine
== llvm::Triple::thumb
) {
501 if (machine
== llvm::Triple::thumb
|| is_alternate_isa
) {
502 uint32_t thumb_opcode
= data
.GetU16(&data_offset
);
503 if ((thumb_opcode
& 0xe000) != 0xe000 ||
504 ((thumb_opcode
& 0x1800u
) == 0)) {
505 m_opcode
.SetOpcode16(thumb_opcode
, byte_order
);
509 thumb_opcode
|= data
.GetU16(&data_offset
);
510 m_opcode
.SetOpcode16_2(thumb_opcode
, byte_order
);
514 m_opcode
.SetOpcode32(data
.GetU32(&data_offset
), byte_order
);
518 // The opcode isn't evenly sized, so we need to actually use the llvm
519 // disassembler to parse it and get the size.
520 uint8_t *opcode_data
=
521 const_cast<uint8_t *>(data
.PeekData(data_offset
, 1));
522 const size_t opcode_data_len
= data
.BytesLeft(data_offset
);
523 const addr_t pc
= m_address
.GetFileAddress();
526 const size_t inst_size
=
527 mc_disasm_ptr
->GetMCInst(opcode_data
, opcode_data_len
, pc
, inst
);
531 m_opcode
.SetOpcodeBytes(opcode_data
, inst_size
);
536 return m_opcode
.GetByteSize();
541 void AppendComment(std::string
&description
) {
542 if (m_comment
.empty())
543 m_comment
.swap(description
);
545 m_comment
.append(", ");
546 m_comment
.append(description
);
550 lldb::InstructionControlFlowKind
551 GetControlFlowKind(const lldb_private::ExecutionContext
*exe_ctx
) override
{
552 DisassemblerScope
disasm(*this, exe_ctx
);
554 if (disasm
->GetArchitecture().GetMachine() == llvm::Triple::x86
)
555 return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode
);
556 else if (disasm
->GetArchitecture().GetMachine() == llvm::Triple::x86_64
)
557 return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode
);
560 return eInstructionControlFlowKindUnknown
;
563 void CalculateMnemonicOperandsAndComment(
564 const lldb_private::ExecutionContext
*exe_ctx
) override
{
566 const AddressClass address_class
= GetAddressClass();
568 if (m_opcode
.GetData(data
)) {
569 std::string out_string
;
570 std::string markup_out_string
;
571 std::string comment_string
;
572 std::string markup_comment_string
;
574 DisassemblerScope
disasm(*this, exe_ctx
);
576 DisassemblerLLVMC::MCDisasmInstance
*mc_disasm_ptr
;
578 if (address_class
== AddressClass::eCodeAlternateISA
)
579 mc_disasm_ptr
= disasm
->m_alternate_disasm_up
.get();
581 mc_disasm_ptr
= disasm
->m_disasm_up
.get();
583 lldb::addr_t pc
= m_address
.GetFileAddress();
584 m_using_file_addr
= true;
586 const bool data_from_file
= disasm
->m_data_from_file
;
587 bool use_hex_immediates
= true;
588 Disassembler::HexImmediateStyle hex_style
= Disassembler::eHexStyleC
;
591 Target
*target
= exe_ctx
->GetTargetPtr();
593 use_hex_immediates
= target
->GetUseHexImmediates();
594 hex_style
= target
->GetHexImmediateStyle();
596 if (!data_from_file
) {
597 const lldb::addr_t load_addr
= m_address
.GetLoadAddress(target
);
598 if (load_addr
!= LLDB_INVALID_ADDRESS
) {
600 m_using_file_addr
= false;
606 const uint8_t *opcode_data
= data
.GetDataStart();
607 const size_t opcode_data_len
= data
.GetByteSize();
610 mc_disasm_ptr
->GetMCInst(opcode_data
, opcode_data_len
, pc
, inst
);
613 mc_disasm_ptr
->SetStyle(use_hex_immediates
, hex_style
);
615 const bool saved_use_color
= mc_disasm_ptr
->GetUseColor();
616 mc_disasm_ptr
->SetUseColor(false);
617 mc_disasm_ptr
->PrintMCInst(inst
, pc
, out_string
, comment_string
);
618 mc_disasm_ptr
->SetUseColor(true);
619 mc_disasm_ptr
->PrintMCInst(inst
, pc
, markup_out_string
,
620 markup_comment_string
);
621 mc_disasm_ptr
->SetUseColor(saved_use_color
);
623 if (!comment_string
.empty()) {
624 AppendComment(comment_string
);
628 if (inst_size
== 0) {
629 m_comment
.assign("unknown opcode");
630 inst_size
= m_opcode
.GetByteSize();
631 StreamString mnemonic_strm
;
632 lldb::offset_t offset
= 0;
633 lldb::ByteOrder byte_order
= data
.GetByteOrder();
636 const uint8_t uval8
= data
.GetU8(&offset
);
637 m_opcode
.SetOpcode8(uval8
, byte_order
);
638 m_opcode_name
.assign(".byte");
639 mnemonic_strm
.Printf("0x%2.2x", uval8
);
642 const uint16_t uval16
= data
.GetU16(&offset
);
643 m_opcode
.SetOpcode16(uval16
, byte_order
);
644 m_opcode_name
.assign(".short");
645 mnemonic_strm
.Printf("0x%4.4x", uval16
);
648 const uint32_t uval32
= data
.GetU32(&offset
);
649 m_opcode
.SetOpcode32(uval32
, byte_order
);
650 m_opcode_name
.assign(".long");
651 mnemonic_strm
.Printf("0x%8.8x", uval32
);
654 const uint64_t uval64
= data
.GetU64(&offset
);
655 m_opcode
.SetOpcode64(uval64
, byte_order
);
656 m_opcode_name
.assign(".quad");
657 mnemonic_strm
.Printf("0x%16.16" PRIx64
, uval64
);
663 const uint8_t *bytes
= data
.PeekData(offset
, inst_size
);
664 if (bytes
== nullptr)
666 m_opcode_name
.assign(".byte");
667 m_opcode
.SetOpcodeBytes(bytes
, inst_size
);
668 mnemonic_strm
.Printf("0x%2.2x", bytes
[0]);
669 for (uint32_t i
= 1; i
< inst_size
; ++i
)
670 mnemonic_strm
.Printf(" 0x%2.2x", bytes
[i
]);
674 m_mnemonics
= std::string(mnemonic_strm
.GetString());
678 static RegularExpression
s_regex(
679 llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
681 llvm::SmallVector
<llvm::StringRef
, 4> matches
;
682 if (s_regex
.Execute(out_string
, &matches
)) {
683 m_opcode_name
= matches
[1].str();
684 m_mnemonics
= matches
[2].str();
687 if (s_regex
.Execute(markup_out_string
, &matches
)) {
688 m_markup_opcode_name
= matches
[1].str();
689 m_markup_mnemonics
= matches
[2].str();
695 bool IsValid() const { return m_is_valid
; }
697 bool UsingFileAddress() const { return m_using_file_addr
; }
698 size_t GetByteSize() const { return m_opcode
.GetByteSize(); }
700 /// Grants exclusive access to the disassembler and initializes it with the
701 /// given InstructionLLVMC and an optional ExecutionContext.
702 class DisassemblerScope
{
703 std::shared_ptr
<DisassemblerLLVMC
> m_disasm
;
706 explicit DisassemblerScope(
708 const lldb_private::ExecutionContext
*exe_ctx
= nullptr)
709 : m_disasm(i
.m_disasm_wp
.lock()) {
710 m_disasm
->m_mutex
.lock();
711 m_disasm
->m_inst
= &i
;
712 m_disasm
->m_exe_ctx
= exe_ctx
;
714 ~DisassemblerScope() { m_disasm
->m_mutex
.unlock(); }
716 /// Evaluates to true if this scope contains a valid disassembler.
717 operator bool() const { return static_cast<bool>(m_disasm
); }
719 std::shared_ptr
<DisassemblerLLVMC
> operator->() { return m_disasm
; }
722 static llvm::StringRef::const_iterator
723 ConsumeWhitespace(llvm::StringRef::const_iterator osi
,
724 llvm::StringRef::const_iterator ose
) {
739 static std::pair
<bool, llvm::StringRef::const_iterator
>
740 ConsumeChar(llvm::StringRef::const_iterator osi
, const char c
,
741 llvm::StringRef::const_iterator ose
) {
744 osi
= ConsumeWhitespace(osi
, ose
);
745 if (osi
!= ose
&& *osi
== c
) {
750 return std::make_pair(found
, osi
);
753 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
754 ParseRegisterName(llvm::StringRef::const_iterator osi
,
755 llvm::StringRef::const_iterator ose
) {
757 ret
.m_type
= Operand::Type::Register
;
760 osi
= ConsumeWhitespace(osi
, ose
);
763 if (*osi
>= '0' && *osi
<= '9') {
765 return std::make_pair(Operand(), osi
);
769 } else if (*osi
>= 'a' && *osi
<= 'z') {
775 return std::make_pair(Operand(), osi
);
777 ret
.m_register
= ConstString(str
);
778 return std::make_pair(ret
, osi
);
782 return std::make_pair(Operand(), osi
);
790 ret
.m_register
= ConstString(str
);
791 return std::make_pair(ret
, osi
);
794 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
795 ParseImmediate(llvm::StringRef::const_iterator osi
,
796 llvm::StringRef::const_iterator ose
) {
798 ret
.m_type
= Operand::Type::Immediate
;
802 osi
= ConsumeWhitespace(osi
, ose
);
805 if (*osi
>= '0' && *osi
<= '9') {
807 } else if (*osi
>= 'a' && *osi
<= 'f') {
811 return std::make_pair(Operand(), osi
);
817 return std::make_pair(Operand(), osi
);
819 ret
.m_immediate
= strtoull(str
.c_str(), nullptr, 0);
820 return std::make_pair(ret
, osi
);
823 if (!str
.compare("0")) {
827 return std::make_pair(Operand(), osi
);
833 return std::make_pair(Operand(), osi
);
838 ret
.m_negative
= true;
840 return std::make_pair(Operand(), osi
);
847 ret
.m_immediate
= strtoull(str
.c_str(), nullptr, 0);
848 return std::make_pair(ret
, osi
);
852 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
853 ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi
,
854 llvm::StringRef::const_iterator ose
) {
855 std::pair
<Operand
, llvm::StringRef::const_iterator
> offset_and_iterator
=
856 ParseImmediate(osi
, ose
);
857 if (offset_and_iterator
.first
.IsValid()) {
858 osi
= offset_and_iterator
.second
;
862 std::tie(found
, osi
) = ConsumeChar(osi
, '(', ose
);
864 return std::make_pair(Operand(), osi
);
867 std::pair
<Operand
, llvm::StringRef::const_iterator
> base_and_iterator
=
868 ParseRegisterName(osi
, ose
);
869 if (base_and_iterator
.first
.IsValid()) {
870 osi
= base_and_iterator
.second
;
872 return std::make_pair(Operand(), osi
);
875 std::tie(found
, osi
) = ConsumeChar(osi
, ',', ose
);
877 return std::make_pair(Operand(), osi
);
880 std::pair
<Operand
, llvm::StringRef::const_iterator
> index_and_iterator
=
881 ParseRegisterName(osi
, ose
);
882 if (index_and_iterator
.first
.IsValid()) {
883 osi
= index_and_iterator
.second
;
885 return std::make_pair(Operand(), osi
);
888 std::tie(found
, osi
) = ConsumeChar(osi
, ',', ose
);
890 return std::make_pair(Operand(), osi
);
893 std::pair
<Operand
, llvm::StringRef::const_iterator
>
894 multiplier_and_iterator
= ParseImmediate(osi
, ose
);
895 if (index_and_iterator
.first
.IsValid()) {
896 osi
= index_and_iterator
.second
;
898 return std::make_pair(Operand(), osi
);
901 std::tie(found
, osi
) = ConsumeChar(osi
, ')', ose
);
903 return std::make_pair(Operand(), osi
);
907 product
.m_type
= Operand::Type::Product
;
908 product
.m_children
.push_back(index_and_iterator
.first
);
909 product
.m_children
.push_back(multiplier_and_iterator
.first
);
912 index
.m_type
= Operand::Type::Sum
;
913 index
.m_children
.push_back(base_and_iterator
.first
);
914 index
.m_children
.push_back(product
);
916 if (offset_and_iterator
.first
.IsValid()) {
918 offset
.m_type
= Operand::Type::Sum
;
919 offset
.m_children
.push_back(offset_and_iterator
.first
);
920 offset
.m_children
.push_back(index
);
923 deref
.m_type
= Operand::Type::Dereference
;
924 deref
.m_children
.push_back(offset
);
925 return std::make_pair(deref
, osi
);
928 deref
.m_type
= Operand::Type::Dereference
;
929 deref
.m_children
.push_back(index
);
930 return std::make_pair(deref
, osi
);
935 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
936 ParseIntelDerefAccess(llvm::StringRef::const_iterator osi
,
937 llvm::StringRef::const_iterator ose
) {
938 std::pair
<Operand
, llvm::StringRef::const_iterator
> offset_and_iterator
=
939 ParseImmediate(osi
, ose
);
940 if (offset_and_iterator
.first
.IsValid()) {
941 osi
= offset_and_iterator
.second
;
945 std::tie(found
, osi
) = ConsumeChar(osi
, '(', ose
);
947 return std::make_pair(Operand(), osi
);
950 std::pair
<Operand
, llvm::StringRef::const_iterator
> base_and_iterator
=
951 ParseRegisterName(osi
, ose
);
952 if (base_and_iterator
.first
.IsValid()) {
953 osi
= base_and_iterator
.second
;
955 return std::make_pair(Operand(), osi
);
958 std::tie(found
, osi
) = ConsumeChar(osi
, ')', ose
);
960 return std::make_pair(Operand(), osi
);
963 if (offset_and_iterator
.first
.IsValid()) {
965 offset
.m_type
= Operand::Type::Sum
;
966 offset
.m_children
.push_back(offset_and_iterator
.first
);
967 offset
.m_children
.push_back(base_and_iterator
.first
);
970 deref
.m_type
= Operand::Type::Dereference
;
971 deref
.m_children
.push_back(offset
);
972 return std::make_pair(deref
, osi
);
975 deref
.m_type
= Operand::Type::Dereference
;
976 deref
.m_children
.push_back(base_and_iterator
.first
);
977 return std::make_pair(deref
, osi
);
982 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
983 ParseARMOffsetAccess(llvm::StringRef::const_iterator osi
,
984 llvm::StringRef::const_iterator ose
) {
986 std::tie(found
, osi
) = ConsumeChar(osi
, '[', ose
);
988 return std::make_pair(Operand(), osi
);
991 std::pair
<Operand
, llvm::StringRef::const_iterator
> base_and_iterator
=
992 ParseRegisterName(osi
, ose
);
993 if (base_and_iterator
.first
.IsValid()) {
994 osi
= base_and_iterator
.second
;
996 return std::make_pair(Operand(), osi
);
999 std::tie(found
, osi
) = ConsumeChar(osi
, ',', ose
);
1001 return std::make_pair(Operand(), osi
);
1004 std::pair
<Operand
, llvm::StringRef::const_iterator
> offset_and_iterator
=
1005 ParseImmediate(osi
, ose
);
1006 if (offset_and_iterator
.first
.IsValid()) {
1007 osi
= offset_and_iterator
.second
;
1010 std::tie(found
, osi
) = ConsumeChar(osi
, ']', ose
);
1012 return std::make_pair(Operand(), osi
);
1016 offset
.m_type
= Operand::Type::Sum
;
1017 offset
.m_children
.push_back(offset_and_iterator
.first
);
1018 offset
.m_children
.push_back(base_and_iterator
.first
);
1021 deref
.m_type
= Operand::Type::Dereference
;
1022 deref
.m_children
.push_back(offset
);
1023 return std::make_pair(deref
, osi
);
1027 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
1028 ParseARMDerefAccess(llvm::StringRef::const_iterator osi
,
1029 llvm::StringRef::const_iterator ose
) {
1031 std::tie(found
, osi
) = ConsumeChar(osi
, '[', ose
);
1033 return std::make_pair(Operand(), osi
);
1036 std::pair
<Operand
, llvm::StringRef::const_iterator
> base_and_iterator
=
1037 ParseRegisterName(osi
, ose
);
1038 if (base_and_iterator
.first
.IsValid()) {
1039 osi
= base_and_iterator
.second
;
1041 return std::make_pair(Operand(), osi
);
1044 std::tie(found
, osi
) = ConsumeChar(osi
, ']', ose
);
1046 return std::make_pair(Operand(), osi
);
1050 deref
.m_type
= Operand::Type::Dereference
;
1051 deref
.m_children
.push_back(base_and_iterator
.first
);
1052 return std::make_pair(deref
, osi
);
1055 static void DumpOperand(const Operand
&op
, Stream
&s
) {
1056 switch (op
.m_type
) {
1057 case Operand::Type::Dereference
:
1059 DumpOperand(op
.m_children
[0], s
);
1061 case Operand::Type::Immediate
:
1062 if (op
.m_negative
) {
1065 s
.PutCString(llvm::to_string(op
.m_immediate
));
1067 case Operand::Type::Invalid
:
1068 s
.PutCString("Invalid");
1070 case Operand::Type::Product
:
1072 DumpOperand(op
.m_children
[0], s
);
1074 DumpOperand(op
.m_children
[1], s
);
1077 case Operand::Type::Register
:
1078 s
.PutCString(op
.m_register
.GetStringRef());
1080 case Operand::Type::Sum
:
1082 DumpOperand(op
.m_children
[0], s
);
1084 DumpOperand(op
.m_children
[1], s
);
1091 llvm::SmallVectorImpl
<Instruction::Operand
> &operands
) override
{
1092 const char *operands_string
= GetOperands(nullptr);
1094 if (!operands_string
) {
1098 llvm::StringRef
operands_ref(operands_string
);
1100 llvm::StringRef::const_iterator osi
= operands_ref
.begin();
1101 llvm::StringRef::const_iterator ose
= operands_ref
.end();
1103 while (osi
!= ose
) {
1105 llvm::StringRef::const_iterator iter
;
1107 if ((std::tie(operand
, iter
) = ParseIntelIndexedAccess(osi
, ose
),
1108 operand
.IsValid()) ||
1109 (std::tie(operand
, iter
) = ParseIntelDerefAccess(osi
, ose
),
1110 operand
.IsValid()) ||
1111 (std::tie(operand
, iter
) = ParseARMOffsetAccess(osi
, ose
),
1112 operand
.IsValid()) ||
1113 (std::tie(operand
, iter
) = ParseARMDerefAccess(osi
, ose
),
1114 operand
.IsValid()) ||
1115 (std::tie(operand
, iter
) = ParseRegisterName(osi
, ose
),
1116 operand
.IsValid()) ||
1117 (std::tie(operand
, iter
) = ParseImmediate(osi
, ose
),
1118 operand
.IsValid())) {
1120 operands
.push_back(operand
);
1125 std::pair
<bool, llvm::StringRef::const_iterator
> found_and_iter
=
1126 ConsumeChar(osi
, ',', ose
);
1127 if (found_and_iter
.first
) {
1128 osi
= found_and_iter
.second
;
1131 osi
= ConsumeWhitespace(osi
, ose
);
1134 DisassemblerSP disasm_sp
= m_disasm_wp
.lock();
1136 if (disasm_sp
&& operands
.size() > 1) {
1137 // TODO tie this into the MC Disassembler's notion of clobbers.
1138 switch (disasm_sp
->GetArchitecture().GetMachine()) {
1141 case llvm::Triple::x86
:
1142 case llvm::Triple::x86_64
:
1143 operands
[operands
.size() - 1].m_clobbered
= true;
1145 case llvm::Triple::arm
:
1146 operands
[0].m_clobbered
= true;
1151 if (Log
*log
= GetLog(LLDBLog::Process
)) {
1154 ss
.Printf("[%s] expands to %zu operands:\n", operands_string
,
1156 for (const Operand
&operand
: operands
) {
1158 DumpOperand(operand
, ss
);
1159 ss
.PutCString("\n");
1162 log
->PutString(ss
.GetString());
1168 bool IsCall() override
{
1174 std::weak_ptr
<DisassemblerLLVMC
> m_disasm_wp
;
1176 bool m_is_valid
= false;
1177 bool m_using_file_addr
= false;
1178 bool m_has_visited_instruction
= false;
1180 // Be conservative. If we didn't understand the instruction, say it:
1182 // - Does not have a delay slot
1185 // - Is not an authenticated instruction
1186 bool m_does_branch
= true;
1187 bool m_has_delay_slot
= false;
1188 bool m_is_call
= false;
1189 bool m_is_load
= false;
1190 bool m_is_authenticated
= false;
1192 void VisitInstruction() {
1193 if (m_has_visited_instruction
)
1196 DisassemblerScope
disasm(*this);
1201 if (!m_opcode
.GetData(data
))
1204 bool is_alternate_isa
;
1205 lldb::addr_t pc
= m_address
.GetFileAddress();
1206 DisassemblerLLVMC::MCDisasmInstance
*mc_disasm_ptr
=
1207 GetDisasmToUse(is_alternate_isa
, disasm
);
1208 const uint8_t *opcode_data
= data
.GetDataStart();
1209 const size_t opcode_data_len
= data
.GetByteSize();
1211 const size_t inst_size
=
1212 mc_disasm_ptr
->GetMCInst(opcode_data
, opcode_data_len
, pc
, inst
);
1216 m_has_visited_instruction
= true;
1217 m_does_branch
= mc_disasm_ptr
->CanBranch(inst
);
1218 m_has_delay_slot
= mc_disasm_ptr
->HasDelaySlot(inst
);
1219 m_is_call
= mc_disasm_ptr
->IsCall(inst
);
1220 m_is_load
= mc_disasm_ptr
->IsLoad(inst
);
1221 m_is_authenticated
= mc_disasm_ptr
->IsAuthenticated(inst
);
1225 DisassemblerLLVMC::MCDisasmInstance
*
1226 GetDisasmToUse(bool &is_alternate_isa
, DisassemblerScope
&disasm
) {
1227 is_alternate_isa
= false;
1229 if (disasm
->m_alternate_disasm_up
) {
1230 const AddressClass address_class
= GetAddressClass();
1232 if (address_class
== AddressClass::eCodeAlternateISA
) {
1233 is_alternate_isa
= true;
1234 return disasm
->m_alternate_disasm_up
.get();
1237 return disasm
->m_disasm_up
.get();
1243 std::unique_ptr
<DisassemblerLLVMC::MCDisasmInstance
>
1244 DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple
, const char *cpu
,
1245 const char *features_str
,
1247 DisassemblerLLVMC
&owner
) {
1248 using Instance
= std::unique_ptr
<DisassemblerLLVMC::MCDisasmInstance
>;
1251 const llvm::Target
*curr_target
=
1252 llvm::TargetRegistry::lookupTarget(triple
, Status
);
1256 std::unique_ptr
<llvm::MCInstrInfo
> instr_info_up(
1257 curr_target
->createMCInstrInfo());
1261 std::unique_ptr
<llvm::MCRegisterInfo
> reg_info_up(
1262 curr_target
->createMCRegInfo(triple
));
1266 std::unique_ptr
<llvm::MCSubtargetInfo
> subtarget_info_up(
1267 curr_target
->createMCSubtargetInfo(triple
, cpu
, features_str
));
1268 if (!subtarget_info_up
)
1271 llvm::MCTargetOptions MCOptions
;
1272 std::unique_ptr
<llvm::MCAsmInfo
> asm_info_up(
1273 curr_target
->createMCAsmInfo(*reg_info_up
, triple
, MCOptions
));
1277 std::unique_ptr
<llvm::MCContext
> context_up(
1278 new llvm::MCContext(llvm::Triple(triple
), asm_info_up
.get(),
1279 reg_info_up
.get(), subtarget_info_up
.get()));
1283 std::unique_ptr
<llvm::MCDisassembler
> disasm_up(
1284 curr_target
->createMCDisassembler(*subtarget_info_up
, *context_up
));
1288 std::unique_ptr
<llvm::MCRelocationInfo
> rel_info_up(
1289 curr_target
->createMCRelocationInfo(triple
, *context_up
));
1293 std::unique_ptr
<llvm::MCSymbolizer
> symbolizer_up(
1294 curr_target
->createMCSymbolizer(
1295 triple
, nullptr, DisassemblerLLVMC::SymbolLookupCallback
, &owner
,
1296 context_up
.get(), std::move(rel_info_up
)));
1297 disasm_up
->setSymbolizer(std::move(symbolizer_up
));
1299 unsigned asm_printer_variant
=
1300 flavor
== ~0U ? asm_info_up
->getAssemblerDialect() : flavor
;
1302 std::unique_ptr
<llvm::MCInstPrinter
> instr_printer_up(
1303 curr_target
->createMCInstPrinter(llvm::Triple
{triple
},
1304 asm_printer_variant
, *asm_info_up
,
1305 *instr_info_up
, *reg_info_up
));
1306 if (!instr_printer_up
)
1309 instr_printer_up
->setPrintBranchImmAsAddress(true);
1311 // Not all targets may have registered createMCInstrAnalysis().
1312 std::unique_ptr
<llvm::MCInstrAnalysis
> instr_analysis_up(
1313 curr_target
->createMCInstrAnalysis(instr_info_up
.get()));
1315 return Instance(new MCDisasmInstance(
1316 std::move(instr_info_up
), std::move(reg_info_up
),
1317 std::move(subtarget_info_up
), std::move(asm_info_up
),
1318 std::move(context_up
), std::move(disasm_up
), std::move(instr_printer_up
),
1319 std::move(instr_analysis_up
)));
1322 DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
1323 std::unique_ptr
<llvm::MCInstrInfo
> &&instr_info_up
,
1324 std::unique_ptr
<llvm::MCRegisterInfo
> &®_info_up
,
1325 std::unique_ptr
<llvm::MCSubtargetInfo
> &&subtarget_info_up
,
1326 std::unique_ptr
<llvm::MCAsmInfo
> &&asm_info_up
,
1327 std::unique_ptr
<llvm::MCContext
> &&context_up
,
1328 std::unique_ptr
<llvm::MCDisassembler
> &&disasm_up
,
1329 std::unique_ptr
<llvm::MCInstPrinter
> &&instr_printer_up
,
1330 std::unique_ptr
<llvm::MCInstrAnalysis
> &&instr_analysis_up
)
1331 : m_instr_info_up(std::move(instr_info_up
)),
1332 m_reg_info_up(std::move(reg_info_up
)),
1333 m_subtarget_info_up(std::move(subtarget_info_up
)),
1334 m_asm_info_up(std::move(asm_info_up
)),
1335 m_context_up(std::move(context_up
)), m_disasm_up(std::move(disasm_up
)),
1336 m_instr_printer_up(std::move(instr_printer_up
)),
1337 m_instr_analysis_up(std::move(instr_analysis_up
)) {
1338 assert(m_instr_info_up
&& m_reg_info_up
&& m_subtarget_info_up
&&
1339 m_asm_info_up
&& m_context_up
&& m_disasm_up
&& m_instr_printer_up
);
1342 uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
1343 const uint8_t *opcode_data
, size_t opcode_data_len
, lldb::addr_t pc
,
1344 llvm::MCInst
&mc_inst
) const {
1345 llvm::ArrayRef
<uint8_t> data(opcode_data
, opcode_data_len
);
1346 llvm::MCDisassembler::DecodeStatus status
;
1348 uint64_t new_inst_size
;
1349 status
= m_disasm_up
->getInstruction(mc_inst
, new_inst_size
, data
, pc
,
1351 if (status
== llvm::MCDisassembler::Success
)
1352 return new_inst_size
;
1357 void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(
1358 llvm::MCInst
&mc_inst
, lldb::addr_t pc
, std::string
&inst_string
,
1359 std::string
&comments_string
) {
1360 llvm::raw_string_ostream
inst_stream(inst_string
);
1361 llvm::raw_string_ostream
comments_stream(comments_string
);
1363 inst_stream
.enable_colors(m_instr_printer_up
->getUseColor());
1364 m_instr_printer_up
->setCommentStream(comments_stream
);
1365 m_instr_printer_up
->printInst(&mc_inst
, pc
, llvm::StringRef(),
1366 *m_subtarget_info_up
, inst_stream
);
1367 m_instr_printer_up
->setCommentStream(llvm::nulls());
1369 comments_stream
.flush();
1371 static std::string
g_newlines("\r\n");
1373 for (size_t newline_pos
= 0;
1374 (newline_pos
= comments_string
.find_first_of(g_newlines
, newline_pos
)) !=
1375 comments_string
.npos
;
1377 comments_string
.replace(comments_string
.begin() + newline_pos
,
1378 comments_string
.begin() + newline_pos
+ 1, 1, ' ');
1382 void DisassemblerLLVMC::MCDisasmInstance::SetStyle(
1383 bool use_hex_immed
, HexImmediateStyle hex_style
) {
1384 m_instr_printer_up
->setPrintImmHex(use_hex_immed
);
1385 switch (hex_style
) {
1387 m_instr_printer_up
->setPrintHexStyle(llvm::HexStyle::C
);
1390 m_instr_printer_up
->setPrintHexStyle(llvm::HexStyle::Asm
);
1395 void DisassemblerLLVMC::MCDisasmInstance::SetUseColor(bool use_color
) {
1396 m_instr_printer_up
->setUseColor(use_color
);
1399 bool DisassemblerLLVMC::MCDisasmInstance::GetUseColor() const {
1400 return m_instr_printer_up
->getUseColor();
1403 bool DisassemblerLLVMC::MCDisasmInstance::CanBranch(
1404 llvm::MCInst
&mc_inst
) const {
1405 if (m_instr_analysis_up
)
1406 return m_instr_analysis_up
->mayAffectControlFlow(mc_inst
, *m_reg_info_up
);
1407 return m_instr_info_up
->get(mc_inst
.getOpcode())
1408 .mayAffectControlFlow(mc_inst
, *m_reg_info_up
);
1411 bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot(
1412 llvm::MCInst
&mc_inst
) const {
1413 return m_instr_info_up
->get(mc_inst
.getOpcode()).hasDelaySlot();
1416 bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst
&mc_inst
) const {
1417 if (m_instr_analysis_up
)
1418 return m_instr_analysis_up
->isCall(mc_inst
);
1419 return m_instr_info_up
->get(mc_inst
.getOpcode()).isCall();
1422 bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst
&mc_inst
) const {
1423 return m_instr_info_up
->get(mc_inst
.getOpcode()).mayLoad();
1426 bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated(
1427 llvm::MCInst
&mc_inst
) const {
1428 const auto &InstrDesc
= m_instr_info_up
->get(mc_inst
.getOpcode());
1430 // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4
1431 // == 'a' + 'c') as authenticated instructions for reporting purposes, in
1432 // addition to the standard authenticated instructions specified in ARMv8.3.
1433 bool IsBrkC47x
= false;
1434 if (InstrDesc
.isTrap() && mc_inst
.getNumOperands() == 1) {
1435 const llvm::MCOperand
&Op0
= mc_inst
.getOperand(0);
1436 if (Op0
.isImm() && Op0
.getImm() >= 0xc470 && Op0
.getImm() <= 0xc474)
1440 return InstrDesc
.isAuthenticated() || IsBrkC47x
;
1443 DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec
&arch
,
1444 const char *flavor_string
)
1445 : Disassembler(arch
, flavor_string
), m_exe_ctx(nullptr), m_inst(nullptr),
1446 m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS
),
1448 if (!FlavorValidForArchSpec(arch
, m_flavor
.c_str())) {
1449 m_flavor
.assign("default");
1452 unsigned flavor
= ~0U;
1453 llvm::Triple triple
= arch
.GetTriple();
1455 // So far the only supported flavor is "intel" on x86. The base class will
1456 // set this correctly coming in.
1457 if (triple
.getArch() == llvm::Triple::x86
||
1458 triple
.getArch() == llvm::Triple::x86_64
) {
1459 if (m_flavor
== "intel") {
1461 } else if (m_flavor
== "att") {
1466 ArchSpec
thumb_arch(arch
);
1467 if (triple
.getArch() == llvm::Triple::arm
) {
1468 std::string
thumb_arch_name(thumb_arch
.GetTriple().getArchName().str());
1469 // Replace "arm" with "thumb" so we get all thumb variants correct
1470 if (thumb_arch_name
.size() > 3) {
1471 thumb_arch_name
.erase(0, 3);
1472 thumb_arch_name
.insert(0, "thumb");
1474 thumb_arch_name
= "thumbv9.3a";
1476 thumb_arch
.GetTriple().setArchName(llvm::StringRef(thumb_arch_name
));
1479 // If no sub architecture specified then use the most recent arm architecture
1480 // so the disassembler will return all instructions. Without it we will see a
1481 // lot of unknown opcodes if the code uses instructions which are not
1482 // available in the oldest arm version (which is used when no sub architecture
1484 if (triple
.getArch() == llvm::Triple::arm
&&
1485 triple
.getSubArch() == llvm::Triple::NoSubArch
)
1486 triple
.setArchName("armv9.3a");
1488 std::string features_str
;
1489 const char *triple_str
= triple
.getTriple().c_str();
1491 // ARM Cortex M0-M7 devices only execute thumb instructions
1492 if (arch
.IsAlwaysThumbInstructions()) {
1493 triple_str
= thumb_arch
.GetTriple().getTriple().c_str();
1494 features_str
+= "+fp-armv8,";
1497 const char *cpu
= "";
1499 switch (arch
.GetCore()) {
1500 case ArchSpec::eCore_mips32
:
1501 case ArchSpec::eCore_mips32el
:
1504 case ArchSpec::eCore_mips32r2
:
1505 case ArchSpec::eCore_mips32r2el
:
1508 case ArchSpec::eCore_mips32r3
:
1509 case ArchSpec::eCore_mips32r3el
:
1512 case ArchSpec::eCore_mips32r5
:
1513 case ArchSpec::eCore_mips32r5el
:
1516 case ArchSpec::eCore_mips32r6
:
1517 case ArchSpec::eCore_mips32r6el
:
1520 case ArchSpec::eCore_mips64
:
1521 case ArchSpec::eCore_mips64el
:
1524 case ArchSpec::eCore_mips64r2
:
1525 case ArchSpec::eCore_mips64r2el
:
1528 case ArchSpec::eCore_mips64r3
:
1529 case ArchSpec::eCore_mips64r3el
:
1532 case ArchSpec::eCore_mips64r5
:
1533 case ArchSpec::eCore_mips64r5el
:
1536 case ArchSpec::eCore_mips64r6
:
1537 case ArchSpec::eCore_mips64r6el
:
1545 if (arch
.IsMIPS()) {
1546 uint32_t arch_flags
= arch
.GetFlags();
1547 if (arch_flags
& ArchSpec::eMIPSAse_msa
)
1548 features_str
+= "+msa,";
1549 if (arch_flags
& ArchSpec::eMIPSAse_dsp
)
1550 features_str
+= "+dsp,";
1551 if (arch_flags
& ArchSpec::eMIPSAse_dspr2
)
1552 features_str
+= "+dspr2,";
1555 // If any AArch64 variant, enable latest ISA with all extensions.
1556 if (triple
.isAArch64()) {
1557 features_str
+= "+all,";
1559 if (triple
.getVendor() == llvm::Triple::Apple
)
1560 cpu
= "apple-latest";
1563 if (triple
.isRISCV()) {
1564 uint32_t arch_flags
= arch
.GetFlags();
1565 if (arch_flags
& ArchSpec::eRISCV_rvc
)
1566 features_str
+= "+c,";
1567 if (arch_flags
& ArchSpec::eRISCV_rve
)
1568 features_str
+= "+e,";
1569 if ((arch_flags
& ArchSpec::eRISCV_float_abi_single
) ==
1570 ArchSpec::eRISCV_float_abi_single
)
1571 features_str
+= "+f,";
1572 if ((arch_flags
& ArchSpec::eRISCV_float_abi_double
) ==
1573 ArchSpec::eRISCV_float_abi_double
)
1574 features_str
+= "+f,+d,";
1575 if ((arch_flags
& ArchSpec::eRISCV_float_abi_quad
) ==
1576 ArchSpec::eRISCV_float_abi_quad
)
1577 features_str
+= "+f,+d,+q,";
1578 // FIXME: how do we detect features such as `+a`, `+m`?
1579 // Turn them on by default now, since everyone seems to use them
1580 features_str
+= "+a,+m,";
1583 // We use m_disasm_up.get() to tell whether we are valid or not, so if this
1584 // isn't good for some reason, we won't be valid and FindPlugin will fail and
1585 // we won't get used.
1586 m_disasm_up
= MCDisasmInstance::Create(triple_str
, cpu
, features_str
.c_str(),
1589 llvm::Triple::ArchType llvm_arch
= triple
.getArch();
1591 // For arm CPUs that can execute arm or thumb instructions, also create a
1592 // thumb instruction disassembler.
1593 if (llvm_arch
== llvm::Triple::arm
) {
1594 std::string
thumb_triple(thumb_arch
.GetTriple().getTriple());
1595 m_alternate_disasm_up
=
1596 MCDisasmInstance::Create(thumb_triple
.c_str(), "", features_str
.c_str(),
1598 if (!m_alternate_disasm_up
)
1599 m_disasm_up
.reset();
1601 } else if (arch
.IsMIPS()) {
1602 /* Create alternate disassembler for MIPS16 and microMIPS */
1603 uint32_t arch_flags
= arch
.GetFlags();
1604 if (arch_flags
& ArchSpec::eMIPSAse_mips16
)
1605 features_str
+= "+mips16,";
1606 else if (arch_flags
& ArchSpec::eMIPSAse_micromips
)
1607 features_str
+= "+micromips,";
1609 m_alternate_disasm_up
= MCDisasmInstance::Create(
1610 triple_str
, cpu
, features_str
.c_str(), flavor
, *this);
1611 if (!m_alternate_disasm_up
)
1612 m_disasm_up
.reset();
1616 DisassemblerLLVMC::~DisassemblerLLVMC() = default;
1618 lldb::DisassemblerSP
DisassemblerLLVMC::CreateInstance(const ArchSpec
&arch
,
1619 const char *flavor
) {
1620 if (arch
.GetTriple().getArch() != llvm::Triple::UnknownArch
) {
1621 auto disasm_sp
= std::make_shared
<DisassemblerLLVMC
>(arch
, flavor
);
1622 if (disasm_sp
&& disasm_sp
->IsValid())
1625 return lldb::DisassemblerSP();
1628 size_t DisassemblerLLVMC::DecodeInstructions(const Address
&base_addr
,
1629 const DataExtractor
&data
,
1630 lldb::offset_t data_offset
,
1631 size_t num_instructions
,
1632 bool append
, bool data_from_file
) {
1634 m_instruction_list
.Clear();
1639 m_data_from_file
= data_from_file
;
1640 uint32_t data_cursor
= data_offset
;
1641 const size_t data_byte_size
= data
.GetByteSize();
1642 uint32_t instructions_parsed
= 0;
1643 Address
inst_addr(base_addr
);
1645 while (data_cursor
< data_byte_size
&&
1646 instructions_parsed
< num_instructions
) {
1648 AddressClass address_class
= AddressClass::eCode
;
1650 if (m_alternate_disasm_up
)
1651 address_class
= inst_addr
.GetAddressClass();
1653 InstructionSP
inst_sp(
1654 new InstructionLLVMC(*this, inst_addr
, address_class
));
1659 uint32_t inst_size
= inst_sp
->Decode(*this, data
, data_cursor
);
1664 m_instruction_list
.Append(inst_sp
);
1665 data_cursor
+= inst_size
;
1666 inst_addr
.Slide(inst_size
);
1667 instructions_parsed
++;
1670 return data_cursor
- data_offset
;
1673 void DisassemblerLLVMC::Initialize() {
1674 PluginManager::RegisterPlugin(GetPluginNameStatic(),
1675 "Disassembler that uses LLVM MC to disassemble "
1676 "i386, x86_64, ARM, and ARM64.",
1679 llvm::InitializeAllTargetInfos();
1680 llvm::InitializeAllTargetMCs();
1681 llvm::InitializeAllAsmParsers();
1682 llvm::InitializeAllDisassemblers();
1685 void DisassemblerLLVMC::Terminate() {
1686 PluginManager::UnregisterPlugin(CreateInstance
);
1689 int DisassemblerLLVMC::OpInfoCallback(void *disassembler
, uint64_t pc
,
1690 uint64_t offset
, uint64_t size
,
1691 int tag_type
, void *tag_bug
) {
1692 return static_cast<DisassemblerLLVMC
*>(disassembler
)
1693 ->OpInfo(pc
, offset
, size
, tag_type
, tag_bug
);
1696 const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler
,
1698 uint64_t *type
, uint64_t pc
,
1699 const char **name
) {
1700 return static_cast<DisassemblerLLVMC
*>(disassembler
)
1701 ->SymbolLookup(value
, type
, pc
, name
);
1704 bool DisassemblerLLVMC::FlavorValidForArchSpec(
1705 const lldb_private::ArchSpec
&arch
, const char *flavor
) {
1706 llvm::Triple triple
= arch
.GetTriple();
1707 if (flavor
== nullptr || strcmp(flavor
, "default") == 0)
1710 if (triple
.getArch() == llvm::Triple::x86
||
1711 triple
.getArch() == llvm::Triple::x86_64
) {
1712 return strcmp(flavor
, "intel") == 0 || strcmp(flavor
, "att") == 0;
1717 bool DisassemblerLLVMC::IsValid() const { return m_disasm_up
.operator bool(); }
1719 int DisassemblerLLVMC::OpInfo(uint64_t PC
, uint64_t Offset
, uint64_t Size
,
1720 int tag_type
, void *tag_bug
) {
1725 memset(tag_bug
, 0, sizeof(::LLVMOpInfo1
));
1731 const char *DisassemblerLLVMC::SymbolLookup(uint64_t value
, uint64_t *type_ptr
,
1732 uint64_t pc
, const char **name
) {
1734 if (m_exe_ctx
&& m_inst
) {
1735 // std::string remove_this_prior_to_checkin;
1736 Target
*target
= m_exe_ctx
? m_exe_ctx
->GetTargetPtr() : nullptr;
1737 Address value_so_addr
;
1739 if (target
->GetArchitecture().GetMachine() == llvm::Triple::aarch64
||
1740 target
->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be
||
1741 target
->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32
) {
1742 if (*type_ptr
== LLVMDisassembler_ReferenceType_In_ARM64_ADRP
) {
1743 m_adrp_address
= pc
;
1744 m_adrp_insn
= value
;
1746 *type_ptr
= LLVMDisassembler_ReferenceType_InOut_None
;
1749 // If this instruction is an ADD and
1750 // the previous instruction was an ADRP and
1751 // the ADRP's register and this ADD's register are the same,
1752 // then this is a pc-relative address calculation.
1753 if (*type_ptr
== LLVMDisassembler_ReferenceType_In_ARM64_ADDXri
&&
1754 m_adrp_insn
&& m_adrp_address
== pc
- 4 &&
1755 (*m_adrp_insn
& 0x1f) == ((value
>> 5) & 0x1f)) {
1756 uint32_t addxri_inst
;
1757 uint64_t adrp_imm
, addxri_imm
;
1758 // Get immlo and immhi bits, OR them together to get the ADRP imm
1761 ((*m_adrp_insn
& 0x00ffffe0) >> 3) | ((*m_adrp_insn
>> 29) & 0x3);
1762 // if high bit of immhi after right-shifting set, sign extend
1763 if (adrp_imm
& (1ULL << 20))
1764 adrp_imm
|= ~((1ULL << 21) - 1);
1766 addxri_inst
= value
;
1767 addxri_imm
= (addxri_inst
>> 10) & 0xfff;
1768 // check if 'sh' bit is set, shift imm value up if so
1769 // (this would make no sense, ADRP already gave us this part)
1770 if ((addxri_inst
>> (12 + 5 + 5)) & 1)
1772 value
= (m_adrp_address
& 0xfffffffffffff000LL
) + (adrp_imm
<< 12) +
1775 m_adrp_address
= LLDB_INVALID_ADDRESS
;
1776 m_adrp_insn
.reset();
1779 if (m_inst
->UsingFileAddress()) {
1780 ModuleSP
module_sp(m_inst
->GetAddress().GetModule());
1782 module_sp
->ResolveFileAddress(value
, value_so_addr
);
1783 module_sp
->ResolveFileAddress(pc
, pc_so_addr
);
1785 } else if (target
&& !target
->GetSectionLoadList().IsEmpty()) {
1786 target
->GetSectionLoadList().ResolveLoadAddress(value
, value_so_addr
);
1787 target
->GetSectionLoadList().ResolveLoadAddress(pc
, pc_so_addr
);
1790 SymbolContext sym_ctx
;
1791 const SymbolContextItem resolve_scope
=
1792 eSymbolContextFunction
| eSymbolContextSymbol
;
1793 if (pc_so_addr
.IsValid() && pc_so_addr
.GetModule()) {
1794 pc_so_addr
.GetModule()->ResolveSymbolContextForAddress(
1795 pc_so_addr
, resolve_scope
, sym_ctx
);
1798 if (value_so_addr
.IsValid() && value_so_addr
.GetSection()) {
1801 bool format_omitting_current_func_name
= false;
1802 if (sym_ctx
.symbol
|| sym_ctx
.function
) {
1804 if (sym_ctx
.GetAddressRange(resolve_scope
, 0, false, range
) &&
1805 range
.GetBaseAddress().IsValid() &&
1806 range
.ContainsLoadAddress(value_so_addr
, target
)) {
1807 format_omitting_current_func_name
= true;
1811 // If the "value" address (the target address we're symbolicating) is
1812 // inside the same SymbolContext as the current instruction pc
1813 // (pc_so_addr), don't print the full function name - just print it
1814 // with DumpStyleNoFunctionName style, e.g. "<+36>".
1815 if (format_omitting_current_func_name
) {
1816 value_so_addr
.Dump(&ss
, target
, Address::DumpStyleNoFunctionName
,
1817 Address::DumpStyleSectionNameOffset
);
1821 Address::DumpStyleResolvedDescriptionNoFunctionArguments
,
1822 Address::DumpStyleSectionNameOffset
);
1825 if (!ss
.GetString().empty()) {
1826 // If Address::Dump returned a multi-line description, most commonly
1827 // seen when we have multiple levels of inlined functions at an
1828 // address, only show the first line.
1829 std::string str
= std::string(ss
.GetString());
1830 size_t first_eol_char
= str
.find_first_of("\r\n");
1831 if (first_eol_char
!= std::string::npos
) {
1832 str
.erase(first_eol_char
);
1834 m_inst
->AppendComment(str
);
1840 // TODO: llvm-objdump sets the type_ptr to the
1841 // LLVMDisassembler_ReferenceType_Out_* values
1842 // based on where value_so_addr is pointing, with
1843 // Mach-O specific augmentations in MachODump.cpp. e.g.
1844 // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
1846 *type_ptr
= LLVMDisassembler_ReferenceType_InOut_None
;