1 //===-- DisassemblerLLVMC.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "DisassemblerLLVMC.h"
11 #include "llvm-c/Disassembler.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/StringExtras.h"
14 #include "llvm/MC/MCAsmInfo.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
17 #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
18 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstPrinter.h"
21 #include "llvm/MC/MCInstrAnalysis.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/MC/MCTargetOptions.h"
26 #include "llvm/MC/TargetRegistry.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/ScopedPrinter.h"
29 #include "llvm/Support/TargetSelect.h"
30 #include "llvm/TargetParser/AArch64TargetParser.h"
32 #include "lldb/Core/Address.h"
33 #include "lldb/Core/Module.h"
34 #include "lldb/Symbol/SymbolContext.h"
35 #include "lldb/Target/ExecutionContext.h"
36 #include "lldb/Target/Process.h"
37 #include "lldb/Target/RegisterContext.h"
38 #include "lldb/Target/SectionLoadList.h"
39 #include "lldb/Target/StackFrame.h"
40 #include "lldb/Target/Target.h"
41 #include "lldb/Utility/DataExtractor.h"
42 #include "lldb/Utility/LLDBLog.h"
43 #include "lldb/Utility/Log.h"
44 #include "lldb/Utility/RegularExpression.h"
45 #include "lldb/Utility/Stream.h"
49 using namespace lldb_private
;
51 LLDB_PLUGIN_DEFINE(DisassemblerLLVMC
)
53 class DisassemblerLLVMC::MCDisasmInstance
{
55 static std::unique_ptr
<MCDisasmInstance
>
56 Create(const char *triple
, const char *cpu
, const char *features_str
,
57 unsigned flavor
, DisassemblerLLVMC
&owner
);
59 ~MCDisasmInstance() = default;
61 uint64_t GetMCInst(const uint8_t *opcode_data
, size_t opcode_data_len
,
62 lldb::addr_t pc
, llvm::MCInst
&mc_inst
) const;
63 void PrintMCInst(llvm::MCInst
&mc_inst
, lldb::addr_t pc
,
64 std::string
&inst_string
, std::string
&comments_string
);
65 void SetStyle(bool use_hex_immed
, HexImmediateStyle hex_style
);
66 void SetUseColor(bool use_color
);
67 bool GetUseColor() const;
68 bool CanBranch(llvm::MCInst
&mc_inst
) const;
69 bool HasDelaySlot(llvm::MCInst
&mc_inst
) const;
70 bool IsCall(llvm::MCInst
&mc_inst
) const;
71 bool IsLoad(llvm::MCInst
&mc_inst
) const;
72 bool IsAuthenticated(llvm::MCInst
&mc_inst
) const;
75 MCDisasmInstance(std::unique_ptr
<llvm::MCInstrInfo
> &&instr_info_up
,
76 std::unique_ptr
<llvm::MCRegisterInfo
> &®_info_up
,
77 std::unique_ptr
<llvm::MCSubtargetInfo
> &&subtarget_info_up
,
78 std::unique_ptr
<llvm::MCAsmInfo
> &&asm_info_up
,
79 std::unique_ptr
<llvm::MCContext
> &&context_up
,
80 std::unique_ptr
<llvm::MCDisassembler
> &&disasm_up
,
81 std::unique_ptr
<llvm::MCInstPrinter
> &&instr_printer_up
,
82 std::unique_ptr
<llvm::MCInstrAnalysis
> &&instr_analysis_up
);
84 std::unique_ptr
<llvm::MCInstrInfo
> m_instr_info_up
;
85 std::unique_ptr
<llvm::MCRegisterInfo
> m_reg_info_up
;
86 std::unique_ptr
<llvm::MCSubtargetInfo
> m_subtarget_info_up
;
87 std::unique_ptr
<llvm::MCAsmInfo
> m_asm_info_up
;
88 std::unique_ptr
<llvm::MCContext
> m_context_up
;
89 std::unique_ptr
<llvm::MCDisassembler
> m_disasm_up
;
90 std::unique_ptr
<llvm::MCInstPrinter
> m_instr_printer_up
;
91 std::unique_ptr
<llvm::MCInstrAnalysis
> m_instr_analysis_up
;
96 /// These are the three values deciding instruction control flow kind.
97 /// InstructionLengthDecode function decodes an instruction and get this struct.
100 /// Primary opcode of the instruction.
101 /// For one-byte opcode instruction, it's the first byte after prefix.
102 /// For two- and three-byte opcodes, it's the second byte.
105 /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
108 /// ModR/M byte of the instruction.
109 /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
110 /// may contain a register or specify an addressing mode, depending on MOD.
111 struct InstructionOpcodeAndModrm
{
112 uint8_t primary_opcode
;
117 /// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
118 /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
121 /// \param[in] opcode_and_modrm
122 /// Contains primary_opcode byte, its length, and ModR/M byte.
123 /// Refer to the struct InstructionOpcodeAndModrm for details.
126 /// The control flow kind of the instruction or
127 /// eInstructionControlFlowKindOther if the instruction doesn't affect
128 /// the control flow of the program.
129 lldb::InstructionControlFlowKind
130 MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm
) {
131 uint8_t opcode
= opcode_and_modrm
.primary_opcode
;
132 uint8_t opcode_len
= opcode_and_modrm
.opcode_len
;
133 uint8_t modrm
= opcode_and_modrm
.modrm
;
136 return lldb::eInstructionControlFlowKindOther
;
138 if (opcode
>= 0x70 && opcode
<= 0x7F) {
140 return lldb::eInstructionControlFlowKindCondJump
;
142 return lldb::eInstructionControlFlowKindOther
;
145 if (opcode
>= 0x80 && opcode
<= 0x8F) {
147 return lldb::eInstructionControlFlowKindCondJump
;
149 return lldb::eInstructionControlFlowKindOther
;
155 return lldb::eInstructionControlFlowKindFarCall
;
158 if (opcode_len
== 1) {
159 uint8_t modrm_reg
= (modrm
>> 3) & 7;
161 return lldb::eInstructionControlFlowKindCall
;
162 else if (modrm_reg
== 3)
163 return lldb::eInstructionControlFlowKindFarCall
;
164 else if (modrm_reg
== 4)
165 return lldb::eInstructionControlFlowKindJump
;
166 else if (modrm_reg
== 5)
167 return lldb::eInstructionControlFlowKindFarJump
;
172 return lldb::eInstructionControlFlowKindCall
;
179 return lldb::eInstructionControlFlowKindFarCall
;
183 return lldb::eInstructionControlFlowKindFarReturn
;
188 return lldb::eInstructionControlFlowKindJump
;
192 return lldb::eInstructionControlFlowKindFarJump
;
199 return lldb::eInstructionControlFlowKindCondJump
;
204 return lldb::eInstructionControlFlowKindReturn
;
209 return lldb::eInstructionControlFlowKindFarReturn
;
214 return lldb::eInstructionControlFlowKindFarCall
;
219 return lldb::eInstructionControlFlowKindFarReturn
;
222 if (opcode_len
== 2) {
225 return lldb::eInstructionControlFlowKindFarCall
;
228 return lldb::eInstructionControlFlowKindFarReturn
;
238 return lldb::eInstructionControlFlowKindOther
;
241 /// Decode an instruction into opcode, modrm and opcode_len.
242 /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
243 /// Opcodes in x86 are generally the first byte of instruction, though two-byte
244 /// instructions and prefixes exist. ModR/M is the byte following the opcode
245 /// and adds additional information for how the instruction is executed.
247 /// \param[in] inst_bytes
248 /// Raw bytes of the instruction
251 /// \param[in] bytes_len
252 /// The length of the inst_bytes array.
254 /// \param[in] is_exec_mode_64b
255 /// If true, the execution mode is 64 bit.
258 /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
259 /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition
260 /// for more details.
261 /// Otherwise if the given instruction is invalid, returns std::nullopt.
262 std::optional
<InstructionOpcodeAndModrm
>
263 InstructionLengthDecode(const uint8_t *inst_bytes
, int bytes_len
,
264 bool is_exec_mode_64b
) {
266 bool prefix_done
= false;
267 InstructionOpcodeAndModrm ret
= {0, 0, 0};
269 // In most cases, the primary_opcode is the first byte of the instruction
270 // but some instructions have a prefix to be skipped for these calculations.
271 // The following mapping is inspired from libipt's instruction decoding logic
273 while (!prefix_done
) {
274 if (op_idx
>= bytes_len
)
277 ret
.primary_opcode
= inst_bytes
[op_idx
];
278 switch (ret
.primary_opcode
) {
286 // prefix_osz, prefix_asz
289 // prefix_lock, prefix_f2, prefix_f3
313 if (is_exec_mode_64b
)
321 if (!is_exec_mode_64b
&& (inst_bytes
[op_idx
+ 1] & 0xc0) != 0xc0) {
327 ret
.primary_opcode
= inst_bytes
[op_idx
+ 2];
328 ret
.modrm
= inst_bytes
[op_idx
+ 3];
332 if (!is_exec_mode_64b
&& (inst_bytes
[op_idx
+ 1] & 0xc0) != 0xc0) {
336 ret
.opcode_len
= inst_bytes
[op_idx
+ 1] & 0x1f;
337 ret
.primary_opcode
= inst_bytes
[op_idx
+ 3];
338 ret
.modrm
= inst_bytes
[op_idx
+ 4];
343 if (!is_exec_mode_64b
&& (inst_bytes
[op_idx
+ 1] & 0xc0) != 0xc0) {
347 ret
.opcode_len
= inst_bytes
[op_idx
+ 1] & 0x03;
348 ret
.primary_opcode
= inst_bytes
[op_idx
+ 4];
349 ret
.modrm
= inst_bytes
[op_idx
+ 5];
358 ret
.primary_opcode
= inst_bytes
[op_idx
];
359 ret
.modrm
= inst_bytes
[op_idx
+ 1];
362 // If the first opcode is 0F, it's two- or three- byte opcodes.
363 if (ret
.primary_opcode
== 0x0F) {
364 ret
.primary_opcode
= inst_bytes
[++op_idx
]; // get the next byte
366 if (ret
.primary_opcode
== 0x38) {
368 ret
.primary_opcode
= inst_bytes
[++op_idx
]; // get the next byte
369 ret
.modrm
= inst_bytes
[op_idx
+ 1];
370 } else if (ret
.primary_opcode
== 0x3A) {
372 ret
.primary_opcode
= inst_bytes
[++op_idx
];
373 ret
.modrm
= inst_bytes
[op_idx
+ 1];
374 } else if ((ret
.primary_opcode
& 0xf8) == 0x38) {
376 ret
.primary_opcode
= inst_bytes
[++op_idx
];
377 ret
.modrm
= inst_bytes
[op_idx
+ 1];
378 } else if (ret
.primary_opcode
== 0x0F) {
380 // opcode is 0x0F, no needs to update
381 ret
.modrm
= inst_bytes
[op_idx
+ 1];
384 ret
.modrm
= inst_bytes
[op_idx
+ 1];
391 lldb::InstructionControlFlowKind
GetControlFlowKind(bool is_exec_mode_64b
,
393 std::optional
<InstructionOpcodeAndModrm
> ret
;
395 if (m_opcode
.GetOpcodeBytes() == nullptr || m_opcode
.GetByteSize() <= 0) {
396 // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
397 return lldb::eInstructionControlFlowKindUnknown
;
400 // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
401 // These are the three values deciding instruction control flow kind.
402 ret
= InstructionLengthDecode((const uint8_t *)m_opcode
.GetOpcodeBytes(),
403 m_opcode
.GetByteSize(), is_exec_mode_64b
);
405 return lldb::eInstructionControlFlowKindUnknown
;
407 return MapOpcodeIntoControlFlowKind(*ret
);
412 class InstructionLLVMC
: public lldb_private::Instruction
{
414 InstructionLLVMC(DisassemblerLLVMC
&disasm
,
415 const lldb_private::Address
&address
,
416 AddressClass addr_class
)
417 : Instruction(address
, addr_class
),
418 m_disasm_wp(std::static_pointer_cast
<DisassemblerLLVMC
>(
419 disasm
.shared_from_this())) {}
421 ~InstructionLLVMC() override
= default;
423 bool DoesBranch() override
{
425 return m_does_branch
;
428 bool HasDelaySlot() override
{
430 return m_has_delay_slot
;
433 bool IsLoad() override
{
438 bool IsAuthenticated() override
{
440 return m_is_authenticated
;
443 DisassemblerLLVMC::MCDisasmInstance
*GetDisasmToUse(bool &is_alternate_isa
) {
444 DisassemblerScope
disasm(*this);
445 return GetDisasmToUse(is_alternate_isa
, disasm
);
448 size_t Decode(const lldb_private::Disassembler
&disassembler
,
449 const lldb_private::DataExtractor
&data
,
450 lldb::offset_t data_offset
) override
{
451 // All we have to do is read the opcode which can be easy for some
454 DisassemblerScope
disasm(*this);
456 const ArchSpec
&arch
= disasm
->GetArchitecture();
457 const lldb::ByteOrder byte_order
= data
.GetByteOrder();
459 const uint32_t min_op_byte_size
= arch
.GetMinimumOpcodeByteSize();
460 const uint32_t max_op_byte_size
= arch
.GetMaximumOpcodeByteSize();
461 if (min_op_byte_size
== max_op_byte_size
) {
462 // Fixed size instructions, just read that amount of data.
463 if (!data
.ValidOffsetForDataOfSize(data_offset
, min_op_byte_size
))
466 switch (min_op_byte_size
) {
468 m_opcode
.SetOpcode8(data
.GetU8(&data_offset
), byte_order
);
473 m_opcode
.SetOpcode16(data
.GetU16(&data_offset
), byte_order
);
478 m_opcode
.SetOpcode32(data
.GetU32(&data_offset
), byte_order
);
483 m_opcode
.SetOpcode64(data
.GetU64(&data_offset
), byte_order
);
488 m_opcode
.SetOpcodeBytes(data
.PeekData(data_offset
, min_op_byte_size
),
495 bool is_alternate_isa
= false;
496 DisassemblerLLVMC::MCDisasmInstance
*mc_disasm_ptr
=
497 GetDisasmToUse(is_alternate_isa
, disasm
);
499 const llvm::Triple::ArchType machine
= arch
.GetMachine();
500 if (machine
== llvm::Triple::arm
|| machine
== llvm::Triple::thumb
) {
501 if (machine
== llvm::Triple::thumb
|| is_alternate_isa
) {
502 uint32_t thumb_opcode
= data
.GetU16(&data_offset
);
503 if ((thumb_opcode
& 0xe000) != 0xe000 ||
504 ((thumb_opcode
& 0x1800u
) == 0)) {
505 m_opcode
.SetOpcode16(thumb_opcode
, byte_order
);
509 thumb_opcode
|= data
.GetU16(&data_offset
);
510 m_opcode
.SetOpcode16_2(thumb_opcode
, byte_order
);
514 m_opcode
.SetOpcode32(data
.GetU32(&data_offset
), byte_order
);
518 // The opcode isn't evenly sized, so we need to actually use the llvm
519 // disassembler to parse it and get the size.
520 uint8_t *opcode_data
=
521 const_cast<uint8_t *>(data
.PeekData(data_offset
, 1));
522 const size_t opcode_data_len
= data
.BytesLeft(data_offset
);
523 const addr_t pc
= m_address
.GetFileAddress();
526 const size_t inst_size
=
527 mc_disasm_ptr
->GetMCInst(opcode_data
, opcode_data_len
, pc
, inst
);
531 m_opcode
.SetOpcodeBytes(opcode_data
, inst_size
);
536 return m_opcode
.GetByteSize();
541 void AppendComment(std::string
&description
) {
542 if (m_comment
.empty())
543 m_comment
.swap(description
);
545 m_comment
.append(", ");
546 m_comment
.append(description
);
550 lldb::InstructionControlFlowKind
551 GetControlFlowKind(const lldb_private::ExecutionContext
*exe_ctx
) override
{
552 DisassemblerScope
disasm(*this, exe_ctx
);
554 if (disasm
->GetArchitecture().GetMachine() == llvm::Triple::x86
)
555 return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode
);
556 else if (disasm
->GetArchitecture().GetMachine() == llvm::Triple::x86_64
)
557 return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode
);
560 return eInstructionControlFlowKindUnknown
;
563 void CalculateMnemonicOperandsAndComment(
564 const lldb_private::ExecutionContext
*exe_ctx
) override
{
566 const AddressClass address_class
= GetAddressClass();
568 if (m_opcode
.GetData(data
)) {
569 std::string out_string
;
570 std::string markup_out_string
;
571 std::string comment_string
;
572 std::string markup_comment_string
;
574 DisassemblerScope
disasm(*this, exe_ctx
);
576 DisassemblerLLVMC::MCDisasmInstance
*mc_disasm_ptr
;
578 if (address_class
== AddressClass::eCodeAlternateISA
)
579 mc_disasm_ptr
= disasm
->m_alternate_disasm_up
.get();
581 mc_disasm_ptr
= disasm
->m_disasm_up
.get();
583 lldb::addr_t pc
= m_address
.GetFileAddress();
584 m_using_file_addr
= true;
586 bool use_hex_immediates
= true;
587 Disassembler::HexImmediateStyle hex_style
= Disassembler::eHexStyleC
;
590 Target
*target
= exe_ctx
->GetTargetPtr();
592 use_hex_immediates
= target
->GetUseHexImmediates();
593 hex_style
= target
->GetHexImmediateStyle();
595 const lldb::addr_t load_addr
= m_address
.GetLoadAddress(target
);
596 if (load_addr
!= LLDB_INVALID_ADDRESS
) {
598 m_using_file_addr
= false;
603 const uint8_t *opcode_data
= data
.GetDataStart();
604 const size_t opcode_data_len
= data
.GetByteSize();
607 mc_disasm_ptr
->GetMCInst(opcode_data
, opcode_data_len
, pc
, inst
);
610 mc_disasm_ptr
->SetStyle(use_hex_immediates
, hex_style
);
612 const bool saved_use_color
= mc_disasm_ptr
->GetUseColor();
613 mc_disasm_ptr
->SetUseColor(false);
614 mc_disasm_ptr
->PrintMCInst(inst
, pc
, out_string
, comment_string
);
615 mc_disasm_ptr
->SetUseColor(true);
616 mc_disasm_ptr
->PrintMCInst(inst
, pc
, markup_out_string
,
617 markup_comment_string
);
618 mc_disasm_ptr
->SetUseColor(saved_use_color
);
620 if (!comment_string
.empty()) {
621 AppendComment(comment_string
);
625 if (inst_size
== 0) {
626 m_comment
.assign("unknown opcode");
627 inst_size
= m_opcode
.GetByteSize();
628 StreamString mnemonic_strm
;
629 lldb::offset_t offset
= 0;
630 lldb::ByteOrder byte_order
= data
.GetByteOrder();
633 const uint8_t uval8
= data
.GetU8(&offset
);
634 m_opcode
.SetOpcode8(uval8
, byte_order
);
635 m_opcode_name
.assign(".byte");
636 mnemonic_strm
.Printf("0x%2.2x", uval8
);
639 const uint16_t uval16
= data
.GetU16(&offset
);
640 m_opcode
.SetOpcode16(uval16
, byte_order
);
641 m_opcode_name
.assign(".short");
642 mnemonic_strm
.Printf("0x%4.4x", uval16
);
645 const uint32_t uval32
= data
.GetU32(&offset
);
646 m_opcode
.SetOpcode32(uval32
, byte_order
);
647 m_opcode_name
.assign(".long");
648 mnemonic_strm
.Printf("0x%8.8x", uval32
);
651 const uint64_t uval64
= data
.GetU64(&offset
);
652 m_opcode
.SetOpcode64(uval64
, byte_order
);
653 m_opcode_name
.assign(".quad");
654 mnemonic_strm
.Printf("0x%16.16" PRIx64
, uval64
);
660 const uint8_t *bytes
= data
.PeekData(offset
, inst_size
);
661 if (bytes
== nullptr)
663 m_opcode_name
.assign(".byte");
664 m_opcode
.SetOpcodeBytes(bytes
, inst_size
);
665 mnemonic_strm
.Printf("0x%2.2x", bytes
[0]);
666 for (uint32_t i
= 1; i
< inst_size
; ++i
)
667 mnemonic_strm
.Printf(" 0x%2.2x", bytes
[i
]);
671 m_mnemonics
= std::string(mnemonic_strm
.GetString());
675 static RegularExpression
s_regex(
676 llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
678 llvm::SmallVector
<llvm::StringRef
, 4> matches
;
679 if (s_regex
.Execute(out_string
, &matches
)) {
680 m_opcode_name
= matches
[1].str();
681 m_mnemonics
= matches
[2].str();
684 if (s_regex
.Execute(markup_out_string
, &matches
)) {
685 m_markup_opcode_name
= matches
[1].str();
686 m_markup_mnemonics
= matches
[2].str();
692 bool IsValid() const { return m_is_valid
; }
694 bool UsingFileAddress() const { return m_using_file_addr
; }
695 size_t GetByteSize() const { return m_opcode
.GetByteSize(); }
697 /// Grants exclusive access to the disassembler and initializes it with the
698 /// given InstructionLLVMC and an optional ExecutionContext.
699 class DisassemblerScope
{
700 std::shared_ptr
<DisassemblerLLVMC
> m_disasm
;
703 explicit DisassemblerScope(
705 const lldb_private::ExecutionContext
*exe_ctx
= nullptr)
706 : m_disasm(i
.m_disasm_wp
.lock()) {
707 m_disasm
->m_mutex
.lock();
708 m_disasm
->m_inst
= &i
;
709 m_disasm
->m_exe_ctx
= exe_ctx
;
711 ~DisassemblerScope() { m_disasm
->m_mutex
.unlock(); }
713 /// Evaluates to true if this scope contains a valid disassembler.
714 operator bool() const { return static_cast<bool>(m_disasm
); }
716 std::shared_ptr
<DisassemblerLLVMC
> operator->() { return m_disasm
; }
719 static llvm::StringRef::const_iterator
720 ConsumeWhitespace(llvm::StringRef::const_iterator osi
,
721 llvm::StringRef::const_iterator ose
) {
736 static std::pair
<bool, llvm::StringRef::const_iterator
>
737 ConsumeChar(llvm::StringRef::const_iterator osi
, const char c
,
738 llvm::StringRef::const_iterator ose
) {
741 osi
= ConsumeWhitespace(osi
, ose
);
742 if (osi
!= ose
&& *osi
== c
) {
747 return std::make_pair(found
, osi
);
750 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
751 ParseRegisterName(llvm::StringRef::const_iterator osi
,
752 llvm::StringRef::const_iterator ose
) {
754 ret
.m_type
= Operand::Type::Register
;
757 osi
= ConsumeWhitespace(osi
, ose
);
760 if (*osi
>= '0' && *osi
<= '9') {
762 return std::make_pair(Operand(), osi
);
766 } else if (*osi
>= 'a' && *osi
<= 'z') {
772 return std::make_pair(Operand(), osi
);
774 ret
.m_register
= ConstString(str
);
775 return std::make_pair(ret
, osi
);
779 return std::make_pair(Operand(), osi
);
787 ret
.m_register
= ConstString(str
);
788 return std::make_pair(ret
, osi
);
791 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
792 ParseImmediate(llvm::StringRef::const_iterator osi
,
793 llvm::StringRef::const_iterator ose
) {
795 ret
.m_type
= Operand::Type::Immediate
;
799 osi
= ConsumeWhitespace(osi
, ose
);
802 if (*osi
>= '0' && *osi
<= '9') {
804 } else if (*osi
>= 'a' && *osi
<= 'f') {
808 return std::make_pair(Operand(), osi
);
814 return std::make_pair(Operand(), osi
);
816 ret
.m_immediate
= strtoull(str
.c_str(), nullptr, 0);
817 return std::make_pair(ret
, osi
);
820 if (!str
.compare("0")) {
824 return std::make_pair(Operand(), osi
);
830 return std::make_pair(Operand(), osi
);
835 ret
.m_negative
= true;
837 return std::make_pair(Operand(), osi
);
844 ret
.m_immediate
= strtoull(str
.c_str(), nullptr, 0);
845 return std::make_pair(ret
, osi
);
849 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
850 ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi
,
851 llvm::StringRef::const_iterator ose
) {
852 std::pair
<Operand
, llvm::StringRef::const_iterator
> offset_and_iterator
=
853 ParseImmediate(osi
, ose
);
854 if (offset_and_iterator
.first
.IsValid()) {
855 osi
= offset_and_iterator
.second
;
859 std::tie(found
, osi
) = ConsumeChar(osi
, '(', ose
);
861 return std::make_pair(Operand(), osi
);
864 std::pair
<Operand
, llvm::StringRef::const_iterator
> base_and_iterator
=
865 ParseRegisterName(osi
, ose
);
866 if (base_and_iterator
.first
.IsValid()) {
867 osi
= base_and_iterator
.second
;
869 return std::make_pair(Operand(), osi
);
872 std::tie(found
, osi
) = ConsumeChar(osi
, ',', ose
);
874 return std::make_pair(Operand(), osi
);
877 std::pair
<Operand
, llvm::StringRef::const_iterator
> index_and_iterator
=
878 ParseRegisterName(osi
, ose
);
879 if (index_and_iterator
.first
.IsValid()) {
880 osi
= index_and_iterator
.second
;
882 return std::make_pair(Operand(), osi
);
885 std::tie(found
, osi
) = ConsumeChar(osi
, ',', ose
);
887 return std::make_pair(Operand(), osi
);
890 std::pair
<Operand
, llvm::StringRef::const_iterator
>
891 multiplier_and_iterator
= ParseImmediate(osi
, ose
);
892 if (index_and_iterator
.first
.IsValid()) {
893 osi
= index_and_iterator
.second
;
895 return std::make_pair(Operand(), osi
);
898 std::tie(found
, osi
) = ConsumeChar(osi
, ')', ose
);
900 return std::make_pair(Operand(), osi
);
904 product
.m_type
= Operand::Type::Product
;
905 product
.m_children
.push_back(index_and_iterator
.first
);
906 product
.m_children
.push_back(multiplier_and_iterator
.first
);
909 index
.m_type
= Operand::Type::Sum
;
910 index
.m_children
.push_back(base_and_iterator
.first
);
911 index
.m_children
.push_back(product
);
913 if (offset_and_iterator
.first
.IsValid()) {
915 offset
.m_type
= Operand::Type::Sum
;
916 offset
.m_children
.push_back(offset_and_iterator
.first
);
917 offset
.m_children
.push_back(index
);
920 deref
.m_type
= Operand::Type::Dereference
;
921 deref
.m_children
.push_back(offset
);
922 return std::make_pair(deref
, osi
);
925 deref
.m_type
= Operand::Type::Dereference
;
926 deref
.m_children
.push_back(index
);
927 return std::make_pair(deref
, osi
);
932 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
933 ParseIntelDerefAccess(llvm::StringRef::const_iterator osi
,
934 llvm::StringRef::const_iterator ose
) {
935 std::pair
<Operand
, llvm::StringRef::const_iterator
> offset_and_iterator
=
936 ParseImmediate(osi
, ose
);
937 if (offset_and_iterator
.first
.IsValid()) {
938 osi
= offset_and_iterator
.second
;
942 std::tie(found
, osi
) = ConsumeChar(osi
, '(', ose
);
944 return std::make_pair(Operand(), osi
);
947 std::pair
<Operand
, llvm::StringRef::const_iterator
> base_and_iterator
=
948 ParseRegisterName(osi
, ose
);
949 if (base_and_iterator
.first
.IsValid()) {
950 osi
= base_and_iterator
.second
;
952 return std::make_pair(Operand(), osi
);
955 std::tie(found
, osi
) = ConsumeChar(osi
, ')', ose
);
957 return std::make_pair(Operand(), osi
);
960 if (offset_and_iterator
.first
.IsValid()) {
962 offset
.m_type
= Operand::Type::Sum
;
963 offset
.m_children
.push_back(offset_and_iterator
.first
);
964 offset
.m_children
.push_back(base_and_iterator
.first
);
967 deref
.m_type
= Operand::Type::Dereference
;
968 deref
.m_children
.push_back(offset
);
969 return std::make_pair(deref
, osi
);
972 deref
.m_type
= Operand::Type::Dereference
;
973 deref
.m_children
.push_back(base_and_iterator
.first
);
974 return std::make_pair(deref
, osi
);
979 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
980 ParseARMOffsetAccess(llvm::StringRef::const_iterator osi
,
981 llvm::StringRef::const_iterator ose
) {
983 std::tie(found
, osi
) = ConsumeChar(osi
, '[', ose
);
985 return std::make_pair(Operand(), osi
);
988 std::pair
<Operand
, llvm::StringRef::const_iterator
> base_and_iterator
=
989 ParseRegisterName(osi
, ose
);
990 if (base_and_iterator
.first
.IsValid()) {
991 osi
= base_and_iterator
.second
;
993 return std::make_pair(Operand(), osi
);
996 std::tie(found
, osi
) = ConsumeChar(osi
, ',', ose
);
998 return std::make_pair(Operand(), osi
);
1001 std::pair
<Operand
, llvm::StringRef::const_iterator
> offset_and_iterator
=
1002 ParseImmediate(osi
, ose
);
1003 if (offset_and_iterator
.first
.IsValid()) {
1004 osi
= offset_and_iterator
.second
;
1007 std::tie(found
, osi
) = ConsumeChar(osi
, ']', ose
);
1009 return std::make_pair(Operand(), osi
);
1013 offset
.m_type
= Operand::Type::Sum
;
1014 offset
.m_children
.push_back(offset_and_iterator
.first
);
1015 offset
.m_children
.push_back(base_and_iterator
.first
);
1018 deref
.m_type
= Operand::Type::Dereference
;
1019 deref
.m_children
.push_back(offset
);
1020 return std::make_pair(deref
, osi
);
1024 static std::pair
<Operand
, llvm::StringRef::const_iterator
>
1025 ParseARMDerefAccess(llvm::StringRef::const_iterator osi
,
1026 llvm::StringRef::const_iterator ose
) {
1028 std::tie(found
, osi
) = ConsumeChar(osi
, '[', ose
);
1030 return std::make_pair(Operand(), osi
);
1033 std::pair
<Operand
, llvm::StringRef::const_iterator
> base_and_iterator
=
1034 ParseRegisterName(osi
, ose
);
1035 if (base_and_iterator
.first
.IsValid()) {
1036 osi
= base_and_iterator
.second
;
1038 return std::make_pair(Operand(), osi
);
1041 std::tie(found
, osi
) = ConsumeChar(osi
, ']', ose
);
1043 return std::make_pair(Operand(), osi
);
1047 deref
.m_type
= Operand::Type::Dereference
;
1048 deref
.m_children
.push_back(base_and_iterator
.first
);
1049 return std::make_pair(deref
, osi
);
1052 static void DumpOperand(const Operand
&op
, Stream
&s
) {
1053 switch (op
.m_type
) {
1054 case Operand::Type::Dereference
:
1056 DumpOperand(op
.m_children
[0], s
);
1058 case Operand::Type::Immediate
:
1059 if (op
.m_negative
) {
1062 s
.PutCString(llvm::to_string(op
.m_immediate
));
1064 case Operand::Type::Invalid
:
1065 s
.PutCString("Invalid");
1067 case Operand::Type::Product
:
1069 DumpOperand(op
.m_children
[0], s
);
1071 DumpOperand(op
.m_children
[1], s
);
1074 case Operand::Type::Register
:
1075 s
.PutCString(op
.m_register
.GetStringRef());
1077 case Operand::Type::Sum
:
1079 DumpOperand(op
.m_children
[0], s
);
1081 DumpOperand(op
.m_children
[1], s
);
1088 llvm::SmallVectorImpl
<Instruction::Operand
> &operands
) override
{
1089 const char *operands_string
= GetOperands(nullptr);
1091 if (!operands_string
) {
1095 llvm::StringRef
operands_ref(operands_string
);
1097 llvm::StringRef::const_iterator osi
= operands_ref
.begin();
1098 llvm::StringRef::const_iterator ose
= operands_ref
.end();
1100 while (osi
!= ose
) {
1102 llvm::StringRef::const_iterator iter
;
1104 if ((std::tie(operand
, iter
) = ParseIntelIndexedAccess(osi
, ose
),
1105 operand
.IsValid()) ||
1106 (std::tie(operand
, iter
) = ParseIntelDerefAccess(osi
, ose
),
1107 operand
.IsValid()) ||
1108 (std::tie(operand
, iter
) = ParseARMOffsetAccess(osi
, ose
),
1109 operand
.IsValid()) ||
1110 (std::tie(operand
, iter
) = ParseARMDerefAccess(osi
, ose
),
1111 operand
.IsValid()) ||
1112 (std::tie(operand
, iter
) = ParseRegisterName(osi
, ose
),
1113 operand
.IsValid()) ||
1114 (std::tie(operand
, iter
) = ParseImmediate(osi
, ose
),
1115 operand
.IsValid())) {
1117 operands
.push_back(operand
);
1122 std::pair
<bool, llvm::StringRef::const_iterator
> found_and_iter
=
1123 ConsumeChar(osi
, ',', ose
);
1124 if (found_and_iter
.first
) {
1125 osi
= found_and_iter
.second
;
1128 osi
= ConsumeWhitespace(osi
, ose
);
1131 DisassemblerSP disasm_sp
= m_disasm_wp
.lock();
1133 if (disasm_sp
&& operands
.size() > 1) {
1134 // TODO tie this into the MC Disassembler's notion of clobbers.
1135 switch (disasm_sp
->GetArchitecture().GetMachine()) {
1138 case llvm::Triple::x86
:
1139 case llvm::Triple::x86_64
:
1140 operands
[operands
.size() - 1].m_clobbered
= true;
1142 case llvm::Triple::arm
:
1143 operands
[0].m_clobbered
= true;
1148 if (Log
*log
= GetLog(LLDBLog::Process
)) {
1151 ss
.Printf("[%s] expands to %zu operands:\n", operands_string
,
1153 for (const Operand
&operand
: operands
) {
1155 DumpOperand(operand
, ss
);
1156 ss
.PutCString("\n");
1159 log
->PutString(ss
.GetString());
1165 bool IsCall() override
{
1171 std::weak_ptr
<DisassemblerLLVMC
> m_disasm_wp
;
1173 bool m_is_valid
= false;
1174 bool m_using_file_addr
= false;
1175 bool m_has_visited_instruction
= false;
1177 // Be conservative. If we didn't understand the instruction, say it:
1179 // - Does not have a delay slot
1182 // - Is not an authenticated instruction
1183 bool m_does_branch
= true;
1184 bool m_has_delay_slot
= false;
1185 bool m_is_call
= false;
1186 bool m_is_load
= false;
1187 bool m_is_authenticated
= false;
1189 void VisitInstruction() {
1190 if (m_has_visited_instruction
)
1193 DisassemblerScope
disasm(*this);
1198 if (!m_opcode
.GetData(data
))
1201 bool is_alternate_isa
;
1202 lldb::addr_t pc
= m_address
.GetFileAddress();
1203 DisassemblerLLVMC::MCDisasmInstance
*mc_disasm_ptr
=
1204 GetDisasmToUse(is_alternate_isa
, disasm
);
1205 const uint8_t *opcode_data
= data
.GetDataStart();
1206 const size_t opcode_data_len
= data
.GetByteSize();
1208 const size_t inst_size
=
1209 mc_disasm_ptr
->GetMCInst(opcode_data
, opcode_data_len
, pc
, inst
);
1213 m_has_visited_instruction
= true;
1214 m_does_branch
= mc_disasm_ptr
->CanBranch(inst
);
1215 m_has_delay_slot
= mc_disasm_ptr
->HasDelaySlot(inst
);
1216 m_is_call
= mc_disasm_ptr
->IsCall(inst
);
1217 m_is_load
= mc_disasm_ptr
->IsLoad(inst
);
1218 m_is_authenticated
= mc_disasm_ptr
->IsAuthenticated(inst
);
1222 DisassemblerLLVMC::MCDisasmInstance
*
1223 GetDisasmToUse(bool &is_alternate_isa
, DisassemblerScope
&disasm
) {
1224 is_alternate_isa
= false;
1226 if (disasm
->m_alternate_disasm_up
) {
1227 const AddressClass address_class
= GetAddressClass();
1229 if (address_class
== AddressClass::eCodeAlternateISA
) {
1230 is_alternate_isa
= true;
1231 return disasm
->m_alternate_disasm_up
.get();
1234 return disasm
->m_disasm_up
.get();
1240 std::unique_ptr
<DisassemblerLLVMC::MCDisasmInstance
>
1241 DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple
, const char *cpu
,
1242 const char *features_str
,
1244 DisassemblerLLVMC
&owner
) {
1245 using Instance
= std::unique_ptr
<DisassemblerLLVMC::MCDisasmInstance
>;
1248 const llvm::Target
*curr_target
=
1249 llvm::TargetRegistry::lookupTarget(triple
, Status
);
1253 std::unique_ptr
<llvm::MCInstrInfo
> instr_info_up(
1254 curr_target
->createMCInstrInfo());
1258 std::unique_ptr
<llvm::MCRegisterInfo
> reg_info_up(
1259 curr_target
->createMCRegInfo(triple
));
1263 std::unique_ptr
<llvm::MCSubtargetInfo
> subtarget_info_up(
1264 curr_target
->createMCSubtargetInfo(triple
, cpu
, features_str
));
1265 if (!subtarget_info_up
)
1268 llvm::MCTargetOptions MCOptions
;
1269 std::unique_ptr
<llvm::MCAsmInfo
> asm_info_up(
1270 curr_target
->createMCAsmInfo(*reg_info_up
, triple
, MCOptions
));
1274 std::unique_ptr
<llvm::MCContext
> context_up(
1275 new llvm::MCContext(llvm::Triple(triple
), asm_info_up
.get(),
1276 reg_info_up
.get(), subtarget_info_up
.get()));
1280 std::unique_ptr
<llvm::MCDisassembler
> disasm_up(
1281 curr_target
->createMCDisassembler(*subtarget_info_up
, *context_up
));
1285 std::unique_ptr
<llvm::MCRelocationInfo
> rel_info_up(
1286 curr_target
->createMCRelocationInfo(triple
, *context_up
));
1290 std::unique_ptr
<llvm::MCSymbolizer
> symbolizer_up(
1291 curr_target
->createMCSymbolizer(
1292 triple
, nullptr, DisassemblerLLVMC::SymbolLookupCallback
, &owner
,
1293 context_up
.get(), std::move(rel_info_up
)));
1294 disasm_up
->setSymbolizer(std::move(symbolizer_up
));
1296 unsigned asm_printer_variant
=
1297 flavor
== ~0U ? asm_info_up
->getAssemblerDialect() : flavor
;
1299 std::unique_ptr
<llvm::MCInstPrinter
> instr_printer_up(
1300 curr_target
->createMCInstPrinter(llvm::Triple
{triple
},
1301 asm_printer_variant
, *asm_info_up
,
1302 *instr_info_up
, *reg_info_up
));
1303 if (!instr_printer_up
)
1306 instr_printer_up
->setPrintBranchImmAsAddress(true);
1308 // Not all targets may have registered createMCInstrAnalysis().
1309 std::unique_ptr
<llvm::MCInstrAnalysis
> instr_analysis_up(
1310 curr_target
->createMCInstrAnalysis(instr_info_up
.get()));
1312 return Instance(new MCDisasmInstance(
1313 std::move(instr_info_up
), std::move(reg_info_up
),
1314 std::move(subtarget_info_up
), std::move(asm_info_up
),
1315 std::move(context_up
), std::move(disasm_up
), std::move(instr_printer_up
),
1316 std::move(instr_analysis_up
)));
1319 DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
1320 std::unique_ptr
<llvm::MCInstrInfo
> &&instr_info_up
,
1321 std::unique_ptr
<llvm::MCRegisterInfo
> &®_info_up
,
1322 std::unique_ptr
<llvm::MCSubtargetInfo
> &&subtarget_info_up
,
1323 std::unique_ptr
<llvm::MCAsmInfo
> &&asm_info_up
,
1324 std::unique_ptr
<llvm::MCContext
> &&context_up
,
1325 std::unique_ptr
<llvm::MCDisassembler
> &&disasm_up
,
1326 std::unique_ptr
<llvm::MCInstPrinter
> &&instr_printer_up
,
1327 std::unique_ptr
<llvm::MCInstrAnalysis
> &&instr_analysis_up
)
1328 : m_instr_info_up(std::move(instr_info_up
)),
1329 m_reg_info_up(std::move(reg_info_up
)),
1330 m_subtarget_info_up(std::move(subtarget_info_up
)),
1331 m_asm_info_up(std::move(asm_info_up
)),
1332 m_context_up(std::move(context_up
)), m_disasm_up(std::move(disasm_up
)),
1333 m_instr_printer_up(std::move(instr_printer_up
)),
1334 m_instr_analysis_up(std::move(instr_analysis_up
)) {
1335 assert(m_instr_info_up
&& m_reg_info_up
&& m_subtarget_info_up
&&
1336 m_asm_info_up
&& m_context_up
&& m_disasm_up
&& m_instr_printer_up
);
1339 uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
1340 const uint8_t *opcode_data
, size_t opcode_data_len
, lldb::addr_t pc
,
1341 llvm::MCInst
&mc_inst
) const {
1342 llvm::ArrayRef
<uint8_t> data(opcode_data
, opcode_data_len
);
1343 llvm::MCDisassembler::DecodeStatus status
;
1345 uint64_t new_inst_size
;
1346 status
= m_disasm_up
->getInstruction(mc_inst
, new_inst_size
, data
, pc
,
1348 if (status
== llvm::MCDisassembler::Success
)
1349 return new_inst_size
;
1354 void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(
1355 llvm::MCInst
&mc_inst
, lldb::addr_t pc
, std::string
&inst_string
,
1356 std::string
&comments_string
) {
1357 llvm::raw_string_ostream
inst_stream(inst_string
);
1358 llvm::raw_string_ostream
comments_stream(comments_string
);
1360 inst_stream
.enable_colors(m_instr_printer_up
->getUseColor());
1361 m_instr_printer_up
->setCommentStream(comments_stream
);
1362 m_instr_printer_up
->printInst(&mc_inst
, pc
, llvm::StringRef(),
1363 *m_subtarget_info_up
, inst_stream
);
1364 m_instr_printer_up
->setCommentStream(llvm::nulls());
1366 static std::string
g_newlines("\r\n");
1368 for (size_t newline_pos
= 0;
1369 (newline_pos
= comments_string
.find_first_of(g_newlines
, newline_pos
)) !=
1370 comments_string
.npos
;
1372 comments_string
.replace(comments_string
.begin() + newline_pos
,
1373 comments_string
.begin() + newline_pos
+ 1, 1, ' ');
1377 void DisassemblerLLVMC::MCDisasmInstance::SetStyle(
1378 bool use_hex_immed
, HexImmediateStyle hex_style
) {
1379 m_instr_printer_up
->setPrintImmHex(use_hex_immed
);
1380 switch (hex_style
) {
1382 m_instr_printer_up
->setPrintHexStyle(llvm::HexStyle::C
);
1385 m_instr_printer_up
->setPrintHexStyle(llvm::HexStyle::Asm
);
1390 void DisassemblerLLVMC::MCDisasmInstance::SetUseColor(bool use_color
) {
1391 m_instr_printer_up
->setUseColor(use_color
);
1394 bool DisassemblerLLVMC::MCDisasmInstance::GetUseColor() const {
1395 return m_instr_printer_up
->getUseColor();
1398 bool DisassemblerLLVMC::MCDisasmInstance::CanBranch(
1399 llvm::MCInst
&mc_inst
) const {
1400 if (m_instr_analysis_up
)
1401 return m_instr_analysis_up
->mayAffectControlFlow(mc_inst
, *m_reg_info_up
);
1402 return m_instr_info_up
->get(mc_inst
.getOpcode())
1403 .mayAffectControlFlow(mc_inst
, *m_reg_info_up
);
1406 bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot(
1407 llvm::MCInst
&mc_inst
) const {
1408 return m_instr_info_up
->get(mc_inst
.getOpcode()).hasDelaySlot();
1411 bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst
&mc_inst
) const {
1412 if (m_instr_analysis_up
)
1413 return m_instr_analysis_up
->isCall(mc_inst
);
1414 return m_instr_info_up
->get(mc_inst
.getOpcode()).isCall();
1417 bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst
&mc_inst
) const {
1418 return m_instr_info_up
->get(mc_inst
.getOpcode()).mayLoad();
1421 bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated(
1422 llvm::MCInst
&mc_inst
) const {
1423 const auto &InstrDesc
= m_instr_info_up
->get(mc_inst
.getOpcode());
1425 // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4
1426 // == 'a' + 'c') as authenticated instructions for reporting purposes, in
1427 // addition to the standard authenticated instructions specified in ARMv8.3.
1428 bool IsBrkC47x
= false;
1429 if (InstrDesc
.isTrap() && mc_inst
.getNumOperands() == 1) {
1430 const llvm::MCOperand
&Op0
= mc_inst
.getOperand(0);
1431 if (Op0
.isImm() && Op0
.getImm() >= 0xc470 && Op0
.getImm() <= 0xc474)
1435 return InstrDesc
.isAuthenticated() || IsBrkC47x
;
1438 DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec
&arch
,
1439 const char *flavor_string
,
1440 const char *cpu_string
,
1441 const char *features_string
)
1442 : Disassembler(arch
, flavor_string
), m_exe_ctx(nullptr), m_inst(nullptr),
1443 m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS
),
1445 if (!FlavorValidForArchSpec(arch
, m_flavor
.c_str())) {
1446 m_flavor
.assign("default");
1449 const bool cpu_or_features_overriden
= cpu_string
|| features_string
;
1450 unsigned flavor
= ~0U;
1451 llvm::Triple triple
= arch
.GetTriple();
1453 // So far the only supported flavor is "intel" on x86. The base class will
1454 // set this correctly coming in.
1455 if (triple
.getArch() == llvm::Triple::x86
||
1456 triple
.getArch() == llvm::Triple::x86_64
) {
1457 if (m_flavor
== "intel") {
1459 } else if (m_flavor
== "att") {
1464 ArchSpec
thumb_arch(arch
);
1465 if (triple
.getArch() == llvm::Triple::arm
) {
1466 std::string
thumb_arch_name(thumb_arch
.GetTriple().getArchName().str());
1467 // Replace "arm" with "thumb" so we get all thumb variants correct
1468 if (thumb_arch_name
.size() > 3) {
1469 thumb_arch_name
.erase(0, 3);
1470 thumb_arch_name
.insert(0, "thumb");
1472 thumb_arch_name
= "thumbv9.3a";
1474 thumb_arch
.GetTriple().setArchName(llvm::StringRef(thumb_arch_name
));
1477 // If no sub architecture specified then use the most recent arm architecture
1478 // so the disassembler will return all instructions. Without it we will see a
1479 // lot of unknown opcodes if the code uses instructions which are not
1480 // available in the oldest arm version (which is used when no sub architecture
1482 if (triple
.getArch() == llvm::Triple::arm
&&
1483 triple
.getSubArch() == llvm::Triple::NoSubArch
)
1484 triple
.setArchName("armv9.3a");
1486 std::string features_str
=
1487 features_string
? std::string(features_string
) : "";
1488 const char *triple_str
= triple
.getTriple().c_str();
1490 // ARM Cortex M0-M7 devices only execute thumb instructions
1491 if (arch
.IsAlwaysThumbInstructions()) {
1492 triple_str
= thumb_arch
.GetTriple().getTriple().c_str();
1493 if (!features_string
)
1494 features_str
+= "+fp-armv8,";
1497 const char *cpu
= cpu_string
;
1499 if (!cpu_or_features_overriden
) {
1500 switch (arch
.GetCore()) {
1501 case ArchSpec::eCore_mips32
:
1502 case ArchSpec::eCore_mips32el
:
1505 case ArchSpec::eCore_mips32r2
:
1506 case ArchSpec::eCore_mips32r2el
:
1509 case ArchSpec::eCore_mips32r3
:
1510 case ArchSpec::eCore_mips32r3el
:
1513 case ArchSpec::eCore_mips32r5
:
1514 case ArchSpec::eCore_mips32r5el
:
1517 case ArchSpec::eCore_mips32r6
:
1518 case ArchSpec::eCore_mips32r6el
:
1521 case ArchSpec::eCore_mips64
:
1522 case ArchSpec::eCore_mips64el
:
1525 case ArchSpec::eCore_mips64r2
:
1526 case ArchSpec::eCore_mips64r2el
:
1529 case ArchSpec::eCore_mips64r3
:
1530 case ArchSpec::eCore_mips64r3el
:
1533 case ArchSpec::eCore_mips64r5
:
1534 case ArchSpec::eCore_mips64r5el
:
1537 case ArchSpec::eCore_mips64r6
:
1538 case ArchSpec::eCore_mips64r6el
:
1547 if (arch
.IsMIPS() && !cpu_or_features_overriden
) {
1548 uint32_t arch_flags
= arch
.GetFlags();
1549 if (arch_flags
& ArchSpec::eMIPSAse_msa
)
1550 features_str
+= "+msa,";
1551 if (arch_flags
& ArchSpec::eMIPSAse_dsp
)
1552 features_str
+= "+dsp,";
1553 if (arch_flags
& ArchSpec::eMIPSAse_dspr2
)
1554 features_str
+= "+dspr2,";
1557 // If any AArch64 variant, enable latest ISA with all extensions unless the
1558 // CPU or features were overridden.
1559 if (triple
.isAArch64() && !cpu_or_features_overriden
) {
1560 features_str
+= "+all,";
1561 if (triple
.getVendor() == llvm::Triple::Apple
)
1562 cpu
= "apple-latest";
1565 if (triple
.isRISCV() && !cpu_or_features_overriden
) {
1566 uint32_t arch_flags
= arch
.GetFlags();
1567 if (arch_flags
& ArchSpec::eRISCV_rvc
)
1568 features_str
+= "+c,";
1569 if (arch_flags
& ArchSpec::eRISCV_rve
)
1570 features_str
+= "+e,";
1571 if ((arch_flags
& ArchSpec::eRISCV_float_abi_single
) ==
1572 ArchSpec::eRISCV_float_abi_single
)
1573 features_str
+= "+f,";
1574 if ((arch_flags
& ArchSpec::eRISCV_float_abi_double
) ==
1575 ArchSpec::eRISCV_float_abi_double
)
1576 features_str
+= "+f,+d,";
1577 if ((arch_flags
& ArchSpec::eRISCV_float_abi_quad
) ==
1578 ArchSpec::eRISCV_float_abi_quad
)
1579 features_str
+= "+f,+d,+q,";
1580 // FIXME: how do we detect features such as `+a`, `+m`?
1581 // Turn them on by default now, since everyone seems to use them
1582 features_str
+= "+a,+m,";
1585 // We use m_disasm_up.get() to tell whether we are valid or not, so if this
1586 // isn't good for some reason, we won't be valid and FindPlugin will fail and
1587 // we won't get used.
1588 m_disasm_up
= MCDisasmInstance::Create(triple_str
, cpu
, features_str
.c_str(),
1591 llvm::Triple::ArchType llvm_arch
= triple
.getArch();
1593 // For arm CPUs that can execute arm or thumb instructions, also create a
1594 // thumb instruction disassembler.
1595 if (llvm_arch
== llvm::Triple::arm
) {
1596 std::string
thumb_triple(thumb_arch
.GetTriple().getTriple());
1597 m_alternate_disasm_up
=
1598 MCDisasmInstance::Create(thumb_triple
.c_str(), "", features_str
.c_str(),
1600 if (!m_alternate_disasm_up
)
1601 m_disasm_up
.reset();
1603 } else if (arch
.IsMIPS()) {
1604 /* Create alternate disassembler for MIPS16 and microMIPS */
1605 uint32_t arch_flags
= arch
.GetFlags();
1606 if (arch_flags
& ArchSpec::eMIPSAse_mips16
)
1607 features_str
+= "+mips16,";
1608 else if (arch_flags
& ArchSpec::eMIPSAse_micromips
)
1609 features_str
+= "+micromips,";
1611 m_alternate_disasm_up
= MCDisasmInstance::Create(
1612 triple_str
, cpu
, features_str
.c_str(), flavor
, *this);
1613 if (!m_alternate_disasm_up
)
1614 m_disasm_up
.reset();
1618 DisassemblerLLVMC::~DisassemblerLLVMC() = default;
1620 lldb::DisassemblerSP
DisassemblerLLVMC::CreateInstance(const ArchSpec
&arch
,
1623 const char *features
) {
1624 if (arch
.GetTriple().getArch() != llvm::Triple::UnknownArch
) {
1626 std::make_shared
<DisassemblerLLVMC
>(arch
, flavor
, cpu
, features
);
1627 if (disasm_sp
&& disasm_sp
->IsValid())
1630 return lldb::DisassemblerSP();
1633 size_t DisassemblerLLVMC::DecodeInstructions(const Address
&base_addr
,
1634 const DataExtractor
&data
,
1635 lldb::offset_t data_offset
,
1636 size_t num_instructions
,
1637 bool append
, bool data_from_file
) {
1639 m_instruction_list
.Clear();
1644 m_data_from_file
= data_from_file
;
1645 uint32_t data_cursor
= data_offset
;
1646 const size_t data_byte_size
= data
.GetByteSize();
1647 uint32_t instructions_parsed
= 0;
1648 Address
inst_addr(base_addr
);
1650 while (data_cursor
< data_byte_size
&&
1651 instructions_parsed
< num_instructions
) {
1653 AddressClass address_class
= AddressClass::eCode
;
1655 if (m_alternate_disasm_up
)
1656 address_class
= inst_addr
.GetAddressClass();
1658 InstructionSP
inst_sp(
1659 new InstructionLLVMC(*this, inst_addr
, address_class
));
1664 uint32_t inst_size
= inst_sp
->Decode(*this, data
, data_cursor
);
1669 m_instruction_list
.Append(inst_sp
);
1670 data_cursor
+= inst_size
;
1671 inst_addr
.Slide(inst_size
);
1672 instructions_parsed
++;
1675 return data_cursor
- data_offset
;
1678 void DisassemblerLLVMC::Initialize() {
1679 PluginManager::RegisterPlugin(GetPluginNameStatic(),
1680 "Disassembler that uses LLVM MC to disassemble "
1681 "i386, x86_64, ARM, and ARM64.",
1684 llvm::InitializeAllTargetInfos();
1685 llvm::InitializeAllTargetMCs();
1686 llvm::InitializeAllAsmParsers();
1687 llvm::InitializeAllDisassemblers();
1690 void DisassemblerLLVMC::Terminate() {
1691 PluginManager::UnregisterPlugin(CreateInstance
);
1694 int DisassemblerLLVMC::OpInfoCallback(void *disassembler
, uint64_t pc
,
1695 uint64_t offset
, uint64_t size
,
1696 int tag_type
, void *tag_bug
) {
1697 return static_cast<DisassemblerLLVMC
*>(disassembler
)
1698 ->OpInfo(pc
, offset
, size
, tag_type
, tag_bug
);
1701 const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler
,
1703 uint64_t *type
, uint64_t pc
,
1704 const char **name
) {
1705 return static_cast<DisassemblerLLVMC
*>(disassembler
)
1706 ->SymbolLookup(value
, type
, pc
, name
);
1709 bool DisassemblerLLVMC::FlavorValidForArchSpec(
1710 const lldb_private::ArchSpec
&arch
, const char *flavor
) {
1711 llvm::Triple triple
= arch
.GetTriple();
1712 if (flavor
== nullptr || strcmp(flavor
, "default") == 0)
1715 if (triple
.getArch() == llvm::Triple::x86
||
1716 triple
.getArch() == llvm::Triple::x86_64
) {
1717 return strcmp(flavor
, "intel") == 0 || strcmp(flavor
, "att") == 0;
1722 bool DisassemblerLLVMC::IsValid() const { return m_disasm_up
.operator bool(); }
1724 int DisassemblerLLVMC::OpInfo(uint64_t PC
, uint64_t Offset
, uint64_t Size
,
1725 int tag_type
, void *tag_bug
) {
1730 memset(tag_bug
, 0, sizeof(::LLVMOpInfo1
));
1736 const char *DisassemblerLLVMC::SymbolLookup(uint64_t value
, uint64_t *type_ptr
,
1737 uint64_t pc
, const char **name
) {
1739 if (m_exe_ctx
&& m_inst
) {
1740 // std::string remove_this_prior_to_checkin;
1741 Target
*target
= m_exe_ctx
? m_exe_ctx
->GetTargetPtr() : nullptr;
1742 Address value_so_addr
;
1744 if (target
->GetArchitecture().GetMachine() == llvm::Triple::aarch64
||
1745 target
->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be
||
1746 target
->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32
) {
1747 if (*type_ptr
== LLVMDisassembler_ReferenceType_In_ARM64_ADRP
) {
1748 m_adrp_address
= pc
;
1749 m_adrp_insn
= value
;
1751 *type_ptr
= LLVMDisassembler_ReferenceType_InOut_None
;
1754 // If this instruction is an ADD and
1755 // the previous instruction was an ADRP and
1756 // the ADRP's register and this ADD's register are the same,
1757 // then this is a pc-relative address calculation.
1758 if (*type_ptr
== LLVMDisassembler_ReferenceType_In_ARM64_ADDXri
&&
1759 m_adrp_insn
&& m_adrp_address
== pc
- 4 &&
1760 (*m_adrp_insn
& 0x1f) == ((value
>> 5) & 0x1f)) {
1761 uint32_t addxri_inst
;
1762 uint64_t adrp_imm
, addxri_imm
;
1763 // Get immlo and immhi bits, OR them together to get the ADRP imm
1766 ((*m_adrp_insn
& 0x00ffffe0) >> 3) | ((*m_adrp_insn
>> 29) & 0x3);
1767 // if high bit of immhi after right-shifting set, sign extend
1768 if (adrp_imm
& (1ULL << 20))
1769 adrp_imm
|= ~((1ULL << 21) - 1);
1771 addxri_inst
= value
;
1772 addxri_imm
= (addxri_inst
>> 10) & 0xfff;
1773 // check if 'sh' bit is set, shift imm value up if so
1774 // (this would make no sense, ADRP already gave us this part)
1775 if ((addxri_inst
>> (12 + 5 + 5)) & 1)
1777 value
= (m_adrp_address
& 0xfffffffffffff000LL
) + (adrp_imm
<< 12) +
1780 m_adrp_address
= LLDB_INVALID_ADDRESS
;
1781 m_adrp_insn
.reset();
1784 if (m_inst
->UsingFileAddress()) {
1785 ModuleSP
module_sp(m_inst
->GetAddress().GetModule());
1787 module_sp
->ResolveFileAddress(value
, value_so_addr
);
1788 module_sp
->ResolveFileAddress(pc
, pc_so_addr
);
1790 } else if (target
&& !target
->GetSectionLoadList().IsEmpty()) {
1791 target
->GetSectionLoadList().ResolveLoadAddress(value
, value_so_addr
);
1792 target
->GetSectionLoadList().ResolveLoadAddress(pc
, pc_so_addr
);
1795 SymbolContext sym_ctx
;
1796 const SymbolContextItem resolve_scope
=
1797 eSymbolContextFunction
| eSymbolContextSymbol
;
1798 if (pc_so_addr
.IsValid() && pc_so_addr
.GetModule()) {
1799 pc_so_addr
.GetModule()->ResolveSymbolContextForAddress(
1800 pc_so_addr
, resolve_scope
, sym_ctx
);
1803 if (value_so_addr
.IsValid() && value_so_addr
.GetSection()) {
1806 bool format_omitting_current_func_name
= false;
1807 if (sym_ctx
.symbol
|| sym_ctx
.function
) {
1809 if (sym_ctx
.GetAddressRange(resolve_scope
, 0, false, range
) &&
1810 range
.GetBaseAddress().IsValid() &&
1811 range
.ContainsLoadAddress(value_so_addr
, target
)) {
1812 format_omitting_current_func_name
= true;
1816 // If the "value" address (the target address we're symbolicating) is
1817 // inside the same SymbolContext as the current instruction pc
1818 // (pc_so_addr), don't print the full function name - just print it
1819 // with DumpStyleNoFunctionName style, e.g. "<+36>".
1820 if (format_omitting_current_func_name
) {
1821 value_so_addr
.Dump(&ss
, target
, Address::DumpStyleNoFunctionName
,
1822 Address::DumpStyleSectionNameOffset
);
1826 Address::DumpStyleResolvedDescriptionNoFunctionArguments
,
1827 Address::DumpStyleSectionNameOffset
);
1830 if (!ss
.GetString().empty()) {
1831 // If Address::Dump returned a multi-line description, most commonly
1832 // seen when we have multiple levels of inlined functions at an
1833 // address, only show the first line.
1834 std::string str
= std::string(ss
.GetString());
1835 size_t first_eol_char
= str
.find_first_of("\r\n");
1836 if (first_eol_char
!= std::string::npos
) {
1837 str
.erase(first_eol_char
);
1839 m_inst
->AppendComment(str
);
1845 // TODO: llvm-objdump sets the type_ptr to the
1846 // LLVMDisassembler_ReferenceType_Out_* values
1847 // based on where value_so_addr is pointing, with
1848 // Mach-O specific augmentations in MachODump.cpp. e.g.
1849 // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
1851 *type_ptr
= LLVMDisassembler_ReferenceType_InOut_None
;