1 //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Merge the offset of address calculation into the offset field
10 // of instructions in a global address lowering sequence.
12 //===----------------------------------------------------------------------===//
15 #include "RISCVTargetMachine.h"
16 #include "llvm/CodeGen/MachineFunctionPass.h"
17 #include "llvm/CodeGen/Passes.h"
18 #include "llvm/MC/TargetRegistry.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Target/TargetOptions.h"
24 #define DEBUG_TYPE "riscv-merge-base-offset"
25 #define RISCV_MERGE_BASE_OFFSET_NAME "RISC-V Merge Base Offset"
28 class RISCVMergeBaseOffsetOpt
: public MachineFunctionPass
{
29 const RISCVSubtarget
*ST
= nullptr;
30 MachineRegisterInfo
*MRI
;
34 bool runOnMachineFunction(MachineFunction
&Fn
) override
;
35 bool detectFoldable(MachineInstr
&Hi
, MachineInstr
*&Lo
);
37 bool detectAndFoldOffset(MachineInstr
&Hi
, MachineInstr
&Lo
);
38 void foldOffset(MachineInstr
&Hi
, MachineInstr
&Lo
, MachineInstr
&Tail
,
40 bool foldLargeOffset(MachineInstr
&Hi
, MachineInstr
&Lo
,
41 MachineInstr
&TailAdd
, Register GSReg
);
42 bool foldShiftedOffset(MachineInstr
&Hi
, MachineInstr
&Lo
,
43 MachineInstr
&TailShXAdd
, Register GSReg
);
45 bool foldIntoMemoryOps(MachineInstr
&Hi
, MachineInstr
&Lo
);
47 RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID
) {}
49 MachineFunctionProperties
getRequiredProperties() const override
{
50 return MachineFunctionProperties().set(
51 MachineFunctionProperties::Property::IsSSA
);
54 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
56 MachineFunctionPass::getAnalysisUsage(AU
);
59 StringRef
getPassName() const override
{
60 return RISCV_MERGE_BASE_OFFSET_NAME
;
63 } // end anonymous namespace
65 char RISCVMergeBaseOffsetOpt::ID
= 0;
66 INITIALIZE_PASS(RISCVMergeBaseOffsetOpt
, DEBUG_TYPE
,
67 RISCV_MERGE_BASE_OFFSET_NAME
, false, false)
69 // Detect either of the patterns:
71 // 1. (medlow pattern):
73 // addi vreg2, vreg1, %lo(s)
75 // 2. (medany pattern):
77 // auipc vreg1, %pcrel_hi(s)
78 // addi vreg2, vreg1, %pcrel_lo(.Lpcrel_hi1)
80 // The pattern is only accepted if:
81 // 1) The first instruction has only one use, which is the ADDI.
82 // 2) The address operands have the appropriate type, reflecting the
83 // lowering of a global address or constant pool using medlow or medany.
84 // 3) The offset value in the Global Address or Constant Pool is 0.
85 bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr
&Hi
,
87 if (Hi
.getOpcode() != RISCV::LUI
&& Hi
.getOpcode() != RISCV::AUIPC
)
90 const MachineOperand
&HiOp1
= Hi
.getOperand(1);
91 unsigned ExpectedFlags
=
92 Hi
.getOpcode() == RISCV::AUIPC
? RISCVII::MO_PCREL_HI
: RISCVII::MO_HI
;
93 if (HiOp1
.getTargetFlags() != ExpectedFlags
)
96 if (!(HiOp1
.isGlobal() || HiOp1
.isCPI() || HiOp1
.isBlockAddress()) ||
97 HiOp1
.getOffset() != 0)
100 Register HiDestReg
= Hi
.getOperand(0).getReg();
101 if (!MRI
->hasOneUse(HiDestReg
))
104 Lo
= &*MRI
->use_instr_begin(HiDestReg
);
105 if (Lo
->getOpcode() != RISCV::ADDI
)
108 const MachineOperand
&LoOp2
= Lo
->getOperand(2);
109 if (Hi
.getOpcode() == RISCV::LUI
) {
110 if (LoOp2
.getTargetFlags() != RISCVII::MO_LO
||
111 !(LoOp2
.isGlobal() || LoOp2
.isCPI() || LoOp2
.isBlockAddress()) ||
112 LoOp2
.getOffset() != 0)
115 assert(Hi
.getOpcode() == RISCV::AUIPC
);
116 if (LoOp2
.getTargetFlags() != RISCVII::MO_PCREL_LO
||
117 LoOp2
.getType() != MachineOperand::MO_MCSymbol
)
121 if (HiOp1
.isGlobal()) {
122 LLVM_DEBUG(dbgs() << " Found lowered global address: "
123 << *HiOp1
.getGlobal() << "\n");
124 } else if (HiOp1
.isBlockAddress()) {
125 LLVM_DEBUG(dbgs() << " Found lowered basic address: "
126 << *HiOp1
.getBlockAddress() << "\n");
127 } else if (HiOp1
.isCPI()) {
128 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1
.getIndex()
135 // Update the offset in Hi and Lo instructions.
136 // Delete the tail instruction and update all the uses to use the
138 void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr
&Hi
, MachineInstr
&Lo
,
139 MachineInstr
&Tail
, int64_t Offset
) {
140 assert(isInt
<32>(Offset
) && "Unexpected offset");
141 // Put the offset back in Hi and the Lo
142 Hi
.getOperand(1).setOffset(Offset
);
143 if (Hi
.getOpcode() != RISCV::AUIPC
)
144 Lo
.getOperand(2).setOffset(Offset
);
145 // Delete the tail instruction.
146 MRI
->constrainRegClass(Lo
.getOperand(0).getReg(),
147 MRI
->getRegClass(Tail
.getOperand(0).getReg()));
148 MRI
->replaceRegWith(Tail
.getOperand(0).getReg(), Lo
.getOperand(0).getReg());
149 Tail
.eraseFromParent();
150 LLVM_DEBUG(dbgs() << " Merged offset " << Offset
<< " into base.\n"
151 << " " << Hi
<< " " << Lo
;);
154 // Detect patterns for large offsets that are passed into an ADD instruction.
155 // If the pattern is found, updates the offset in Hi and Lo instructions
156 // and deletes TailAdd and the instructions that produced the offset.
158 // Base address lowering is of the form:
159 // Hi: lui vreg1, %hi(s)
160 // Lo: addi vreg2, vreg1, %lo(s)
164 // / The large offset can be of two forms: \
165 // 1) Offset that has non zero bits in lower 2) Offset that has non zero
166 // 12 bits and upper 20 bits bits in upper 20 bits only
167 // OffseLUI: lui vreg3, 4
168 // OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128
173 // TailAdd: add vreg4, vreg2, voff
174 bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr
&Hi
,
176 MachineInstr
&TailAdd
,
178 assert((TailAdd
.getOpcode() == RISCV::ADD
) && "Expected ADD instruction!");
179 Register Rs
= TailAdd
.getOperand(1).getReg();
180 Register Rt
= TailAdd
.getOperand(2).getReg();
181 Register Reg
= Rs
== GAReg
? Rt
: Rs
;
183 // Can't fold if the register has more than one use.
184 if (!Reg
.isVirtual() || !MRI
->hasOneUse(Reg
))
186 // This can point to an ADDI(W) or a LUI:
187 MachineInstr
&OffsetTail
= *MRI
->getVRegDef(Reg
);
188 if (OffsetTail
.getOpcode() == RISCV::ADDI
||
189 OffsetTail
.getOpcode() == RISCV::ADDIW
) {
190 // The offset value has non zero bits in both %hi and %lo parts.
191 // Detect an ADDI that feeds from a LUI instruction.
192 MachineOperand
&AddiImmOp
= OffsetTail
.getOperand(2);
193 if (AddiImmOp
.getTargetFlags() != RISCVII::MO_None
)
195 Register AddiReg
= OffsetTail
.getOperand(1).getReg();
196 int64_t OffLo
= AddiImmOp
.getImm();
198 // Handle rs1 of ADDI is X0.
199 if (AddiReg
== RISCV::X0
) {
200 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
);
201 foldOffset(Hi
, Lo
, TailAdd
, OffLo
);
202 OffsetTail
.eraseFromParent();
206 MachineInstr
&OffsetLui
= *MRI
->getVRegDef(AddiReg
);
207 MachineOperand
&LuiImmOp
= OffsetLui
.getOperand(1);
208 if (OffsetLui
.getOpcode() != RISCV::LUI
||
209 LuiImmOp
.getTargetFlags() != RISCVII::MO_None
||
210 !MRI
->hasOneUse(OffsetLui
.getOperand(0).getReg()))
212 int64_t Offset
= SignExtend64
<32>(LuiImmOp
.getImm() << 12);
214 // RV32 ignores the upper 32 bits. ADDIW sign extends the result.
215 if (!ST
->is64Bit() || OffsetTail
.getOpcode() == RISCV::ADDIW
)
216 Offset
= SignExtend64
<32>(Offset
);
217 // We can only fold simm32 offsets.
218 if (!isInt
<32>(Offset
))
220 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
221 << " " << OffsetLui
);
222 foldOffset(Hi
, Lo
, TailAdd
, Offset
);
223 OffsetTail
.eraseFromParent();
224 OffsetLui
.eraseFromParent();
226 } else if (OffsetTail
.getOpcode() == RISCV::LUI
) {
227 // The offset value has all zero bits in the lower 12 bits. Only LUI
229 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail
);
230 int64_t Offset
= SignExtend64
<32>(OffsetTail
.getOperand(1).getImm() << 12);
231 foldOffset(Hi
, Lo
, TailAdd
, Offset
);
232 OffsetTail
.eraseFromParent();
238 // Detect patterns for offsets that are passed into a SHXADD instruction.
239 // The offset has 1, 2, or 3 trailing zeros and fits in simm13, simm14, simm15.
240 // The constant is created with addi voff, x0, C, and shXadd is used to
241 // fill insert the trailing zeros and do the addition.
242 // If the pattern is found, updates the offset in Hi and Lo instructions
243 // and deletes TailShXAdd and the instructions that produced the offset.
245 // Hi: lui vreg1, %hi(s)
246 // Lo: addi vreg2, vreg1, %lo(s)
247 // OffsetTail: addi voff, x0, C
248 // TailAdd: shXadd vreg4, voff, vreg2
249 bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr
&Hi
,
251 MachineInstr
&TailShXAdd
,
253 assert((TailShXAdd
.getOpcode() == RISCV::SH1ADD
||
254 TailShXAdd
.getOpcode() == RISCV::SH2ADD
||
255 TailShXAdd
.getOpcode() == RISCV::SH3ADD
) &&
256 "Expected SHXADD instruction!");
258 if (GAReg
!= TailShXAdd
.getOperand(2).getReg())
261 // The first source is the shifted operand.
262 Register Rs1
= TailShXAdd
.getOperand(1).getReg();
264 // Can't fold if the register has more than one use.
265 if (!Rs1
.isVirtual() || !MRI
->hasOneUse(Rs1
))
267 // This can point to an ADDI X0, C.
268 MachineInstr
&OffsetTail
= *MRI
->getVRegDef(Rs1
);
269 if (OffsetTail
.getOpcode() != RISCV::ADDI
)
271 if (!OffsetTail
.getOperand(1).isReg() ||
272 OffsetTail
.getOperand(1).getReg() != RISCV::X0
||
273 !OffsetTail
.getOperand(2).isImm())
276 int64_t Offset
= OffsetTail
.getOperand(2).getImm();
277 assert(isInt
<12>(Offset
) && "Unexpected offset");
280 switch (TailShXAdd
.getOpcode()) {
281 default: llvm_unreachable("Unexpected opcode");
282 case RISCV::SH1ADD
: ShAmt
= 1; break;
283 case RISCV::SH2ADD
: ShAmt
= 2; break;
284 case RISCV::SH3ADD
: ShAmt
= 3; break;
287 Offset
= (uint64_t)Offset
<< ShAmt
;
289 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail
);
290 foldOffset(Hi
, Lo
, TailShXAdd
, Offset
);
291 OffsetTail
.eraseFromParent();
295 bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr
&Hi
,
297 Register DestReg
= Lo
.getOperand(0).getReg();
299 // Look for arithmetic instructions we can get an offset from.
300 // We might be able to remove the arithmetic instructions by folding the
301 // offset into the LUI+ADDI.
302 if (!MRI
->hasOneUse(DestReg
))
305 // Lo has only one use.
306 MachineInstr
&Tail
= *MRI
->use_instr_begin(DestReg
);
307 switch (Tail
.getOpcode()) {
309 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
313 // Offset is simply an immediate operand.
314 int64_t Offset
= Tail
.getOperand(2).getImm();
316 // We might have two ADDIs in a row.
317 Register TailDestReg
= Tail
.getOperand(0).getReg();
318 if (MRI
->hasOneUse(TailDestReg
)) {
319 MachineInstr
&TailTail
= *MRI
->use_instr_begin(TailDestReg
);
320 if (TailTail
.getOpcode() == RISCV::ADDI
) {
321 Offset
+= TailTail
.getOperand(2).getImm();
322 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail
<< TailTail
);
323 foldOffset(Hi
, Lo
, TailTail
, Offset
);
324 Tail
.eraseFromParent();
329 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail
);
330 foldOffset(Hi
, Lo
, Tail
, Offset
);
334 // The offset is too large to fit in the immediate field of ADDI.
335 // This can be in two forms:
336 // 1) LUI hi_Offset followed by:
338 // This happens in case the offset has non zero bits in
339 // both hi 20 and lo 12 bits.
341 // This happens in case the lower 12 bits of the offset are zeros.
342 return foldLargeOffset(Hi
, Lo
, Tail
, DestReg
);
346 // The offset is too large to fit in the immediate field of ADDI.
347 // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
348 // (SH3ADD (ADDI X0, C), DestReg).
349 return foldShiftedOffset(Hi
, Lo
, Tail
, DestReg
);
355 bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr
&Hi
,
357 Register DestReg
= Lo
.getOperand(0).getReg();
359 // If all the uses are memory ops with the same offset, we can transform:
361 // 1. (medlow pattern):
362 // Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
363 // Lo: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
364 // Tail: lw vreg3, 8(vreg2)
366 // 2. (medany pattern):
367 // Hi: 1:auipc vreg1, %pcrel_hi(s) ---> auipc vreg1, %pcrel_hi(foo+8)
368 // Lo: addi vreg2, vreg1, %pcrel_lo(1b) ---> lw vreg3, %pcrel_lo(1b)(vreg1)
369 // Tail: lw vreg3, 8(vreg2)
371 std::optional
<int64_t> CommonOffset
;
372 DenseMap
<const MachineInstr
*, SmallVector
<unsigned>>
373 InlineAsmMemoryOpIndexesMap
;
374 for (const MachineInstr
&UseMI
: MRI
->use_instructions(DestReg
)) {
375 switch (UseMI
.getOpcode()) {
377 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI
);
396 if (UseMI
.getOperand(1).isFI())
398 // Register defined by Lo should not be the value register.
399 if (DestReg
== UseMI
.getOperand(0).getReg())
401 assert(DestReg
== UseMI
.getOperand(1).getReg() &&
402 "Expected base address use");
403 // All load/store instructions must use the same offset.
404 int64_t Offset
= UseMI
.getOperand(2).getImm();
405 if (CommonOffset
&& Offset
!= CommonOffset
)
407 CommonOffset
= Offset
;
410 case RISCV::INLINEASM
:
411 case RISCV::INLINEASM_BR
: {
412 SmallVector
<unsigned> InlineAsmMemoryOpIndexes
;
414 for (unsigned I
= InlineAsm::MIOp_FirstOperand
;
415 I
< UseMI
.getNumOperands(); I
+= 1 + NumOps
) {
416 const MachineOperand
&FlagsMO
= UseMI
.getOperand(I
);
418 if (!FlagsMO
.isImm())
421 const InlineAsm::Flag
Flags(FlagsMO
.getImm());
422 NumOps
= Flags
.getNumOperandRegisters();
424 // Memory constraints have two operands.
425 if (NumOps
!= 2 || !Flags
.isMemKind())
428 // We can't do this for constraint A because AMO instructions don't have
429 // an immediate offset field.
430 if (Flags
.getMemoryConstraintID() == InlineAsm::ConstraintCode::A
)
433 const MachineOperand
&AddrMO
= UseMI
.getOperand(I
+ 1);
434 if (!AddrMO
.isReg() || AddrMO
.getReg() != DestReg
)
437 const MachineOperand
&OffsetMO
= UseMI
.getOperand(I
+ 2);
438 if (!OffsetMO
.isImm())
441 // All inline asm memory operands must use the same offset.
442 int64_t Offset
= OffsetMO
.getImm();
443 if (CommonOffset
&& Offset
!= CommonOffset
)
445 CommonOffset
= Offset
;
446 InlineAsmMemoryOpIndexes
.push_back(I
+ 1);
448 InlineAsmMemoryOpIndexesMap
.insert(
449 std::make_pair(&UseMI
, InlineAsmMemoryOpIndexes
));
455 // We found a common offset.
456 // Update the offsets in global address lowering.
457 // We may have already folded some arithmetic so we need to add to any
459 int64_t NewOffset
= Hi
.getOperand(1).getOffset() + *CommonOffset
;
460 // RV32 ignores the upper 32 bits.
462 NewOffset
= SignExtend64
<32>(NewOffset
);
463 // We can only fold simm32 offsets.
464 if (!isInt
<32>(NewOffset
))
467 Hi
.getOperand(1).setOffset(NewOffset
);
468 MachineOperand
&ImmOp
= Lo
.getOperand(2);
469 if (Hi
.getOpcode() != RISCV::AUIPC
)
470 ImmOp
.setOffset(NewOffset
);
472 // Update the immediate in the load/store instructions to add the offset.
473 for (MachineInstr
&UseMI
:
474 llvm::make_early_inc_range(MRI
->use_instructions(DestReg
))) {
475 if (UseMI
.getOpcode() == RISCV::INLINEASM
||
476 UseMI
.getOpcode() == RISCV::INLINEASM_BR
) {
477 auto &InlineAsmMemoryOpIndexes
= InlineAsmMemoryOpIndexesMap
[&UseMI
];
478 for (unsigned I
: InlineAsmMemoryOpIndexes
) {
479 MachineOperand
&MO
= UseMI
.getOperand(I
+ 1);
480 switch (ImmOp
.getType()) {
481 case MachineOperand::MO_GlobalAddress
:
482 MO
.ChangeToGA(ImmOp
.getGlobal(), ImmOp
.getOffset(),
483 ImmOp
.getTargetFlags());
485 case MachineOperand::MO_MCSymbol
:
486 MO
.ChangeToMCSymbol(ImmOp
.getMCSymbol(), ImmOp
.getTargetFlags());
487 MO
.setOffset(ImmOp
.getOffset());
489 case MachineOperand::MO_BlockAddress
:
490 MO
.ChangeToBA(ImmOp
.getBlockAddress(), ImmOp
.getOffset(),
491 ImmOp
.getTargetFlags());
494 report_fatal_error("unsupported machine operand type");
499 UseMI
.removeOperand(2);
500 UseMI
.addOperand(ImmOp
);
504 MRI
->replaceRegWith(Lo
.getOperand(0).getReg(), Hi
.getOperand(0).getReg());
505 Lo
.eraseFromParent();
509 bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction
&Fn
) {
510 if (skipFunction(Fn
.getFunction()))
513 ST
= &Fn
.getSubtarget
<RISCVSubtarget
>();
515 bool MadeChange
= false;
516 MRI
= &Fn
.getRegInfo();
517 for (MachineBasicBlock
&MBB
: Fn
) {
518 LLVM_DEBUG(dbgs() << "MBB: " << MBB
.getName() << "\n");
519 for (MachineInstr
&Hi
: MBB
) {
520 MachineInstr
*Lo
= nullptr;
521 if (!detectFoldable(Hi
, Lo
))
523 MadeChange
|= detectAndFoldOffset(Hi
, *Lo
);
524 MadeChange
|= foldIntoMemoryOps(Hi
, *Lo
);
531 /// Returns an instance of the Merge Base Offset Optimization pass.
532 FunctionPass
*llvm::createRISCVMergeBaseOffsetOptPass() {
533 return new RISCVMergeBaseOffsetOpt();