1 //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Merge the offset of address calculation into the offset field
10 // of instructions in a global address lowering sequence.
12 //===----------------------------------------------------------------------===//
15 #include "RISCVTargetMachine.h"
16 #include "llvm/CodeGen/MachineFunctionPass.h"
17 #include "llvm/CodeGen/Passes.h"
18 #include "llvm/MC/TargetRegistry.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Target/TargetOptions.h"
24 #define DEBUG_TYPE "riscv-merge-base-offset"
25 #define RISCV_MERGE_BASE_OFFSET_NAME "RISC-V Merge Base Offset"
28 class RISCVMergeBaseOffsetOpt
: public MachineFunctionPass
{
29 const RISCVSubtarget
*ST
= nullptr;
30 MachineRegisterInfo
*MRI
;
34 bool runOnMachineFunction(MachineFunction
&Fn
) override
;
35 bool detectFoldable(MachineInstr
&Hi
, MachineInstr
*&Lo
);
37 bool detectAndFoldOffset(MachineInstr
&Hi
, MachineInstr
&Lo
);
38 void foldOffset(MachineInstr
&Hi
, MachineInstr
&Lo
, MachineInstr
&Tail
,
40 bool foldLargeOffset(MachineInstr
&Hi
, MachineInstr
&Lo
,
41 MachineInstr
&TailAdd
, Register GSReg
);
42 bool foldShiftedOffset(MachineInstr
&Hi
, MachineInstr
&Lo
,
43 MachineInstr
&TailShXAdd
, Register GSReg
);
45 bool foldIntoMemoryOps(MachineInstr
&Hi
, MachineInstr
&Lo
);
47 RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID
) {}
49 MachineFunctionProperties
getRequiredProperties() const override
{
50 return MachineFunctionProperties().set(
51 MachineFunctionProperties::Property::IsSSA
);
54 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
56 MachineFunctionPass::getAnalysisUsage(AU
);
59 StringRef
getPassName() const override
{
60 return RISCV_MERGE_BASE_OFFSET_NAME
;
63 } // end anonymous namespace
65 char RISCVMergeBaseOffsetOpt::ID
= 0;
66 INITIALIZE_PASS(RISCVMergeBaseOffsetOpt
, DEBUG_TYPE
,
67 RISCV_MERGE_BASE_OFFSET_NAME
, false, false)
69 // Detect either of the patterns:
71 // 1. (medlow pattern):
73 // addi vreg2, vreg1, %lo(s)
75 // 2. (medany pattern):
77 // auipc vreg1, %pcrel_hi(s)
78 // addi vreg2, vreg1, %pcrel_lo(.Lpcrel_hi1)
80 // The pattern is only accepted if:
81 // 1) The first instruction has only one use, which is the ADDI.
82 // 2) The address operands have the appropriate type, reflecting the
83 // lowering of a global address or constant pool using medlow or medany.
84 // 3) The offset value in the Global Address or Constant Pool is 0.
85 bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr
&Hi
,
87 if (Hi
.getOpcode() != RISCV::LUI
&& Hi
.getOpcode() != RISCV::AUIPC
&&
88 Hi
.getOpcode() != RISCV::PseudoMovAddr
)
91 const MachineOperand
&HiOp1
= Hi
.getOperand(1);
92 unsigned ExpectedFlags
=
93 Hi
.getOpcode() == RISCV::AUIPC
? RISCVII::MO_PCREL_HI
: RISCVII::MO_HI
;
94 if (HiOp1
.getTargetFlags() != ExpectedFlags
)
97 if (!(HiOp1
.isGlobal() || HiOp1
.isCPI() || HiOp1
.isBlockAddress()) ||
98 HiOp1
.getOffset() != 0)
101 if (Hi
.getOpcode() == RISCV::PseudoMovAddr
) {
102 // Most of the code should handle it correctly without modification by
103 // setting Lo and Hi both point to PseudoMovAddr
106 Register HiDestReg
= Hi
.getOperand(0).getReg();
107 if (!MRI
->hasOneUse(HiDestReg
))
110 Lo
= &*MRI
->use_instr_begin(HiDestReg
);
111 if (Lo
->getOpcode() != RISCV::ADDI
)
115 const MachineOperand
&LoOp2
= Lo
->getOperand(2);
116 if (Hi
.getOpcode() == RISCV::LUI
|| Hi
.getOpcode() == RISCV::PseudoMovAddr
) {
117 if (LoOp2
.getTargetFlags() != RISCVII::MO_LO
||
118 !(LoOp2
.isGlobal() || LoOp2
.isCPI() || LoOp2
.isBlockAddress()) ||
119 LoOp2
.getOffset() != 0)
122 assert(Hi
.getOpcode() == RISCV::AUIPC
);
123 if (LoOp2
.getTargetFlags() != RISCVII::MO_PCREL_LO
||
124 LoOp2
.getType() != MachineOperand::MO_MCSymbol
)
128 if (HiOp1
.isGlobal()) {
129 LLVM_DEBUG(dbgs() << " Found lowered global address: "
130 << *HiOp1
.getGlobal() << "\n");
131 } else if (HiOp1
.isBlockAddress()) {
132 LLVM_DEBUG(dbgs() << " Found lowered basic address: "
133 << *HiOp1
.getBlockAddress() << "\n");
134 } else if (HiOp1
.isCPI()) {
135 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1
.getIndex()
142 // Update the offset in Hi and Lo instructions.
143 // Delete the tail instruction and update all the uses to use the
145 void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr
&Hi
, MachineInstr
&Lo
,
146 MachineInstr
&Tail
, int64_t Offset
) {
147 assert(isInt
<32>(Offset
) && "Unexpected offset");
148 // Put the offset back in Hi and the Lo
149 Hi
.getOperand(1).setOffset(Offset
);
150 if (Hi
.getOpcode() != RISCV::AUIPC
)
151 Lo
.getOperand(2).setOffset(Offset
);
152 // Delete the tail instruction.
153 MRI
->constrainRegClass(Lo
.getOperand(0).getReg(),
154 MRI
->getRegClass(Tail
.getOperand(0).getReg()));
155 MRI
->replaceRegWith(Tail
.getOperand(0).getReg(), Lo
.getOperand(0).getReg());
156 Tail
.eraseFromParent();
157 LLVM_DEBUG(dbgs() << " Merged offset " << Offset
<< " into base.\n"
158 << " " << Hi
<< " " << Lo
;);
161 // Detect patterns for large offsets that are passed into an ADD instruction.
162 // If the pattern is found, updates the offset in Hi and Lo instructions
163 // and deletes TailAdd and the instructions that produced the offset.
165 // Base address lowering is of the form:
166 // Hi: lui vreg1, %hi(s)
167 // Lo: addi vreg2, vreg1, %lo(s)
171 // / The large offset can be of two forms: \
172 // 1) Offset that has non zero bits in lower 2) Offset that has non zero
173 // 12 bits and upper 20 bits bits in upper 20 bits only
174 // OffseLUI: lui vreg3, 4
175 // OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128
180 // TailAdd: add vreg4, vreg2, voff
181 bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr
&Hi
,
183 MachineInstr
&TailAdd
,
185 assert((TailAdd
.getOpcode() == RISCV::ADD
) && "Expected ADD instruction!");
186 Register Rs
= TailAdd
.getOperand(1).getReg();
187 Register Rt
= TailAdd
.getOperand(2).getReg();
188 Register Reg
= Rs
== GAReg
? Rt
: Rs
;
190 // Can't fold if the register has more than one use.
191 if (!Reg
.isVirtual() || !MRI
->hasOneUse(Reg
))
193 // This can point to an ADDI(W) or a LUI:
194 MachineInstr
&OffsetTail
= *MRI
->getVRegDef(Reg
);
195 if (OffsetTail
.getOpcode() == RISCV::ADDI
||
196 OffsetTail
.getOpcode() == RISCV::ADDIW
) {
197 // The offset value has non zero bits in both %hi and %lo parts.
198 // Detect an ADDI that feeds from a LUI instruction.
199 MachineOperand
&AddiImmOp
= OffsetTail
.getOperand(2);
200 if (AddiImmOp
.getTargetFlags() != RISCVII::MO_None
)
202 Register AddiReg
= OffsetTail
.getOperand(1).getReg();
203 int64_t OffLo
= AddiImmOp
.getImm();
205 // Handle rs1 of ADDI is X0.
206 if (AddiReg
== RISCV::X0
) {
207 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
);
208 foldOffset(Hi
, Lo
, TailAdd
, OffLo
);
209 OffsetTail
.eraseFromParent();
213 MachineInstr
&OffsetLui
= *MRI
->getVRegDef(AddiReg
);
214 MachineOperand
&LuiImmOp
= OffsetLui
.getOperand(1);
215 if (OffsetLui
.getOpcode() != RISCV::LUI
||
216 LuiImmOp
.getTargetFlags() != RISCVII::MO_None
||
217 !MRI
->hasOneUse(OffsetLui
.getOperand(0).getReg()))
219 int64_t Offset
= SignExtend64
<32>(LuiImmOp
.getImm() << 12);
221 // RV32 ignores the upper 32 bits. ADDIW sign extends the result.
222 if (!ST
->is64Bit() || OffsetTail
.getOpcode() == RISCV::ADDIW
)
223 Offset
= SignExtend64
<32>(Offset
);
224 // We can only fold simm32 offsets.
225 if (!isInt
<32>(Offset
))
227 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
228 << " " << OffsetLui
);
229 foldOffset(Hi
, Lo
, TailAdd
, Offset
);
230 OffsetTail
.eraseFromParent();
231 OffsetLui
.eraseFromParent();
233 } else if (OffsetTail
.getOpcode() == RISCV::LUI
) {
234 // The offset value has all zero bits in the lower 12 bits. Only LUI
236 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail
);
237 int64_t Offset
= SignExtend64
<32>(OffsetTail
.getOperand(1).getImm() << 12);
238 foldOffset(Hi
, Lo
, TailAdd
, Offset
);
239 OffsetTail
.eraseFromParent();
245 // Detect patterns for offsets that are passed into a SHXADD instruction.
246 // The offset has 1, 2, or 3 trailing zeros and fits in simm13, simm14, simm15.
247 // The constant is created with addi voff, x0, C, and shXadd is used to
248 // fill insert the trailing zeros and do the addition.
249 // If the pattern is found, updates the offset in Hi and Lo instructions
250 // and deletes TailShXAdd and the instructions that produced the offset.
252 // Hi: lui vreg1, %hi(s)
253 // Lo: addi vreg2, vreg1, %lo(s)
254 // OffsetTail: addi voff, x0, C
255 // TailAdd: shXadd vreg4, voff, vreg2
256 bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr
&Hi
,
258 MachineInstr
&TailShXAdd
,
260 assert((TailShXAdd
.getOpcode() == RISCV::SH1ADD
||
261 TailShXAdd
.getOpcode() == RISCV::SH2ADD
||
262 TailShXAdd
.getOpcode() == RISCV::SH3ADD
) &&
263 "Expected SHXADD instruction!");
265 if (GAReg
!= TailShXAdd
.getOperand(2).getReg())
268 // The first source is the shifted operand.
269 Register Rs1
= TailShXAdd
.getOperand(1).getReg();
271 // Can't fold if the register has more than one use.
272 if (!Rs1
.isVirtual() || !MRI
->hasOneUse(Rs1
))
274 // This can point to an ADDI X0, C.
275 MachineInstr
&OffsetTail
= *MRI
->getVRegDef(Rs1
);
276 if (OffsetTail
.getOpcode() != RISCV::ADDI
)
278 if (!OffsetTail
.getOperand(1).isReg() ||
279 OffsetTail
.getOperand(1).getReg() != RISCV::X0
||
280 !OffsetTail
.getOperand(2).isImm())
283 int64_t Offset
= OffsetTail
.getOperand(2).getImm();
284 assert(isInt
<12>(Offset
) && "Unexpected offset");
287 switch (TailShXAdd
.getOpcode()) {
288 default: llvm_unreachable("Unexpected opcode");
289 case RISCV::SH1ADD
: ShAmt
= 1; break;
290 case RISCV::SH2ADD
: ShAmt
= 2; break;
291 case RISCV::SH3ADD
: ShAmt
= 3; break;
294 Offset
= (uint64_t)Offset
<< ShAmt
;
296 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail
);
297 foldOffset(Hi
, Lo
, TailShXAdd
, Offset
);
298 OffsetTail
.eraseFromParent();
302 bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr
&Hi
,
304 Register DestReg
= Lo
.getOperand(0).getReg();
306 // Look for arithmetic instructions we can get an offset from.
307 // We might be able to remove the arithmetic instructions by folding the
308 // offset into the LUI+ADDI.
309 if (!MRI
->hasOneUse(DestReg
))
312 // Lo has only one use.
313 MachineInstr
&Tail
= *MRI
->use_instr_begin(DestReg
);
314 switch (Tail
.getOpcode()) {
316 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
320 // Offset is simply an immediate operand.
321 int64_t Offset
= Tail
.getOperand(2).getImm();
323 // We might have two ADDIs in a row.
324 Register TailDestReg
= Tail
.getOperand(0).getReg();
325 if (MRI
->hasOneUse(TailDestReg
)) {
326 MachineInstr
&TailTail
= *MRI
->use_instr_begin(TailDestReg
);
327 if (TailTail
.getOpcode() == RISCV::ADDI
) {
328 Offset
+= TailTail
.getOperand(2).getImm();
329 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail
<< TailTail
);
330 foldOffset(Hi
, Lo
, TailTail
, Offset
);
331 Tail
.eraseFromParent();
336 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail
);
337 foldOffset(Hi
, Lo
, Tail
, Offset
);
341 // The offset is too large to fit in the immediate field of ADDI.
342 // This can be in two forms:
343 // 1) LUI hi_Offset followed by:
345 // This happens in case the offset has non zero bits in
346 // both hi 20 and lo 12 bits.
348 // This happens in case the lower 12 bits of the offset are zeros.
349 return foldLargeOffset(Hi
, Lo
, Tail
, DestReg
);
353 // The offset is too large to fit in the immediate field of ADDI.
354 // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
355 // (SH3ADD (ADDI X0, C), DestReg).
356 return foldShiftedOffset(Hi
, Lo
, Tail
, DestReg
);
362 bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr
&Hi
,
364 Register DestReg
= Lo
.getOperand(0).getReg();
366 // If all the uses are memory ops with the same offset, we can transform:
368 // 1. (medlow pattern):
369 // Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
370 // Lo: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
371 // Tail: lw vreg3, 8(vreg2)
373 // 2. (medany pattern):
374 // Hi: 1:auipc vreg1, %pcrel_hi(s) ---> auipc vreg1, %pcrel_hi(foo+8)
375 // Lo: addi vreg2, vreg1, %pcrel_lo(1b) ---> lw vreg3, %pcrel_lo(1b)(vreg1)
376 // Tail: lw vreg3, 8(vreg2)
378 std::optional
<int64_t> CommonOffset
;
379 DenseMap
<const MachineInstr
*, SmallVector
<unsigned>>
380 InlineAsmMemoryOpIndexesMap
;
381 for (const MachineInstr
&UseMI
: MRI
->use_instructions(DestReg
)) {
382 switch (UseMI
.getOpcode()) {
384 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI
);
407 if (UseMI
.getOperand(1).isFI())
409 // Register defined by Lo should not be the value register.
410 if (DestReg
== UseMI
.getOperand(0).getReg())
412 assert(DestReg
== UseMI
.getOperand(1).getReg() &&
413 "Expected base address use");
414 // All load/store instructions must use the same offset.
415 int64_t Offset
= UseMI
.getOperand(2).getImm();
416 if (CommonOffset
&& Offset
!= CommonOffset
)
418 CommonOffset
= Offset
;
421 case RISCV::INLINEASM
:
422 case RISCV::INLINEASM_BR
: {
423 SmallVector
<unsigned> InlineAsmMemoryOpIndexes
;
425 for (unsigned I
= InlineAsm::MIOp_FirstOperand
;
426 I
< UseMI
.getNumOperands(); I
+= 1 + NumOps
) {
427 const MachineOperand
&FlagsMO
= UseMI
.getOperand(I
);
429 if (!FlagsMO
.isImm())
432 const InlineAsm::Flag
Flags(FlagsMO
.getImm());
433 NumOps
= Flags
.getNumOperandRegisters();
435 // Memory constraints have two operands.
436 if (NumOps
!= 2 || !Flags
.isMemKind()) {
437 // If the register is used by something other than a memory contraint,
438 // we should not fold.
439 for (unsigned J
= 0; J
< NumOps
; ++J
) {
440 const MachineOperand
&MO
= UseMI
.getOperand(I
+ 1 + J
);
441 if (MO
.isReg() && MO
.getReg() == DestReg
)
447 // We can't do this for constraint A because AMO instructions don't have
448 // an immediate offset field.
449 if (Flags
.getMemoryConstraintID() == InlineAsm::ConstraintCode::A
)
452 const MachineOperand
&AddrMO
= UseMI
.getOperand(I
+ 1);
453 if (!AddrMO
.isReg() || AddrMO
.getReg() != DestReg
)
456 const MachineOperand
&OffsetMO
= UseMI
.getOperand(I
+ 2);
457 if (!OffsetMO
.isImm())
460 // All inline asm memory operands must use the same offset.
461 int64_t Offset
= OffsetMO
.getImm();
462 if (CommonOffset
&& Offset
!= CommonOffset
)
464 CommonOffset
= Offset
;
465 InlineAsmMemoryOpIndexes
.push_back(I
+ 1);
467 InlineAsmMemoryOpIndexesMap
.insert(
468 std::make_pair(&UseMI
, InlineAsmMemoryOpIndexes
));
474 // We found a common offset.
475 // Update the offsets in global address lowering.
476 // We may have already folded some arithmetic so we need to add to any
478 int64_t NewOffset
= Hi
.getOperand(1).getOffset() + *CommonOffset
;
479 // RV32 ignores the upper 32 bits.
481 NewOffset
= SignExtend64
<32>(NewOffset
);
482 // We can only fold simm32 offsets.
483 if (!isInt
<32>(NewOffset
))
486 Hi
.getOperand(1).setOffset(NewOffset
);
487 MachineOperand
&ImmOp
= Lo
.getOperand(2);
488 // Expand PseudoMovAddr into LUI
489 if (Hi
.getOpcode() == RISCV::PseudoMovAddr
) {
490 auto *TII
= ST
->getInstrInfo();
491 Hi
.setDesc(TII
->get(RISCV::LUI
));
495 if (Hi
.getOpcode() != RISCV::AUIPC
)
496 ImmOp
.setOffset(NewOffset
);
498 // Update the immediate in the load/store instructions to add the offset.
499 for (MachineInstr
&UseMI
:
500 llvm::make_early_inc_range(MRI
->use_instructions(DestReg
))) {
501 if (UseMI
.getOpcode() == RISCV::INLINEASM
||
502 UseMI
.getOpcode() == RISCV::INLINEASM_BR
) {
503 auto &InlineAsmMemoryOpIndexes
= InlineAsmMemoryOpIndexesMap
[&UseMI
];
504 for (unsigned I
: InlineAsmMemoryOpIndexes
) {
505 MachineOperand
&MO
= UseMI
.getOperand(I
+ 1);
506 switch (ImmOp
.getType()) {
507 case MachineOperand::MO_GlobalAddress
:
508 MO
.ChangeToGA(ImmOp
.getGlobal(), ImmOp
.getOffset(),
509 ImmOp
.getTargetFlags());
511 case MachineOperand::MO_MCSymbol
:
512 MO
.ChangeToMCSymbol(ImmOp
.getMCSymbol(), ImmOp
.getTargetFlags());
513 MO
.setOffset(ImmOp
.getOffset());
515 case MachineOperand::MO_BlockAddress
:
516 MO
.ChangeToBA(ImmOp
.getBlockAddress(), ImmOp
.getOffset(),
517 ImmOp
.getTargetFlags());
520 report_fatal_error("unsupported machine operand type");
525 UseMI
.removeOperand(2);
526 UseMI
.addOperand(ImmOp
);
530 // Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from
535 MRI
->replaceRegWith(Lo
.getOperand(0).getReg(), Hi
.getOperand(0).getReg());
536 Lo
.eraseFromParent();
540 bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction
&Fn
) {
541 if (skipFunction(Fn
.getFunction()))
544 ST
= &Fn
.getSubtarget
<RISCVSubtarget
>();
546 bool MadeChange
= false;
547 MRI
= &Fn
.getRegInfo();
548 for (MachineBasicBlock
&MBB
: Fn
) {
549 LLVM_DEBUG(dbgs() << "MBB: " << MBB
.getName() << "\n");
550 for (MachineInstr
&Hi
: MBB
) {
551 MachineInstr
*Lo
= nullptr;
552 if (!detectFoldable(Hi
, Lo
))
554 MadeChange
|= detectAndFoldOffset(Hi
, *Lo
);
555 MadeChange
|= foldIntoMemoryOps(Hi
, *Lo
);
562 /// Returns an instance of the Merge Base Offset Optimization pass.
563 FunctionPass
*llvm::createRISCVMergeBaseOffsetOptPass() {
564 return new RISCVMergeBaseOffsetOpt();