1 //===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the RISC-V implementation of the TargetInstrInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "RISCVInstrInfo.h"
14 #include "MCTargetDesc/RISCVBaseInfo.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Analysis/MemoryLocation.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/CodeGen/LiveIntervals.h"
24 #include "llvm/CodeGen/LiveVariables.h"
25 #include "llvm/CodeGen/MachineCombinerPattern.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/MachineTraceMetrics.h"
29 #include "llvm/CodeGen/RegisterScavenging.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/IR/DebugInfoMetadata.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/MC/MCInstBuilder.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/ErrorHandling.h"
39 #define GEN_CHECK_COMPRESS_INSTR
40 #include "RISCVGenCompressInstEmitter.inc"
42 #define GET_INSTRINFO_CTOR_DTOR
43 #define GET_INSTRINFO_NAMED_OPS
44 #include "RISCVGenInstrInfo.inc"
46 static cl::opt
<bool> PreferWholeRegisterMove(
47 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden
,
48 cl::desc("Prefer whole register move for vector registers."));
50 static cl::opt
<MachineTraceStrategy
> ForceMachineCombinerStrategy(
51 "riscv-force-machine-combiner-strategy", cl::Hidden
,
52 cl::desc("Force machine combiner to use a specific strategy for machine "
53 "trace metrics evaluation."),
54 cl::init(MachineTraceStrategy::TS_NumStrategies
),
55 cl::values(clEnumValN(MachineTraceStrategy::TS_Local
, "local",
57 clEnumValN(MachineTraceStrategy::TS_MinInstrCount
, "min-instr",
58 "MinInstrCount strategy.")));
60 namespace llvm::RISCVVPseudosTable
{
62 using namespace RISCV
;
64 #define GET_RISCVVPseudosTable_IMPL
65 #include "RISCVGenSearchableTables.inc"
67 } // namespace llvm::RISCVVPseudosTable
69 namespace llvm::RISCV
{
71 #define GET_RISCVMaskedPseudosTable_IMPL
72 #include "RISCVGenSearchableTables.inc"
74 } // end namespace llvm::RISCV
76 RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget
&STI
)
77 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN
, RISCV::ADJCALLSTACKUP
),
80 MCInst
RISCVInstrInfo::getNop() const {
81 if (STI
.hasStdExtCOrZca())
82 return MCInstBuilder(RISCV::C_NOP
);
83 return MCInstBuilder(RISCV::ADDI
)
89 Register
RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr
&MI
,
90 int &FrameIndex
) const {
92 return isLoadFromStackSlot(MI
, FrameIndex
, Dummy
);
95 Register
RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr
&MI
,
97 unsigned &MemBytes
) const {
98 switch (MI
.getOpcode()) {
123 if (MI
.getOperand(1).isFI() && MI
.getOperand(2).isImm() &&
124 MI
.getOperand(2).getImm() == 0) {
125 FrameIndex
= MI
.getOperand(1).getIndex();
126 return MI
.getOperand(0).getReg();
132 Register
RISCVInstrInfo::isStoreToStackSlot(const MachineInstr
&MI
,
133 int &FrameIndex
) const {
135 return isStoreToStackSlot(MI
, FrameIndex
, Dummy
);
138 Register
RISCVInstrInfo::isStoreToStackSlot(const MachineInstr
&MI
,
140 unsigned &MemBytes
) const {
141 switch (MI
.getOpcode()) {
163 if (MI
.getOperand(1).isFI() && MI
.getOperand(2).isImm() &&
164 MI
.getOperand(2).getImm() == 0) {
165 FrameIndex
= MI
.getOperand(1).getIndex();
166 return MI
.getOperand(0).getReg();
172 bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
173 const MachineInstr
&MI
) const {
174 switch (RISCV::getRVVMCOpcode(MI
.getOpcode())) {
176 case RISCV::VFMV_V_F
:
179 case RISCV::VFMV_S_F
:
181 return MI
.getOperand(1).isUndef();
183 return TargetInstrInfo::isReallyTriviallyReMaterializable(MI
);
187 static bool forwardCopyWillClobberTuple(unsigned DstReg
, unsigned SrcReg
,
189 return DstReg
> SrcReg
&& (DstReg
- SrcReg
) < NumRegs
;
192 static bool isConvertibleToVMV_V_V(const RISCVSubtarget
&STI
,
193 const MachineBasicBlock
&MBB
,
194 MachineBasicBlock::const_iterator MBBI
,
195 MachineBasicBlock::const_iterator
&DefMBBI
,
196 RISCVII::VLMUL LMul
) {
197 if (PreferWholeRegisterMove
)
200 assert(MBBI
->getOpcode() == TargetOpcode::COPY
&&
201 "Unexpected COPY instruction.");
202 Register SrcReg
= MBBI
->getOperand(1).getReg();
203 const TargetRegisterInfo
*TRI
= STI
.getRegisterInfo();
205 bool FoundDef
= false;
206 bool FirstVSetVLI
= false;
207 unsigned FirstSEW
= 0;
208 while (MBBI
!= MBB
.begin()) {
210 if (MBBI
->isMetaInstruction())
213 if (MBBI
->getOpcode() == RISCV::PseudoVSETVLI
||
214 MBBI
->getOpcode() == RISCV::PseudoVSETVLIX0
||
215 MBBI
->getOpcode() == RISCV::PseudoVSETIVLI
) {
216 // There is a vsetvli between COPY and source define instruction.
217 // vy = def_vop ... (producing instruction)
225 unsigned FirstVType
= MBBI
->getOperand(2).getImm();
226 RISCVII::VLMUL FirstLMul
= RISCVVType::getVLMUL(FirstVType
);
227 FirstSEW
= RISCVVType::getSEW(FirstVType
);
228 // The first encountered vsetvli must have the same lmul as the
229 // register class of COPY.
230 if (FirstLMul
!= LMul
)
233 // Only permit `vsetvli x0, x0, vtype` between COPY and the source
234 // define instruction.
235 if (MBBI
->getOperand(0).getReg() != RISCV::X0
)
237 if (MBBI
->getOperand(1).isImm())
239 if (MBBI
->getOperand(1).getReg() != RISCV::X0
)
244 // MBBI is the first vsetvli before the producing instruction.
245 unsigned VType
= MBBI
->getOperand(2).getImm();
246 // If there is a vsetvli between COPY and the producing instruction.
248 // If SEW is different, return false.
249 if (RISCVVType::getSEW(VType
) != FirstSEW
)
253 // If the vsetvli is tail undisturbed, keep the whole register move.
254 if (!RISCVVType::isTailAgnostic(VType
))
257 // The checking is conservative. We only have register classes for
258 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
259 // for fractional LMUL operations. However, we could not use the vsetvli
260 // lmul for widening operations. The result of widening operation is
262 return LMul
== RISCVVType::getVLMUL(VType
);
263 } else if (MBBI
->isInlineAsm() || MBBI
->isCall()) {
265 } else if (MBBI
->getNumDefs()) {
266 // Check all the instructions which will change VL.
267 // For example, vleff has implicit def VL.
268 if (MBBI
->modifiesRegister(RISCV::VL
, /*TRI=*/nullptr))
271 // Only converting whole register copies to vmv.v.v when the defining
272 // value appears in the explicit operands.
273 for (const MachineOperand
&MO
: MBBI
->explicit_operands()) {
274 if (!MO
.isReg() || !MO
.isDef())
276 if (!FoundDef
&& TRI
->regsOverlap(MO
.getReg(), SrcReg
)) {
277 // We only permit the source of COPY has the same LMUL as the defined
279 // There are cases we need to keep the whole register copy if the LMUL
282 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
283 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
284 // # The COPY may be created by vlmul_trunc intrinsic.
285 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
287 // After widening, the valid value will be 4 x e32 elements. If we
288 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
289 // FIXME: The COPY of subregister of Zvlsseg register will not be able
290 // to convert to vmv.v.[v|i] under the constraint.
291 if (MO
.getReg() != SrcReg
)
294 // In widening reduction instructions with LMUL_1 input vector case,
295 // only checking the LMUL is insufficient due to reduction result is
298 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
299 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
300 // $v26 = COPY killed renamable $v8
301 // After widening, The valid value will be 1 x e16 elements. If we
302 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
303 uint64_t TSFlags
= MBBI
->getDesc().TSFlags
;
304 if (RISCVII::isRVVWideningReduction(TSFlags
))
307 // If the producing instruction does not depend on vsetvli, do not
308 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
309 if (!RISCVII::hasSEWOp(TSFlags
) || !RISCVII::hasVLOp(TSFlags
))
312 // Found the definition.
324 void RISCVInstrInfo::copyPhysRegVector(
325 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
326 const DebugLoc
&DL
, MCRegister DstReg
, MCRegister SrcReg
, bool KillSrc
,
327 const TargetRegisterClass
*RegClass
) const {
328 const TargetRegisterInfo
*TRI
= STI
.getRegisterInfo();
329 RISCVII::VLMUL LMul
= RISCVRI::getLMul(RegClass
->TSFlags
);
330 unsigned NF
= RISCVRI::getNF(RegClass
->TSFlags
);
332 uint16_t SrcEncoding
= TRI
->getEncodingValue(SrcReg
);
333 uint16_t DstEncoding
= TRI
->getEncodingValue(DstReg
);
334 auto [LMulVal
, Fractional
] = RISCVVType::decodeVLMUL(LMul
);
335 assert(!Fractional
&& "It is impossible be fractional lmul here.");
336 unsigned NumRegs
= NF
* LMulVal
;
338 forwardCopyWillClobberTuple(DstEncoding
, SrcEncoding
, NumRegs
);
340 // If the src and dest overlap when copying a tuple, we need to copy the
341 // registers in reverse.
342 SrcEncoding
+= NumRegs
- 1;
343 DstEncoding
+= NumRegs
- 1;
347 auto GetCopyInfo
= [&](uint16_t SrcEncoding
, uint16_t DstEncoding
)
348 -> std::tuple
<RISCVII::VLMUL
, const TargetRegisterClass
&, unsigned,
349 unsigned, unsigned> {
351 // For reversed copying, if there are enough aligned registers(8/4/2), we
352 // can do a larger copy(LMUL8/4/2).
353 // Besides, we have already known that DstEncoding is larger than
354 // SrcEncoding in forwardCopyWillClobberTuple, so the difference between
355 // DstEncoding and SrcEncoding should be >= LMUL value we try to use to
357 uint16_t Diff
= DstEncoding
- SrcEncoding
;
358 if (I
+ 8 <= NumRegs
&& Diff
>= 8 && SrcEncoding
% 8 == 7 &&
359 DstEncoding
% 8 == 7)
360 return {RISCVII::LMUL_8
, RISCV::VRM8RegClass
, RISCV::VMV8R_V
,
361 RISCV::PseudoVMV_V_V_M8
, RISCV::PseudoVMV_V_I_M8
};
362 if (I
+ 4 <= NumRegs
&& Diff
>= 4 && SrcEncoding
% 4 == 3 &&
363 DstEncoding
% 4 == 3)
364 return {RISCVII::LMUL_4
, RISCV::VRM4RegClass
, RISCV::VMV4R_V
,
365 RISCV::PseudoVMV_V_V_M4
, RISCV::PseudoVMV_V_I_M4
};
366 if (I
+ 2 <= NumRegs
&& Diff
>= 2 && SrcEncoding
% 2 == 1 &&
367 DstEncoding
% 2 == 1)
368 return {RISCVII::LMUL_2
, RISCV::VRM2RegClass
, RISCV::VMV2R_V
,
369 RISCV::PseudoVMV_V_V_M2
, RISCV::PseudoVMV_V_I_M2
};
370 // Or we should do LMUL1 copying.
371 return {RISCVII::LMUL_1
, RISCV::VRRegClass
, RISCV::VMV1R_V
,
372 RISCV::PseudoVMV_V_V_M1
, RISCV::PseudoVMV_V_I_M1
};
375 // For forward copying, if source register encoding and destination register
376 // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.
377 if (I
+ 8 <= NumRegs
&& SrcEncoding
% 8 == 0 && DstEncoding
% 8 == 0)
378 return {RISCVII::LMUL_8
, RISCV::VRM8RegClass
, RISCV::VMV8R_V
,
379 RISCV::PseudoVMV_V_V_M8
, RISCV::PseudoVMV_V_I_M8
};
380 if (I
+ 4 <= NumRegs
&& SrcEncoding
% 4 == 0 && DstEncoding
% 4 == 0)
381 return {RISCVII::LMUL_4
, RISCV::VRM4RegClass
, RISCV::VMV4R_V
,
382 RISCV::PseudoVMV_V_V_M4
, RISCV::PseudoVMV_V_I_M4
};
383 if (I
+ 2 <= NumRegs
&& SrcEncoding
% 2 == 0 && DstEncoding
% 2 == 0)
384 return {RISCVII::LMUL_2
, RISCV::VRM2RegClass
, RISCV::VMV2R_V
,
385 RISCV::PseudoVMV_V_V_M2
, RISCV::PseudoVMV_V_I_M2
};
386 // Or we should do LMUL1 copying.
387 return {RISCVII::LMUL_1
, RISCV::VRRegClass
, RISCV::VMV1R_V
,
388 RISCV::PseudoVMV_V_V_M1
, RISCV::PseudoVMV_V_I_M1
};
390 auto FindRegWithEncoding
= [TRI
](const TargetRegisterClass
&RegClass
,
392 MCRegister Reg
= RISCV::V0
+ Encoding
;
393 if (RISCVRI::getLMul(RegClass
.TSFlags
) == RISCVII::LMUL_1
)
395 return TRI
->getMatchingSuperReg(Reg
, RISCV::sub_vrm1_0
, &RegClass
);
397 while (I
!= NumRegs
) {
398 // For non-segment copying, we only do this once as the registers are always
400 // For segment copying, we may do this several times. If the registers are
401 // aligned to larger LMUL, we can eliminate some copyings.
402 auto [LMulCopied
, RegClass
, Opc
, VVOpc
, VIOpc
] =
403 GetCopyInfo(SrcEncoding
, DstEncoding
);
404 auto [NumCopied
, _
] = RISCVVType::decodeVLMUL(LMulCopied
);
406 MachineBasicBlock::const_iterator DefMBBI
;
407 if (LMul
== LMulCopied
&&
408 isConvertibleToVMV_V_V(STI
, MBB
, MBBI
, DefMBBI
, LMul
)) {
410 if (DefMBBI
->getOpcode() == VIOpc
)
414 // Emit actual copying.
415 // For reversed copying, the encoding should be decreased.
416 MCRegister ActualSrcReg
= FindRegWithEncoding(
417 RegClass
, ReversedCopy
? (SrcEncoding
- NumCopied
+ 1) : SrcEncoding
);
418 MCRegister ActualDstReg
= FindRegWithEncoding(
419 RegClass
, ReversedCopy
? (DstEncoding
- NumCopied
+ 1) : DstEncoding
);
421 auto MIB
= BuildMI(MBB
, MBBI
, DL
, get(Opc
), ActualDstReg
);
422 bool UseVMV_V_I
= RISCV::getRVVMCOpcode(Opc
) == RISCV::VMV_V_I
;
423 bool UseVMV
= UseVMV_V_I
|| RISCV::getRVVMCOpcode(Opc
) == RISCV::VMV_V_V
;
425 MIB
.addReg(ActualDstReg
, RegState::Undef
);
427 MIB
= MIB
.add(DefMBBI
->getOperand(2));
429 MIB
= MIB
.addReg(ActualSrcReg
, getKillRegState(KillSrc
));
431 const MCInstrDesc
&Desc
= DefMBBI
->getDesc();
432 MIB
.add(DefMBBI
->getOperand(RISCVII::getVLOpNum(Desc
))); // AVL
434 DefMBBI
->getOperand(RISCVII::getSEWOpNum(Desc
)).getImm();
435 MIB
.addImm(Log2SEW
? Log2SEW
: 3); // SEW
436 MIB
.addImm(0); // tu, mu
437 MIB
.addReg(RISCV::VL
, RegState::Implicit
);
438 MIB
.addReg(RISCV::VTYPE
, RegState::Implicit
);
441 // If we are copying reversely, we should decrease the encoding.
442 SrcEncoding
+= (ReversedCopy
? -NumCopied
: NumCopied
);
443 DstEncoding
+= (ReversedCopy
? -NumCopied
: NumCopied
);
448 void RISCVInstrInfo::copyPhysReg(MachineBasicBlock
&MBB
,
449 MachineBasicBlock::iterator MBBI
,
450 const DebugLoc
&DL
, MCRegister DstReg
,
451 MCRegister SrcReg
, bool KillSrc
,
452 bool RenamableDest
, bool RenamableSrc
) const {
453 const TargetRegisterInfo
*TRI
= STI
.getRegisterInfo();
455 if (RISCV::GPRRegClass
.contains(DstReg
, SrcReg
)) {
456 BuildMI(MBB
, MBBI
, DL
, get(RISCV::ADDI
), DstReg
)
458 getKillRegState(KillSrc
) | getRenamableRegState(RenamableSrc
))
463 if (RISCV::GPRF16RegClass
.contains(DstReg
, SrcReg
)) {
464 BuildMI(MBB
, MBBI
, DL
, get(RISCV::PseudoMV_FPR16INX
), DstReg
)
466 getKillRegState(KillSrc
) | getRenamableRegState(RenamableSrc
));
470 if (RISCV::GPRF32RegClass
.contains(DstReg
, SrcReg
)) {
471 BuildMI(MBB
, MBBI
, DL
, get(RISCV::PseudoMV_FPR32INX
), DstReg
)
473 getKillRegState(KillSrc
) | getRenamableRegState(RenamableSrc
));
477 if (RISCV::GPRPairRegClass
.contains(DstReg
, SrcReg
)) {
478 // Emit an ADDI for both parts of GPRPair.
479 BuildMI(MBB
, MBBI
, DL
, get(RISCV::ADDI
),
480 TRI
->getSubReg(DstReg
, RISCV::sub_gpr_even
))
481 .addReg(TRI
->getSubReg(SrcReg
, RISCV::sub_gpr_even
),
482 getKillRegState(KillSrc
))
484 BuildMI(MBB
, MBBI
, DL
, get(RISCV::ADDI
),
485 TRI
->getSubReg(DstReg
, RISCV::sub_gpr_odd
))
486 .addReg(TRI
->getSubReg(SrcReg
, RISCV::sub_gpr_odd
),
487 getKillRegState(KillSrc
))
492 // Handle copy from csr
493 if (RISCV::VCSRRegClass
.contains(SrcReg
) &&
494 RISCV::GPRRegClass
.contains(DstReg
)) {
495 BuildMI(MBB
, MBBI
, DL
, get(RISCV::CSRRS
), DstReg
)
496 .addImm(RISCVSysReg::lookupSysRegByName(TRI
->getName(SrcReg
))->Encoding
)
501 if (RISCV::FPR16RegClass
.contains(DstReg
, SrcReg
)) {
503 if (STI
.hasStdExtZfh()) {
504 Opc
= RISCV::FSGNJ_H
;
506 assert(STI
.hasStdExtF() &&
507 (STI
.hasStdExtZfhmin() || STI
.hasStdExtZfbfmin()) &&
508 "Unexpected extensions");
509 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
510 DstReg
= TRI
->getMatchingSuperReg(DstReg
, RISCV::sub_16
,
511 &RISCV::FPR32RegClass
);
512 SrcReg
= TRI
->getMatchingSuperReg(SrcReg
, RISCV::sub_16
,
513 &RISCV::FPR32RegClass
);
514 Opc
= RISCV::FSGNJ_S
;
516 BuildMI(MBB
, MBBI
, DL
, get(Opc
), DstReg
)
517 .addReg(SrcReg
, getKillRegState(KillSrc
))
518 .addReg(SrcReg
, getKillRegState(KillSrc
));
522 if (RISCV::FPR32RegClass
.contains(DstReg
, SrcReg
)) {
523 BuildMI(MBB
, MBBI
, DL
, get(RISCV::FSGNJ_S
), DstReg
)
524 .addReg(SrcReg
, getKillRegState(KillSrc
))
525 .addReg(SrcReg
, getKillRegState(KillSrc
));
529 if (RISCV::FPR64RegClass
.contains(DstReg
, SrcReg
)) {
530 BuildMI(MBB
, MBBI
, DL
, get(RISCV::FSGNJ_D
), DstReg
)
531 .addReg(SrcReg
, getKillRegState(KillSrc
))
532 .addReg(SrcReg
, getKillRegState(KillSrc
));
536 if (RISCV::FPR32RegClass
.contains(DstReg
) &&
537 RISCV::GPRRegClass
.contains(SrcReg
)) {
538 BuildMI(MBB
, MBBI
, DL
, get(RISCV::FMV_W_X
), DstReg
)
539 .addReg(SrcReg
, getKillRegState(KillSrc
));
543 if (RISCV::GPRRegClass
.contains(DstReg
) &&
544 RISCV::FPR32RegClass
.contains(SrcReg
)) {
545 BuildMI(MBB
, MBBI
, DL
, get(RISCV::FMV_X_W
), DstReg
)
546 .addReg(SrcReg
, getKillRegState(KillSrc
));
550 if (RISCV::FPR64RegClass
.contains(DstReg
) &&
551 RISCV::GPRRegClass
.contains(SrcReg
)) {
552 assert(STI
.getXLen() == 64 && "Unexpected GPR size");
553 BuildMI(MBB
, MBBI
, DL
, get(RISCV::FMV_D_X
), DstReg
)
554 .addReg(SrcReg
, getKillRegState(KillSrc
));
558 if (RISCV::GPRRegClass
.contains(DstReg
) &&
559 RISCV::FPR64RegClass
.contains(SrcReg
)) {
560 assert(STI
.getXLen() == 64 && "Unexpected GPR size");
561 BuildMI(MBB
, MBBI
, DL
, get(RISCV::FMV_X_D
), DstReg
)
562 .addReg(SrcReg
, getKillRegState(KillSrc
));
567 const TargetRegisterClass
*RegClass
=
568 TRI
->getCommonMinimalPhysRegClass(SrcReg
, DstReg
);
569 if (RISCVRegisterInfo::isRVVRegClass(RegClass
)) {
570 copyPhysRegVector(MBB
, MBBI
, DL
, DstReg
, SrcReg
, KillSrc
, RegClass
);
574 llvm_unreachable("Impossible reg-to-reg copy");
577 void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock
&MBB
,
578 MachineBasicBlock::iterator I
,
579 Register SrcReg
, bool IsKill
, int FI
,
580 const TargetRegisterClass
*RC
,
581 const TargetRegisterInfo
*TRI
,
582 Register VReg
) const {
583 MachineFunction
*MF
= MBB
.getParent();
584 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
587 bool IsScalableVector
= true;
588 if (RISCV::GPRRegClass
.hasSubClassEq(RC
)) {
589 Opcode
= TRI
->getRegSizeInBits(RISCV::GPRRegClass
) == 32 ?
590 RISCV::SW
: RISCV::SD
;
591 IsScalableVector
= false;
592 } else if (RISCV::GPRF16RegClass
.hasSubClassEq(RC
)) {
593 Opcode
= RISCV::SH_INX
;
594 IsScalableVector
= false;
595 } else if (RISCV::GPRF32RegClass
.hasSubClassEq(RC
)) {
596 Opcode
= RISCV::SW_INX
;
597 IsScalableVector
= false;
598 } else if (RISCV::GPRPairRegClass
.hasSubClassEq(RC
)) {
599 Opcode
= RISCV::PseudoRV32ZdinxSD
;
600 IsScalableVector
= false;
601 } else if (RISCV::FPR16RegClass
.hasSubClassEq(RC
)) {
603 IsScalableVector
= false;
604 } else if (RISCV::FPR32RegClass
.hasSubClassEq(RC
)) {
606 IsScalableVector
= false;
607 } else if (RISCV::FPR64RegClass
.hasSubClassEq(RC
)) {
609 IsScalableVector
= false;
610 } else if (RISCV::VRRegClass
.hasSubClassEq(RC
)) {
611 Opcode
= RISCV::VS1R_V
;
612 } else if (RISCV::VRM2RegClass
.hasSubClassEq(RC
)) {
613 Opcode
= RISCV::VS2R_V
;
614 } else if (RISCV::VRM4RegClass
.hasSubClassEq(RC
)) {
615 Opcode
= RISCV::VS4R_V
;
616 } else if (RISCV::VRM8RegClass
.hasSubClassEq(RC
)) {
617 Opcode
= RISCV::VS8R_V
;
618 } else if (RISCV::VRN2M1RegClass
.hasSubClassEq(RC
))
619 Opcode
= RISCV::PseudoVSPILL2_M1
;
620 else if (RISCV::VRN2M2RegClass
.hasSubClassEq(RC
))
621 Opcode
= RISCV::PseudoVSPILL2_M2
;
622 else if (RISCV::VRN2M4RegClass
.hasSubClassEq(RC
))
623 Opcode
= RISCV::PseudoVSPILL2_M4
;
624 else if (RISCV::VRN3M1RegClass
.hasSubClassEq(RC
))
625 Opcode
= RISCV::PseudoVSPILL3_M1
;
626 else if (RISCV::VRN3M2RegClass
.hasSubClassEq(RC
))
627 Opcode
= RISCV::PseudoVSPILL3_M2
;
628 else if (RISCV::VRN4M1RegClass
.hasSubClassEq(RC
))
629 Opcode
= RISCV::PseudoVSPILL4_M1
;
630 else if (RISCV::VRN4M2RegClass
.hasSubClassEq(RC
))
631 Opcode
= RISCV::PseudoVSPILL4_M2
;
632 else if (RISCV::VRN5M1RegClass
.hasSubClassEq(RC
))
633 Opcode
= RISCV::PseudoVSPILL5_M1
;
634 else if (RISCV::VRN6M1RegClass
.hasSubClassEq(RC
))
635 Opcode
= RISCV::PseudoVSPILL6_M1
;
636 else if (RISCV::VRN7M1RegClass
.hasSubClassEq(RC
))
637 Opcode
= RISCV::PseudoVSPILL7_M1
;
638 else if (RISCV::VRN8M1RegClass
.hasSubClassEq(RC
))
639 Opcode
= RISCV::PseudoVSPILL8_M1
;
641 llvm_unreachable("Can't store this register to stack slot");
643 if (IsScalableVector
) {
644 MachineMemOperand
*MMO
= MF
->getMachineMemOperand(
645 MachinePointerInfo::getFixedStack(*MF
, FI
), MachineMemOperand::MOStore
,
646 LocationSize::beforeOrAfterPointer(), MFI
.getObjectAlign(FI
));
648 MFI
.setStackID(FI
, TargetStackID::ScalableVector
);
649 BuildMI(MBB
, I
, DebugLoc(), get(Opcode
))
650 .addReg(SrcReg
, getKillRegState(IsKill
))
654 MachineMemOperand
*MMO
= MF
->getMachineMemOperand(
655 MachinePointerInfo::getFixedStack(*MF
, FI
), MachineMemOperand::MOStore
,
656 MFI
.getObjectSize(FI
), MFI
.getObjectAlign(FI
));
658 BuildMI(MBB
, I
, DebugLoc(), get(Opcode
))
659 .addReg(SrcReg
, getKillRegState(IsKill
))
666 void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock
&MBB
,
667 MachineBasicBlock::iterator I
,
668 Register DstReg
, int FI
,
669 const TargetRegisterClass
*RC
,
670 const TargetRegisterInfo
*TRI
,
671 Register VReg
) const {
672 MachineFunction
*MF
= MBB
.getParent();
673 MachineFrameInfo
&MFI
= MF
->getFrameInfo();
676 bool IsScalableVector
= true;
677 if (RISCV::GPRRegClass
.hasSubClassEq(RC
)) {
678 Opcode
= TRI
->getRegSizeInBits(RISCV::GPRRegClass
) == 32 ?
679 RISCV::LW
: RISCV::LD
;
680 IsScalableVector
= false;
681 } else if (RISCV::GPRF16RegClass
.hasSubClassEq(RC
)) {
682 Opcode
= RISCV::LH_INX
;
683 IsScalableVector
= false;
684 } else if (RISCV::GPRF32RegClass
.hasSubClassEq(RC
)) {
685 Opcode
= RISCV::LW_INX
;
686 IsScalableVector
= false;
687 } else if (RISCV::GPRPairRegClass
.hasSubClassEq(RC
)) {
688 Opcode
= RISCV::PseudoRV32ZdinxLD
;
689 IsScalableVector
= false;
690 } else if (RISCV::FPR16RegClass
.hasSubClassEq(RC
)) {
692 IsScalableVector
= false;
693 } else if (RISCV::FPR32RegClass
.hasSubClassEq(RC
)) {
695 IsScalableVector
= false;
696 } else if (RISCV::FPR64RegClass
.hasSubClassEq(RC
)) {
698 IsScalableVector
= false;
699 } else if (RISCV::VRRegClass
.hasSubClassEq(RC
)) {
700 Opcode
= RISCV::VL1RE8_V
;
701 } else if (RISCV::VRM2RegClass
.hasSubClassEq(RC
)) {
702 Opcode
= RISCV::VL2RE8_V
;
703 } else if (RISCV::VRM4RegClass
.hasSubClassEq(RC
)) {
704 Opcode
= RISCV::VL4RE8_V
;
705 } else if (RISCV::VRM8RegClass
.hasSubClassEq(RC
)) {
706 Opcode
= RISCV::VL8RE8_V
;
707 } else if (RISCV::VRN2M1RegClass
.hasSubClassEq(RC
))
708 Opcode
= RISCV::PseudoVRELOAD2_M1
;
709 else if (RISCV::VRN2M2RegClass
.hasSubClassEq(RC
))
710 Opcode
= RISCV::PseudoVRELOAD2_M2
;
711 else if (RISCV::VRN2M4RegClass
.hasSubClassEq(RC
))
712 Opcode
= RISCV::PseudoVRELOAD2_M4
;
713 else if (RISCV::VRN3M1RegClass
.hasSubClassEq(RC
))
714 Opcode
= RISCV::PseudoVRELOAD3_M1
;
715 else if (RISCV::VRN3M2RegClass
.hasSubClassEq(RC
))
716 Opcode
= RISCV::PseudoVRELOAD3_M2
;
717 else if (RISCV::VRN4M1RegClass
.hasSubClassEq(RC
))
718 Opcode
= RISCV::PseudoVRELOAD4_M1
;
719 else if (RISCV::VRN4M2RegClass
.hasSubClassEq(RC
))
720 Opcode
= RISCV::PseudoVRELOAD4_M2
;
721 else if (RISCV::VRN5M1RegClass
.hasSubClassEq(RC
))
722 Opcode
= RISCV::PseudoVRELOAD5_M1
;
723 else if (RISCV::VRN6M1RegClass
.hasSubClassEq(RC
))
724 Opcode
= RISCV::PseudoVRELOAD6_M1
;
725 else if (RISCV::VRN7M1RegClass
.hasSubClassEq(RC
))
726 Opcode
= RISCV::PseudoVRELOAD7_M1
;
727 else if (RISCV::VRN8M1RegClass
.hasSubClassEq(RC
))
728 Opcode
= RISCV::PseudoVRELOAD8_M1
;
730 llvm_unreachable("Can't load this register from stack slot");
732 if (IsScalableVector
) {
733 MachineMemOperand
*MMO
= MF
->getMachineMemOperand(
734 MachinePointerInfo::getFixedStack(*MF
, FI
), MachineMemOperand::MOLoad
,
735 LocationSize::beforeOrAfterPointer(), MFI
.getObjectAlign(FI
));
737 MFI
.setStackID(FI
, TargetStackID::ScalableVector
);
738 BuildMI(MBB
, I
, DebugLoc(), get(Opcode
), DstReg
)
742 MachineMemOperand
*MMO
= MF
->getMachineMemOperand(
743 MachinePointerInfo::getFixedStack(*MF
, FI
), MachineMemOperand::MOLoad
,
744 MFI
.getObjectSize(FI
), MFI
.getObjectAlign(FI
));
746 BuildMI(MBB
, I
, DebugLoc(), get(Opcode
), DstReg
)
753 MachineInstr
*RISCVInstrInfo::foldMemoryOperandImpl(
754 MachineFunction
&MF
, MachineInstr
&MI
, ArrayRef
<unsigned> Ops
,
755 MachineBasicBlock::iterator InsertPt
, int FrameIndex
, LiveIntervals
*LIS
,
756 VirtRegMap
*VRM
) const {
757 // The below optimizations narrow the load so they are only valid for little
759 // TODO: Support big endian by adding an offset into the frame object?
760 if (MF
.getDataLayout().isBigEndian())
763 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
764 if (Ops
.size() != 1 || Ops
[0] != 1)
768 switch (MI
.getOpcode()) {
770 if (RISCV::isSEXT_W(MI
)) {
774 if (RISCV::isZEXT_W(MI
)) {
775 LoadOpc
= RISCV::LWU
;
778 if (RISCV::isZEXT_B(MI
)) {
779 LoadOpc
= RISCV::LBU
;
782 if (RISCV::getRVVMCOpcode(MI
.getOpcode()) == RISCV::VMV_X_S
) {
784 MI
.getOperand(RISCVII::getSEWOpNum(MI
.getDesc())).getImm();
785 if (STI
.getXLen() < (1U << Log2SEW
))
801 llvm_unreachable("Unexpected SEW");
805 if (RISCV::getRVVMCOpcode(MI
.getOpcode()) == RISCV::VFMV_F_S
) {
807 MI
.getOperand(RISCVII::getSEWOpNum(MI
.getDesc())).getImm();
810 LoadOpc
= RISCV::FLH
;
813 LoadOpc
= RISCV::FLW
;
816 LoadOpc
= RISCV::FLD
;
819 llvm_unreachable("Unexpected SEW");
830 case RISCV::ZEXT_H_RV32
:
831 case RISCV::ZEXT_H_RV64
:
832 LoadOpc
= RISCV::LHU
;
836 Register DstReg
= MI
.getOperand(0).getReg();
837 return BuildMI(*MI
.getParent(), InsertPt
, MI
.getDebugLoc(), get(LoadOpc
),
839 .addFrameIndex(FrameIndex
)
843 void RISCVInstrInfo::movImm(MachineBasicBlock
&MBB
,
844 MachineBasicBlock::iterator MBBI
,
845 const DebugLoc
&DL
, Register DstReg
, uint64_t Val
,
846 MachineInstr::MIFlag Flag
, bool DstRenamable
,
847 bool DstIsDead
) const {
848 Register SrcReg
= RISCV::X0
;
850 // For RV32, allow a sign or unsigned 32 bit value.
851 if (!STI
.is64Bit() && !isInt
<32>(Val
)) {
852 // If have a uimm32 it will still fit in a register so we can allow it.
853 if (!isUInt
<32>(Val
))
854 report_fatal_error("Should only materialize 32-bit constants for RV32");
856 // Sign extend for generateInstSeq.
857 Val
= SignExtend64
<32>(Val
);
860 RISCVMatInt::InstSeq Seq
= RISCVMatInt::generateInstSeq(Val
, STI
);
861 assert(!Seq
.empty());
863 bool SrcRenamable
= false;
866 for (const RISCVMatInt::Inst
&Inst
: Seq
) {
867 bool LastItem
= ++Num
== Seq
.size();
868 unsigned DstRegState
= getDeadRegState(DstIsDead
&& LastItem
) |
869 getRenamableRegState(DstRenamable
);
870 unsigned SrcRegState
= getKillRegState(SrcReg
!= RISCV::X0
) |
871 getRenamableRegState(SrcRenamable
);
872 switch (Inst
.getOpndKind()) {
873 case RISCVMatInt::Imm
:
874 BuildMI(MBB
, MBBI
, DL
, get(Inst
.getOpcode()))
875 .addReg(DstReg
, RegState::Define
| DstRegState
)
876 .addImm(Inst
.getImm())
879 case RISCVMatInt::RegX0
:
880 BuildMI(MBB
, MBBI
, DL
, get(Inst
.getOpcode()))
881 .addReg(DstReg
, RegState::Define
| DstRegState
)
882 .addReg(SrcReg
, SrcRegState
)
886 case RISCVMatInt::RegReg
:
887 BuildMI(MBB
, MBBI
, DL
, get(Inst
.getOpcode()))
888 .addReg(DstReg
, RegState::Define
| DstRegState
)
889 .addReg(SrcReg
, SrcRegState
)
890 .addReg(SrcReg
, SrcRegState
)
893 case RISCVMatInt::RegImm
:
894 BuildMI(MBB
, MBBI
, DL
, get(Inst
.getOpcode()))
895 .addReg(DstReg
, RegState::Define
| DstRegState
)
896 .addReg(SrcReg
, SrcRegState
)
897 .addImm(Inst
.getImm())
902 // Only the first instruction has X0 as its source.
904 SrcRenamable
= DstRenamable
;
908 static RISCVCC::CondCode
getCondFromBranchOpc(unsigned Opc
) {
911 return RISCVCC::COND_INVALID
;
912 case RISCV::CV_BEQIMM
:
913 return RISCVCC::COND_EQ
;
914 case RISCV::CV_BNEIMM
:
915 return RISCVCC::COND_NE
;
917 return RISCVCC::COND_EQ
;
919 return RISCVCC::COND_NE
;
921 return RISCVCC::COND_LT
;
923 return RISCVCC::COND_GE
;
925 return RISCVCC::COND_LTU
;
927 return RISCVCC::COND_GEU
;
931 // The contents of values added to Cond are not examined outside of
932 // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
933 // push BranchOpcode, Reg1, Reg2.
934 static void parseCondBranch(MachineInstr
&LastInst
, MachineBasicBlock
*&Target
,
935 SmallVectorImpl
<MachineOperand
> &Cond
) {
936 // Block ends with fall-through condbranch.
937 assert(LastInst
.getDesc().isConditionalBranch() &&
938 "Unknown conditional branch");
939 Target
= LastInst
.getOperand(2).getMBB();
940 unsigned CC
= getCondFromBranchOpc(LastInst
.getOpcode());
941 Cond
.push_back(MachineOperand::CreateImm(CC
));
942 Cond
.push_back(LastInst
.getOperand(0));
943 Cond
.push_back(LastInst
.getOperand(1));
946 unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC
, bool Imm
) {
949 llvm_unreachable("Unknown condition code!");
950 case RISCVCC::COND_EQ
:
951 return Imm
? RISCV::CV_BEQIMM
: RISCV::BEQ
;
952 case RISCVCC::COND_NE
:
953 return Imm
? RISCV::CV_BNEIMM
: RISCV::BNE
;
954 case RISCVCC::COND_LT
:
956 case RISCVCC::COND_GE
:
958 case RISCVCC::COND_LTU
:
960 case RISCVCC::COND_GEU
:
965 const MCInstrDesc
&RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC
,
967 return get(RISCVCC::getBrCond(CC
, Imm
));
970 RISCVCC::CondCode
RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC
) {
973 llvm_unreachable("Unrecognized conditional branch");
974 case RISCVCC::COND_EQ
:
975 return RISCVCC::COND_NE
;
976 case RISCVCC::COND_NE
:
977 return RISCVCC::COND_EQ
;
978 case RISCVCC::COND_LT
:
979 return RISCVCC::COND_GE
;
980 case RISCVCC::COND_GE
:
981 return RISCVCC::COND_LT
;
982 case RISCVCC::COND_LTU
:
983 return RISCVCC::COND_GEU
;
984 case RISCVCC::COND_GEU
:
985 return RISCVCC::COND_LTU
;
989 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock
&MBB
,
990 MachineBasicBlock
*&TBB
,
991 MachineBasicBlock
*&FBB
,
992 SmallVectorImpl
<MachineOperand
> &Cond
,
993 bool AllowModify
) const {
997 // If the block has no terminators, it just falls into the block after it.
998 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
999 if (I
== MBB
.end() || !isUnpredicatedTerminator(*I
))
1002 // Count the number of terminators and find the first unconditional or
1004 MachineBasicBlock::iterator FirstUncondOrIndirectBr
= MBB
.end();
1005 int NumTerminators
= 0;
1006 for (auto J
= I
.getReverse(); J
!= MBB
.rend() && isUnpredicatedTerminator(*J
);
1009 if (J
->getDesc().isUnconditionalBranch() ||
1010 J
->getDesc().isIndirectBranch()) {
1011 FirstUncondOrIndirectBr
= J
.getReverse();
1015 // If AllowModify is true, we can erase any terminators after
1016 // FirstUncondOrIndirectBR.
1017 if (AllowModify
&& FirstUncondOrIndirectBr
!= MBB
.end()) {
1018 while (std::next(FirstUncondOrIndirectBr
) != MBB
.end()) {
1019 std::next(FirstUncondOrIndirectBr
)->eraseFromParent();
1022 I
= FirstUncondOrIndirectBr
;
1025 // We can't handle blocks that end in an indirect branch.
1026 if (I
->getDesc().isIndirectBranch())
1029 // We can't handle Generic branch opcodes from Global ISel.
1030 if (I
->isPreISelOpcode())
1033 // We can't handle blocks with more than 2 terminators.
1034 if (NumTerminators
> 2)
1037 // Handle a single unconditional branch.
1038 if (NumTerminators
== 1 && I
->getDesc().isUnconditionalBranch()) {
1039 TBB
= getBranchDestBlock(*I
);
1043 // Handle a single conditional branch.
1044 if (NumTerminators
== 1 && I
->getDesc().isConditionalBranch()) {
1045 parseCondBranch(*I
, TBB
, Cond
);
1049 // Handle a conditional branch followed by an unconditional branch.
1050 if (NumTerminators
== 2 && std::prev(I
)->getDesc().isConditionalBranch() &&
1051 I
->getDesc().isUnconditionalBranch()) {
1052 parseCondBranch(*std::prev(I
), TBB
, Cond
);
1053 FBB
= getBranchDestBlock(*I
);
1057 // Otherwise, we can't handle this.
1061 unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock
&MBB
,
1062 int *BytesRemoved
) const {
1065 MachineBasicBlock::iterator I
= MBB
.getLastNonDebugInstr();
1069 if (!I
->getDesc().isUnconditionalBranch() &&
1070 !I
->getDesc().isConditionalBranch())
1073 // Remove the branch.
1075 *BytesRemoved
+= getInstSizeInBytes(*I
);
1076 I
->eraseFromParent();
1080 if (I
== MBB
.begin())
1083 if (!I
->getDesc().isConditionalBranch())
1086 // Remove the branch.
1088 *BytesRemoved
+= getInstSizeInBytes(*I
);
1089 I
->eraseFromParent();
1093 // Inserts a branch into the end of the specific MachineBasicBlock, returning
1094 // the number of instructions inserted.
1095 unsigned RISCVInstrInfo::insertBranch(
1096 MachineBasicBlock
&MBB
, MachineBasicBlock
*TBB
, MachineBasicBlock
*FBB
,
1097 ArrayRef
<MachineOperand
> Cond
, const DebugLoc
&DL
, int *BytesAdded
) const {
1101 // Shouldn't be a fall through.
1102 assert(TBB
&& "insertBranch must not be told to insert a fallthrough");
1103 assert((Cond
.size() == 3 || Cond
.size() == 0) &&
1104 "RISC-V branch conditions have two components!");
1106 // Unconditional branch.
1108 MachineInstr
&MI
= *BuildMI(&MBB
, DL
, get(RISCV::PseudoBR
)).addMBB(TBB
);
1110 *BytesAdded
+= getInstSizeInBytes(MI
);
1114 // Either a one or two-way conditional branch.
1115 auto CC
= static_cast<RISCVCC::CondCode
>(Cond
[0].getImm());
1116 MachineInstr
&CondMI
= *BuildMI(&MBB
, DL
, getBrCond(CC
, Cond
[2].isImm()))
1121 *BytesAdded
+= getInstSizeInBytes(CondMI
);
1123 // One-way conditional branch.
1127 // Two-way conditional branch.
1128 MachineInstr
&MI
= *BuildMI(&MBB
, DL
, get(RISCV::PseudoBR
)).addMBB(FBB
);
1130 *BytesAdded
+= getInstSizeInBytes(MI
);
1134 void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock
&MBB
,
1135 MachineBasicBlock
&DestBB
,
1136 MachineBasicBlock
&RestoreBB
,
1137 const DebugLoc
&DL
, int64_t BrOffset
,
1138 RegScavenger
*RS
) const {
1139 assert(RS
&& "RegScavenger required for long branching");
1140 assert(MBB
.empty() &&
1141 "new block should be inserted for expanding unconditional branch");
1142 assert(MBB
.pred_size() == 1);
1143 assert(RestoreBB
.empty() &&
1144 "restore block should be inserted for restoring clobbered registers");
1146 MachineFunction
*MF
= MBB
.getParent();
1147 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
1148 RISCVMachineFunctionInfo
*RVFI
= MF
->getInfo
<RISCVMachineFunctionInfo
>();
1149 const TargetRegisterInfo
*TRI
= MF
->getSubtarget().getRegisterInfo();
1151 if (!isInt
<32>(BrOffset
))
1153 "Branch offsets outside of the signed 32-bit range not supported");
1155 // FIXME: A virtual register must be used initially, as the register
1156 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1157 // uses the same workaround).
1158 Register ScratchReg
= MRI
.createVirtualRegister(&RISCV::GPRJALRRegClass
);
1159 auto II
= MBB
.end();
1160 // We may also update the jump target to RestoreBB later.
1161 MachineInstr
&MI
= *BuildMI(MBB
, II
, DL
, get(RISCV::PseudoJump
))
1162 .addReg(ScratchReg
, RegState::Define
| RegState::Dead
)
1163 .addMBB(&DestBB
, RISCVII::MO_CALL
);
1165 RS
->enterBasicBlockEnd(MBB
);
1167 RS
->scavengeRegisterBackwards(RISCV::GPRRegClass
, MI
.getIterator(),
1168 /*RestoreAfter=*/false, /*SpAdj=*/0,
1169 /*AllowSpill=*/false);
1170 if (TmpGPR
!= RISCV::NoRegister
)
1171 RS
->setRegUsed(TmpGPR
);
1173 // The case when there is no scavenged register needs special handling.
1175 // Pick s11 because it doesn't make a difference.
1176 TmpGPR
= RISCV::X27
;
1178 int FrameIndex
= RVFI
->getBranchRelaxationScratchFrameIndex();
1179 if (FrameIndex
== -1)
1180 report_fatal_error("underestimated function size");
1182 storeRegToStackSlot(MBB
, MI
, TmpGPR
, /*IsKill=*/true, FrameIndex
,
1183 &RISCV::GPRRegClass
, TRI
, Register());
1184 TRI
->eliminateFrameIndex(std::prev(MI
.getIterator()),
1185 /*SpAdj=*/0, /*FIOperandNum=*/1);
1187 MI
.getOperand(1).setMBB(&RestoreBB
);
1189 loadRegFromStackSlot(RestoreBB
, RestoreBB
.end(), TmpGPR
, FrameIndex
,
1190 &RISCV::GPRRegClass
, TRI
, Register());
1191 TRI
->eliminateFrameIndex(RestoreBB
.back(),
1192 /*SpAdj=*/0, /*FIOperandNum=*/1);
1195 MRI
.replaceRegWith(ScratchReg
, TmpGPR
);
1196 MRI
.clearVirtRegs();
1199 bool RISCVInstrInfo::reverseBranchCondition(
1200 SmallVectorImpl
<MachineOperand
> &Cond
) const {
1201 assert((Cond
.size() == 3) && "Invalid branch condition!");
1202 auto CC
= static_cast<RISCVCC::CondCode
>(Cond
[0].getImm());
1203 Cond
[0].setImm(getOppositeBranchCondition(CC
));
1207 bool RISCVInstrInfo::optimizeCondBranch(MachineInstr
&MI
) const {
1208 MachineBasicBlock
*MBB
= MI
.getParent();
1209 MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1211 MachineBasicBlock
*TBB
, *FBB
;
1212 SmallVector
<MachineOperand
, 3> Cond
;
1213 if (analyzeBranch(*MBB
, TBB
, FBB
, Cond
, /*AllowModify=*/false))
1216 RISCVCC::CondCode CC
= static_cast<RISCVCC::CondCode
>(Cond
[0].getImm());
1217 assert(CC
!= RISCVCC::COND_INVALID
);
1219 if (CC
== RISCVCC::COND_EQ
|| CC
== RISCVCC::COND_NE
)
1222 // For two constants C0 and C1 from
1227 // 1. if C1 = C0 + 1
1229 // (a) blt Y, X -> bge X, Z
1230 // (b) bge Y, X -> blt X, Z
1232 // 2. if C1 = C0 - 1
1234 // (a) blt X, Y -> bge Z, X
1235 // (b) bge X, Y -> blt Z, X
1237 // To make sure this optimization is really beneficial, we only
1238 // optimize for cases where Y had only one use (i.e. only used by the branch).
1240 // Right now we only care about LI (i.e. ADDI x0, imm)
1241 auto isLoadImm
= [](const MachineInstr
*MI
, int64_t &Imm
) -> bool {
1242 if (MI
->getOpcode() == RISCV::ADDI
&& MI
->getOperand(1).isReg() &&
1243 MI
->getOperand(1).getReg() == RISCV::X0
) {
1244 Imm
= MI
->getOperand(2).getImm();
1249 // Either a load from immediate instruction or X0.
1250 auto isFromLoadImm
= [&](const MachineOperand
&Op
, int64_t &Imm
) -> bool {
1253 Register Reg
= Op
.getReg();
1254 return Reg
.isVirtual() && isLoadImm(MRI
.getVRegDef(Reg
), Imm
);
1257 MachineOperand
&LHS
= MI
.getOperand(0);
1258 MachineOperand
&RHS
= MI
.getOperand(1);
1259 // Try to find the register for constant Z; return
1260 // invalid register otherwise.
1261 auto searchConst
= [&](int64_t C1
) -> Register
{
1262 MachineBasicBlock::reverse_iterator
II(&MI
), E
= MBB
->rend();
1263 auto DefC1
= std::find_if(++II
, E
, [&](const MachineInstr
&I
) -> bool {
1265 return isLoadImm(&I
, Imm
) && Imm
== C1
&&
1266 I
.getOperand(0).getReg().isVirtual();
1269 return DefC1
->getOperand(0).getReg();
1274 bool Modify
= false;
1276 if (isFromLoadImm(LHS
, C0
) && MRI
.hasOneUse(LHS
.getReg())) {
1278 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
1279 // to worry about unsigned overflow here)
1281 if (Register RegZ
= searchConst(C0
+ 1)) {
1282 reverseBranchCondition(Cond
);
1283 Cond
[1] = MachineOperand::CreateReg(RHS
.getReg(), /*isDef=*/false);
1284 Cond
[2] = MachineOperand::CreateReg(RegZ
, /*isDef=*/false);
1285 // We might extend the live range of Z, clear its kill flag to
1286 // account for this.
1287 MRI
.clearKillFlags(RegZ
);
1290 } else if (isFromLoadImm(RHS
, C0
) && MRI
.hasOneUse(RHS
.getReg())) {
1292 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
1294 if ((CC
== RISCVCC::COND_GE
|| CC
== RISCVCC::COND_LT
) || C0
)
1295 if (Register RegZ
= searchConst(C0
- 1)) {
1296 reverseBranchCondition(Cond
);
1297 Cond
[1] = MachineOperand::CreateReg(RegZ
, /*isDef=*/false);
1298 Cond
[2] = MachineOperand::CreateReg(LHS
.getReg(), /*isDef=*/false);
1299 // We might extend the live range of Z, clear its kill flag to
1300 // account for this.
1301 MRI
.clearKillFlags(RegZ
);
1309 // Build the new branch and remove the old one.
1310 BuildMI(*MBB
, MI
, MI
.getDebugLoc(),
1311 getBrCond(static_cast<RISCVCC::CondCode
>(Cond
[0].getImm())))
1315 MI
.eraseFromParent();
1321 RISCVInstrInfo::getBranchDestBlock(const MachineInstr
&MI
) const {
1322 assert(MI
.getDesc().isBranch() && "Unexpected opcode!");
1323 // The branch target is always the last operand.
1324 int NumOp
= MI
.getNumExplicitOperands();
1325 return MI
.getOperand(NumOp
- 1).getMBB();
1328 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp
,
1329 int64_t BrOffset
) const {
1330 unsigned XLen
= STI
.getXLen();
1331 // Ideally we could determine the supported branch offset from the
1332 // RISCVII::FormMask, but this can't be used for Pseudo instructions like
1336 llvm_unreachable("Unexpected opcode!");
1343 case RISCV::CV_BEQIMM
:
1344 case RISCV::CV_BNEIMM
:
1345 return isIntN(13, BrOffset
);
1347 case RISCV::PseudoBR
:
1348 return isIntN(21, BrOffset
);
1349 case RISCV::PseudoJump
:
1350 return isIntN(32, SignExtend64(BrOffset
+ 0x800, XLen
));
1354 // If the operation has a predicated pseudo instruction, return the pseudo
1355 // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1356 // TODO: Support more operations.
1357 unsigned getPredicatedOpcode(unsigned Opcode
) {
1359 case RISCV::ADD
: return RISCV::PseudoCCADD
; break;
1360 case RISCV::SUB
: return RISCV::PseudoCCSUB
; break;
1361 case RISCV::SLL
: return RISCV::PseudoCCSLL
; break;
1362 case RISCV::SRL
: return RISCV::PseudoCCSRL
; break;
1363 case RISCV::SRA
: return RISCV::PseudoCCSRA
; break;
1364 case RISCV::AND
: return RISCV::PseudoCCAND
; break;
1365 case RISCV::OR
: return RISCV::PseudoCCOR
; break;
1366 case RISCV::XOR
: return RISCV::PseudoCCXOR
; break;
1368 case RISCV::ADDI
: return RISCV::PseudoCCADDI
; break;
1369 case RISCV::SLLI
: return RISCV::PseudoCCSLLI
; break;
1370 case RISCV::SRLI
: return RISCV::PseudoCCSRLI
; break;
1371 case RISCV::SRAI
: return RISCV::PseudoCCSRAI
; break;
1372 case RISCV::ANDI
: return RISCV::PseudoCCANDI
; break;
1373 case RISCV::ORI
: return RISCV::PseudoCCORI
; break;
1374 case RISCV::XORI
: return RISCV::PseudoCCXORI
; break;
1376 case RISCV::ADDW
: return RISCV::PseudoCCADDW
; break;
1377 case RISCV::SUBW
: return RISCV::PseudoCCSUBW
; break;
1378 case RISCV::SLLW
: return RISCV::PseudoCCSLLW
; break;
1379 case RISCV::SRLW
: return RISCV::PseudoCCSRLW
; break;
1380 case RISCV::SRAW
: return RISCV::PseudoCCSRAW
; break;
1382 case RISCV::ADDIW
: return RISCV::PseudoCCADDIW
; break;
1383 case RISCV::SLLIW
: return RISCV::PseudoCCSLLIW
; break;
1384 case RISCV::SRLIW
: return RISCV::PseudoCCSRLIW
; break;
1385 case RISCV::SRAIW
: return RISCV::PseudoCCSRAIW
; break;
1387 case RISCV::ANDN
: return RISCV::PseudoCCANDN
; break;
1388 case RISCV::ORN
: return RISCV::PseudoCCORN
; break;
1389 case RISCV::XNOR
: return RISCV::PseudoCCXNOR
; break;
1392 return RISCV::INSTRUCTION_LIST_END
;
1395 /// Identify instructions that can be folded into a CCMOV instruction, and
1396 /// return the defining instruction.
1397 static MachineInstr
*canFoldAsPredicatedOp(Register Reg
,
1398 const MachineRegisterInfo
&MRI
,
1399 const TargetInstrInfo
*TII
) {
1400 if (!Reg
.isVirtual())
1402 if (!MRI
.hasOneNonDBGUse(Reg
))
1404 MachineInstr
*MI
= MRI
.getVRegDef(Reg
);
1407 // Check if MI can be predicated and folded into the CCMOV.
1408 if (getPredicatedOpcode(MI
->getOpcode()) == RISCV::INSTRUCTION_LIST_END
)
1410 // Don't predicate li idiom.
1411 if (MI
->getOpcode() == RISCV::ADDI
&& MI
->getOperand(1).isReg() &&
1412 MI
->getOperand(1).getReg() == RISCV::X0
)
1414 // Check if MI has any other defs or physreg uses.
1415 for (const MachineOperand
&MO
: llvm::drop_begin(MI
->operands())) {
1416 // Reject frame index operands, PEI can't handle the predicated pseudos.
1417 if (MO
.isFI() || MO
.isCPI() || MO
.isJTI())
1421 // MI can't have any tied operands, that would conflict with predication.
1426 // Allow constant physregs.
1427 if (MO
.getReg().isPhysical() && !MRI
.isConstantPhysReg(MO
.getReg()))
1430 bool DontMoveAcrossStores
= true;
1431 if (!MI
->isSafeToMove(DontMoveAcrossStores
))
1436 bool RISCVInstrInfo::analyzeSelect(const MachineInstr
&MI
,
1437 SmallVectorImpl
<MachineOperand
> &Cond
,
1438 unsigned &TrueOp
, unsigned &FalseOp
,
1439 bool &Optimizable
) const {
1440 assert(MI
.getOpcode() == RISCV::PseudoCCMOVGPR
&&
1441 "Unknown select instruction");
1444 // 1: LHS of compare.
1445 // 2: RHS of compare.
1446 // 3: Condition code.
1451 Cond
.push_back(MI
.getOperand(1));
1452 Cond
.push_back(MI
.getOperand(2));
1453 Cond
.push_back(MI
.getOperand(3));
1454 // We can only fold when we support short forward branch opt.
1455 Optimizable
= STI
.hasShortForwardBranchOpt();
1460 RISCVInstrInfo::optimizeSelect(MachineInstr
&MI
,
1461 SmallPtrSetImpl
<MachineInstr
*> &SeenMIs
,
1462 bool PreferFalse
) const {
1463 assert(MI
.getOpcode() == RISCV::PseudoCCMOVGPR
&&
1464 "Unknown select instruction");
1465 if (!STI
.hasShortForwardBranchOpt())
1468 MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
1469 MachineInstr
*DefMI
=
1470 canFoldAsPredicatedOp(MI
.getOperand(5).getReg(), MRI
, this);
1471 bool Invert
= !DefMI
;
1473 DefMI
= canFoldAsPredicatedOp(MI
.getOperand(4).getReg(), MRI
, this);
1477 // Find new register class to use.
1478 MachineOperand FalseReg
= MI
.getOperand(Invert
? 5 : 4);
1479 Register DestReg
= MI
.getOperand(0).getReg();
1480 const TargetRegisterClass
*PreviousClass
= MRI
.getRegClass(FalseReg
.getReg());
1481 if (!MRI
.constrainRegClass(DestReg
, PreviousClass
))
1484 unsigned PredOpc
= getPredicatedOpcode(DefMI
->getOpcode());
1485 assert(PredOpc
!= RISCV::INSTRUCTION_LIST_END
&& "Unexpected opcode!");
1487 // Create a new predicated version of DefMI.
1488 MachineInstrBuilder NewMI
=
1489 BuildMI(*MI
.getParent(), MI
, MI
.getDebugLoc(), get(PredOpc
), DestReg
);
1491 // Copy the condition portion.
1492 NewMI
.add(MI
.getOperand(1));
1493 NewMI
.add(MI
.getOperand(2));
1495 // Add condition code, inverting if necessary.
1496 auto CC
= static_cast<RISCVCC::CondCode
>(MI
.getOperand(3).getImm());
1498 CC
= RISCVCC::getOppositeBranchCondition(CC
);
1501 // Copy the false register.
1502 NewMI
.add(FalseReg
);
1504 // Copy all the DefMI operands.
1505 const MCInstrDesc
&DefDesc
= DefMI
->getDesc();
1506 for (unsigned i
= 1, e
= DefDesc
.getNumOperands(); i
!= e
; ++i
)
1507 NewMI
.add(DefMI
->getOperand(i
));
1509 // Update SeenMIs set: register newly created MI and erase removed DefMI.
1510 SeenMIs
.insert(NewMI
);
1511 SeenMIs
.erase(DefMI
);
1513 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1514 // DefMI would be invalid when tranferred inside the loop. Checking for a
1515 // loop is expensive, but at least remove kill flags if they are in different
1517 if (DefMI
->getParent() != MI
.getParent())
1518 NewMI
->clearKillInfo();
1520 // The caller will erase MI, but not DefMI.
1521 DefMI
->eraseFromParent();
1525 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr
&MI
) const {
1526 if (MI
.isMetaInstruction())
1529 unsigned Opcode
= MI
.getOpcode();
1531 if (Opcode
== TargetOpcode::INLINEASM
||
1532 Opcode
== TargetOpcode::INLINEASM_BR
) {
1533 const MachineFunction
&MF
= *MI
.getParent()->getParent();
1534 return getInlineAsmLength(MI
.getOperand(0).getSymbolName(),
1535 *MF
.getTarget().getMCAsmInfo());
1538 if (!MI
.memoperands_empty()) {
1539 MachineMemOperand
*MMO
= *(MI
.memoperands_begin());
1540 if (STI
.hasStdExtZihintntl() && MMO
->isNonTemporal()) {
1541 if (STI
.hasStdExtCOrZca() && STI
.enableRVCHintInstrs()) {
1542 if (isCompressibleInst(MI
, STI
))
1543 return 4; // c.ntl.all + c.load/c.store
1544 return 6; // c.ntl.all + load/store
1546 return 8; // ntl.all + load/store
1550 if (Opcode
== TargetOpcode::BUNDLE
)
1551 return getInstBundleLength(MI
);
1553 if (MI
.getParent() && MI
.getParent()->getParent()) {
1554 if (isCompressibleInst(MI
, STI
))
1559 case RISCV::PseudoMV_FPR16INX
:
1560 case RISCV::PseudoMV_FPR32INX
:
1561 // MV is always compressible to either c.mv or c.li rd, 0.
1562 return STI
.hasStdExtCOrZca() ? 2 : 4;
1563 case TargetOpcode::STACKMAP
:
1564 // The upper bound for a stackmap intrinsic is the full length of its shadow
1565 return StackMapOpers(&MI
).getNumPatchBytes();
1566 case TargetOpcode::PATCHPOINT
:
1567 // The size of the patchpoint intrinsic is the number of bytes requested
1568 return PatchPointOpers(&MI
).getNumPatchBytes();
1569 case TargetOpcode::STATEPOINT
: {
1570 // The size of the statepoint intrinsic is the number of bytes requested
1571 unsigned NumBytes
= StatepointOpers(&MI
).getNumPatchBytes();
1572 // No patch bytes means at most a PseudoCall is emitted
1573 return std::max(NumBytes
, 8U);
1575 case TargetOpcode::PATCHABLE_FUNCTION_ENTER
:
1576 case TargetOpcode::PATCHABLE_FUNCTION_EXIT
:
1577 case TargetOpcode::PATCHABLE_TAIL_CALL
: {
1578 const MachineFunction
&MF
= *MI
.getParent()->getParent();
1579 const Function
&F
= MF
.getFunction();
1580 if (Opcode
== TargetOpcode::PATCHABLE_FUNCTION_ENTER
&&
1581 F
.hasFnAttribute("patchable-function-entry")) {
1583 if (F
.getFnAttribute("patchable-function-entry")
1585 .getAsInteger(10, Num
))
1586 return get(Opcode
).getSize();
1588 // Number of C.NOP or NOP
1589 return (STI
.hasStdExtCOrZca() ? 2 : 4) * Num
;
1591 // XRay uses C.JAL + 21 or 33 C.NOP for each sled in RV32 and RV64,
1593 return STI
.is64Bit() ? 68 : 44;
1596 return get(Opcode
).getSize();
1600 unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr
&MI
) const {
1602 MachineBasicBlock::const_instr_iterator I
= MI
.getIterator();
1603 MachineBasicBlock::const_instr_iterator E
= MI
.getParent()->instr_end();
1604 while (++I
!= E
&& I
->isInsideBundle()) {
1605 assert(!I
->isBundle() && "No nested bundle!");
1606 Size
+= getInstSizeInBytes(*I
);
1611 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr
&MI
) const {
1612 const unsigned Opcode
= MI
.getOpcode();
1616 case RISCV::FSGNJ_D
:
1617 case RISCV::FSGNJ_S
:
1618 case RISCV::FSGNJ_H
:
1619 case RISCV::FSGNJ_D_INX
:
1620 case RISCV::FSGNJ_D_IN32X
:
1621 case RISCV::FSGNJ_S_INX
:
1622 case RISCV::FSGNJ_H_INX
:
1623 // The canonical floating-point move is fsgnj rd, rs, rs.
1624 return MI
.getOperand(1).isReg() && MI
.getOperand(2).isReg() &&
1625 MI
.getOperand(1).getReg() == MI
.getOperand(2).getReg();
1629 return (MI
.getOperand(1).isReg() &&
1630 MI
.getOperand(1).getReg() == RISCV::X0
) ||
1631 (MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 0);
1633 return MI
.isAsCheapAsAMove();
1636 std::optional
<DestSourcePair
>
1637 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr
&MI
) const {
1639 return DestSourcePair
{MI
.getOperand(0), MI
.getOperand(1)};
1640 switch (MI
.getOpcode()) {
1644 // Operand 1 can be a frameindex but callers expect registers
1645 if (MI
.getOperand(1).isReg() && MI
.getOperand(2).isImm() &&
1646 MI
.getOperand(2).getImm() == 0)
1647 return DestSourcePair
{MI
.getOperand(0), MI
.getOperand(1)};
1649 case RISCV::FSGNJ_D
:
1650 case RISCV::FSGNJ_S
:
1651 case RISCV::FSGNJ_H
:
1652 case RISCV::FSGNJ_D_INX
:
1653 case RISCV::FSGNJ_D_IN32X
:
1654 case RISCV::FSGNJ_S_INX
:
1655 case RISCV::FSGNJ_H_INX
:
1656 // The canonical floating-point move is fsgnj rd, rs, rs.
1657 if (MI
.getOperand(1).isReg() && MI
.getOperand(2).isReg() &&
1658 MI
.getOperand(1).getReg() == MI
.getOperand(2).getReg())
1659 return DestSourcePair
{MI
.getOperand(0), MI
.getOperand(1)};
1662 return std::nullopt
;
1665 MachineTraceStrategy
RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
1666 if (ForceMachineCombinerStrategy
.getNumOccurrences() == 0) {
1667 // The option is unused. Choose Local strategy only for in-order cores. When
1668 // scheduling model is unspecified, use MinInstrCount strategy as more
1670 const auto &SchedModel
= STI
.getSchedModel();
1671 return (!SchedModel
.hasInstrSchedModel() || SchedModel
.isOutOfOrder())
1672 ? MachineTraceStrategy::TS_MinInstrCount
1673 : MachineTraceStrategy::TS_Local
;
1675 // The strategy was forced by the option.
1676 return ForceMachineCombinerStrategy
;
1679 void RISCVInstrInfo::finalizeInsInstrs(
1680 MachineInstr
&Root
, unsigned &Pattern
,
1681 SmallVectorImpl
<MachineInstr
*> &InsInstrs
) const {
1683 RISCV::getNamedOperandIdx(Root
.getOpcode(), RISCV::OpName::frm
);
1685 assert(all_of(InsInstrs
,
1686 [](MachineInstr
*MI
) {
1687 return RISCV::getNamedOperandIdx(MI
->getOpcode(),
1688 RISCV::OpName::frm
) < 0;
1690 "New instructions require FRM whereas the old one does not have it");
1694 const MachineOperand
&FRM
= Root
.getOperand(FrmOpIdx
);
1695 MachineFunction
&MF
= *Root
.getMF();
1697 for (auto *NewMI
: InsInstrs
) {
1698 // We'd already added the FRM operand.
1699 if (static_cast<unsigned>(RISCV::getNamedOperandIdx(
1700 NewMI
->getOpcode(), RISCV::OpName::frm
)) != NewMI
->getNumOperands())
1702 MachineInstrBuilder
MIB(MF
, NewMI
);
1704 if (FRM
.getImm() == RISCVFPRndMode::DYN
)
1705 MIB
.addUse(RISCV::FRM
, RegState::Implicit
);
1709 static bool isFADD(unsigned Opc
) {
1720 static bool isFSUB(unsigned Opc
) {
1731 static bool isFMUL(unsigned Opc
) {
1742 bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr
&Inst
,
1743 bool Invert
) const {
1744 #define OPCODE_LMUL_CASE(OPC) \
1745 case RISCV::OPC##_M1: \
1746 case RISCV::OPC##_M2: \
1747 case RISCV::OPC##_M4: \
1748 case RISCV::OPC##_M8: \
1749 case RISCV::OPC##_MF2: \
1750 case RISCV::OPC##_MF4: \
1751 case RISCV::OPC##_MF8
1753 #define OPCODE_LMUL_MASK_CASE(OPC) \
1754 case RISCV::OPC##_M1_MASK: \
1755 case RISCV::OPC##_M2_MASK: \
1756 case RISCV::OPC##_M4_MASK: \
1757 case RISCV::OPC##_M8_MASK: \
1758 case RISCV::OPC##_MF2_MASK: \
1759 case RISCV::OPC##_MF4_MASK: \
1760 case RISCV::OPC##_MF8_MASK
1762 unsigned Opcode
= Inst
.getOpcode();
1764 if (auto InvOpcode
= getInverseOpcode(Opcode
))
1765 Opcode
= *InvOpcode
;
1774 OPCODE_LMUL_CASE(PseudoVADD_VV
):
1775 OPCODE_LMUL_MASK_CASE(PseudoVADD_VV
):
1776 OPCODE_LMUL_CASE(PseudoVMUL_VV
):
1777 OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV
):
1782 #undef OPCODE_LMUL_MASK_CASE
1783 #undef OPCODE_LMUL_CASE
1786 bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr
&Root
,
1787 const MachineInstr
&Prev
) const {
1788 if (!areOpcodesEqualOrInverse(Root
.getOpcode(), Prev
.getOpcode()))
1791 assert(Root
.getMF() == Prev
.getMF());
1792 const MachineRegisterInfo
*MRI
= &Root
.getMF()->getRegInfo();
1793 const TargetRegisterInfo
*TRI
= MRI
->getTargetRegisterInfo();
1795 // Make sure vtype operands are also the same.
1796 const MCInstrDesc
&Desc
= get(Root
.getOpcode());
1797 const uint64_t TSFlags
= Desc
.TSFlags
;
1799 auto checkImmOperand
= [&](unsigned OpIdx
) {
1800 return Root
.getOperand(OpIdx
).getImm() == Prev
.getOperand(OpIdx
).getImm();
1803 auto checkRegOperand
= [&](unsigned OpIdx
) {
1804 return Root
.getOperand(OpIdx
).getReg() == Prev
.getOperand(OpIdx
).getReg();
1808 // TODO: Potentially we can loosen the condition to consider Root to be
1809 // associable with Prev if Root has NoReg as passthru. In which case we
1810 // also need to loosen the condition on vector policies between these.
1811 if (!checkRegOperand(1))
1815 if (RISCVII::hasSEWOp(TSFlags
) &&
1816 !checkImmOperand(RISCVII::getSEWOpNum(Desc
)))
1820 if (RISCVII::usesMaskPolicy(TSFlags
)) {
1821 const MachineBasicBlock
*MBB
= Root
.getParent();
1822 const MachineBasicBlock::const_reverse_iterator
It1(&Root
);
1823 const MachineBasicBlock::const_reverse_iterator
It2(&Prev
);
1826 bool SeenMI2
= false;
1827 for (auto End
= MBB
->rend(), It
= It1
; It
!= End
; ++It
) {
1830 if (!MI1VReg
.isValid())
1831 // There is no V0 def between Root and Prev; they're sharing the
1836 if (It
->modifiesRegister(RISCV::V0
, TRI
)) {
1837 Register SrcReg
= It
->getOperand(1).getReg();
1838 // If it's not VReg it'll be more difficult to track its defs, so
1839 // bailing out here just to be safe.
1840 if (!SrcReg
.isVirtual())
1843 if (!MI1VReg
.isValid()) {
1844 // This is the V0 def for Root.
1849 // Some random mask updates.
1853 // This is the V0 def for Prev; check if it's the same as that of
1855 if (MI1VReg
!= SrcReg
)
1862 // If we haven't encountered Prev, it's likely that this function was
1863 // called in a wrong way (e.g. Root is before Prev).
1864 assert(SeenMI2
&& "Prev is expected to appear before Root");
1867 // Tail / Mask policies
1868 if (RISCVII::hasVecPolicyOp(TSFlags
) &&
1869 !checkImmOperand(RISCVII::getVecPolicyOpNum(Desc
)))
1873 if (RISCVII::hasVLOp(TSFlags
)) {
1874 unsigned OpIdx
= RISCVII::getVLOpNum(Desc
);
1875 const MachineOperand
&Op1
= Root
.getOperand(OpIdx
);
1876 const MachineOperand
&Op2
= Prev
.getOperand(OpIdx
);
1877 if (Op1
.getType() != Op2
.getType())
1879 switch (Op1
.getType()) {
1880 case MachineOperand::MO_Register
:
1881 if (Op1
.getReg() != Op2
.getReg())
1884 case MachineOperand::MO_Immediate
:
1885 if (Op1
.getImm() != Op2
.getImm())
1889 llvm_unreachable("Unrecognized VL operand type");
1894 if (RISCVII::hasRoundModeOp(TSFlags
) &&
1895 !checkImmOperand(RISCVII::getVLOpNum(Desc
) - 1))
1901 // Most of our RVV pseudos have passthru operand, so the real operands
1902 // start from index = 2.
1903 bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr
&Inst
,
1904 bool &Commuted
) const {
1905 const MachineBasicBlock
*MBB
= Inst
.getParent();
1906 const MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1907 assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst
.getOpcode())) &&
1908 "Expect the present of passthrough operand.");
1909 MachineInstr
*MI1
= MRI
.getUniqueVRegDef(Inst
.getOperand(2).getReg());
1910 MachineInstr
*MI2
= MRI
.getUniqueVRegDef(Inst
.getOperand(3).getReg());
1912 // If only one operand has the same or inverse opcode and it's the second
1913 // source operand, the operands must be commuted.
1914 Commuted
= !areRVVInstsReassociable(Inst
, *MI1
) &&
1915 areRVVInstsReassociable(Inst
, *MI2
);
1917 std::swap(MI1
, MI2
);
1919 return areRVVInstsReassociable(Inst
, *MI1
) &&
1920 (isVectorAssociativeAndCommutative(*MI1
) ||
1921 isVectorAssociativeAndCommutative(*MI1
, /* Invert */ true)) &&
1922 hasReassociableOperands(*MI1
, MBB
) &&
1923 MRI
.hasOneNonDBGUse(MI1
->getOperand(0).getReg());
1926 bool RISCVInstrInfo::hasReassociableOperands(
1927 const MachineInstr
&Inst
, const MachineBasicBlock
*MBB
) const {
1928 if (!isVectorAssociativeAndCommutative(Inst
) &&
1929 !isVectorAssociativeAndCommutative(Inst
, /*Invert=*/true))
1930 return TargetInstrInfo::hasReassociableOperands(Inst
, MBB
);
1932 const MachineOperand
&Op1
= Inst
.getOperand(2);
1933 const MachineOperand
&Op2
= Inst
.getOperand(3);
1934 const MachineRegisterInfo
&MRI
= MBB
->getParent()->getRegInfo();
1936 // We need virtual register definitions for the operands that we will
1938 MachineInstr
*MI1
= nullptr;
1939 MachineInstr
*MI2
= nullptr;
1940 if (Op1
.isReg() && Op1
.getReg().isVirtual())
1941 MI1
= MRI
.getUniqueVRegDef(Op1
.getReg());
1942 if (Op2
.isReg() && Op2
.getReg().isVirtual())
1943 MI2
= MRI
.getUniqueVRegDef(Op2
.getReg());
1945 // And at least one operand must be defined in MBB.
1946 return MI1
&& MI2
&& (MI1
->getParent() == MBB
|| MI2
->getParent() == MBB
);
1949 void RISCVInstrInfo::getReassociateOperandIndices(
1950 const MachineInstr
&Root
, unsigned Pattern
,
1951 std::array
<unsigned, 5> &OperandIndices
) const {
1952 TargetInstrInfo::getReassociateOperandIndices(Root
, Pattern
, OperandIndices
);
1953 if (RISCV::getRVVMCOpcode(Root
.getOpcode())) {
1954 // Skip the passthrough operand, so increment all indices by one.
1955 for (unsigned I
= 0; I
< 5; ++I
)
1956 ++OperandIndices
[I
];
1960 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr
&Inst
,
1961 bool &Commuted
) const {
1962 if (isVectorAssociativeAndCommutative(Inst
) ||
1963 isVectorAssociativeAndCommutative(Inst
, /*Invert=*/true))
1964 return hasReassociableVectorSibling(Inst
, Commuted
);
1966 if (!TargetInstrInfo::hasReassociableSibling(Inst
, Commuted
))
1969 const MachineRegisterInfo
&MRI
= Inst
.getMF()->getRegInfo();
1970 unsigned OperandIdx
= Commuted
? 2 : 1;
1971 const MachineInstr
&Sibling
=
1972 *MRI
.getVRegDef(Inst
.getOperand(OperandIdx
).getReg());
1974 int16_t InstFrmOpIdx
=
1975 RISCV::getNamedOperandIdx(Inst
.getOpcode(), RISCV::OpName::frm
);
1976 int16_t SiblingFrmOpIdx
=
1977 RISCV::getNamedOperandIdx(Sibling
.getOpcode(), RISCV::OpName::frm
);
1979 return (InstFrmOpIdx
< 0 && SiblingFrmOpIdx
< 0) ||
1980 RISCV::hasEqualFRM(Inst
, Sibling
);
1983 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr
&Inst
,
1984 bool Invert
) const {
1985 if (isVectorAssociativeAndCommutative(Inst
, Invert
))
1988 unsigned Opc
= Inst
.getOpcode();
1990 auto InverseOpcode
= getInverseOpcode(Opc
);
1993 Opc
= *InverseOpcode
;
1996 if (isFADD(Opc
) || isFMUL(Opc
))
1997 return Inst
.getFlag(MachineInstr::MIFlag::FmReassoc
) &&
1998 Inst
.getFlag(MachineInstr::MIFlag::FmNsz
);
2008 // From RISC-V ISA spec, if both the high and low bits of the same product
2009 // are required, then the recommended code sequence is:
2011 // MULH[[S]U] rdh, rs1, rs2
2012 // MUL rdl, rs1, rs2
2013 // (source register specifiers must be in same order and rdh cannot be the
2014 // same as rs1 or rs2)
2016 // Microarchitectures can then fuse these into a single multiply operation
2017 // instead of performing two separate multiplies.
2018 // MachineCombiner may reassociate MUL operands and lose the fusion
2038 std::optional
<unsigned>
2039 RISCVInstrInfo::getInverseOpcode(unsigned Opcode
) const {
2040 #define RVV_OPC_LMUL_CASE(OPC, INV) \
2041 case RISCV::OPC##_M1: \
2042 return RISCV::INV##_M1; \
2043 case RISCV::OPC##_M2: \
2044 return RISCV::INV##_M2; \
2045 case RISCV::OPC##_M4: \
2046 return RISCV::INV##_M4; \
2047 case RISCV::OPC##_M8: \
2048 return RISCV::INV##_M8; \
2049 case RISCV::OPC##_MF2: \
2050 return RISCV::INV##_MF2; \
2051 case RISCV::OPC##_MF4: \
2052 return RISCV::INV##_MF4; \
2053 case RISCV::OPC##_MF8: \
2054 return RISCV::INV##_MF8
2056 #define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \
2057 case RISCV::OPC##_M1_MASK: \
2058 return RISCV::INV##_M1_MASK; \
2059 case RISCV::OPC##_M2_MASK: \
2060 return RISCV::INV##_M2_MASK; \
2061 case RISCV::OPC##_M4_MASK: \
2062 return RISCV::INV##_M4_MASK; \
2063 case RISCV::OPC##_M8_MASK: \
2064 return RISCV::INV##_M8_MASK; \
2065 case RISCV::OPC##_MF2_MASK: \
2066 return RISCV::INV##_MF2_MASK; \
2067 case RISCV::OPC##_MF4_MASK: \
2068 return RISCV::INV##_MF4_MASK; \
2069 case RISCV::OPC##_MF8_MASK: \
2070 return RISCV::INV##_MF8_MASK
2074 return std::nullopt
;
2076 return RISCV::FSUB_H
;
2078 return RISCV::FSUB_S
;
2080 return RISCV::FSUB_D
;
2082 return RISCV::FADD_H
;
2084 return RISCV::FADD_S
;
2086 return RISCV::FADD_D
;
2096 RVV_OPC_LMUL_CASE(PseudoVADD_VV
, PseudoVSUB_VV
);
2097 RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV
, PseudoVSUB_VV
);
2098 RVV_OPC_LMUL_CASE(PseudoVSUB_VV
, PseudoVADD_VV
);
2099 RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV
, PseudoVADD_VV
);
2103 #undef RVV_OPC_LMUL_MASK_CASE
2104 #undef RVV_OPC_LMUL_CASE
2107 static bool canCombineFPFusedMultiply(const MachineInstr
&Root
,
2108 const MachineOperand
&MO
,
2109 bool DoRegPressureReduce
) {
2110 if (!MO
.isReg() || !MO
.getReg().isVirtual())
2112 const MachineRegisterInfo
&MRI
= Root
.getMF()->getRegInfo();
2113 MachineInstr
*MI
= MRI
.getVRegDef(MO
.getReg());
2114 if (!MI
|| !isFMUL(MI
->getOpcode()))
2117 if (!Root
.getFlag(MachineInstr::MIFlag::FmContract
) ||
2118 !MI
->getFlag(MachineInstr::MIFlag::FmContract
))
2121 // Try combining even if fmul has more than one use as it eliminates
2122 // dependency between fadd(fsub) and fmul. However, it can extend liveranges
2123 // for fmul operands, so reject the transformation in register pressure
2125 if (DoRegPressureReduce
&& !MRI
.hasOneNonDBGUse(MI
->getOperand(0).getReg()))
2128 // Do not combine instructions from different basic blocks.
2129 if (Root
.getParent() != MI
->getParent())
2131 return RISCV::hasEqualFRM(Root
, *MI
);
2134 static bool getFPFusedMultiplyPatterns(MachineInstr
&Root
,
2135 SmallVectorImpl
<unsigned> &Patterns
,
2136 bool DoRegPressureReduce
) {
2137 unsigned Opc
= Root
.getOpcode();
2138 bool IsFAdd
= isFADD(Opc
);
2139 if (!IsFAdd
&& !isFSUB(Opc
))
2142 if (canCombineFPFusedMultiply(Root
, Root
.getOperand(1),
2143 DoRegPressureReduce
)) {
2144 Patterns
.push_back(IsFAdd
? RISCVMachineCombinerPattern::FMADD_AX
2145 : RISCVMachineCombinerPattern::FMSUB
);
2148 if (canCombineFPFusedMultiply(Root
, Root
.getOperand(2),
2149 DoRegPressureReduce
)) {
2150 Patterns
.push_back(IsFAdd
? RISCVMachineCombinerPattern::FMADD_XA
2151 : RISCVMachineCombinerPattern::FNMSUB
);
2157 static bool getFPPatterns(MachineInstr
&Root
,
2158 SmallVectorImpl
<unsigned> &Patterns
,
2159 bool DoRegPressureReduce
) {
2160 return getFPFusedMultiplyPatterns(Root
, Patterns
, DoRegPressureReduce
);
2163 /// Utility routine that checks if \param MO is defined by an
2164 /// \param CombineOpc instruction in the basic block \param MBB
2165 static const MachineInstr
*canCombine(const MachineBasicBlock
&MBB
,
2166 const MachineOperand
&MO
,
2167 unsigned CombineOpc
) {
2168 const MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
2169 const MachineInstr
*MI
= nullptr;
2171 if (MO
.isReg() && MO
.getReg().isVirtual())
2172 MI
= MRI
.getUniqueVRegDef(MO
.getReg());
2173 // And it needs to be in the trace (otherwise, it won't have a depth).
2174 if (!MI
|| MI
->getParent() != &MBB
|| MI
->getOpcode() != CombineOpc
)
2176 // Must only used by the user we combine with.
2177 if (!MRI
.hasOneNonDBGUse(MI
->getOperand(0).getReg()))
2183 /// Utility routine that checks if \param MO is defined by a SLLI in \param
2184 /// MBB that can be combined by splitting across 2 SHXADD instructions. The
2185 /// first SHXADD shift amount is given by \param OuterShiftAmt.
2186 static bool canCombineShiftIntoShXAdd(const MachineBasicBlock
&MBB
,
2187 const MachineOperand
&MO
,
2188 unsigned OuterShiftAmt
) {
2189 const MachineInstr
*ShiftMI
= canCombine(MBB
, MO
, RISCV::SLLI
);
2193 unsigned InnerShiftAmt
= ShiftMI
->getOperand(2).getImm();
2194 if (InnerShiftAmt
< OuterShiftAmt
|| (InnerShiftAmt
- OuterShiftAmt
) > 3)
2200 // Returns the shift amount from a SHXADD instruction. Returns 0 if the
2201 // instruction is not a SHXADD.
2202 static unsigned getSHXADDShiftAmount(unsigned Opc
) {
2215 // Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
2216 // (sh3add (sh2add Y, Z), X).
2217 static bool getSHXADDPatterns(const MachineInstr
&Root
,
2218 SmallVectorImpl
<unsigned> &Patterns
) {
2219 unsigned ShiftAmt
= getSHXADDShiftAmount(Root
.getOpcode());
2223 const MachineBasicBlock
&MBB
= *Root
.getParent();
2225 const MachineInstr
*AddMI
= canCombine(MBB
, Root
.getOperand(2), RISCV::ADD
);
2230 if (canCombineShiftIntoShXAdd(MBB
, AddMI
->getOperand(1), ShiftAmt
)) {
2231 Patterns
.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1
);
2234 if (canCombineShiftIntoShXAdd(MBB
, AddMI
->getOperand(2), ShiftAmt
)) {
2235 Patterns
.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2
);
2242 CombinerObjective
RISCVInstrInfo::getCombinerObjective(unsigned Pattern
) const {
2244 case RISCVMachineCombinerPattern::FMADD_AX
:
2245 case RISCVMachineCombinerPattern::FMADD_XA
:
2246 case RISCVMachineCombinerPattern::FMSUB
:
2247 case RISCVMachineCombinerPattern::FNMSUB
:
2248 return CombinerObjective::MustReduceDepth
;
2250 return TargetInstrInfo::getCombinerObjective(Pattern
);
2254 bool RISCVInstrInfo::getMachineCombinerPatterns(
2255 MachineInstr
&Root
, SmallVectorImpl
<unsigned> &Patterns
,
2256 bool DoRegPressureReduce
) const {
2258 if (getFPPatterns(Root
, Patterns
, DoRegPressureReduce
))
2261 if (getSHXADDPatterns(Root
, Patterns
))
2264 return TargetInstrInfo::getMachineCombinerPatterns(Root
, Patterns
,
2265 DoRegPressureReduce
);
2268 static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc
, unsigned Pattern
) {
2271 llvm_unreachable("Unexpected opcode");
2273 return RISCV::FMADD_H
;
2275 return RISCV::FMADD_S
;
2277 return RISCV::FMADD_D
;
2279 return Pattern
== RISCVMachineCombinerPattern::FMSUB
? RISCV::FMSUB_H
2282 return Pattern
== RISCVMachineCombinerPattern::FMSUB
? RISCV::FMSUB_S
2285 return Pattern
== RISCVMachineCombinerPattern::FMSUB
? RISCV::FMSUB_D
2290 static unsigned getAddendOperandIdx(unsigned Pattern
) {
2293 llvm_unreachable("Unexpected pattern");
2294 case RISCVMachineCombinerPattern::FMADD_AX
:
2295 case RISCVMachineCombinerPattern::FMSUB
:
2297 case RISCVMachineCombinerPattern::FMADD_XA
:
2298 case RISCVMachineCombinerPattern::FNMSUB
:
2303 static void combineFPFusedMultiply(MachineInstr
&Root
, MachineInstr
&Prev
,
2305 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
2306 SmallVectorImpl
<MachineInstr
*> &DelInstrs
) {
2307 MachineFunction
*MF
= Root
.getMF();
2308 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
2309 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
2311 MachineOperand
&Mul1
= Prev
.getOperand(1);
2312 MachineOperand
&Mul2
= Prev
.getOperand(2);
2313 MachineOperand
&Dst
= Root
.getOperand(0);
2314 MachineOperand
&Addend
= Root
.getOperand(getAddendOperandIdx(Pattern
));
2316 Register DstReg
= Dst
.getReg();
2317 unsigned FusedOpc
= getFPFusedMultiplyOpcode(Root
.getOpcode(), Pattern
);
2318 uint32_t IntersectedFlags
= Root
.getFlags() & Prev
.getFlags();
2319 DebugLoc MergedLoc
=
2320 DILocation::getMergedLocation(Root
.getDebugLoc(), Prev
.getDebugLoc());
2322 bool Mul1IsKill
= Mul1
.isKill();
2323 bool Mul2IsKill
= Mul2
.isKill();
2324 bool AddendIsKill
= Addend
.isKill();
2326 // We need to clear kill flags since we may be extending the live range past
2327 // a kill. If the mul had kill flags, we can preserve those since we know
2328 // where the previous range stopped.
2329 MRI
.clearKillFlags(Mul1
.getReg());
2330 MRI
.clearKillFlags(Mul2
.getReg());
2332 MachineInstrBuilder MIB
=
2333 BuildMI(*MF
, MergedLoc
, TII
->get(FusedOpc
), DstReg
)
2334 .addReg(Mul1
.getReg(), getKillRegState(Mul1IsKill
))
2335 .addReg(Mul2
.getReg(), getKillRegState(Mul2IsKill
))
2336 .addReg(Addend
.getReg(), getKillRegState(AddendIsKill
))
2337 .setMIFlags(IntersectedFlags
);
2339 InsInstrs
.push_back(MIB
);
2340 if (MRI
.hasOneNonDBGUse(Prev
.getOperand(0).getReg()))
2341 DelInstrs
.push_back(&Prev
);
2342 DelInstrs
.push_back(&Root
);
2345 // Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
2346 // (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
2347 // shXadd instructions. The outer shXadd keeps its original opcode.
2349 genShXAddAddShift(MachineInstr
&Root
, unsigned AddOpIdx
,
2350 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
2351 SmallVectorImpl
<MachineInstr
*> &DelInstrs
,
2352 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
) {
2353 MachineFunction
*MF
= Root
.getMF();
2354 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
2355 const TargetInstrInfo
*TII
= MF
->getSubtarget().getInstrInfo();
2357 unsigned OuterShiftAmt
= getSHXADDShiftAmount(Root
.getOpcode());
2358 assert(OuterShiftAmt
!= 0 && "Unexpected opcode");
2360 MachineInstr
*AddMI
= MRI
.getUniqueVRegDef(Root
.getOperand(2).getReg());
2361 MachineInstr
*ShiftMI
=
2362 MRI
.getUniqueVRegDef(AddMI
->getOperand(AddOpIdx
).getReg());
2364 unsigned InnerShiftAmt
= ShiftMI
->getOperand(2).getImm();
2365 assert(InnerShiftAmt
>= OuterShiftAmt
&& "Unexpected shift amount");
2368 switch (InnerShiftAmt
- OuterShiftAmt
) {
2370 llvm_unreachable("Unexpected shift amount");
2372 InnerOpc
= RISCV::ADD
;
2375 InnerOpc
= RISCV::SH1ADD
;
2378 InnerOpc
= RISCV::SH2ADD
;
2381 InnerOpc
= RISCV::SH3ADD
;
2385 const MachineOperand
&X
= AddMI
->getOperand(3 - AddOpIdx
);
2386 const MachineOperand
&Y
= ShiftMI
->getOperand(1);
2387 const MachineOperand
&Z
= Root
.getOperand(1);
2389 Register NewVR
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
2391 auto MIB1
= BuildMI(*MF
, MIMetadata(Root
), TII
->get(InnerOpc
), NewVR
)
2392 .addReg(Y
.getReg(), getKillRegState(Y
.isKill()))
2393 .addReg(Z
.getReg(), getKillRegState(Z
.isKill()));
2394 auto MIB2
= BuildMI(*MF
, MIMetadata(Root
), TII
->get(Root
.getOpcode()),
2395 Root
.getOperand(0).getReg())
2396 .addReg(NewVR
, RegState::Kill
)
2397 .addReg(X
.getReg(), getKillRegState(X
.isKill()));
2399 InstrIdxForVirtReg
.insert(std::make_pair(NewVR
, 0));
2400 InsInstrs
.push_back(MIB1
);
2401 InsInstrs
.push_back(MIB2
);
2402 DelInstrs
.push_back(ShiftMI
);
2403 DelInstrs
.push_back(AddMI
);
2404 DelInstrs
.push_back(&Root
);
2407 void RISCVInstrInfo::genAlternativeCodeSequence(
2408 MachineInstr
&Root
, unsigned Pattern
,
2409 SmallVectorImpl
<MachineInstr
*> &InsInstrs
,
2410 SmallVectorImpl
<MachineInstr
*> &DelInstrs
,
2411 DenseMap
<unsigned, unsigned> &InstrIdxForVirtReg
) const {
2412 MachineRegisterInfo
&MRI
= Root
.getMF()->getRegInfo();
2415 TargetInstrInfo::genAlternativeCodeSequence(Root
, Pattern
, InsInstrs
,
2416 DelInstrs
, InstrIdxForVirtReg
);
2418 case RISCVMachineCombinerPattern::FMADD_AX
:
2419 case RISCVMachineCombinerPattern::FMSUB
: {
2420 MachineInstr
&Prev
= *MRI
.getVRegDef(Root
.getOperand(1).getReg());
2421 combineFPFusedMultiply(Root
, Prev
, Pattern
, InsInstrs
, DelInstrs
);
2424 case RISCVMachineCombinerPattern::FMADD_XA
:
2425 case RISCVMachineCombinerPattern::FNMSUB
: {
2426 MachineInstr
&Prev
= *MRI
.getVRegDef(Root
.getOperand(2).getReg());
2427 combineFPFusedMultiply(Root
, Prev
, Pattern
, InsInstrs
, DelInstrs
);
2430 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1
:
2431 genShXAddAddShift(Root
, 1, InsInstrs
, DelInstrs
, InstrIdxForVirtReg
);
2433 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2
:
2434 genShXAddAddShift(Root
, 2, InsInstrs
, DelInstrs
, InstrIdxForVirtReg
);
2439 bool RISCVInstrInfo::verifyInstruction(const MachineInstr
&MI
,
2440 StringRef
&ErrInfo
) const {
2441 MCInstrDesc
const &Desc
= MI
.getDesc();
2443 for (const auto &[Index
, Operand
] : enumerate(Desc
.operands())) {
2444 unsigned OpType
= Operand
.OperandType
;
2445 if (OpType
>= RISCVOp::OPERAND_FIRST_RISCV_IMM
&&
2446 OpType
<= RISCVOp::OPERAND_LAST_RISCV_IMM
) {
2447 const MachineOperand
&MO
= MI
.getOperand(Index
);
2449 ErrInfo
= "Expected a non-register operand.";
2453 int64_t Imm
= MO
.getImm();
2457 llvm_unreachable("Unexpected operand type");
2460 #define CASE_OPERAND_UIMM(NUM) \
2461 case RISCVOp::OPERAND_UIMM##NUM: \
2462 Ok = isUInt<NUM>(Imm); \
2464 #define CASE_OPERAND_SIMM(NUM) \
2465 case RISCVOp::OPERAND_SIMM##NUM: \
2466 Ok = isInt<NUM>(Imm); \
2468 CASE_OPERAND_UIMM(1)
2469 CASE_OPERAND_UIMM(2)
2470 CASE_OPERAND_UIMM(3)
2471 CASE_OPERAND_UIMM(4)
2472 CASE_OPERAND_UIMM(5)
2473 CASE_OPERAND_UIMM(6)
2474 CASE_OPERAND_UIMM(7)
2475 CASE_OPERAND_UIMM(8)
2476 CASE_OPERAND_UIMM(12)
2477 CASE_OPERAND_UIMM(20)
2479 case RISCVOp::OPERAND_UIMM2_LSB0
:
2480 Ok
= isShiftedUInt
<1, 1>(Imm
);
2482 case RISCVOp::OPERAND_UIMM5_LSB0
:
2483 Ok
= isShiftedUInt
<4, 1>(Imm
);
2485 case RISCVOp::OPERAND_UIMM6_LSB0
:
2486 Ok
= isShiftedUInt
<5, 1>(Imm
);
2488 case RISCVOp::OPERAND_UIMM7_LSB00
:
2489 Ok
= isShiftedUInt
<5, 2>(Imm
);
2491 case RISCVOp::OPERAND_UIMM8_LSB00
:
2492 Ok
= isShiftedUInt
<6, 2>(Imm
);
2494 case RISCVOp::OPERAND_UIMM8_LSB000
:
2495 Ok
= isShiftedUInt
<5, 3>(Imm
);
2497 case RISCVOp::OPERAND_UIMM8_GE32
:
2498 Ok
= isUInt
<8>(Imm
) && Imm
>= 32;
2500 case RISCVOp::OPERAND_UIMM9_LSB000
:
2501 Ok
= isShiftedUInt
<6, 3>(Imm
);
2503 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO
:
2504 Ok
= isShiftedInt
<6, 4>(Imm
) && (Imm
!= 0);
2506 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO
:
2507 Ok
= isShiftedUInt
<8, 2>(Imm
) && (Imm
!= 0);
2509 case RISCVOp::OPERAND_ZERO
:
2513 CASE_OPERAND_SIMM(5)
2514 CASE_OPERAND_SIMM(6)
2515 CASE_OPERAND_SIMM(12)
2517 case RISCVOp::OPERAND_SIMM5_PLUS1
:
2518 Ok
= (isInt
<5>(Imm
) && Imm
!= -16) || Imm
== 16;
2520 case RISCVOp::OPERAND_SIMM6_NONZERO
:
2521 Ok
= Imm
!= 0 && isInt
<6>(Imm
);
2523 case RISCVOp::OPERAND_VTYPEI10
:
2524 Ok
= isUInt
<10>(Imm
);
2526 case RISCVOp::OPERAND_VTYPEI11
:
2527 Ok
= isUInt
<11>(Imm
);
2529 case RISCVOp::OPERAND_SIMM12_LSB00000
:
2530 Ok
= isShiftedInt
<7, 5>(Imm
);
2532 case RISCVOp::OPERAND_UIMMLOG2XLEN
:
2533 Ok
= STI
.is64Bit() ? isUInt
<6>(Imm
) : isUInt
<5>(Imm
);
2535 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO
:
2536 Ok
= STI
.is64Bit() ? isUInt
<6>(Imm
) : isUInt
<5>(Imm
);
2537 Ok
= Ok
&& Imm
!= 0;
2539 case RISCVOp::OPERAND_CLUI_IMM
:
2540 Ok
= (isUInt
<5>(Imm
) && Imm
!= 0) ||
2541 (Imm
>= 0xfffe0 && Imm
<= 0xfffff);
2543 case RISCVOp::OPERAND_RVKRNUM
:
2544 Ok
= Imm
>= 0 && Imm
<= 10;
2546 case RISCVOp::OPERAND_RVKRNUM_0_7
:
2547 Ok
= Imm
>= 0 && Imm
<= 7;
2549 case RISCVOp::OPERAND_RVKRNUM_1_10
:
2550 Ok
= Imm
>= 1 && Imm
<= 10;
2552 case RISCVOp::OPERAND_RVKRNUM_2_14
:
2553 Ok
= Imm
>= 2 && Imm
<= 14;
2555 case RISCVOp::OPERAND_SPIMM
:
2556 Ok
= (Imm
& 0xf) == 0;
2558 case RISCVOp::OPERAND_FRMARG
:
2559 Ok
= RISCVFPRndMode::isValidRoundingMode(Imm
);
2561 case RISCVOp::OPERAND_RTZARG
:
2562 Ok
= Imm
== RISCVFPRndMode::RTZ
;
2564 case RISCVOp::OPERAND_COND_CODE
:
2565 Ok
= Imm
>= 0 && Imm
< RISCVCC::COND_INVALID
;
2567 case RISCVOp::OPERAND_VEC_POLICY
:
2568 Ok
= (Imm
& (RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
)) == Imm
;
2570 case RISCVOp::OPERAND_SEW
:
2571 Ok
= (isUInt
<5>(Imm
) && RISCVVType::isValidSEW(1 << Imm
));
2573 case RISCVOp::OPERAND_SEW_MASK
:
2576 case RISCVOp::OPERAND_VEC_RM
:
2577 assert(RISCVII::hasRoundModeOp(Desc
.TSFlags
));
2578 if (RISCVII::usesVXRM(Desc
.TSFlags
))
2579 Ok
= isUInt
<2>(Imm
);
2581 Ok
= RISCVFPRndMode::isValidRoundingMode(Imm
);
2585 ErrInfo
= "Invalid immediate";
2592 const uint64_t TSFlags
= Desc
.TSFlags
;
2593 if (RISCVII::hasVLOp(TSFlags
)) {
2594 const MachineOperand
&Op
= MI
.getOperand(RISCVII::getVLOpNum(Desc
));
2595 if (!Op
.isImm() && !Op
.isReg()) {
2596 ErrInfo
= "Invalid operand type for VL operand";
2599 if (Op
.isReg() && Op
.getReg() != RISCV::NoRegister
) {
2600 const MachineRegisterInfo
&MRI
= MI
.getParent()->getParent()->getRegInfo();
2601 auto *RC
= MRI
.getRegClass(Op
.getReg());
2602 if (!RISCV::GPRRegClass
.hasSubClassEq(RC
)) {
2603 ErrInfo
= "Invalid register class for VL operand";
2607 if (!RISCVII::hasSEWOp(TSFlags
)) {
2608 ErrInfo
= "VL operand w/o SEW operand?";
2612 if (RISCVII::hasSEWOp(TSFlags
)) {
2613 unsigned OpIdx
= RISCVII::getSEWOpNum(Desc
);
2614 if (!MI
.getOperand(OpIdx
).isImm()) {
2615 ErrInfo
= "SEW value expected to be an immediate";
2618 uint64_t Log2SEW
= MI
.getOperand(OpIdx
).getImm();
2620 ErrInfo
= "Unexpected SEW value";
2623 unsigned SEW
= Log2SEW
? 1 << Log2SEW
: 8;
2624 if (!RISCVVType::isValidSEW(SEW
)) {
2625 ErrInfo
= "Unexpected SEW value";
2629 if (RISCVII::hasVecPolicyOp(TSFlags
)) {
2630 unsigned OpIdx
= RISCVII::getVecPolicyOpNum(Desc
);
2631 if (!MI
.getOperand(OpIdx
).isImm()) {
2632 ErrInfo
= "Policy operand expected to be an immediate";
2635 uint64_t Policy
= MI
.getOperand(OpIdx
).getImm();
2636 if (Policy
> (RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
)) {
2637 ErrInfo
= "Invalid Policy Value";
2640 if (!RISCVII::hasVLOp(TSFlags
)) {
2641 ErrInfo
= "policy operand w/o VL operand?";
2645 // VecPolicy operands can only exist on instructions with passthru/merge
2646 // arguments. Note that not all arguments with passthru have vec policy
2647 // operands- some instructions have implicit policies.
2649 if (!MI
.isRegTiedToUseOperand(0, &UseOpIdx
)) {
2650 ErrInfo
= "policy operand w/o tied operand?";
2655 if (int Idx
= RISCVII::getFRMOpNum(Desc
);
2656 Idx
>= 0 && MI
.getOperand(Idx
).getImm() == RISCVFPRndMode::DYN
&&
2657 !MI
.readsRegister(RISCV::FRM
, /*TRI=*/nullptr)) {
2658 ErrInfo
= "dynamic rounding mode should read FRM";
2665 bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr
&MemI
, Register Reg
,
2666 const MachineInstr
&AddrI
,
2667 ExtAddrMode
&AM
) const {
2668 switch (MemI
.getOpcode()) {
2695 if (MemI
.getOperand(0).getReg() == Reg
)
2698 if (AddrI
.getOpcode() != RISCV::ADDI
|| !AddrI
.getOperand(1).isReg() ||
2699 !AddrI
.getOperand(2).isImm())
2702 int64_t OldOffset
= MemI
.getOperand(2).getImm();
2703 int64_t Disp
= AddrI
.getOperand(2).getImm();
2704 int64_t NewOffset
= OldOffset
+ Disp
;
2706 NewOffset
= SignExtend64
<32>(NewOffset
);
2708 if (!isInt
<12>(NewOffset
))
2711 AM
.BaseReg
= AddrI
.getOperand(1).getReg();
2714 AM
.Displacement
= NewOffset
;
2715 AM
.Form
= ExtAddrMode::Formula::Basic
;
2719 MachineInstr
*RISCVInstrInfo::emitLdStWithAddr(MachineInstr
&MemI
,
2720 const ExtAddrMode
&AM
) const {
2722 const DebugLoc
&DL
= MemI
.getDebugLoc();
2723 MachineBasicBlock
&MBB
= *MemI
.getParent();
2725 assert(AM
.ScaledReg
== 0 && AM
.Scale
== 0 &&
2726 "Addressing mode not supported for folding");
2728 return BuildMI(MBB
, MemI
, DL
, get(MemI
.getOpcode()))
2729 .addReg(MemI
.getOperand(0).getReg(),
2730 MemI
.mayLoad() ? RegState::Define
: 0)
2732 .addImm(AM
.Displacement
)
2733 .setMemRefs(MemI
.memoperands())
2734 .setMIFlags(MemI
.getFlags());
2737 bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
2738 const MachineInstr
&LdSt
, SmallVectorImpl
<const MachineOperand
*> &BaseOps
,
2739 int64_t &Offset
, bool &OffsetIsScalable
, LocationSize
&Width
,
2740 const TargetRegisterInfo
*TRI
) const {
2741 if (!LdSt
.mayLoadOrStore())
2744 // Conservatively, only handle scalar loads/stores for now.
2745 switch (LdSt
.getOpcode()) {
2771 const MachineOperand
*BaseOp
;
2772 OffsetIsScalable
= false;
2773 if (!getMemOperandWithOffsetWidth(LdSt
, BaseOp
, Offset
, Width
, TRI
))
2775 BaseOps
.push_back(BaseOp
);
2779 // TODO: This was copied from SIInstrInfo. Could it be lifted to a common
2781 static bool memOpsHaveSameBasePtr(const MachineInstr
&MI1
,
2782 ArrayRef
<const MachineOperand
*> BaseOps1
,
2783 const MachineInstr
&MI2
,
2784 ArrayRef
<const MachineOperand
*> BaseOps2
) {
2785 // Only examine the first "base" operand of each instruction, on the
2786 // assumption that it represents the real base address of the memory access.
2787 // Other operands are typically offsets or indices from this base address.
2788 if (BaseOps1
.front()->isIdenticalTo(*BaseOps2
.front()))
2791 if (!MI1
.hasOneMemOperand() || !MI2
.hasOneMemOperand())
2794 auto MO1
= *MI1
.memoperands_begin();
2795 auto MO2
= *MI2
.memoperands_begin();
2796 if (MO1
->getAddrSpace() != MO2
->getAddrSpace())
2799 auto Base1
= MO1
->getValue();
2800 auto Base2
= MO2
->getValue();
2801 if (!Base1
|| !Base2
)
2803 Base1
= getUnderlyingObject(Base1
);
2804 Base2
= getUnderlyingObject(Base2
);
2806 if (isa
<UndefValue
>(Base1
) || isa
<UndefValue
>(Base2
))
2809 return Base1
== Base2
;
2812 bool RISCVInstrInfo::shouldClusterMemOps(
2813 ArrayRef
<const MachineOperand
*> BaseOps1
, int64_t Offset1
,
2814 bool OffsetIsScalable1
, ArrayRef
<const MachineOperand
*> BaseOps2
,
2815 int64_t Offset2
, bool OffsetIsScalable2
, unsigned ClusterSize
,
2816 unsigned NumBytes
) const {
2817 // If the mem ops (to be clustered) do not have the same base ptr, then they
2818 // should not be clustered
2819 if (!BaseOps1
.empty() && !BaseOps2
.empty()) {
2820 const MachineInstr
&FirstLdSt
= *BaseOps1
.front()->getParent();
2821 const MachineInstr
&SecondLdSt
= *BaseOps2
.front()->getParent();
2822 if (!memOpsHaveSameBasePtr(FirstLdSt
, BaseOps1
, SecondLdSt
, BaseOps2
))
2824 } else if (!BaseOps1
.empty() || !BaseOps2
.empty()) {
2825 // If only one base op is empty, they do not have the same base ptr
2829 unsigned CacheLineSize
=
2830 BaseOps1
.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
2831 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
2832 CacheLineSize
= CacheLineSize
? CacheLineSize
: 64;
2833 // Cluster if the memory operations are on the same or a neighbouring cache
2834 // line, but limit the maximum ClusterSize to avoid creating too much
2835 // additional register pressure.
2836 return ClusterSize
<= 4 && std::abs(Offset1
- Offset2
) < CacheLineSize
;
2839 // Set BaseReg (the base register operand), Offset (the byte offset being
2840 // accessed) and the access Width of the passed instruction that reads/writes
2841 // memory. Returns false if the instruction does not read/write memory or the
2842 // BaseReg/Offset/Width can't be determined. Is not guaranteed to always
2843 // recognise base operands and offsets in all cases.
2844 // TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
2845 // function) and set it as appropriate.
2846 bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
2847 const MachineInstr
&LdSt
, const MachineOperand
*&BaseReg
, int64_t &Offset
,
2848 LocationSize
&Width
, const TargetRegisterInfo
*TRI
) const {
2849 if (!LdSt
.mayLoadOrStore())
2852 // Here we assume the standard RISC-V ISA, which uses a base+offset
2853 // addressing mode. You'll need to relax these conditions to support custom
2854 // load/store instructions.
2855 if (LdSt
.getNumExplicitOperands() != 3)
2857 if ((!LdSt
.getOperand(1).isReg() && !LdSt
.getOperand(1).isFI()) ||
2858 !LdSt
.getOperand(2).isImm())
2861 if (!LdSt
.hasOneMemOperand())
2864 Width
= (*LdSt
.memoperands_begin())->getSize();
2865 BaseReg
= &LdSt
.getOperand(1);
2866 Offset
= LdSt
.getOperand(2).getImm();
2870 bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
2871 const MachineInstr
&MIa
, const MachineInstr
&MIb
) const {
2872 assert(MIa
.mayLoadOrStore() && "MIa must be a load or store.");
2873 assert(MIb
.mayLoadOrStore() && "MIb must be a load or store.");
2875 if (MIa
.hasUnmodeledSideEffects() || MIb
.hasUnmodeledSideEffects() ||
2876 MIa
.hasOrderedMemoryRef() || MIb
.hasOrderedMemoryRef())
2879 // Retrieve the base register, offset from the base register and width. Width
2880 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
2881 // base registers are identical, and the offset of a lower memory access +
2882 // the width doesn't overlap the offset of a higher memory access,
2883 // then the memory accesses are different.
2884 const TargetRegisterInfo
*TRI
= STI
.getRegisterInfo();
2885 const MachineOperand
*BaseOpA
= nullptr, *BaseOpB
= nullptr;
2886 int64_t OffsetA
= 0, OffsetB
= 0;
2887 LocationSize WidthA
= 0, WidthB
= 0;
2888 if (getMemOperandWithOffsetWidth(MIa
, BaseOpA
, OffsetA
, WidthA
, TRI
) &&
2889 getMemOperandWithOffsetWidth(MIb
, BaseOpB
, OffsetB
, WidthB
, TRI
)) {
2890 if (BaseOpA
->isIdenticalTo(*BaseOpB
)) {
2891 int LowOffset
= std::min(OffsetA
, OffsetB
);
2892 int HighOffset
= std::max(OffsetA
, OffsetB
);
2893 LocationSize LowWidth
= (LowOffset
== OffsetA
) ? WidthA
: WidthB
;
2894 if (LowWidth
.hasValue() &&
2895 LowOffset
+ (int)LowWidth
.getValue() <= HighOffset
)
2902 std::pair
<unsigned, unsigned>
2903 RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF
) const {
2904 const unsigned Mask
= RISCVII::MO_DIRECT_FLAG_MASK
;
2905 return std::make_pair(TF
& Mask
, TF
& ~Mask
);
2908 ArrayRef
<std::pair
<unsigned, const char *>>
2909 RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
2910 using namespace RISCVII
;
2911 static const std::pair
<unsigned, const char *> TargetFlags
[] = {
2912 {MO_CALL
, "riscv-call"},
2913 {MO_LO
, "riscv-lo"},
2914 {MO_HI
, "riscv-hi"},
2915 {MO_PCREL_LO
, "riscv-pcrel-lo"},
2916 {MO_PCREL_HI
, "riscv-pcrel-hi"},
2917 {MO_GOT_HI
, "riscv-got-hi"},
2918 {MO_TPREL_LO
, "riscv-tprel-lo"},
2919 {MO_TPREL_HI
, "riscv-tprel-hi"},
2920 {MO_TPREL_ADD
, "riscv-tprel-add"},
2921 {MO_TLS_GOT_HI
, "riscv-tls-got-hi"},
2922 {MO_TLS_GD_HI
, "riscv-tls-gd-hi"},
2923 {MO_TLSDESC_HI
, "riscv-tlsdesc-hi"},
2924 {MO_TLSDESC_LOAD_LO
, "riscv-tlsdesc-load-lo"},
2925 {MO_TLSDESC_ADD_LO
, "riscv-tlsdesc-add-lo"},
2926 {MO_TLSDESC_CALL
, "riscv-tlsdesc-call"}};
2927 return ArrayRef(TargetFlags
);
2929 bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
2930 MachineFunction
&MF
, bool OutlineFromLinkOnceODRs
) const {
2931 const Function
&F
= MF
.getFunction();
2933 // Can F be deduplicated by the linker? If it can, don't outline from it.
2934 if (!OutlineFromLinkOnceODRs
&& F
.hasLinkOnceODRLinkage())
2937 // Don't outline from functions with section markings; the program could
2938 // expect that all the code is in the named section.
2942 // It's safe to outline from MF.
2946 bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock
&MBB
,
2947 unsigned &Flags
) const {
2948 // More accurate safety checking is done in getOutliningCandidateInfo.
2949 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB
, Flags
);
2952 // Enum values indicating how an outlined call should be constructed.
2953 enum MachineOutlinerConstructionID
{
2954 MachineOutlinerTailCall
,
2955 MachineOutlinerDefault
2958 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
2959 MachineFunction
&MF
) const {
2960 return MF
.getFunction().hasMinSize();
2963 static bool isCandidatePatchable(const MachineBasicBlock
&MBB
) {
2964 const MachineFunction
*MF
= MBB
.getParent();
2965 const Function
&F
= MF
->getFunction();
2966 return F
.getFnAttribute("fentry-call").getValueAsBool() ||
2967 F
.hasFnAttribute("patchable-function-entry");
2970 static bool isMIReadsReg(const MachineInstr
&MI
, const TargetRegisterInfo
*TRI
,
2972 return MI
.readsRegister(RegNo
, TRI
) ||
2973 MI
.getDesc().hasImplicitUseOfPhysReg(RegNo
);
2976 static bool isMIModifiesReg(const MachineInstr
&MI
,
2977 const TargetRegisterInfo
*TRI
, unsigned RegNo
) {
2978 return MI
.modifiesRegister(RegNo
, TRI
) ||
2979 MI
.getDesc().hasImplicitDefOfPhysReg(RegNo
);
2982 static bool cannotInsertTailCall(const MachineBasicBlock
&MBB
) {
2983 if (!MBB
.back().isReturn())
2985 if (isCandidatePatchable(MBB
))
2988 // If the candidate reads the pre-set register
2989 // that can be used for expanding PseudoTAIL instruction,
2990 // then we cannot insert tail call.
2991 const TargetSubtargetInfo
&STI
= MBB
.getParent()->getSubtarget();
2992 unsigned TailExpandUseRegNo
=
2993 RISCVII::getTailExpandUseRegNo(STI
.getFeatureBits());
2994 for (const MachineInstr
&MI
: MBB
) {
2995 if (isMIReadsReg(MI
, STI
.getRegisterInfo(), TailExpandUseRegNo
))
2997 if (isMIModifiesReg(MI
, STI
.getRegisterInfo(), TailExpandUseRegNo
))
3003 static std::optional
<MachineOutlinerConstructionID
>
3004 analyzeCandidate(outliner::Candidate
&C
) {
3005 // If last instruction is return then we can rely on
3006 // the verification already performed in the getOutliningTypeImpl.
3007 if (C
.back().isReturn()) {
3008 assert(!cannotInsertTailCall(*C
.getMBB()) &&
3009 "The candidate who uses return instruction must be outlined "
3011 return MachineOutlinerTailCall
;
3014 auto CandidateUsesX5
= [](outliner::Candidate
&C
) {
3015 const TargetRegisterInfo
*TRI
= C
.getMF()->getSubtarget().getRegisterInfo();
3016 if (std::any_of(C
.begin(), C
.end(), [TRI
](const MachineInstr
&MI
) {
3017 return isMIModifiesReg(MI
, TRI
, RISCV::X5
);
3020 return !C
.isAvailableAcrossAndOutOfSeq(RISCV::X5
, *TRI
);
3023 if (!CandidateUsesX5(C
))
3024 return MachineOutlinerDefault
;
3026 return std::nullopt
;
3029 std::optional
<std::unique_ptr
<outliner::OutlinedFunction
>>
3030 RISCVInstrInfo::getOutliningCandidateInfo(
3031 const MachineModuleInfo
&MMI
,
3032 std::vector
<outliner::Candidate
> &RepeatedSequenceLocs
,
3033 unsigned MinRepeats
) const {
3035 // Each RepeatedSequenceLoc is identical.
3036 outliner::Candidate
&Candidate
= RepeatedSequenceLocs
[0];
3037 auto CandidateInfo
= analyzeCandidate(Candidate
);
3039 RepeatedSequenceLocs
.clear();
3041 // If the sequence doesn't have enough candidates left, then we're done.
3042 if (RepeatedSequenceLocs
.size() < MinRepeats
)
3043 return std::nullopt
;
3045 unsigned InstrSizeCExt
=
3046 Candidate
.getMF()->getSubtarget
<RISCVSubtarget
>().hasStdExtCOrZca() ? 2
3048 unsigned CallOverhead
= 0, FrameOverhead
= 0;
3050 MachineOutlinerConstructionID MOCI
= CandidateInfo
.value();
3052 case MachineOutlinerDefault
:
3053 // call t0, function = 8 bytes.
3055 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
3056 FrameOverhead
= InstrSizeCExt
;
3058 case MachineOutlinerTailCall
:
3059 // tail call = auipc + jalr in the worst case without linker relaxation.
3060 CallOverhead
= 4 + InstrSizeCExt
;
3061 // Using tail call we move ret instruction from caller to callee.
3066 for (auto &C
: RepeatedSequenceLocs
)
3067 C
.setCallInfo(MOCI
, CallOverhead
);
3069 unsigned SequenceSize
= 0;
3070 for (auto &MI
: Candidate
)
3071 SequenceSize
+= getInstSizeInBytes(MI
);
3073 return std::make_unique
<outliner::OutlinedFunction
>(
3074 RepeatedSequenceLocs
, SequenceSize
, FrameOverhead
, MOCI
);
3078 RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo
&MMI
,
3079 MachineBasicBlock::iterator
&MBBI
,
3080 unsigned Flags
) const {
3081 MachineInstr
&MI
= *MBBI
;
3082 MachineBasicBlock
*MBB
= MI
.getParent();
3083 const TargetRegisterInfo
*TRI
=
3084 MBB
->getParent()->getSubtarget().getRegisterInfo();
3085 const auto &F
= MI
.getMF()->getFunction();
3087 // We can manually strip out CFI instructions later.
3088 if (MI
.isCFIInstruction())
3089 // If current function has exception handling code, we can't outline &
3090 // strip these CFI instructions since it may break .eh_frame section
3091 // needed in unwinding.
3092 return F
.needsUnwindTableEntry() ? outliner::InstrType::Illegal
3093 : outliner::InstrType::Invisible
;
3095 if (cannotInsertTailCall(*MBB
) &&
3096 (MI
.isReturn() || isMIModifiesReg(MI
, TRI
, RISCV::X5
)))
3097 return outliner::InstrType::Illegal
;
3099 // Make sure the operands don't reference something unsafe.
3100 for (const auto &MO
: MI
.operands()) {
3102 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out
3104 if (MO
.getTargetFlags() == RISCVII::MO_PCREL_LO
&&
3105 (MI
.getMF()->getTarget().getFunctionSections() || F
.hasComdat() ||
3106 F
.hasSection() || F
.getSectionPrefix()))
3107 return outliner::InstrType::Illegal
;
3110 return outliner::InstrType::Legal
;
3113 void RISCVInstrInfo::buildOutlinedFrame(
3114 MachineBasicBlock
&MBB
, MachineFunction
&MF
,
3115 const outliner::OutlinedFunction
&OF
) const {
3117 // Strip out any CFI instructions
3118 bool Changed
= true;
3121 auto I
= MBB
.begin();
3123 for (; I
!= E
; ++I
) {
3124 if (I
->isCFIInstruction()) {
3125 I
->removeFromParent();
3132 if (OF
.FrameConstructionID
== MachineOutlinerTailCall
)
3135 MBB
.addLiveIn(RISCV::X5
);
3137 // Add in a return instruction to the end of the outlined frame.
3138 MBB
.insert(MBB
.end(), BuildMI(MF
, DebugLoc(), get(RISCV::JALR
))
3139 .addReg(RISCV::X0
, RegState::Define
)
3144 MachineBasicBlock::iterator
RISCVInstrInfo::insertOutlinedCall(
3145 Module
&M
, MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&It
,
3146 MachineFunction
&MF
, outliner::Candidate
&C
) const {
3148 if (C
.CallConstructionID
== MachineOutlinerTailCall
) {
3149 It
= MBB
.insert(It
, BuildMI(MF
, DebugLoc(), get(RISCV::PseudoTAIL
))
3150 .addGlobalAddress(M
.getNamedValue(MF
.getName()),
3151 /*Offset=*/0, RISCVII::MO_CALL
));
3155 // Add in a call instruction to the outlined function at the given location.
3157 BuildMI(MF
, DebugLoc(), get(RISCV::PseudoCALLReg
), RISCV::X5
)
3158 .addGlobalAddress(M
.getNamedValue(MF
.getName()), 0,
3163 std::optional
<RegImmPair
> RISCVInstrInfo::isAddImmediate(const MachineInstr
&MI
,
3164 Register Reg
) const {
3165 // TODO: Handle cases where Reg is a super- or sub-register of the
3166 // destination register.
3167 const MachineOperand
&Op0
= MI
.getOperand(0);
3168 if (!Op0
.isReg() || Reg
!= Op0
.getReg())
3169 return std::nullopt
;
3171 // Don't consider ADDIW as a candidate because the caller may not be aware
3172 // of its sign extension behaviour.
3173 if (MI
.getOpcode() == RISCV::ADDI
&& MI
.getOperand(1).isReg() &&
3174 MI
.getOperand(2).isImm())
3175 return RegImmPair
{MI
.getOperand(1).getReg(), MI
.getOperand(2).getImm()};
3177 return std::nullopt
;
3180 // MIR printer helper function to annotate Operands with a comment.
3181 std::string
RISCVInstrInfo::createMIROperandComment(
3182 const MachineInstr
&MI
, const MachineOperand
&Op
, unsigned OpIdx
,
3183 const TargetRegisterInfo
*TRI
) const {
3184 // Print a generic comment for this operand if there is one.
3185 std::string GenericComment
=
3186 TargetInstrInfo::createMIROperandComment(MI
, Op
, OpIdx
, TRI
);
3187 if (!GenericComment
.empty())
3188 return GenericComment
;
3190 // If not, we must have an immediate operand.
3192 return std::string();
3194 const MCInstrDesc
&Desc
= MI
.getDesc();
3195 if (OpIdx
>= Desc
.getNumOperands())
3196 return std::string();
3198 std::string Comment
;
3199 raw_string_ostream
OS(Comment
);
3201 const MCOperandInfo
&OpInfo
= Desc
.operands()[OpIdx
];
3203 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
3204 // operand of vector codegen pseudos.
3205 switch (OpInfo
.OperandType
) {
3206 case RISCVOp::OPERAND_VTYPEI10
:
3207 case RISCVOp::OPERAND_VTYPEI11
: {
3208 unsigned Imm
= Op
.getImm();
3209 RISCVVType::printVType(Imm
, OS
);
3212 case RISCVOp::OPERAND_SEW
:
3213 case RISCVOp::OPERAND_SEW_MASK
: {
3214 unsigned Log2SEW
= Op
.getImm();
3215 unsigned SEW
= Log2SEW
? 1 << Log2SEW
: 8;
3216 assert(RISCVVType::isValidSEW(SEW
) && "Unexpected SEW");
3220 case RISCVOp::OPERAND_VEC_POLICY
:
3221 unsigned Policy
= Op
.getImm();
3222 assert(Policy
<= (RISCVII::TAIL_AGNOSTIC
| RISCVII::MASK_AGNOSTIC
) &&
3223 "Invalid Policy Value");
3224 OS
<< (Policy
& RISCVII::TAIL_AGNOSTIC
? "ta" : "tu") << ", "
3225 << (Policy
& RISCVII::MASK_AGNOSTIC
? "ma" : "mu");
3233 #define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
3234 RISCV::Pseudo##OP##_##LMUL
3236 #define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
3237 RISCV::Pseudo##OP##_##LMUL##_MASK
3239 #define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
3240 CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
3241 case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
3243 #define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
3244 CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
3245 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
3246 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
3247 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
3248 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
3249 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
3251 #define CASE_RVV_OPCODE_UNMASK(OP) \
3252 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
3253 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
3255 #define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
3256 CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
3257 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
3258 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
3259 case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
3260 case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
3261 case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
3263 #define CASE_RVV_OPCODE_MASK(OP) \
3264 CASE_RVV_OPCODE_MASK_WIDEN(OP): \
3265 case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
3267 #define CASE_RVV_OPCODE_WIDEN(OP) \
3268 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
3269 case CASE_RVV_OPCODE_MASK_WIDEN(OP)
3271 #define CASE_RVV_OPCODE(OP) \
3272 CASE_RVV_OPCODE_UNMASK(OP): \
3273 case CASE_RVV_OPCODE_MASK(OP)
3277 #define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
3278 RISCV::PseudoV##OP##_##TYPE##_##LMUL
3280 #define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \
3281 CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
3282 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
3283 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
3284 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)
3286 #define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \
3287 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
3288 case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE)
3290 #define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \
3291 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
3292 case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE)
3294 #define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
3295 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
3296 case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE)
3298 // VFMA instructions are SEW specific.
3299 #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
3300 RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW
3302 #define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
3303 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
3304 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
3305 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
3306 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)
3308 #define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
3309 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
3310 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)
3312 #define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
3313 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
3314 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)
3316 #define CASE_VFMA_OPCODE_VV(OP) \
3317 CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
3318 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
3319 case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
3321 #define CASE_VFMA_SPLATS(OP) \
3322 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
3323 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
3324 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
3327 bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr
&MI
,
3328 unsigned &SrcOpIdx1
,
3329 unsigned &SrcOpIdx2
) const {
3330 const MCInstrDesc
&Desc
= MI
.getDesc();
3331 if (!Desc
.isCommutable())
3334 switch (MI
.getOpcode()) {
3335 case RISCV::TH_MVEQZ
:
3336 case RISCV::TH_MVNEZ
:
3337 // We can't commute operands if operand 2 (i.e., rs1 in
3338 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
3339 // not valid as the in/out-operand 1).
3340 if (MI
.getOperand(2).getReg() == RISCV::X0
)
3342 // Operands 1 and 2 are commutable, if we switch the opcode.
3343 return fixCommutedOpIndices(SrcOpIdx1
, SrcOpIdx2
, 1, 2);
3344 case RISCV::TH_MULA
:
3345 case RISCV::TH_MULAW
:
3346 case RISCV::TH_MULAH
:
3347 case RISCV::TH_MULS
:
3348 case RISCV::TH_MULSW
:
3349 case RISCV::TH_MULSH
:
3350 // Operands 2 and 3 are commutable.
3351 return fixCommutedOpIndices(SrcOpIdx1
, SrcOpIdx2
, 2, 3);
3352 case RISCV::PseudoCCMOVGPRNoX0
:
3353 case RISCV::PseudoCCMOVGPR
:
3354 // Operands 4 and 5 are commutable.
3355 return fixCommutedOpIndices(SrcOpIdx1
, SrcOpIdx2
, 4, 5);
3356 case CASE_RVV_OPCODE(VADD_VV
):
3357 case CASE_RVV_OPCODE(VAND_VV
):
3358 case CASE_RVV_OPCODE(VOR_VV
):
3359 case CASE_RVV_OPCODE(VXOR_VV
):
3360 case CASE_RVV_OPCODE_MASK(VMSEQ_VV
):
3361 case CASE_RVV_OPCODE_MASK(VMSNE_VV
):
3362 case CASE_RVV_OPCODE(VMIN_VV
):
3363 case CASE_RVV_OPCODE(VMINU_VV
):
3364 case CASE_RVV_OPCODE(VMAX_VV
):
3365 case CASE_RVV_OPCODE(VMAXU_VV
):
3366 case CASE_RVV_OPCODE(VMUL_VV
):
3367 case CASE_RVV_OPCODE(VMULH_VV
):
3368 case CASE_RVV_OPCODE(VMULHU_VV
):
3369 case CASE_RVV_OPCODE_WIDEN(VWADD_VV
):
3370 case CASE_RVV_OPCODE_WIDEN(VWADDU_VV
):
3371 case CASE_RVV_OPCODE_WIDEN(VWMUL_VV
):
3372 case CASE_RVV_OPCODE_WIDEN(VWMULU_VV
):
3373 case CASE_RVV_OPCODE_WIDEN(VWMACC_VV
):
3374 case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV
):
3375 case CASE_RVV_OPCODE_UNMASK(VADC_VVM
):
3376 case CASE_RVV_OPCODE(VSADD_VV
):
3377 case CASE_RVV_OPCODE(VSADDU_VV
):
3378 case CASE_RVV_OPCODE(VAADD_VV
):
3379 case CASE_RVV_OPCODE(VAADDU_VV
):
3380 case CASE_RVV_OPCODE(VSMUL_VV
):
3381 // Operands 2 and 3 are commutable.
3382 return fixCommutedOpIndices(SrcOpIdx1
, SrcOpIdx2
, 2, 3);
3383 case CASE_VFMA_SPLATS(FMADD
):
3384 case CASE_VFMA_SPLATS(FMSUB
):
3385 case CASE_VFMA_SPLATS(FMACC
):
3386 case CASE_VFMA_SPLATS(FMSAC
):
3387 case CASE_VFMA_SPLATS(FNMADD
):
3388 case CASE_VFMA_SPLATS(FNMSUB
):
3389 case CASE_VFMA_SPLATS(FNMACC
):
3390 case CASE_VFMA_SPLATS(FNMSAC
):
3391 case CASE_VFMA_OPCODE_VV(FMACC
):
3392 case CASE_VFMA_OPCODE_VV(FMSAC
):
3393 case CASE_VFMA_OPCODE_VV(FNMACC
):
3394 case CASE_VFMA_OPCODE_VV(FNMSAC
):
3395 case CASE_VMA_OPCODE_LMULS(MADD
, VX
):
3396 case CASE_VMA_OPCODE_LMULS(NMSUB
, VX
):
3397 case CASE_VMA_OPCODE_LMULS(MACC
, VX
):
3398 case CASE_VMA_OPCODE_LMULS(NMSAC
, VX
):
3399 case CASE_VMA_OPCODE_LMULS(MACC
, VV
):
3400 case CASE_VMA_OPCODE_LMULS(NMSAC
, VV
): {
3401 // If the tail policy is undisturbed we can't commute.
3402 assert(RISCVII::hasVecPolicyOp(MI
.getDesc().TSFlags
));
3403 if ((MI
.getOperand(MI
.getNumExplicitOperands() - 1).getImm() & 1) == 0)
3406 // For these instructions we can only swap operand 1 and operand 3 by
3407 // changing the opcode.
3408 unsigned CommutableOpIdx1
= 1;
3409 unsigned CommutableOpIdx2
= 3;
3410 if (!fixCommutedOpIndices(SrcOpIdx1
, SrcOpIdx2
, CommutableOpIdx1
,
3415 case CASE_VFMA_OPCODE_VV(FMADD
):
3416 case CASE_VFMA_OPCODE_VV(FMSUB
):
3417 case CASE_VFMA_OPCODE_VV(FNMADD
):
3418 case CASE_VFMA_OPCODE_VV(FNMSUB
):
3419 case CASE_VMA_OPCODE_LMULS(MADD
, VV
):
3420 case CASE_VMA_OPCODE_LMULS(NMSUB
, VV
): {
3421 // If the tail policy is undisturbed we can't commute.
3422 assert(RISCVII::hasVecPolicyOp(MI
.getDesc().TSFlags
));
3423 if ((MI
.getOperand(MI
.getNumExplicitOperands() - 1).getImm() & 1) == 0)
3426 // For these instructions we have more freedom. We can commute with the
3427 // other multiplicand or with the addend/subtrahend/minuend.
3429 // Any fixed operand must be from source 1, 2 or 3.
3430 if (SrcOpIdx1
!= CommuteAnyOperandIndex
&& SrcOpIdx1
> 3)
3432 if (SrcOpIdx2
!= CommuteAnyOperandIndex
&& SrcOpIdx2
> 3)
3435 // It both ops are fixed one must be the tied source.
3436 if (SrcOpIdx1
!= CommuteAnyOperandIndex
&&
3437 SrcOpIdx2
!= CommuteAnyOperandIndex
&& SrcOpIdx1
!= 1 && SrcOpIdx2
!= 1)
3440 // Look for two different register operands assumed to be commutable
3441 // regardless of the FMA opcode. The FMA opcode is adjusted later if
3443 if (SrcOpIdx1
== CommuteAnyOperandIndex
||
3444 SrcOpIdx2
== CommuteAnyOperandIndex
) {
3445 // At least one of operands to be commuted is not specified and
3446 // this method is free to choose appropriate commutable operands.
3447 unsigned CommutableOpIdx1
= SrcOpIdx1
;
3448 if (SrcOpIdx1
== SrcOpIdx2
) {
3449 // Both of operands are not fixed. Set one of commutable
3450 // operands to the tied source.
3451 CommutableOpIdx1
= 1;
3452 } else if (SrcOpIdx1
== CommuteAnyOperandIndex
) {
3453 // Only one of the operands is not fixed.
3454 CommutableOpIdx1
= SrcOpIdx2
;
3457 // CommutableOpIdx1 is well defined now. Let's choose another commutable
3458 // operand and assign its index to CommutableOpIdx2.
3459 unsigned CommutableOpIdx2
;
3460 if (CommutableOpIdx1
!= 1) {
3461 // If we haven't already used the tied source, we must use it now.
3462 CommutableOpIdx2
= 1;
3464 Register Op1Reg
= MI
.getOperand(CommutableOpIdx1
).getReg();
3466 // The commuted operands should have different registers.
3467 // Otherwise, the commute transformation does not change anything and
3468 // is useless. We use this as a hint to make our decision.
3469 if (Op1Reg
!= MI
.getOperand(2).getReg())
3470 CommutableOpIdx2
= 2;
3472 CommutableOpIdx2
= 3;
3475 // Assign the found pair of commutable indices to SrcOpIdx1 and
3476 // SrcOpIdx2 to return those values.
3477 if (!fixCommutedOpIndices(SrcOpIdx1
, SrcOpIdx2
, CommutableOpIdx1
,
3486 return TargetInstrInfo::findCommutedOpIndices(MI
, SrcOpIdx1
, SrcOpIdx2
);
3490 #define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
3491 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
3492 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
3495 #define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \
3496 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
3497 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
3498 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
3499 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
3501 #define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \
3502 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
3503 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
3505 #define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \
3506 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
3507 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
3509 #define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
3510 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
3511 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
3513 #define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
3514 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
3515 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
3516 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
3518 // VFMA depends on SEW.
3519 #define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
3520 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
3521 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
3524 #define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
3525 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
3526 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
3527 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
3528 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)
3530 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
3531 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
3532 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
3534 #define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
3535 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
3536 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
3537 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
3539 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
3540 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
3541 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)
3543 #define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \
3544 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \
3545 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW)
3547 #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
3548 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
3549 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
3550 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
3552 MachineInstr
*RISCVInstrInfo::commuteInstructionImpl(MachineInstr
&MI
,
3555 unsigned OpIdx2
) const {
3556 auto cloneIfNew
= [NewMI
](MachineInstr
&MI
) -> MachineInstr
& {
3558 return *MI
.getParent()->getParent()->CloneMachineInstr(&MI
);
3562 switch (MI
.getOpcode()) {
3563 case RISCV::TH_MVEQZ
:
3564 case RISCV::TH_MVNEZ
: {
3565 auto &WorkingMI
= cloneIfNew(MI
);
3566 WorkingMI
.setDesc(get(MI
.getOpcode() == RISCV::TH_MVEQZ
? RISCV::TH_MVNEZ
3567 : RISCV::TH_MVEQZ
));
3568 return TargetInstrInfo::commuteInstructionImpl(WorkingMI
, false, OpIdx1
,
3571 case RISCV::PseudoCCMOVGPRNoX0
:
3572 case RISCV::PseudoCCMOVGPR
: {
3573 // CCMOV can be commuted by inverting the condition.
3574 auto CC
= static_cast<RISCVCC::CondCode
>(MI
.getOperand(3).getImm());
3575 CC
= RISCVCC::getOppositeBranchCondition(CC
);
3576 auto &WorkingMI
= cloneIfNew(MI
);
3577 WorkingMI
.getOperand(3).setImm(CC
);
3578 return TargetInstrInfo::commuteInstructionImpl(WorkingMI
, /*NewMI*/ false,
3581 case CASE_VFMA_SPLATS(FMACC
):
3582 case CASE_VFMA_SPLATS(FMADD
):
3583 case CASE_VFMA_SPLATS(FMSAC
):
3584 case CASE_VFMA_SPLATS(FMSUB
):
3585 case CASE_VFMA_SPLATS(FNMACC
):
3586 case CASE_VFMA_SPLATS(FNMADD
):
3587 case CASE_VFMA_SPLATS(FNMSAC
):
3588 case CASE_VFMA_SPLATS(FNMSUB
):
3589 case CASE_VFMA_OPCODE_VV(FMACC
):
3590 case CASE_VFMA_OPCODE_VV(FMSAC
):
3591 case CASE_VFMA_OPCODE_VV(FNMACC
):
3592 case CASE_VFMA_OPCODE_VV(FNMSAC
):
3593 case CASE_VMA_OPCODE_LMULS(MADD
, VX
):
3594 case CASE_VMA_OPCODE_LMULS(NMSUB
, VX
):
3595 case CASE_VMA_OPCODE_LMULS(MACC
, VX
):
3596 case CASE_VMA_OPCODE_LMULS(NMSAC
, VX
):
3597 case CASE_VMA_OPCODE_LMULS(MACC
, VV
):
3598 case CASE_VMA_OPCODE_LMULS(NMSAC
, VV
): {
3599 // It only make sense to toggle these between clobbering the
3600 // addend/subtrahend/minuend one of the multiplicands.
3601 assert((OpIdx1
== 1 || OpIdx2
== 1) && "Unexpected opcode index");
3602 assert((OpIdx1
== 3 || OpIdx2
== 3) && "Unexpected opcode index");
3604 switch (MI
.getOpcode()) {
3606 llvm_unreachable("Unexpected opcode");
3607 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC
, FMADD
)
3608 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD
, FMACC
)
3609 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC
, FMSUB
)
3610 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB
, FMSAC
)
3611 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC
, FNMADD
)
3612 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD
, FNMACC
)
3613 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC
, FNMSUB
)
3614 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB
, FNMSAC
)
3615 CASE_VFMA_CHANGE_OPCODE_VV(FMACC
, FMADD
)
3616 CASE_VFMA_CHANGE_OPCODE_VV(FMSAC
, FMSUB
)
3617 CASE_VFMA_CHANGE_OPCODE_VV(FNMACC
, FNMADD
)
3618 CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC
, FNMSUB
)
3619 CASE_VMA_CHANGE_OPCODE_LMULS(MACC
, MADD
, VX
)
3620 CASE_VMA_CHANGE_OPCODE_LMULS(MADD
, MACC
, VX
)
3621 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC
, NMSUB
, VX
)
3622 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB
, NMSAC
, VX
)
3623 CASE_VMA_CHANGE_OPCODE_LMULS(MACC
, MADD
, VV
)
3624 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC
, NMSUB
, VV
)
3627 auto &WorkingMI
= cloneIfNew(MI
);
3628 WorkingMI
.setDesc(get(Opc
));
3629 return TargetInstrInfo::commuteInstructionImpl(WorkingMI
, /*NewMI=*/false,
3632 case CASE_VFMA_OPCODE_VV(FMADD
):
3633 case CASE_VFMA_OPCODE_VV(FMSUB
):
3634 case CASE_VFMA_OPCODE_VV(FNMADD
):
3635 case CASE_VFMA_OPCODE_VV(FNMSUB
):
3636 case CASE_VMA_OPCODE_LMULS(MADD
, VV
):
3637 case CASE_VMA_OPCODE_LMULS(NMSUB
, VV
): {
3638 assert((OpIdx1
== 1 || OpIdx2
== 1) && "Unexpected opcode index");
3639 // If one of the operands, is the addend we need to change opcode.
3640 // Otherwise we're just swapping 2 of the multiplicands.
3641 if (OpIdx1
== 3 || OpIdx2
== 3) {
3643 switch (MI
.getOpcode()) {
3645 llvm_unreachable("Unexpected opcode");
3646 CASE_VFMA_CHANGE_OPCODE_VV(FMADD
, FMACC
)
3647 CASE_VFMA_CHANGE_OPCODE_VV(FMSUB
, FMSAC
)
3648 CASE_VFMA_CHANGE_OPCODE_VV(FNMADD
, FNMACC
)
3649 CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB
, FNMSAC
)
3650 CASE_VMA_CHANGE_OPCODE_LMULS(MADD
, MACC
, VV
)
3651 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB
, NMSAC
, VV
)
3654 auto &WorkingMI
= cloneIfNew(MI
);
3655 WorkingMI
.setDesc(get(Opc
));
3656 return TargetInstrInfo::commuteInstructionImpl(WorkingMI
, /*NewMI=*/false,
3659 // Let the default code handle it.
3664 return TargetInstrInfo::commuteInstructionImpl(MI
, NewMI
, OpIdx1
, OpIdx2
);
3667 #undef CASE_RVV_OPCODE_UNMASK_LMUL
3668 #undef CASE_RVV_OPCODE_MASK_LMUL
3669 #undef CASE_RVV_OPCODE_LMUL
3670 #undef CASE_RVV_OPCODE_UNMASK_WIDEN
3671 #undef CASE_RVV_OPCODE_UNMASK
3672 #undef CASE_RVV_OPCODE_MASK_WIDEN
3673 #undef CASE_RVV_OPCODE_MASK
3674 #undef CASE_RVV_OPCODE_WIDEN
3675 #undef CASE_RVV_OPCODE
3677 #undef CASE_VMA_OPCODE_COMMON
3678 #undef CASE_VMA_OPCODE_LMULS_M1
3679 #undef CASE_VMA_OPCODE_LMULS_MF2
3680 #undef CASE_VMA_OPCODE_LMULS_MF4
3681 #undef CASE_VMA_OPCODE_LMULS
3682 #undef CASE_VFMA_OPCODE_COMMON
3683 #undef CASE_VFMA_OPCODE_LMULS_M1
3684 #undef CASE_VFMA_OPCODE_LMULS_MF2
3685 #undef CASE_VFMA_OPCODE_LMULS_MF4
3686 #undef CASE_VFMA_OPCODE_VV
3687 #undef CASE_VFMA_SPLATS
3690 #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
3691 RISCV::PseudoV##OP##_##LMUL##_TIED
3693 #define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \
3694 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
3695 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
3696 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
3697 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
3698 case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
3700 #define CASE_WIDEOP_OPCODE_LMULS(OP) \
3701 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
3702 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)
3704 #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
3705 case RISCV::PseudoV##OP##_##LMUL##_TIED: \
3706 NewOpc = RISCV::PseudoV##OP##_##LMUL; \
3709 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3710 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
3711 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
3712 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
3713 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
3714 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
3716 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3717 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
3718 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3720 // FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
3721 #define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
3722 RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED
3724 #define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \
3725 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
3726 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
3727 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
3728 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
3729 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
3730 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
3731 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
3732 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
3733 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \
3735 #define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
3736 case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
3737 NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
3740 #define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3741 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
3742 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
3743 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
3744 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
3745 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
3746 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
3747 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
3748 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
3749 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
3751 #define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3752 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3755 MachineInstr
*RISCVInstrInfo::convertToThreeAddress(MachineInstr
&MI
,
3757 LiveIntervals
*LIS
) const {
3758 MachineInstrBuilder MIB
;
3759 switch (MI
.getOpcode()) {
3762 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV
):
3763 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV
): {
3764 assert(RISCVII::hasVecPolicyOp(MI
.getDesc().TSFlags
) &&
3765 MI
.getNumExplicitOperands() == 7 &&
3766 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
3767 // If the tail policy is undisturbed we can't convert.
3768 if ((MI
.getOperand(RISCVII::getVecPolicyOpNum(MI
.getDesc())).getImm() &
3773 switch (MI
.getOpcode()) {
3775 llvm_unreachable("Unexpected opcode");
3776 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV
)
3777 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV
)
3781 MachineBasicBlock
&MBB
= *MI
.getParent();
3782 MIB
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), get(NewOpc
))
3783 .add(MI
.getOperand(0))
3784 .addReg(MI
.getOperand(0).getReg(), RegState::Undef
)
3785 .add(MI
.getOperand(1))
3786 .add(MI
.getOperand(2))
3787 .add(MI
.getOperand(3))
3788 .add(MI
.getOperand(4))
3789 .add(MI
.getOperand(5))
3790 .add(MI
.getOperand(6));
3793 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV
):
3794 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV
):
3795 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV
):
3796 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV
): {
3797 // If the tail policy is undisturbed we can't convert.
3798 assert(RISCVII::hasVecPolicyOp(MI
.getDesc().TSFlags
) &&
3799 MI
.getNumExplicitOperands() == 6);
3800 if ((MI
.getOperand(5).getImm() & 1) == 0)
3805 switch (MI
.getOpcode()) {
3807 llvm_unreachable("Unexpected opcode");
3808 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV
)
3809 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV
)
3810 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV
)
3811 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV
)
3815 MachineBasicBlock
&MBB
= *MI
.getParent();
3816 MIB
= BuildMI(MBB
, MI
, MI
.getDebugLoc(), get(NewOpc
))
3817 .add(MI
.getOperand(0))
3818 .addReg(MI
.getOperand(0).getReg(), RegState::Undef
)
3819 .add(MI
.getOperand(1))
3820 .add(MI
.getOperand(2))
3821 .add(MI
.getOperand(3))
3822 .add(MI
.getOperand(4))
3823 .add(MI
.getOperand(5));
3827 MIB
.copyImplicitOps(MI
);
3830 unsigned NumOps
= MI
.getNumOperands();
3831 for (unsigned I
= 1; I
< NumOps
; ++I
) {
3832 MachineOperand
&Op
= MI
.getOperand(I
);
3833 if (Op
.isReg() && Op
.isKill())
3834 LV
->replaceKillInstruction(Op
.getReg(), MI
, *MIB
);
3839 SlotIndex Idx
= LIS
->ReplaceMachineInstrInMaps(MI
, *MIB
);
3841 if (MI
.getOperand(0).isEarlyClobber()) {
3842 // Use operand 1 was tied to early-clobber def operand 0, so its live
3843 // interval could have ended at an early-clobber slot. Now they are not
3844 // tied we need to update it to the normal register slot.
3845 LiveInterval
&LI
= LIS
->getInterval(MI
.getOperand(1).getReg());
3846 LiveRange::Segment
*S
= LI
.getSegmentContaining(Idx
);
3847 if (S
->end
== Idx
.getRegSlot(true))
3848 S
->end
= Idx
.getRegSlot();
3855 #undef CASE_WIDEOP_OPCODE_COMMON
3856 #undef CASE_WIDEOP_OPCODE_LMULS_MF4
3857 #undef CASE_WIDEOP_OPCODE_LMULS
3858 #undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
3859 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3860 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
3861 #undef CASE_FP_WIDEOP_OPCODE_COMMON
3862 #undef CASE_FP_WIDEOP_OPCODE_LMULS_MF4
3863 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
3864 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3865 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS
3867 void RISCVInstrInfo::mulImm(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
3868 MachineBasicBlock::iterator II
, const DebugLoc
&DL
,
3869 Register DestReg
, uint32_t Amount
,
3870 MachineInstr::MIFlag Flag
) const {
3871 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
3872 if (llvm::has_single_bit
<uint32_t>(Amount
)) {
3873 uint32_t ShiftAmount
= Log2_32(Amount
);
3874 if (ShiftAmount
== 0)
3876 BuildMI(MBB
, II
, DL
, get(RISCV::SLLI
), DestReg
)
3877 .addReg(DestReg
, RegState::Kill
)
3878 .addImm(ShiftAmount
)
3880 } else if (STI
.hasStdExtZba() &&
3881 ((Amount
% 3 == 0 && isPowerOf2_64(Amount
/ 3)) ||
3882 (Amount
% 5 == 0 && isPowerOf2_64(Amount
/ 5)) ||
3883 (Amount
% 9 == 0 && isPowerOf2_64(Amount
/ 9)))) {
3884 // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
3886 uint32_t ShiftAmount
;
3887 if (Amount
% 9 == 0) {
3888 Opc
= RISCV::SH3ADD
;
3889 ShiftAmount
= Log2_64(Amount
/ 9);
3890 } else if (Amount
% 5 == 0) {
3891 Opc
= RISCV::SH2ADD
;
3892 ShiftAmount
= Log2_64(Amount
/ 5);
3893 } else if (Amount
% 3 == 0) {
3894 Opc
= RISCV::SH1ADD
;
3895 ShiftAmount
= Log2_64(Amount
/ 3);
3897 llvm_unreachable("implied by if-clause");
3900 BuildMI(MBB
, II
, DL
, get(RISCV::SLLI
), DestReg
)
3901 .addReg(DestReg
, RegState::Kill
)
3902 .addImm(ShiftAmount
)
3904 BuildMI(MBB
, II
, DL
, get(Opc
), DestReg
)
3905 .addReg(DestReg
, RegState::Kill
)
3908 } else if (llvm::has_single_bit
<uint32_t>(Amount
- 1)) {
3909 Register ScaledRegister
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
3910 uint32_t ShiftAmount
= Log2_32(Amount
- 1);
3911 BuildMI(MBB
, II
, DL
, get(RISCV::SLLI
), ScaledRegister
)
3913 .addImm(ShiftAmount
)
3915 BuildMI(MBB
, II
, DL
, get(RISCV::ADD
), DestReg
)
3916 .addReg(ScaledRegister
, RegState::Kill
)
3917 .addReg(DestReg
, RegState::Kill
)
3919 } else if (llvm::has_single_bit
<uint32_t>(Amount
+ 1)) {
3920 Register ScaledRegister
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
3921 uint32_t ShiftAmount
= Log2_32(Amount
+ 1);
3922 BuildMI(MBB
, II
, DL
, get(RISCV::SLLI
), ScaledRegister
)
3924 .addImm(ShiftAmount
)
3926 BuildMI(MBB
, II
, DL
, get(RISCV::SUB
), DestReg
)
3927 .addReg(ScaledRegister
, RegState::Kill
)
3928 .addReg(DestReg
, RegState::Kill
)
3930 } else if (STI
.hasStdExtZmmul()) {
3931 Register N
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
3932 movImm(MBB
, II
, DL
, N
, Amount
, Flag
);
3933 BuildMI(MBB
, II
, DL
, get(RISCV::MUL
), DestReg
)
3934 .addReg(DestReg
, RegState::Kill
)
3935 .addReg(N
, RegState::Kill
)
3939 uint32_t PrevShiftAmount
= 0;
3940 for (uint32_t ShiftAmount
= 0; Amount
>> ShiftAmount
; ShiftAmount
++) {
3941 if (Amount
& (1U << ShiftAmount
)) {
3943 BuildMI(MBB
, II
, DL
, get(RISCV::SLLI
), DestReg
)
3944 .addReg(DestReg
, RegState::Kill
)
3945 .addImm(ShiftAmount
- PrevShiftAmount
)
3947 if (Amount
>> (ShiftAmount
+ 1)) {
3948 // If we don't have an accmulator yet, create it and copy DestReg.
3950 Acc
= MRI
.createVirtualRegister(&RISCV::GPRRegClass
);
3951 BuildMI(MBB
, II
, DL
, get(TargetOpcode::COPY
), Acc
)
3955 BuildMI(MBB
, II
, DL
, get(RISCV::ADD
), Acc
)
3956 .addReg(Acc
, RegState::Kill
)
3961 PrevShiftAmount
= ShiftAmount
;
3964 assert(Acc
&& "Expected valid accumulator");
3965 BuildMI(MBB
, II
, DL
, get(RISCV::ADD
), DestReg
)
3966 .addReg(DestReg
, RegState::Kill
)
3967 .addReg(Acc
, RegState::Kill
)
3972 ArrayRef
<std::pair
<MachineMemOperand::Flags
, const char *>>
3973 RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
3974 static const std::pair
<MachineMemOperand::Flags
, const char *> TargetFlags
[] =
3975 {{MONontemporalBit0
, "riscv-nontemporal-domain-bit-0"},
3976 {MONontemporalBit1
, "riscv-nontemporal-domain-bit-1"}};
3977 return ArrayRef(TargetFlags
);
3980 unsigned RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel
) const {
3981 return OptLevel
>= CodeGenOptLevel::Aggressive
3982 ? STI
.getTailDupAggressiveThreshold()
3986 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
3987 bool RISCV::isSEXT_W(const MachineInstr
&MI
) {
3988 return MI
.getOpcode() == RISCV::ADDIW
&& MI
.getOperand(1).isReg() &&
3989 MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 0;
3992 // Returns true if this is the zext.w pattern, adduw rd, rs1, x0.
3993 bool RISCV::isZEXT_W(const MachineInstr
&MI
) {
3994 return MI
.getOpcode() == RISCV::ADD_UW
&& MI
.getOperand(1).isReg() &&
3995 MI
.getOperand(2).isReg() && MI
.getOperand(2).getReg() == RISCV::X0
;
3998 // Returns true if this is the zext.b pattern, andi rd, rs1, 255.
3999 bool RISCV::isZEXT_B(const MachineInstr
&MI
) {
4000 return MI
.getOpcode() == RISCV::ANDI
&& MI
.getOperand(1).isReg() &&
4001 MI
.getOperand(2).isImm() && MI
.getOperand(2).getImm() == 255;
4004 static bool isRVVWholeLoadStore(unsigned Opcode
) {
4012 case RISCV::VL1RE8_V
:
4013 case RISCV::VL2RE8_V
:
4014 case RISCV::VL4RE8_V
:
4015 case RISCV::VL8RE8_V
:
4016 case RISCV::VL1RE16_V
:
4017 case RISCV::VL2RE16_V
:
4018 case RISCV::VL4RE16_V
:
4019 case RISCV::VL8RE16_V
:
4020 case RISCV::VL1RE32_V
:
4021 case RISCV::VL2RE32_V
:
4022 case RISCV::VL4RE32_V
:
4023 case RISCV::VL8RE32_V
:
4024 case RISCV::VL1RE64_V
:
4025 case RISCV::VL2RE64_V
:
4026 case RISCV::VL4RE64_V
:
4027 case RISCV::VL8RE64_V
:
4032 bool RISCV::isRVVSpill(const MachineInstr
&MI
) {
4033 // RVV lacks any support for immediate addressing for stack addresses, so be
4035 unsigned Opcode
= MI
.getOpcode();
4036 if (!RISCVVPseudosTable::getPseudoInfo(Opcode
) &&
4037 !isRVVWholeLoadStore(Opcode
) && !isRVVSpillForZvlsseg(Opcode
))
4042 std::optional
<std::pair
<unsigned, unsigned>>
4043 RISCV::isRVVSpillForZvlsseg(unsigned Opcode
) {
4046 return std::nullopt
;
4047 case RISCV::PseudoVSPILL2_M1
:
4048 case RISCV::PseudoVRELOAD2_M1
:
4049 return std::make_pair(2u, 1u);
4050 case RISCV::PseudoVSPILL2_M2
:
4051 case RISCV::PseudoVRELOAD2_M2
:
4052 return std::make_pair(2u, 2u);
4053 case RISCV::PseudoVSPILL2_M4
:
4054 case RISCV::PseudoVRELOAD2_M4
:
4055 return std::make_pair(2u, 4u);
4056 case RISCV::PseudoVSPILL3_M1
:
4057 case RISCV::PseudoVRELOAD3_M1
:
4058 return std::make_pair(3u, 1u);
4059 case RISCV::PseudoVSPILL3_M2
:
4060 case RISCV::PseudoVRELOAD3_M2
:
4061 return std::make_pair(3u, 2u);
4062 case RISCV::PseudoVSPILL4_M1
:
4063 case RISCV::PseudoVRELOAD4_M1
:
4064 return std::make_pair(4u, 1u);
4065 case RISCV::PseudoVSPILL4_M2
:
4066 case RISCV::PseudoVRELOAD4_M2
:
4067 return std::make_pair(4u, 2u);
4068 case RISCV::PseudoVSPILL5_M1
:
4069 case RISCV::PseudoVRELOAD5_M1
:
4070 return std::make_pair(5u, 1u);
4071 case RISCV::PseudoVSPILL6_M1
:
4072 case RISCV::PseudoVRELOAD6_M1
:
4073 return std::make_pair(6u, 1u);
4074 case RISCV::PseudoVSPILL7_M1
:
4075 case RISCV::PseudoVRELOAD7_M1
:
4076 return std::make_pair(7u, 1u);
4077 case RISCV::PseudoVSPILL8_M1
:
4078 case RISCV::PseudoVRELOAD8_M1
:
4079 return std::make_pair(8u, 1u);
4083 bool RISCV::isFaultFirstLoad(const MachineInstr
&MI
) {
4084 return MI
.getNumExplicitDefs() == 2 &&
4085 MI
.modifiesRegister(RISCV::VL
, /*TRI=*/nullptr) && !MI
.isInlineAsm();
4088 bool RISCV::hasEqualFRM(const MachineInstr
&MI1
, const MachineInstr
&MI2
) {
4089 int16_t MI1FrmOpIdx
=
4090 RISCV::getNamedOperandIdx(MI1
.getOpcode(), RISCV::OpName::frm
);
4091 int16_t MI2FrmOpIdx
=
4092 RISCV::getNamedOperandIdx(MI2
.getOpcode(), RISCV::OpName::frm
);
4093 if (MI1FrmOpIdx
< 0 || MI2FrmOpIdx
< 0)
4095 MachineOperand FrmOp1
= MI1
.getOperand(MI1FrmOpIdx
);
4096 MachineOperand FrmOp2
= MI2
.getOperand(MI2FrmOpIdx
);
4097 return FrmOp1
.getImm() == FrmOp2
.getImm();
4100 std::optional
<unsigned>
4101 RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode
, unsigned Log2SEW
) {
4102 // TODO: Handle Zvbb instructions
4105 return std::nullopt
;
4107 // 11.6. Vector Single-Width Shift Instructions
4108 case RISCV::VSLL_VX
:
4109 case RISCV::VSRL_VX
:
4110 case RISCV::VSRA_VX
:
4111 // 12.4. Vector Single-Width Scaling Shift Instructions
4112 case RISCV::VSSRL_VX
:
4113 case RISCV::VSSRA_VX
:
4114 // Only the low lg2(SEW) bits of the shift-amount value are used.
4117 // 11.7 Vector Narrowing Integer Right Shift Instructions
4118 case RISCV::VNSRL_WX
:
4119 case RISCV::VNSRA_WX
:
4120 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
4121 case RISCV::VNCLIPU_WX
:
4122 case RISCV::VNCLIP_WX
:
4123 // Only the low lg2(2*SEW) bits of the shift-amount value are used.
4126 // 11.1. Vector Single-Width Integer Add and Subtract
4127 case RISCV::VADD_VX
:
4128 case RISCV::VSUB_VX
:
4129 case RISCV::VRSUB_VX
:
4130 // 11.2. Vector Widening Integer Add/Subtract
4131 case RISCV::VWADDU_VX
:
4132 case RISCV::VWSUBU_VX
:
4133 case RISCV::VWADD_VX
:
4134 case RISCV::VWSUB_VX
:
4135 case RISCV::VWADDU_WX
:
4136 case RISCV::VWSUBU_WX
:
4137 case RISCV::VWADD_WX
:
4138 case RISCV::VWSUB_WX
:
4139 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
4140 case RISCV::VADC_VXM
:
4141 case RISCV::VADC_VIM
:
4142 case RISCV::VMADC_VXM
:
4143 case RISCV::VMADC_VIM
:
4144 case RISCV::VMADC_VX
:
4145 case RISCV::VSBC_VXM
:
4146 case RISCV::VMSBC_VXM
:
4147 case RISCV::VMSBC_VX
:
4148 // 11.5 Vector Bitwise Logical Instructions
4149 case RISCV::VAND_VX
:
4151 case RISCV::VXOR_VX
:
4152 // 11.8. Vector Integer Compare Instructions
4153 case RISCV::VMSEQ_VX
:
4154 case RISCV::VMSNE_VX
:
4155 case RISCV::VMSLTU_VX
:
4156 case RISCV::VMSLT_VX
:
4157 case RISCV::VMSLEU_VX
:
4158 case RISCV::VMSLE_VX
:
4159 case RISCV::VMSGTU_VX
:
4160 case RISCV::VMSGT_VX
:
4161 // 11.9. Vector Integer Min/Max Instructions
4162 case RISCV::VMINU_VX
:
4163 case RISCV::VMIN_VX
:
4164 case RISCV::VMAXU_VX
:
4165 case RISCV::VMAX_VX
:
4166 // 11.10. Vector Single-Width Integer Multiply Instructions
4167 case RISCV::VMUL_VX
:
4168 case RISCV::VMULH_VX
:
4169 case RISCV::VMULHU_VX
:
4170 case RISCV::VMULHSU_VX
:
4171 // 11.11. Vector Integer Divide Instructions
4172 case RISCV::VDIVU_VX
:
4173 case RISCV::VDIV_VX
:
4174 case RISCV::VREMU_VX
:
4175 case RISCV::VREM_VX
:
4176 // 11.12. Vector Widening Integer Multiply Instructions
4177 case RISCV::VWMUL_VX
:
4178 case RISCV::VWMULU_VX
:
4179 case RISCV::VWMULSU_VX
:
4180 // 11.13. Vector Single-Width Integer Multiply-Add Instructions
4181 case RISCV::VMACC_VX
:
4182 case RISCV::VNMSAC_VX
:
4183 case RISCV::VMADD_VX
:
4184 case RISCV::VNMSUB_VX
:
4185 // 11.14. Vector Widening Integer Multiply-Add Instructions
4186 case RISCV::VWMACCU_VX
:
4187 case RISCV::VWMACC_VX
:
4188 case RISCV::VWMACCSU_VX
:
4189 case RISCV::VWMACCUS_VX
:
4190 // 11.15. Vector Integer Merge Instructions
4191 case RISCV::VMERGE_VXM
:
4192 // 11.16. Vector Integer Move Instructions
4193 case RISCV::VMV_V_X
:
4194 // 12.1. Vector Single-Width Saturating Add and Subtract
4195 case RISCV::VSADDU_VX
:
4196 case RISCV::VSADD_VX
:
4197 case RISCV::VSSUBU_VX
:
4198 case RISCV::VSSUB_VX
:
4199 // 12.2. Vector Single-Width Averaging Add and Subtract
4200 case RISCV::VAADDU_VX
:
4201 case RISCV::VAADD_VX
:
4202 case RISCV::VASUBU_VX
:
4203 case RISCV::VASUB_VX
:
4204 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
4205 case RISCV::VSMUL_VX
:
4206 // 16.1. Integer Scalar Move Instructions
4207 case RISCV::VMV_S_X
:
4208 return 1U << Log2SEW
;
4212 unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode
) {
4213 const RISCVVPseudosTable::PseudoInfo
*RVV
=
4214 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode
);
4217 return RVV
->BaseInstr
;
4220 unsigned RISCV::getDestLog2EEW(const MCInstrDesc
&Desc
, unsigned Log2SEW
) {
4222 (Desc
.TSFlags
& RISCVII::DestEEWMask
) >> RISCVII::DestEEWShift
;
4227 unsigned Scaled
= Log2SEW
+ (DestEEW
- 1);
4228 assert(Scaled
>= 3 && Scaled
<= 6);
4232 /// Given two VL operands, do we know that LHS <= RHS?
4233 bool RISCV::isVLKnownLE(const MachineOperand
&LHS
, const MachineOperand
&RHS
) {
4234 if (LHS
.isReg() && RHS
.isReg() && LHS
.getReg().isVirtual() &&
4235 LHS
.getReg() == RHS
.getReg())
4237 if (RHS
.isImm() && RHS
.getImm() == RISCV::VLMaxSentinel
)
4239 if (LHS
.isImm() && LHS
.getImm() == RISCV::VLMaxSentinel
)
4241 if (!LHS
.isImm() || !RHS
.isImm())
4243 return LHS
.getImm() <= RHS
.getImm();
4247 class RISCVPipelinerLoopInfo
: public TargetInstrInfo::PipelinerLoopInfo
{
4248 const MachineInstr
*LHS
;
4249 const MachineInstr
*RHS
;
4250 SmallVector
<MachineOperand
, 3> Cond
;
4253 RISCVPipelinerLoopInfo(const MachineInstr
*LHS
, const MachineInstr
*RHS
,
4254 const SmallVectorImpl
<MachineOperand
> &Cond
)
4255 : LHS(LHS
), RHS(RHS
), Cond(Cond
.begin(), Cond
.end()) {}
4257 bool shouldIgnoreForPipelining(const MachineInstr
*MI
) const override
{
4258 // Make the instructions for loop control be placed in stage 0.
4259 // The predecessors of LHS/RHS are considered by the caller.
4260 if (LHS
&& MI
== LHS
)
4262 if (RHS
&& MI
== RHS
)
4267 std::optional
<bool> createTripCountGreaterCondition(
4268 int TC
, MachineBasicBlock
&MBB
,
4269 SmallVectorImpl
<MachineOperand
> &CondParam
) override
{
4270 // A branch instruction will be inserted as "if (Cond) goto epilogue".
4271 // Cond is normalized for such use.
4272 // The predecessors of the branch are assumed to have already been inserted.
4277 void setPreheader(MachineBasicBlock
*NewPreheader
) override
{}
4279 void adjustTripCount(int TripCountAdjust
) override
{}
4281 void disposed() override
{}
4285 std::unique_ptr
<TargetInstrInfo::PipelinerLoopInfo
>
4286 RISCVInstrInfo::analyzeLoopForPipelining(MachineBasicBlock
*LoopBB
) const {
4287 MachineBasicBlock
*TBB
= nullptr, *FBB
= nullptr;
4288 SmallVector
<MachineOperand
, 4> Cond
;
4289 if (analyzeBranch(*LoopBB
, TBB
, FBB
, Cond
, /*AllowModify=*/false))
4292 // Infinite loops are not supported
4293 if (TBB
== LoopBB
&& FBB
== LoopBB
)
4296 // Must be conditional branch
4300 assert((TBB
== LoopBB
|| FBB
== LoopBB
) &&
4301 "The Loop must be a single-basic-block loop");
4303 // Normalization for createTripCountGreaterCondition()
4305 reverseBranchCondition(Cond
);
4307 const MachineRegisterInfo
&MRI
= LoopBB
->getParent()->getRegInfo();
4308 auto FindRegDef
= [&MRI
](MachineOperand
&Op
) -> const MachineInstr
* {
4311 Register Reg
= Op
.getReg();
4312 if (!Reg
.isVirtual())
4314 return MRI
.getVRegDef(Reg
);
4317 const MachineInstr
*LHS
= FindRegDef(Cond
[1]);
4318 const MachineInstr
*RHS
= FindRegDef(Cond
[2]);
4319 if (LHS
&& LHS
->isPHI())
4321 if (RHS
&& RHS
->isPHI())
4324 return std::make_unique
<RISCVPipelinerLoopInfo
>(LHS
, RHS
, Cond
);