[OptTable] Fix typo VALUE => VALUES (NFCI) (#121523)
[llvm-project.git] / llvm / lib / Target / RISCV / RISCVInstrInfo.cpp
blobf24940795e433f96cdbdc5a6611730dab700643a
1 //===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the RISC-V implementation of the TargetInstrInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "RISCVInstrInfo.h"
14 #include "MCTargetDesc/RISCVBaseInfo.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVSubtarget.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Analysis/MemoryLocation.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/CodeGen/LiveIntervals.h"
24 #include "llvm/CodeGen/LiveVariables.h"
25 #include "llvm/CodeGen/MachineCombinerPattern.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/MachineTraceMetrics.h"
29 #include "llvm/CodeGen/RegisterScavenging.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/IR/DebugInfoMetadata.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/MC/MCInstBuilder.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/ErrorHandling.h"
37 using namespace llvm;
39 #define GEN_CHECK_COMPRESS_INSTR
40 #include "RISCVGenCompressInstEmitter.inc"
42 #define GET_INSTRINFO_CTOR_DTOR
43 #define GET_INSTRINFO_NAMED_OPS
44 #include "RISCVGenInstrInfo.inc"
46 static cl::opt<bool> PreferWholeRegisterMove(
47 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
48 cl::desc("Prefer whole register move for vector registers."));
50 static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
51 "riscv-force-machine-combiner-strategy", cl::Hidden,
52 cl::desc("Force machine combiner to use a specific strategy for machine "
53 "trace metrics evaluation."),
54 cl::init(MachineTraceStrategy::TS_NumStrategies),
55 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
56 "Local strategy."),
57 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
58 "MinInstrCount strategy.")));
60 namespace llvm::RISCVVPseudosTable {
62 using namespace RISCV;
64 #define GET_RISCVVPseudosTable_IMPL
65 #include "RISCVGenSearchableTables.inc"
67 } // namespace llvm::RISCVVPseudosTable
69 namespace llvm::RISCV {
71 #define GET_RISCVMaskedPseudosTable_IMPL
72 #include "RISCVGenSearchableTables.inc"
74 } // end namespace llvm::RISCV
76 RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
77 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
78 STI(STI) {}
80 MCInst RISCVInstrInfo::getNop() const {
81 if (STI.hasStdExtCOrZca())
82 return MCInstBuilder(RISCV::C_NOP);
83 return MCInstBuilder(RISCV::ADDI)
84 .addReg(RISCV::X0)
85 .addReg(RISCV::X0)
86 .addImm(0);
89 Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
90 int &FrameIndex) const {
91 unsigned Dummy;
92 return isLoadFromStackSlot(MI, FrameIndex, Dummy);
95 Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
96 int &FrameIndex,
97 unsigned &MemBytes) const {
98 switch (MI.getOpcode()) {
99 default:
100 return 0;
101 case RISCV::LB:
102 case RISCV::LBU:
103 MemBytes = 1;
104 break;
105 case RISCV::LH:
106 case RISCV::LH_INX:
107 case RISCV::LHU:
108 case RISCV::FLH:
109 MemBytes = 2;
110 break;
111 case RISCV::LW:
112 case RISCV::LW_INX:
113 case RISCV::FLW:
114 case RISCV::LWU:
115 MemBytes = 4;
116 break;
117 case RISCV::LD:
118 case RISCV::FLD:
119 MemBytes = 8;
120 break;
123 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
124 MI.getOperand(2).getImm() == 0) {
125 FrameIndex = MI.getOperand(1).getIndex();
126 return MI.getOperand(0).getReg();
129 return 0;
132 Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
133 int &FrameIndex) const {
134 unsigned Dummy;
135 return isStoreToStackSlot(MI, FrameIndex, Dummy);
138 Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
139 int &FrameIndex,
140 unsigned &MemBytes) const {
141 switch (MI.getOpcode()) {
142 default:
143 return 0;
144 case RISCV::SB:
145 MemBytes = 1;
146 break;
147 case RISCV::SH:
148 case RISCV::SH_INX:
149 case RISCV::FSH:
150 MemBytes = 2;
151 break;
152 case RISCV::SW:
153 case RISCV::SW_INX:
154 case RISCV::FSW:
155 MemBytes = 4;
156 break;
157 case RISCV::SD:
158 case RISCV::FSD:
159 MemBytes = 8;
160 break;
163 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
164 MI.getOperand(2).getImm() == 0) {
165 FrameIndex = MI.getOperand(1).getIndex();
166 return MI.getOperand(0).getReg();
169 return 0;
172 bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
173 const MachineInstr &MI) const {
174 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
175 case RISCV::VMV_V_X:
176 case RISCV::VFMV_V_F:
177 case RISCV::VMV_V_I:
178 case RISCV::VMV_S_X:
179 case RISCV::VFMV_S_F:
180 case RISCV::VID_V:
181 return MI.getOperand(1).isUndef();
182 default:
183 return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
187 static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
188 unsigned NumRegs) {
189 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
192 static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
193 const MachineBasicBlock &MBB,
194 MachineBasicBlock::const_iterator MBBI,
195 MachineBasicBlock::const_iterator &DefMBBI,
196 RISCVII::VLMUL LMul) {
197 if (PreferWholeRegisterMove)
198 return false;
200 assert(MBBI->getOpcode() == TargetOpcode::COPY &&
201 "Unexpected COPY instruction.");
202 Register SrcReg = MBBI->getOperand(1).getReg();
203 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
205 bool FoundDef = false;
206 bool FirstVSetVLI = false;
207 unsigned FirstSEW = 0;
208 while (MBBI != MBB.begin()) {
209 --MBBI;
210 if (MBBI->isMetaInstruction())
211 continue;
213 if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
214 MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
215 MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
216 // There is a vsetvli between COPY and source define instruction.
217 // vy = def_vop ... (producing instruction)
218 // ...
219 // vsetvli
220 // ...
221 // vx = COPY vy
222 if (!FoundDef) {
223 if (!FirstVSetVLI) {
224 FirstVSetVLI = true;
225 unsigned FirstVType = MBBI->getOperand(2).getImm();
226 RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
227 FirstSEW = RISCVVType::getSEW(FirstVType);
228 // The first encountered vsetvli must have the same lmul as the
229 // register class of COPY.
230 if (FirstLMul != LMul)
231 return false;
233 // Only permit `vsetvli x0, x0, vtype` between COPY and the source
234 // define instruction.
235 if (MBBI->getOperand(0).getReg() != RISCV::X0)
236 return false;
237 if (MBBI->getOperand(1).isImm())
238 return false;
239 if (MBBI->getOperand(1).getReg() != RISCV::X0)
240 return false;
241 continue;
244 // MBBI is the first vsetvli before the producing instruction.
245 unsigned VType = MBBI->getOperand(2).getImm();
246 // If there is a vsetvli between COPY and the producing instruction.
247 if (FirstVSetVLI) {
248 // If SEW is different, return false.
249 if (RISCVVType::getSEW(VType) != FirstSEW)
250 return false;
253 // If the vsetvli is tail undisturbed, keep the whole register move.
254 if (!RISCVVType::isTailAgnostic(VType))
255 return false;
257 // The checking is conservative. We only have register classes for
258 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
259 // for fractional LMUL operations. However, we could not use the vsetvli
260 // lmul for widening operations. The result of widening operation is
261 // 2 x LMUL.
262 return LMul == RISCVVType::getVLMUL(VType);
263 } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
264 return false;
265 } else if (MBBI->getNumDefs()) {
266 // Check all the instructions which will change VL.
267 // For example, vleff has implicit def VL.
268 if (MBBI->modifiesRegister(RISCV::VL, /*TRI=*/nullptr))
269 return false;
271 // Only converting whole register copies to vmv.v.v when the defining
272 // value appears in the explicit operands.
273 for (const MachineOperand &MO : MBBI->explicit_operands()) {
274 if (!MO.isReg() || !MO.isDef())
275 continue;
276 if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) {
277 // We only permit the source of COPY has the same LMUL as the defined
278 // operand.
279 // There are cases we need to keep the whole register copy if the LMUL
280 // is different.
281 // For example,
282 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
283 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
284 // # The COPY may be created by vlmul_trunc intrinsic.
285 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
287 // After widening, the valid value will be 4 x e32 elements. If we
288 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
289 // FIXME: The COPY of subregister of Zvlsseg register will not be able
290 // to convert to vmv.v.[v|i] under the constraint.
291 if (MO.getReg() != SrcReg)
292 return false;
294 // In widening reduction instructions with LMUL_1 input vector case,
295 // only checking the LMUL is insufficient due to reduction result is
296 // always LMUL_1.
297 // For example,
298 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
299 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
300 // $v26 = COPY killed renamable $v8
301 // After widening, The valid value will be 1 x e16 elements. If we
302 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
303 uint64_t TSFlags = MBBI->getDesc().TSFlags;
304 if (RISCVII::isRVVWideningReduction(TSFlags))
305 return false;
307 // If the producing instruction does not depend on vsetvli, do not
308 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
309 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags))
310 return false;
312 // Found the definition.
313 FoundDef = true;
314 DefMBBI = MBBI;
315 break;
321 return false;
324 void RISCVInstrInfo::copyPhysRegVector(
325 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
326 const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
327 const TargetRegisterClass *RegClass) const {
328 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
329 RISCVII::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags);
330 unsigned NF = RISCVRI::getNF(RegClass->TSFlags);
332 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
333 uint16_t DstEncoding = TRI->getEncodingValue(DstReg);
334 auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(LMul);
335 assert(!Fractional && "It is impossible be fractional lmul here.");
336 unsigned NumRegs = NF * LMulVal;
337 bool ReversedCopy =
338 forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs);
339 if (ReversedCopy) {
340 // If the src and dest overlap when copying a tuple, we need to copy the
341 // registers in reverse.
342 SrcEncoding += NumRegs - 1;
343 DstEncoding += NumRegs - 1;
346 unsigned I = 0;
347 auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding)
348 -> std::tuple<RISCVII::VLMUL, const TargetRegisterClass &, unsigned,
349 unsigned, unsigned> {
350 if (ReversedCopy) {
351 // For reversed copying, if there are enough aligned registers(8/4/2), we
352 // can do a larger copy(LMUL8/4/2).
353 // Besides, we have already known that DstEncoding is larger than
354 // SrcEncoding in forwardCopyWillClobberTuple, so the difference between
355 // DstEncoding and SrcEncoding should be >= LMUL value we try to use to
356 // avoid clobbering.
357 uint16_t Diff = DstEncoding - SrcEncoding;
358 if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 &&
359 DstEncoding % 8 == 7)
360 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
361 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
362 if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 &&
363 DstEncoding % 4 == 3)
364 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
365 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
366 if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 &&
367 DstEncoding % 2 == 1)
368 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
369 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
370 // Or we should do LMUL1 copying.
371 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
372 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
375 // For forward copying, if source register encoding and destination register
376 // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.
377 if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0)
378 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
379 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
380 if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0)
381 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
382 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
383 if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0)
384 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
385 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
386 // Or we should do LMUL1 copying.
387 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
388 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
390 auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
391 uint16_t Encoding) {
392 MCRegister Reg = RISCV::V0 + Encoding;
393 if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVII::LMUL_1)
394 return Reg;
395 return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
397 while (I != NumRegs) {
398 // For non-segment copying, we only do this once as the registers are always
399 // aligned.
400 // For segment copying, we may do this several times. If the registers are
401 // aligned to larger LMUL, we can eliminate some copyings.
402 auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] =
403 GetCopyInfo(SrcEncoding, DstEncoding);
404 auto [NumCopied, _] = RISCVVType::decodeVLMUL(LMulCopied);
406 MachineBasicBlock::const_iterator DefMBBI;
407 if (LMul == LMulCopied &&
408 isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
409 Opc = VVOpc;
410 if (DefMBBI->getOpcode() == VIOpc)
411 Opc = VIOpc;
414 // Emit actual copying.
415 // For reversed copying, the encoding should be decreased.
416 MCRegister ActualSrcReg = FindRegWithEncoding(
417 RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
418 MCRegister ActualDstReg = FindRegWithEncoding(
419 RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);
421 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
422 bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I;
423 bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V;
424 if (UseVMV)
425 MIB.addReg(ActualDstReg, RegState::Undef);
426 if (UseVMV_V_I)
427 MIB = MIB.add(DefMBBI->getOperand(2));
428 else
429 MIB = MIB.addReg(ActualSrcReg, getKillRegState(KillSrc));
430 if (UseVMV) {
431 const MCInstrDesc &Desc = DefMBBI->getDesc();
432 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
433 unsigned Log2SEW =
434 DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
435 MIB.addImm(Log2SEW ? Log2SEW : 3); // SEW
436 MIB.addImm(0); // tu, mu
437 MIB.addReg(RISCV::VL, RegState::Implicit);
438 MIB.addReg(RISCV::VTYPE, RegState::Implicit);
441 // If we are copying reversely, we should decrease the encoding.
442 SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied);
443 DstEncoding += (ReversedCopy ? -NumCopied : NumCopied);
444 I += NumCopied;
448 void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
449 MachineBasicBlock::iterator MBBI,
450 const DebugLoc &DL, MCRegister DstReg,
451 MCRegister SrcReg, bool KillSrc,
452 bool RenamableDest, bool RenamableSrc) const {
453 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
455 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
456 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
457 .addReg(SrcReg,
458 getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc))
459 .addImm(0);
460 return;
463 if (RISCV::GPRF16RegClass.contains(DstReg, SrcReg)) {
464 BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR16INX), DstReg)
465 .addReg(SrcReg,
466 getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc));
467 return;
470 if (RISCV::GPRF32RegClass.contains(DstReg, SrcReg)) {
471 BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR32INX), DstReg)
472 .addReg(SrcReg,
473 getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc));
474 return;
477 if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
478 // Emit an ADDI for both parts of GPRPair.
479 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
480 TRI->getSubReg(DstReg, RISCV::sub_gpr_even))
481 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even),
482 getKillRegState(KillSrc))
483 .addImm(0);
484 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
485 TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))
486 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd),
487 getKillRegState(KillSrc))
488 .addImm(0);
489 return;
492 // Handle copy from csr
493 if (RISCV::VCSRRegClass.contains(SrcReg) &&
494 RISCV::GPRRegClass.contains(DstReg)) {
495 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
496 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)
497 .addReg(RISCV::X0);
498 return;
501 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
502 unsigned Opc;
503 if (STI.hasStdExtZfh()) {
504 Opc = RISCV::FSGNJ_H;
505 } else {
506 assert(STI.hasStdExtF() &&
507 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
508 "Unexpected extensions");
509 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
510 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
511 &RISCV::FPR32RegClass);
512 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
513 &RISCV::FPR32RegClass);
514 Opc = RISCV::FSGNJ_S;
516 BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
517 .addReg(SrcReg, getKillRegState(KillSrc))
518 .addReg(SrcReg, getKillRegState(KillSrc));
519 return;
522 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
523 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)
524 .addReg(SrcReg, getKillRegState(KillSrc))
525 .addReg(SrcReg, getKillRegState(KillSrc));
526 return;
529 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
530 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)
531 .addReg(SrcReg, getKillRegState(KillSrc))
532 .addReg(SrcReg, getKillRegState(KillSrc));
533 return;
536 if (RISCV::FPR32RegClass.contains(DstReg) &&
537 RISCV::GPRRegClass.contains(SrcReg)) {
538 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)
539 .addReg(SrcReg, getKillRegState(KillSrc));
540 return;
543 if (RISCV::GPRRegClass.contains(DstReg) &&
544 RISCV::FPR32RegClass.contains(SrcReg)) {
545 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)
546 .addReg(SrcReg, getKillRegState(KillSrc));
547 return;
550 if (RISCV::FPR64RegClass.contains(DstReg) &&
551 RISCV::GPRRegClass.contains(SrcReg)) {
552 assert(STI.getXLen() == 64 && "Unexpected GPR size");
553 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)
554 .addReg(SrcReg, getKillRegState(KillSrc));
555 return;
558 if (RISCV::GPRRegClass.contains(DstReg) &&
559 RISCV::FPR64RegClass.contains(SrcReg)) {
560 assert(STI.getXLen() == 64 && "Unexpected GPR size");
561 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)
562 .addReg(SrcReg, getKillRegState(KillSrc));
563 return;
566 // VR->VR copies.
567 const TargetRegisterClass *RegClass =
568 TRI->getCommonMinimalPhysRegClass(SrcReg, DstReg);
569 if (RISCVRegisterInfo::isRVVRegClass(RegClass)) {
570 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass);
571 return;
574 llvm_unreachable("Impossible reg-to-reg copy");
577 void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
578 MachineBasicBlock::iterator I,
579 Register SrcReg, bool IsKill, int FI,
580 const TargetRegisterClass *RC,
581 const TargetRegisterInfo *TRI,
582 Register VReg) const {
583 MachineFunction *MF = MBB.getParent();
584 MachineFrameInfo &MFI = MF->getFrameInfo();
586 unsigned Opcode;
587 bool IsScalableVector = true;
588 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
589 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
590 RISCV::SW : RISCV::SD;
591 IsScalableVector = false;
592 } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
593 Opcode = RISCV::SH_INX;
594 IsScalableVector = false;
595 } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
596 Opcode = RISCV::SW_INX;
597 IsScalableVector = false;
598 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
599 Opcode = RISCV::PseudoRV32ZdinxSD;
600 IsScalableVector = false;
601 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
602 Opcode = RISCV::FSH;
603 IsScalableVector = false;
604 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
605 Opcode = RISCV::FSW;
606 IsScalableVector = false;
607 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
608 Opcode = RISCV::FSD;
609 IsScalableVector = false;
610 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
611 Opcode = RISCV::VS1R_V;
612 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
613 Opcode = RISCV::VS2R_V;
614 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
615 Opcode = RISCV::VS4R_V;
616 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
617 Opcode = RISCV::VS8R_V;
618 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
619 Opcode = RISCV::PseudoVSPILL2_M1;
620 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
621 Opcode = RISCV::PseudoVSPILL2_M2;
622 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
623 Opcode = RISCV::PseudoVSPILL2_M4;
624 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
625 Opcode = RISCV::PseudoVSPILL3_M1;
626 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
627 Opcode = RISCV::PseudoVSPILL3_M2;
628 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
629 Opcode = RISCV::PseudoVSPILL4_M1;
630 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
631 Opcode = RISCV::PseudoVSPILL4_M2;
632 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
633 Opcode = RISCV::PseudoVSPILL5_M1;
634 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
635 Opcode = RISCV::PseudoVSPILL6_M1;
636 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
637 Opcode = RISCV::PseudoVSPILL7_M1;
638 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
639 Opcode = RISCV::PseudoVSPILL8_M1;
640 else
641 llvm_unreachable("Can't store this register to stack slot");
643 if (IsScalableVector) {
644 MachineMemOperand *MMO = MF->getMachineMemOperand(
645 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
646 LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI));
648 MFI.setStackID(FI, TargetStackID::ScalableVector);
649 BuildMI(MBB, I, DebugLoc(), get(Opcode))
650 .addReg(SrcReg, getKillRegState(IsKill))
651 .addFrameIndex(FI)
652 .addMemOperand(MMO);
653 } else {
654 MachineMemOperand *MMO = MF->getMachineMemOperand(
655 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
656 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
658 BuildMI(MBB, I, DebugLoc(), get(Opcode))
659 .addReg(SrcReg, getKillRegState(IsKill))
660 .addFrameIndex(FI)
661 .addImm(0)
662 .addMemOperand(MMO);
666 void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
667 MachineBasicBlock::iterator I,
668 Register DstReg, int FI,
669 const TargetRegisterClass *RC,
670 const TargetRegisterInfo *TRI,
671 Register VReg) const {
672 MachineFunction *MF = MBB.getParent();
673 MachineFrameInfo &MFI = MF->getFrameInfo();
675 unsigned Opcode;
676 bool IsScalableVector = true;
677 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
678 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
679 RISCV::LW : RISCV::LD;
680 IsScalableVector = false;
681 } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
682 Opcode = RISCV::LH_INX;
683 IsScalableVector = false;
684 } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
685 Opcode = RISCV::LW_INX;
686 IsScalableVector = false;
687 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
688 Opcode = RISCV::PseudoRV32ZdinxLD;
689 IsScalableVector = false;
690 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
691 Opcode = RISCV::FLH;
692 IsScalableVector = false;
693 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
694 Opcode = RISCV::FLW;
695 IsScalableVector = false;
696 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
697 Opcode = RISCV::FLD;
698 IsScalableVector = false;
699 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
700 Opcode = RISCV::VL1RE8_V;
701 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
702 Opcode = RISCV::VL2RE8_V;
703 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
704 Opcode = RISCV::VL4RE8_V;
705 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
706 Opcode = RISCV::VL8RE8_V;
707 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
708 Opcode = RISCV::PseudoVRELOAD2_M1;
709 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
710 Opcode = RISCV::PseudoVRELOAD2_M2;
711 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
712 Opcode = RISCV::PseudoVRELOAD2_M4;
713 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
714 Opcode = RISCV::PseudoVRELOAD3_M1;
715 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
716 Opcode = RISCV::PseudoVRELOAD3_M2;
717 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
718 Opcode = RISCV::PseudoVRELOAD4_M1;
719 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
720 Opcode = RISCV::PseudoVRELOAD4_M2;
721 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
722 Opcode = RISCV::PseudoVRELOAD5_M1;
723 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
724 Opcode = RISCV::PseudoVRELOAD6_M1;
725 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
726 Opcode = RISCV::PseudoVRELOAD7_M1;
727 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
728 Opcode = RISCV::PseudoVRELOAD8_M1;
729 else
730 llvm_unreachable("Can't load this register from stack slot");
732 if (IsScalableVector) {
733 MachineMemOperand *MMO = MF->getMachineMemOperand(
734 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
735 LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI));
737 MFI.setStackID(FI, TargetStackID::ScalableVector);
738 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
739 .addFrameIndex(FI)
740 .addMemOperand(MMO);
741 } else {
742 MachineMemOperand *MMO = MF->getMachineMemOperand(
743 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
744 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
746 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
747 .addFrameIndex(FI)
748 .addImm(0)
749 .addMemOperand(MMO);
753 MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
754 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
755 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
756 VirtRegMap *VRM) const {
757 // The below optimizations narrow the load so they are only valid for little
758 // endian.
759 // TODO: Support big endian by adding an offset into the frame object?
760 if (MF.getDataLayout().isBigEndian())
761 return nullptr;
763 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
764 if (Ops.size() != 1 || Ops[0] != 1)
765 return nullptr;
767 unsigned LoadOpc;
768 switch (MI.getOpcode()) {
769 default:
770 if (RISCV::isSEXT_W(MI)) {
771 LoadOpc = RISCV::LW;
772 break;
774 if (RISCV::isZEXT_W(MI)) {
775 LoadOpc = RISCV::LWU;
776 break;
778 if (RISCV::isZEXT_B(MI)) {
779 LoadOpc = RISCV::LBU;
780 break;
782 if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VMV_X_S) {
783 unsigned Log2SEW =
784 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
785 if (STI.getXLen() < (1U << Log2SEW))
786 return nullptr;
787 switch (Log2SEW) {
788 case 3:
789 LoadOpc = RISCV::LB;
790 break;
791 case 4:
792 LoadOpc = RISCV::LH;
793 break;
794 case 5:
795 LoadOpc = RISCV::LW;
796 break;
797 case 6:
798 LoadOpc = RISCV::LD;
799 break;
800 default:
801 llvm_unreachable("Unexpected SEW");
803 break;
805 if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VFMV_F_S) {
806 unsigned Log2SEW =
807 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
808 switch (Log2SEW) {
809 case 4:
810 LoadOpc = RISCV::FLH;
811 break;
812 case 5:
813 LoadOpc = RISCV::FLW;
814 break;
815 case 6:
816 LoadOpc = RISCV::FLD;
817 break;
818 default:
819 llvm_unreachable("Unexpected SEW");
821 break;
823 return nullptr;
824 case RISCV::SEXT_H:
825 LoadOpc = RISCV::LH;
826 break;
827 case RISCV::SEXT_B:
828 LoadOpc = RISCV::LB;
829 break;
830 case RISCV::ZEXT_H_RV32:
831 case RISCV::ZEXT_H_RV64:
832 LoadOpc = RISCV::LHU;
833 break;
836 Register DstReg = MI.getOperand(0).getReg();
837 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc),
838 DstReg)
839 .addFrameIndex(FrameIndex)
840 .addImm(0);
843 void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
844 MachineBasicBlock::iterator MBBI,
845 const DebugLoc &DL, Register DstReg, uint64_t Val,
846 MachineInstr::MIFlag Flag, bool DstRenamable,
847 bool DstIsDead) const {
848 Register SrcReg = RISCV::X0;
850 // For RV32, allow a sign or unsigned 32 bit value.
851 if (!STI.is64Bit() && !isInt<32>(Val)) {
852 // If have a uimm32 it will still fit in a register so we can allow it.
853 if (!isUInt<32>(Val))
854 report_fatal_error("Should only materialize 32-bit constants for RV32");
856 // Sign extend for generateInstSeq.
857 Val = SignExtend64<32>(Val);
860 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
861 assert(!Seq.empty());
863 bool SrcRenamable = false;
864 unsigned Num = 0;
866 for (const RISCVMatInt::Inst &Inst : Seq) {
867 bool LastItem = ++Num == Seq.size();
868 unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) |
869 getRenamableRegState(DstRenamable);
870 unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) |
871 getRenamableRegState(SrcRenamable);
872 switch (Inst.getOpndKind()) {
873 case RISCVMatInt::Imm:
874 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
875 .addReg(DstReg, RegState::Define | DstRegState)
876 .addImm(Inst.getImm())
877 .setMIFlag(Flag);
878 break;
879 case RISCVMatInt::RegX0:
880 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
881 .addReg(DstReg, RegState::Define | DstRegState)
882 .addReg(SrcReg, SrcRegState)
883 .addReg(RISCV::X0)
884 .setMIFlag(Flag);
885 break;
886 case RISCVMatInt::RegReg:
887 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
888 .addReg(DstReg, RegState::Define | DstRegState)
889 .addReg(SrcReg, SrcRegState)
890 .addReg(SrcReg, SrcRegState)
891 .setMIFlag(Flag);
892 break;
893 case RISCVMatInt::RegImm:
894 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
895 .addReg(DstReg, RegState::Define | DstRegState)
896 .addReg(SrcReg, SrcRegState)
897 .addImm(Inst.getImm())
898 .setMIFlag(Flag);
899 break;
902 // Only the first instruction has X0 as its source.
903 SrcReg = DstReg;
904 SrcRenamable = DstRenamable;
908 static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {
909 switch (Opc) {
910 default:
911 return RISCVCC::COND_INVALID;
912 case RISCV::CV_BEQIMM:
913 return RISCVCC::COND_EQ;
914 case RISCV::CV_BNEIMM:
915 return RISCVCC::COND_NE;
916 case RISCV::BEQ:
917 return RISCVCC::COND_EQ;
918 case RISCV::BNE:
919 return RISCVCC::COND_NE;
920 case RISCV::BLT:
921 return RISCVCC::COND_LT;
922 case RISCV::BGE:
923 return RISCVCC::COND_GE;
924 case RISCV::BLTU:
925 return RISCVCC::COND_LTU;
926 case RISCV::BGEU:
927 return RISCVCC::COND_GEU;
931 // The contents of values added to Cond are not examined outside of
932 // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
933 // push BranchOpcode, Reg1, Reg2.
934 static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
935 SmallVectorImpl<MachineOperand> &Cond) {
936 // Block ends with fall-through condbranch.
937 assert(LastInst.getDesc().isConditionalBranch() &&
938 "Unknown conditional branch");
939 Target = LastInst.getOperand(2).getMBB();
940 unsigned CC = getCondFromBranchOpc(LastInst.getOpcode());
941 Cond.push_back(MachineOperand::CreateImm(CC));
942 Cond.push_back(LastInst.getOperand(0));
943 Cond.push_back(LastInst.getOperand(1));
946 unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, bool Imm) {
947 switch (CC) {
948 default:
949 llvm_unreachable("Unknown condition code!");
950 case RISCVCC::COND_EQ:
951 return Imm ? RISCV::CV_BEQIMM : RISCV::BEQ;
952 case RISCVCC::COND_NE:
953 return Imm ? RISCV::CV_BNEIMM : RISCV::BNE;
954 case RISCVCC::COND_LT:
955 return RISCV::BLT;
956 case RISCVCC::COND_GE:
957 return RISCV::BGE;
958 case RISCVCC::COND_LTU:
959 return RISCV::BLTU;
960 case RISCVCC::COND_GEU:
961 return RISCV::BGEU;
965 const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC,
966 bool Imm) const {
967 return get(RISCVCC::getBrCond(CC, Imm));
970 RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
971 switch (CC) {
972 default:
973 llvm_unreachable("Unrecognized conditional branch");
974 case RISCVCC::COND_EQ:
975 return RISCVCC::COND_NE;
976 case RISCVCC::COND_NE:
977 return RISCVCC::COND_EQ;
978 case RISCVCC::COND_LT:
979 return RISCVCC::COND_GE;
980 case RISCVCC::COND_GE:
981 return RISCVCC::COND_LT;
982 case RISCVCC::COND_LTU:
983 return RISCVCC::COND_GEU;
984 case RISCVCC::COND_GEU:
985 return RISCVCC::COND_LTU;
989 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
990 MachineBasicBlock *&TBB,
991 MachineBasicBlock *&FBB,
992 SmallVectorImpl<MachineOperand> &Cond,
993 bool AllowModify) const {
994 TBB = FBB = nullptr;
995 Cond.clear();
997 // If the block has no terminators, it just falls into the block after it.
998 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
999 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1000 return false;
1002 // Count the number of terminators and find the first unconditional or
1003 // indirect branch.
1004 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
1005 int NumTerminators = 0;
1006 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
1007 J++) {
1008 NumTerminators++;
1009 if (J->getDesc().isUnconditionalBranch() ||
1010 J->getDesc().isIndirectBranch()) {
1011 FirstUncondOrIndirectBr = J.getReverse();
1015 // If AllowModify is true, we can erase any terminators after
1016 // FirstUncondOrIndirectBR.
1017 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
1018 while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {
1019 std::next(FirstUncondOrIndirectBr)->eraseFromParent();
1020 NumTerminators--;
1022 I = FirstUncondOrIndirectBr;
1025 // We can't handle blocks that end in an indirect branch.
1026 if (I->getDesc().isIndirectBranch())
1027 return true;
1029 // We can't handle Generic branch opcodes from Global ISel.
1030 if (I->isPreISelOpcode())
1031 return true;
1033 // We can't handle blocks with more than 2 terminators.
1034 if (NumTerminators > 2)
1035 return true;
1037 // Handle a single unconditional branch.
1038 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
1039 TBB = getBranchDestBlock(*I);
1040 return false;
1043 // Handle a single conditional branch.
1044 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
1045 parseCondBranch(*I, TBB, Cond);
1046 return false;
1049 // Handle a conditional branch followed by an unconditional branch.
1050 if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
1051 I->getDesc().isUnconditionalBranch()) {
1052 parseCondBranch(*std::prev(I), TBB, Cond);
1053 FBB = getBranchDestBlock(*I);
1054 return false;
1057 // Otherwise, we can't handle this.
1058 return true;
1061 unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
1062 int *BytesRemoved) const {
1063 if (BytesRemoved)
1064 *BytesRemoved = 0;
1065 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1066 if (I == MBB.end())
1067 return 0;
1069 if (!I->getDesc().isUnconditionalBranch() &&
1070 !I->getDesc().isConditionalBranch())
1071 return 0;
1073 // Remove the branch.
1074 if (BytesRemoved)
1075 *BytesRemoved += getInstSizeInBytes(*I);
1076 I->eraseFromParent();
1078 I = MBB.end();
1080 if (I == MBB.begin())
1081 return 1;
1082 --I;
1083 if (!I->getDesc().isConditionalBranch())
1084 return 1;
1086 // Remove the branch.
1087 if (BytesRemoved)
1088 *BytesRemoved += getInstSizeInBytes(*I);
1089 I->eraseFromParent();
1090 return 2;
1093 // Inserts a branch into the end of the specific MachineBasicBlock, returning
1094 // the number of instructions inserted.
1095 unsigned RISCVInstrInfo::insertBranch(
1096 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
1097 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
1098 if (BytesAdded)
1099 *BytesAdded = 0;
1101 // Shouldn't be a fall through.
1102 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1103 assert((Cond.size() == 3 || Cond.size() == 0) &&
1104 "RISC-V branch conditions have two components!");
1106 // Unconditional branch.
1107 if (Cond.empty()) {
1108 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
1109 if (BytesAdded)
1110 *BytesAdded += getInstSizeInBytes(MI);
1111 return 1;
1114 // Either a one or two-way conditional branch.
1115 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1116 MachineInstr &CondMI = *BuildMI(&MBB, DL, getBrCond(CC, Cond[2].isImm()))
1117 .add(Cond[1])
1118 .add(Cond[2])
1119 .addMBB(TBB);
1120 if (BytesAdded)
1121 *BytesAdded += getInstSizeInBytes(CondMI);
1123 // One-way conditional branch.
1124 if (!FBB)
1125 return 1;
1127 // Two-way conditional branch.
1128 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
1129 if (BytesAdded)
1130 *BytesAdded += getInstSizeInBytes(MI);
1131 return 2;
1134 void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
1135 MachineBasicBlock &DestBB,
1136 MachineBasicBlock &RestoreBB,
1137 const DebugLoc &DL, int64_t BrOffset,
1138 RegScavenger *RS) const {
1139 assert(RS && "RegScavenger required for long branching");
1140 assert(MBB.empty() &&
1141 "new block should be inserted for expanding unconditional branch");
1142 assert(MBB.pred_size() == 1);
1143 assert(RestoreBB.empty() &&
1144 "restore block should be inserted for restoring clobbered registers");
1146 MachineFunction *MF = MBB.getParent();
1147 MachineRegisterInfo &MRI = MF->getRegInfo();
1148 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
1149 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1151 if (!isInt<32>(BrOffset))
1152 report_fatal_error(
1153 "Branch offsets outside of the signed 32-bit range not supported");
1155 // FIXME: A virtual register must be used initially, as the register
1156 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1157 // uses the same workaround).
1158 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRJALRRegClass);
1159 auto II = MBB.end();
1160 // We may also update the jump target to RestoreBB later.
1161 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
1162 .addReg(ScratchReg, RegState::Define | RegState::Dead)
1163 .addMBB(&DestBB, RISCVII::MO_CALL);
1165 RS->enterBasicBlockEnd(MBB);
1166 Register TmpGPR =
1167 RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
1168 /*RestoreAfter=*/false, /*SpAdj=*/0,
1169 /*AllowSpill=*/false);
1170 if (TmpGPR != RISCV::NoRegister)
1171 RS->setRegUsed(TmpGPR);
1172 else {
1173 // The case when there is no scavenged register needs special handling.
1175 // Pick s11 because it doesn't make a difference.
1176 TmpGPR = RISCV::X27;
1178 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
1179 if (FrameIndex == -1)
1180 report_fatal_error("underestimated function size");
1182 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex,
1183 &RISCV::GPRRegClass, TRI, Register());
1184 TRI->eliminateFrameIndex(std::prev(MI.getIterator()),
1185 /*SpAdj=*/0, /*FIOperandNum=*/1);
1187 MI.getOperand(1).setMBB(&RestoreBB);
1189 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
1190 &RISCV::GPRRegClass, TRI, Register());
1191 TRI->eliminateFrameIndex(RestoreBB.back(),
1192 /*SpAdj=*/0, /*FIOperandNum=*/1);
1195 MRI.replaceRegWith(ScratchReg, TmpGPR);
1196 MRI.clearVirtRegs();
1199 bool RISCVInstrInfo::reverseBranchCondition(
1200 SmallVectorImpl<MachineOperand> &Cond) const {
1201 assert((Cond.size() == 3) && "Invalid branch condition!");
1202 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1203 Cond[0].setImm(getOppositeBranchCondition(CC));
1204 return false;
1207 bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
1208 MachineBasicBlock *MBB = MI.getParent();
1209 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1211 MachineBasicBlock *TBB, *FBB;
1212 SmallVector<MachineOperand, 3> Cond;
1213 if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false))
1214 return false;
1216 RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1217 assert(CC != RISCVCC::COND_INVALID);
1219 if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE)
1220 return false;
1222 // For two constants C0 and C1 from
1223 // ```
1224 // li Y, C0
1225 // li Z, C1
1226 // ```
1227 // 1. if C1 = C0 + 1
1228 // we can turn:
1229 // (a) blt Y, X -> bge X, Z
1230 // (b) bge Y, X -> blt X, Z
1232 // 2. if C1 = C0 - 1
1233 // we can turn:
1234 // (a) blt X, Y -> bge Z, X
1235 // (b) bge X, Y -> blt Z, X
1237 // To make sure this optimization is really beneficial, we only
1238 // optimize for cases where Y had only one use (i.e. only used by the branch).
1240 // Right now we only care about LI (i.e. ADDI x0, imm)
1241 auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
1242 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1243 MI->getOperand(1).getReg() == RISCV::X0) {
1244 Imm = MI->getOperand(2).getImm();
1245 return true;
1247 return false;
1249 // Either a load from immediate instruction or X0.
1250 auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
1251 if (!Op.isReg())
1252 return false;
1253 Register Reg = Op.getReg();
1254 return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
1257 MachineOperand &LHS = MI.getOperand(0);
1258 MachineOperand &RHS = MI.getOperand(1);
1259 // Try to find the register for constant Z; return
1260 // invalid register otherwise.
1261 auto searchConst = [&](int64_t C1) -> Register {
1262 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
1263 auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {
1264 int64_t Imm;
1265 return isLoadImm(&I, Imm) && Imm == C1 &&
1266 I.getOperand(0).getReg().isVirtual();
1268 if (DefC1 != E)
1269 return DefC1->getOperand(0).getReg();
1271 return Register();
1274 bool Modify = false;
1275 int64_t C0;
1276 if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
1277 // Might be case 1.
1278 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
1279 // to worry about unsigned overflow here)
1280 if (C0 < INT64_MAX)
1281 if (Register RegZ = searchConst(C0 + 1)) {
1282 reverseBranchCondition(Cond);
1283 Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false);
1284 Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1285 // We might extend the live range of Z, clear its kill flag to
1286 // account for this.
1287 MRI.clearKillFlags(RegZ);
1288 Modify = true;
1290 } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) {
1291 // Might be case 2.
1292 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
1293 // when C0 is zero.
1294 if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0)
1295 if (Register RegZ = searchConst(C0 - 1)) {
1296 reverseBranchCondition(Cond);
1297 Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1298 Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false);
1299 // We might extend the live range of Z, clear its kill flag to
1300 // account for this.
1301 MRI.clearKillFlags(RegZ);
1302 Modify = true;
1306 if (!Modify)
1307 return false;
1309 // Build the new branch and remove the old one.
1310 BuildMI(*MBB, MI, MI.getDebugLoc(),
1311 getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm())))
1312 .add(Cond[1])
1313 .add(Cond[2])
1314 .addMBB(TBB);
1315 MI.eraseFromParent();
1317 return true;
1320 MachineBasicBlock *
1321 RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
1322 assert(MI.getDesc().isBranch() && "Unexpected opcode!");
1323 // The branch target is always the last operand.
1324 int NumOp = MI.getNumExplicitOperands();
1325 return MI.getOperand(NumOp - 1).getMBB();
1328 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1329 int64_t BrOffset) const {
1330 unsigned XLen = STI.getXLen();
1331 // Ideally we could determine the supported branch offset from the
1332 // RISCVII::FormMask, but this can't be used for Pseudo instructions like
1333 // PseudoBR.
1334 switch (BranchOp) {
1335 default:
1336 llvm_unreachable("Unexpected opcode!");
1337 case RISCV::BEQ:
1338 case RISCV::BNE:
1339 case RISCV::BLT:
1340 case RISCV::BGE:
1341 case RISCV::BLTU:
1342 case RISCV::BGEU:
1343 case RISCV::CV_BEQIMM:
1344 case RISCV::CV_BNEIMM:
1345 return isIntN(13, BrOffset);
1346 case RISCV::JAL:
1347 case RISCV::PseudoBR:
1348 return isIntN(21, BrOffset);
1349 case RISCV::PseudoJump:
1350 return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
1354 // If the operation has a predicated pseudo instruction, return the pseudo
1355 // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1356 // TODO: Support more operations.
1357 unsigned getPredicatedOpcode(unsigned Opcode) {
1358 switch (Opcode) {
1359 case RISCV::ADD: return RISCV::PseudoCCADD; break;
1360 case RISCV::SUB: return RISCV::PseudoCCSUB; break;
1361 case RISCV::SLL: return RISCV::PseudoCCSLL; break;
1362 case RISCV::SRL: return RISCV::PseudoCCSRL; break;
1363 case RISCV::SRA: return RISCV::PseudoCCSRA; break;
1364 case RISCV::AND: return RISCV::PseudoCCAND; break;
1365 case RISCV::OR: return RISCV::PseudoCCOR; break;
1366 case RISCV::XOR: return RISCV::PseudoCCXOR; break;
1368 case RISCV::ADDI: return RISCV::PseudoCCADDI; break;
1369 case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;
1370 case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;
1371 case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;
1372 case RISCV::ANDI: return RISCV::PseudoCCANDI; break;
1373 case RISCV::ORI: return RISCV::PseudoCCORI; break;
1374 case RISCV::XORI: return RISCV::PseudoCCXORI; break;
1376 case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
1377 case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
1378 case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;
1379 case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;
1380 case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;
1382 case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
1383 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
1384 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
1385 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;
1387 case RISCV::ANDN: return RISCV::PseudoCCANDN; break;
1388 case RISCV::ORN: return RISCV::PseudoCCORN; break;
1389 case RISCV::XNOR: return RISCV::PseudoCCXNOR; break;
1392 return RISCV::INSTRUCTION_LIST_END;
1395 /// Identify instructions that can be folded into a CCMOV instruction, and
1396 /// return the defining instruction.
1397 static MachineInstr *canFoldAsPredicatedOp(Register Reg,
1398 const MachineRegisterInfo &MRI,
1399 const TargetInstrInfo *TII) {
1400 if (!Reg.isVirtual())
1401 return nullptr;
1402 if (!MRI.hasOneNonDBGUse(Reg))
1403 return nullptr;
1404 MachineInstr *MI = MRI.getVRegDef(Reg);
1405 if (!MI)
1406 return nullptr;
1407 // Check if MI can be predicated and folded into the CCMOV.
1408 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
1409 return nullptr;
1410 // Don't predicate li idiom.
1411 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1412 MI->getOperand(1).getReg() == RISCV::X0)
1413 return nullptr;
1414 // Check if MI has any other defs or physreg uses.
1415 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
1416 // Reject frame index operands, PEI can't handle the predicated pseudos.
1417 if (MO.isFI() || MO.isCPI() || MO.isJTI())
1418 return nullptr;
1419 if (!MO.isReg())
1420 continue;
1421 // MI can't have any tied operands, that would conflict with predication.
1422 if (MO.isTied())
1423 return nullptr;
1424 if (MO.isDef())
1425 return nullptr;
1426 // Allow constant physregs.
1427 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg()))
1428 return nullptr;
1430 bool DontMoveAcrossStores = true;
1431 if (!MI->isSafeToMove(DontMoveAcrossStores))
1432 return nullptr;
1433 return MI;
1436 bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,
1437 SmallVectorImpl<MachineOperand> &Cond,
1438 unsigned &TrueOp, unsigned &FalseOp,
1439 bool &Optimizable) const {
1440 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1441 "Unknown select instruction");
1442 // CCMOV operands:
1443 // 0: Def.
1444 // 1: LHS of compare.
1445 // 2: RHS of compare.
1446 // 3: Condition code.
1447 // 4: False use.
1448 // 5: True use.
1449 TrueOp = 5;
1450 FalseOp = 4;
1451 Cond.push_back(MI.getOperand(1));
1452 Cond.push_back(MI.getOperand(2));
1453 Cond.push_back(MI.getOperand(3));
1454 // We can only fold when we support short forward branch opt.
1455 Optimizable = STI.hasShortForwardBranchOpt();
1456 return false;
1459 MachineInstr *
1460 RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
1461 SmallPtrSetImpl<MachineInstr *> &SeenMIs,
1462 bool PreferFalse) const {
1463 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1464 "Unknown select instruction");
1465 if (!STI.hasShortForwardBranchOpt())
1466 return nullptr;
1468 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1469 MachineInstr *DefMI =
1470 canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
1471 bool Invert = !DefMI;
1472 if (!DefMI)
1473 DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
1474 if (!DefMI)
1475 return nullptr;
1477 // Find new register class to use.
1478 MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
1479 Register DestReg = MI.getOperand(0).getReg();
1480 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
1481 if (!MRI.constrainRegClass(DestReg, PreviousClass))
1482 return nullptr;
1484 unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode());
1485 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
1487 // Create a new predicated version of DefMI.
1488 MachineInstrBuilder NewMI =
1489 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);
1491 // Copy the condition portion.
1492 NewMI.add(MI.getOperand(1));
1493 NewMI.add(MI.getOperand(2));
1495 // Add condition code, inverting if necessary.
1496 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
1497 if (Invert)
1498 CC = RISCVCC::getOppositeBranchCondition(CC);
1499 NewMI.addImm(CC);
1501 // Copy the false register.
1502 NewMI.add(FalseReg);
1504 // Copy all the DefMI operands.
1505 const MCInstrDesc &DefDesc = DefMI->getDesc();
1506 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
1507 NewMI.add(DefMI->getOperand(i));
1509 // Update SeenMIs set: register newly created MI and erase removed DefMI.
1510 SeenMIs.insert(NewMI);
1511 SeenMIs.erase(DefMI);
1513 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1514 // DefMI would be invalid when tranferred inside the loop. Checking for a
1515 // loop is expensive, but at least remove kill flags if they are in different
1516 // BBs.
1517 if (DefMI->getParent() != MI.getParent())
1518 NewMI->clearKillInfo();
1520 // The caller will erase MI, but not DefMI.
1521 DefMI->eraseFromParent();
1522 return NewMI;
1525 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
1526 if (MI.isMetaInstruction())
1527 return 0;
1529 unsigned Opcode = MI.getOpcode();
1531 if (Opcode == TargetOpcode::INLINEASM ||
1532 Opcode == TargetOpcode::INLINEASM_BR) {
1533 const MachineFunction &MF = *MI.getParent()->getParent();
1534 return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
1535 *MF.getTarget().getMCAsmInfo());
1538 if (!MI.memoperands_empty()) {
1539 MachineMemOperand *MMO = *(MI.memoperands_begin());
1540 if (STI.hasStdExtZihintntl() && MMO->isNonTemporal()) {
1541 if (STI.hasStdExtCOrZca() && STI.enableRVCHintInstrs()) {
1542 if (isCompressibleInst(MI, STI))
1543 return 4; // c.ntl.all + c.load/c.store
1544 return 6; // c.ntl.all + load/store
1546 return 8; // ntl.all + load/store
1550 if (Opcode == TargetOpcode::BUNDLE)
1551 return getInstBundleLength(MI);
1553 if (MI.getParent() && MI.getParent()->getParent()) {
1554 if (isCompressibleInst(MI, STI))
1555 return 2;
1558 switch (Opcode) {
1559 case RISCV::PseudoMV_FPR16INX:
1560 case RISCV::PseudoMV_FPR32INX:
1561 // MV is always compressible to either c.mv or c.li rd, 0.
1562 return STI.hasStdExtCOrZca() ? 2 : 4;
1563 case TargetOpcode::STACKMAP:
1564 // The upper bound for a stackmap intrinsic is the full length of its shadow
1565 return StackMapOpers(&MI).getNumPatchBytes();
1566 case TargetOpcode::PATCHPOINT:
1567 // The size of the patchpoint intrinsic is the number of bytes requested
1568 return PatchPointOpers(&MI).getNumPatchBytes();
1569 case TargetOpcode::STATEPOINT: {
1570 // The size of the statepoint intrinsic is the number of bytes requested
1571 unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes();
1572 // No patch bytes means at most a PseudoCall is emitted
1573 return std::max(NumBytes, 8U);
1575 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
1576 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
1577 case TargetOpcode::PATCHABLE_TAIL_CALL: {
1578 const MachineFunction &MF = *MI.getParent()->getParent();
1579 const Function &F = MF.getFunction();
1580 if (Opcode == TargetOpcode::PATCHABLE_FUNCTION_ENTER &&
1581 F.hasFnAttribute("patchable-function-entry")) {
1582 unsigned Num;
1583 if (F.getFnAttribute("patchable-function-entry")
1584 .getValueAsString()
1585 .getAsInteger(10, Num))
1586 return get(Opcode).getSize();
1588 // Number of C.NOP or NOP
1589 return (STI.hasStdExtCOrZca() ? 2 : 4) * Num;
1591 // XRay uses C.JAL + 21 or 33 C.NOP for each sled in RV32 and RV64,
1592 // respectively.
1593 return STI.is64Bit() ? 68 : 44;
1595 default:
1596 return get(Opcode).getSize();
1600 unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
1601 unsigned Size = 0;
1602 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
1603 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
1604 while (++I != E && I->isInsideBundle()) {
1605 assert(!I->isBundle() && "No nested bundle!");
1606 Size += getInstSizeInBytes(*I);
1608 return Size;
1611 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
1612 const unsigned Opcode = MI.getOpcode();
1613 switch (Opcode) {
1614 default:
1615 break;
1616 case RISCV::FSGNJ_D:
1617 case RISCV::FSGNJ_S:
1618 case RISCV::FSGNJ_H:
1619 case RISCV::FSGNJ_D_INX:
1620 case RISCV::FSGNJ_D_IN32X:
1621 case RISCV::FSGNJ_S_INX:
1622 case RISCV::FSGNJ_H_INX:
1623 // The canonical floating-point move is fsgnj rd, rs, rs.
1624 return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
1625 MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
1626 case RISCV::ADDI:
1627 case RISCV::ORI:
1628 case RISCV::XORI:
1629 return (MI.getOperand(1).isReg() &&
1630 MI.getOperand(1).getReg() == RISCV::X0) ||
1631 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
1633 return MI.isAsCheapAsAMove();
1636 std::optional<DestSourcePair>
1637 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
1638 if (MI.isMoveReg())
1639 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1640 switch (MI.getOpcode()) {
1641 default:
1642 break;
1643 case RISCV::ADDI:
1644 // Operand 1 can be a frameindex but callers expect registers
1645 if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
1646 MI.getOperand(2).getImm() == 0)
1647 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1648 break;
1649 case RISCV::FSGNJ_D:
1650 case RISCV::FSGNJ_S:
1651 case RISCV::FSGNJ_H:
1652 case RISCV::FSGNJ_D_INX:
1653 case RISCV::FSGNJ_D_IN32X:
1654 case RISCV::FSGNJ_S_INX:
1655 case RISCV::FSGNJ_H_INX:
1656 // The canonical floating-point move is fsgnj rd, rs, rs.
1657 if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
1658 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
1659 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1660 break;
1662 return std::nullopt;
1665 MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
1666 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
1667 // The option is unused. Choose Local strategy only for in-order cores. When
1668 // scheduling model is unspecified, use MinInstrCount strategy as more
1669 // generic one.
1670 const auto &SchedModel = STI.getSchedModel();
1671 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
1672 ? MachineTraceStrategy::TS_MinInstrCount
1673 : MachineTraceStrategy::TS_Local;
1675 // The strategy was forced by the option.
1676 return ForceMachineCombinerStrategy;
1679 void RISCVInstrInfo::finalizeInsInstrs(
1680 MachineInstr &Root, unsigned &Pattern,
1681 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
1682 int16_t FrmOpIdx =
1683 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);
1684 if (FrmOpIdx < 0) {
1685 assert(all_of(InsInstrs,
1686 [](MachineInstr *MI) {
1687 return RISCV::getNamedOperandIdx(MI->getOpcode(),
1688 RISCV::OpName::frm) < 0;
1689 }) &&
1690 "New instructions require FRM whereas the old one does not have it");
1691 return;
1694 const MachineOperand &FRM = Root.getOperand(FrmOpIdx);
1695 MachineFunction &MF = *Root.getMF();
1697 for (auto *NewMI : InsInstrs) {
1698 // We'd already added the FRM operand.
1699 if (static_cast<unsigned>(RISCV::getNamedOperandIdx(
1700 NewMI->getOpcode(), RISCV::OpName::frm)) != NewMI->getNumOperands())
1701 continue;
1702 MachineInstrBuilder MIB(MF, NewMI);
1703 MIB.add(FRM);
1704 if (FRM.getImm() == RISCVFPRndMode::DYN)
1705 MIB.addUse(RISCV::FRM, RegState::Implicit);
1709 static bool isFADD(unsigned Opc) {
1710 switch (Opc) {
1711 default:
1712 return false;
1713 case RISCV::FADD_H:
1714 case RISCV::FADD_S:
1715 case RISCV::FADD_D:
1716 return true;
1720 static bool isFSUB(unsigned Opc) {
1721 switch (Opc) {
1722 default:
1723 return false;
1724 case RISCV::FSUB_H:
1725 case RISCV::FSUB_S:
1726 case RISCV::FSUB_D:
1727 return true;
1731 static bool isFMUL(unsigned Opc) {
1732 switch (Opc) {
1733 default:
1734 return false;
1735 case RISCV::FMUL_H:
1736 case RISCV::FMUL_S:
1737 case RISCV::FMUL_D:
1738 return true;
1742 bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst,
1743 bool Invert) const {
1744 #define OPCODE_LMUL_CASE(OPC) \
1745 case RISCV::OPC##_M1: \
1746 case RISCV::OPC##_M2: \
1747 case RISCV::OPC##_M4: \
1748 case RISCV::OPC##_M8: \
1749 case RISCV::OPC##_MF2: \
1750 case RISCV::OPC##_MF4: \
1751 case RISCV::OPC##_MF8
1753 #define OPCODE_LMUL_MASK_CASE(OPC) \
1754 case RISCV::OPC##_M1_MASK: \
1755 case RISCV::OPC##_M2_MASK: \
1756 case RISCV::OPC##_M4_MASK: \
1757 case RISCV::OPC##_M8_MASK: \
1758 case RISCV::OPC##_MF2_MASK: \
1759 case RISCV::OPC##_MF4_MASK: \
1760 case RISCV::OPC##_MF8_MASK
1762 unsigned Opcode = Inst.getOpcode();
1763 if (Invert) {
1764 if (auto InvOpcode = getInverseOpcode(Opcode))
1765 Opcode = *InvOpcode;
1766 else
1767 return false;
1770 // clang-format off
1771 switch (Opcode) {
1772 default:
1773 return false;
1774 OPCODE_LMUL_CASE(PseudoVADD_VV):
1775 OPCODE_LMUL_MASK_CASE(PseudoVADD_VV):
1776 OPCODE_LMUL_CASE(PseudoVMUL_VV):
1777 OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV):
1778 return true;
1780 // clang-format on
1782 #undef OPCODE_LMUL_MASK_CASE
1783 #undef OPCODE_LMUL_CASE
1786 bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root,
1787 const MachineInstr &Prev) const {
1788 if (!areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()))
1789 return false;
1791 assert(Root.getMF() == Prev.getMF());
1792 const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo();
1793 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
1795 // Make sure vtype operands are also the same.
1796 const MCInstrDesc &Desc = get(Root.getOpcode());
1797 const uint64_t TSFlags = Desc.TSFlags;
1799 auto checkImmOperand = [&](unsigned OpIdx) {
1800 return Root.getOperand(OpIdx).getImm() == Prev.getOperand(OpIdx).getImm();
1803 auto checkRegOperand = [&](unsigned OpIdx) {
1804 return Root.getOperand(OpIdx).getReg() == Prev.getOperand(OpIdx).getReg();
1807 // PassThru
1808 // TODO: Potentially we can loosen the condition to consider Root to be
1809 // associable with Prev if Root has NoReg as passthru. In which case we
1810 // also need to loosen the condition on vector policies between these.
1811 if (!checkRegOperand(1))
1812 return false;
1814 // SEW
1815 if (RISCVII::hasSEWOp(TSFlags) &&
1816 !checkImmOperand(RISCVII::getSEWOpNum(Desc)))
1817 return false;
1819 // Mask
1820 if (RISCVII::usesMaskPolicy(TSFlags)) {
1821 const MachineBasicBlock *MBB = Root.getParent();
1822 const MachineBasicBlock::const_reverse_iterator It1(&Root);
1823 const MachineBasicBlock::const_reverse_iterator It2(&Prev);
1824 Register MI1VReg;
1826 bool SeenMI2 = false;
1827 for (auto End = MBB->rend(), It = It1; It != End; ++It) {
1828 if (It == It2) {
1829 SeenMI2 = true;
1830 if (!MI1VReg.isValid())
1831 // There is no V0 def between Root and Prev; they're sharing the
1832 // same V0.
1833 break;
1836 if (It->modifiesRegister(RISCV::V0, TRI)) {
1837 Register SrcReg = It->getOperand(1).getReg();
1838 // If it's not VReg it'll be more difficult to track its defs, so
1839 // bailing out here just to be safe.
1840 if (!SrcReg.isVirtual())
1841 return false;
1843 if (!MI1VReg.isValid()) {
1844 // This is the V0 def for Root.
1845 MI1VReg = SrcReg;
1846 continue;
1849 // Some random mask updates.
1850 if (!SeenMI2)
1851 continue;
1853 // This is the V0 def for Prev; check if it's the same as that of
1854 // Root.
1855 if (MI1VReg != SrcReg)
1856 return false;
1857 else
1858 break;
1862 // If we haven't encountered Prev, it's likely that this function was
1863 // called in a wrong way (e.g. Root is before Prev).
1864 assert(SeenMI2 && "Prev is expected to appear before Root");
1867 // Tail / Mask policies
1868 if (RISCVII::hasVecPolicyOp(TSFlags) &&
1869 !checkImmOperand(RISCVII::getVecPolicyOpNum(Desc)))
1870 return false;
1872 // VL
1873 if (RISCVII::hasVLOp(TSFlags)) {
1874 unsigned OpIdx = RISCVII::getVLOpNum(Desc);
1875 const MachineOperand &Op1 = Root.getOperand(OpIdx);
1876 const MachineOperand &Op2 = Prev.getOperand(OpIdx);
1877 if (Op1.getType() != Op2.getType())
1878 return false;
1879 switch (Op1.getType()) {
1880 case MachineOperand::MO_Register:
1881 if (Op1.getReg() != Op2.getReg())
1882 return false;
1883 break;
1884 case MachineOperand::MO_Immediate:
1885 if (Op1.getImm() != Op2.getImm())
1886 return false;
1887 break;
1888 default:
1889 llvm_unreachable("Unrecognized VL operand type");
1893 // Rounding modes
1894 if (RISCVII::hasRoundModeOp(TSFlags) &&
1895 !checkImmOperand(RISCVII::getVLOpNum(Desc) - 1))
1896 return false;
1898 return true;
1901 // Most of our RVV pseudos have passthru operand, so the real operands
1902 // start from index = 2.
1903 bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst,
1904 bool &Commuted) const {
1905 const MachineBasicBlock *MBB = Inst.getParent();
1906 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1907 assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst.getOpcode())) &&
1908 "Expect the present of passthrough operand.");
1909 MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
1910 MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(3).getReg());
1912 // If only one operand has the same or inverse opcode and it's the second
1913 // source operand, the operands must be commuted.
1914 Commuted = !areRVVInstsReassociable(Inst, *MI1) &&
1915 areRVVInstsReassociable(Inst, *MI2);
1916 if (Commuted)
1917 std::swap(MI1, MI2);
1919 return areRVVInstsReassociable(Inst, *MI1) &&
1920 (isVectorAssociativeAndCommutative(*MI1) ||
1921 isVectorAssociativeAndCommutative(*MI1, /* Invert */ true)) &&
1922 hasReassociableOperands(*MI1, MBB) &&
1923 MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
1926 bool RISCVInstrInfo::hasReassociableOperands(
1927 const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
1928 if (!isVectorAssociativeAndCommutative(Inst) &&
1929 !isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
1930 return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
1932 const MachineOperand &Op1 = Inst.getOperand(2);
1933 const MachineOperand &Op2 = Inst.getOperand(3);
1934 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1936 // We need virtual register definitions for the operands that we will
1937 // reassociate.
1938 MachineInstr *MI1 = nullptr;
1939 MachineInstr *MI2 = nullptr;
1940 if (Op1.isReg() && Op1.getReg().isVirtual())
1941 MI1 = MRI.getUniqueVRegDef(Op1.getReg());
1942 if (Op2.isReg() && Op2.getReg().isVirtual())
1943 MI2 = MRI.getUniqueVRegDef(Op2.getReg());
1945 // And at least one operand must be defined in MBB.
1946 return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB);
1949 void RISCVInstrInfo::getReassociateOperandIndices(
1950 const MachineInstr &Root, unsigned Pattern,
1951 std::array<unsigned, 5> &OperandIndices) const {
1952 TargetInstrInfo::getReassociateOperandIndices(Root, Pattern, OperandIndices);
1953 if (RISCV::getRVVMCOpcode(Root.getOpcode())) {
1954 // Skip the passthrough operand, so increment all indices by one.
1955 for (unsigned I = 0; I < 5; ++I)
1956 ++OperandIndices[I];
1960 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
1961 bool &Commuted) const {
1962 if (isVectorAssociativeAndCommutative(Inst) ||
1963 isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
1964 return hasReassociableVectorSibling(Inst, Commuted);
1966 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
1967 return false;
1969 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
1970 unsigned OperandIdx = Commuted ? 2 : 1;
1971 const MachineInstr &Sibling =
1972 *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg());
1974 int16_t InstFrmOpIdx =
1975 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm);
1976 int16_t SiblingFrmOpIdx =
1977 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm);
1979 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) ||
1980 RISCV::hasEqualFRM(Inst, Sibling);
1983 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
1984 bool Invert) const {
1985 if (isVectorAssociativeAndCommutative(Inst, Invert))
1986 return true;
1988 unsigned Opc = Inst.getOpcode();
1989 if (Invert) {
1990 auto InverseOpcode = getInverseOpcode(Opc);
1991 if (!InverseOpcode)
1992 return false;
1993 Opc = *InverseOpcode;
1996 if (isFADD(Opc) || isFMUL(Opc))
1997 return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
1998 Inst.getFlag(MachineInstr::MIFlag::FmNsz);
2000 switch (Opc) {
2001 default:
2002 return false;
2003 case RISCV::ADD:
2004 case RISCV::ADDW:
2005 case RISCV::AND:
2006 case RISCV::OR:
2007 case RISCV::XOR:
2008 // From RISC-V ISA spec, if both the high and low bits of the same product
2009 // are required, then the recommended code sequence is:
2011 // MULH[[S]U] rdh, rs1, rs2
2012 // MUL rdl, rs1, rs2
2013 // (source register specifiers must be in same order and rdh cannot be the
2014 // same as rs1 or rs2)
2016 // Microarchitectures can then fuse these into a single multiply operation
2017 // instead of performing two separate multiplies.
2018 // MachineCombiner may reassociate MUL operands and lose the fusion
2019 // opportunity.
2020 case RISCV::MUL:
2021 case RISCV::MULW:
2022 case RISCV::MIN:
2023 case RISCV::MINU:
2024 case RISCV::MAX:
2025 case RISCV::MAXU:
2026 case RISCV::FMIN_H:
2027 case RISCV::FMIN_S:
2028 case RISCV::FMIN_D:
2029 case RISCV::FMAX_H:
2030 case RISCV::FMAX_S:
2031 case RISCV::FMAX_D:
2032 return true;
2035 return false;
2038 std::optional<unsigned>
2039 RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
2040 #define RVV_OPC_LMUL_CASE(OPC, INV) \
2041 case RISCV::OPC##_M1: \
2042 return RISCV::INV##_M1; \
2043 case RISCV::OPC##_M2: \
2044 return RISCV::INV##_M2; \
2045 case RISCV::OPC##_M4: \
2046 return RISCV::INV##_M4; \
2047 case RISCV::OPC##_M8: \
2048 return RISCV::INV##_M8; \
2049 case RISCV::OPC##_MF2: \
2050 return RISCV::INV##_MF2; \
2051 case RISCV::OPC##_MF4: \
2052 return RISCV::INV##_MF4; \
2053 case RISCV::OPC##_MF8: \
2054 return RISCV::INV##_MF8
2056 #define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \
2057 case RISCV::OPC##_M1_MASK: \
2058 return RISCV::INV##_M1_MASK; \
2059 case RISCV::OPC##_M2_MASK: \
2060 return RISCV::INV##_M2_MASK; \
2061 case RISCV::OPC##_M4_MASK: \
2062 return RISCV::INV##_M4_MASK; \
2063 case RISCV::OPC##_M8_MASK: \
2064 return RISCV::INV##_M8_MASK; \
2065 case RISCV::OPC##_MF2_MASK: \
2066 return RISCV::INV##_MF2_MASK; \
2067 case RISCV::OPC##_MF4_MASK: \
2068 return RISCV::INV##_MF4_MASK; \
2069 case RISCV::OPC##_MF8_MASK: \
2070 return RISCV::INV##_MF8_MASK
2072 switch (Opcode) {
2073 default:
2074 return std::nullopt;
2075 case RISCV::FADD_H:
2076 return RISCV::FSUB_H;
2077 case RISCV::FADD_S:
2078 return RISCV::FSUB_S;
2079 case RISCV::FADD_D:
2080 return RISCV::FSUB_D;
2081 case RISCV::FSUB_H:
2082 return RISCV::FADD_H;
2083 case RISCV::FSUB_S:
2084 return RISCV::FADD_S;
2085 case RISCV::FSUB_D:
2086 return RISCV::FADD_D;
2087 case RISCV::ADD:
2088 return RISCV::SUB;
2089 case RISCV::SUB:
2090 return RISCV::ADD;
2091 case RISCV::ADDW:
2092 return RISCV::SUBW;
2093 case RISCV::SUBW:
2094 return RISCV::ADDW;
2095 // clang-format off
2096 RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV);
2097 RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV);
2098 RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV);
2099 RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV);
2100 // clang-format on
2103 #undef RVV_OPC_LMUL_MASK_CASE
2104 #undef RVV_OPC_LMUL_CASE
2107 static bool canCombineFPFusedMultiply(const MachineInstr &Root,
2108 const MachineOperand &MO,
2109 bool DoRegPressureReduce) {
2110 if (!MO.isReg() || !MO.getReg().isVirtual())
2111 return false;
2112 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2113 MachineInstr *MI = MRI.getVRegDef(MO.getReg());
2114 if (!MI || !isFMUL(MI->getOpcode()))
2115 return false;
2117 if (!Root.getFlag(MachineInstr::MIFlag::FmContract) ||
2118 !MI->getFlag(MachineInstr::MIFlag::FmContract))
2119 return false;
2121 // Try combining even if fmul has more than one use as it eliminates
2122 // dependency between fadd(fsub) and fmul. However, it can extend liveranges
2123 // for fmul operands, so reject the transformation in register pressure
2124 // reduction mode.
2125 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2126 return false;
2128 // Do not combine instructions from different basic blocks.
2129 if (Root.getParent() != MI->getParent())
2130 return false;
2131 return RISCV::hasEqualFRM(Root, *MI);
2134 static bool getFPFusedMultiplyPatterns(MachineInstr &Root,
2135 SmallVectorImpl<unsigned> &Patterns,
2136 bool DoRegPressureReduce) {
2137 unsigned Opc = Root.getOpcode();
2138 bool IsFAdd = isFADD(Opc);
2139 if (!IsFAdd && !isFSUB(Opc))
2140 return false;
2141 bool Added = false;
2142 if (canCombineFPFusedMultiply(Root, Root.getOperand(1),
2143 DoRegPressureReduce)) {
2144 Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX
2145 : RISCVMachineCombinerPattern::FMSUB);
2146 Added = true;
2148 if (canCombineFPFusedMultiply(Root, Root.getOperand(2),
2149 DoRegPressureReduce)) {
2150 Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA
2151 : RISCVMachineCombinerPattern::FNMSUB);
2152 Added = true;
2154 return Added;
2157 static bool getFPPatterns(MachineInstr &Root,
2158 SmallVectorImpl<unsigned> &Patterns,
2159 bool DoRegPressureReduce) {
2160 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
2163 /// Utility routine that checks if \param MO is defined by an
2164 /// \param CombineOpc instruction in the basic block \param MBB
2165 static const MachineInstr *canCombine(const MachineBasicBlock &MBB,
2166 const MachineOperand &MO,
2167 unsigned CombineOpc) {
2168 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2169 const MachineInstr *MI = nullptr;
2171 if (MO.isReg() && MO.getReg().isVirtual())
2172 MI = MRI.getUniqueVRegDef(MO.getReg());
2173 // And it needs to be in the trace (otherwise, it won't have a depth).
2174 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
2175 return nullptr;
2176 // Must only used by the user we combine with.
2177 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2178 return nullptr;
2180 return MI;
2183 /// Utility routine that checks if \param MO is defined by a SLLI in \param
2184 /// MBB that can be combined by splitting across 2 SHXADD instructions. The
2185 /// first SHXADD shift amount is given by \param OuterShiftAmt.
2186 static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB,
2187 const MachineOperand &MO,
2188 unsigned OuterShiftAmt) {
2189 const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI);
2190 if (!ShiftMI)
2191 return false;
2193 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
2194 if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3)
2195 return false;
2197 return true;
2200 // Returns the shift amount from a SHXADD instruction. Returns 0 if the
2201 // instruction is not a SHXADD.
2202 static unsigned getSHXADDShiftAmount(unsigned Opc) {
2203 switch (Opc) {
2204 default:
2205 return 0;
2206 case RISCV::SH1ADD:
2207 return 1;
2208 case RISCV::SH2ADD:
2209 return 2;
2210 case RISCV::SH3ADD:
2211 return 3;
2215 // Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
2216 // (sh3add (sh2add Y, Z), X).
2217 static bool getSHXADDPatterns(const MachineInstr &Root,
2218 SmallVectorImpl<unsigned> &Patterns) {
2219 unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
2220 if (!ShiftAmt)
2221 return false;
2223 const MachineBasicBlock &MBB = *Root.getParent();
2225 const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD);
2226 if (!AddMI)
2227 return false;
2229 bool Found = false;
2230 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) {
2231 Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1);
2232 Found = true;
2234 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) {
2235 Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2);
2236 Found = true;
2239 return Found;
2242 CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const {
2243 switch (Pattern) {
2244 case RISCVMachineCombinerPattern::FMADD_AX:
2245 case RISCVMachineCombinerPattern::FMADD_XA:
2246 case RISCVMachineCombinerPattern::FMSUB:
2247 case RISCVMachineCombinerPattern::FNMSUB:
2248 return CombinerObjective::MustReduceDepth;
2249 default:
2250 return TargetInstrInfo::getCombinerObjective(Pattern);
2254 bool RISCVInstrInfo::getMachineCombinerPatterns(
2255 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
2256 bool DoRegPressureReduce) const {
2258 if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
2259 return true;
2261 if (getSHXADDPatterns(Root, Patterns))
2262 return true;
2264 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
2265 DoRegPressureReduce);
2268 static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {
2269 switch (RootOpc) {
2270 default:
2271 llvm_unreachable("Unexpected opcode");
2272 case RISCV::FADD_H:
2273 return RISCV::FMADD_H;
2274 case RISCV::FADD_S:
2275 return RISCV::FMADD_S;
2276 case RISCV::FADD_D:
2277 return RISCV::FMADD_D;
2278 case RISCV::FSUB_H:
2279 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
2280 : RISCV::FNMSUB_H;
2281 case RISCV::FSUB_S:
2282 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
2283 : RISCV::FNMSUB_S;
2284 case RISCV::FSUB_D:
2285 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
2286 : RISCV::FNMSUB_D;
2290 static unsigned getAddendOperandIdx(unsigned Pattern) {
2291 switch (Pattern) {
2292 default:
2293 llvm_unreachable("Unexpected pattern");
2294 case RISCVMachineCombinerPattern::FMADD_AX:
2295 case RISCVMachineCombinerPattern::FMSUB:
2296 return 2;
2297 case RISCVMachineCombinerPattern::FMADD_XA:
2298 case RISCVMachineCombinerPattern::FNMSUB:
2299 return 1;
2303 static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
2304 unsigned Pattern,
2305 SmallVectorImpl<MachineInstr *> &InsInstrs,
2306 SmallVectorImpl<MachineInstr *> &DelInstrs) {
2307 MachineFunction *MF = Root.getMF();
2308 MachineRegisterInfo &MRI = MF->getRegInfo();
2309 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2311 MachineOperand &Mul1 = Prev.getOperand(1);
2312 MachineOperand &Mul2 = Prev.getOperand(2);
2313 MachineOperand &Dst = Root.getOperand(0);
2314 MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern));
2316 Register DstReg = Dst.getReg();
2317 unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);
2318 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
2319 DebugLoc MergedLoc =
2320 DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc());
2322 bool Mul1IsKill = Mul1.isKill();
2323 bool Mul2IsKill = Mul2.isKill();
2324 bool AddendIsKill = Addend.isKill();
2326 // We need to clear kill flags since we may be extending the live range past
2327 // a kill. If the mul had kill flags, we can preserve those since we know
2328 // where the previous range stopped.
2329 MRI.clearKillFlags(Mul1.getReg());
2330 MRI.clearKillFlags(Mul2.getReg());
2332 MachineInstrBuilder MIB =
2333 BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg)
2334 .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill))
2335 .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill))
2336 .addReg(Addend.getReg(), getKillRegState(AddendIsKill))
2337 .setMIFlags(IntersectedFlags);
2339 InsInstrs.push_back(MIB);
2340 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg()))
2341 DelInstrs.push_back(&Prev);
2342 DelInstrs.push_back(&Root);
2345 // Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
2346 // (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
2347 // shXadd instructions. The outer shXadd keeps its original opcode.
2348 static void
2349 genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
2350 SmallVectorImpl<MachineInstr *> &InsInstrs,
2351 SmallVectorImpl<MachineInstr *> &DelInstrs,
2352 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
2353 MachineFunction *MF = Root.getMF();
2354 MachineRegisterInfo &MRI = MF->getRegInfo();
2355 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2357 unsigned OuterShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
2358 assert(OuterShiftAmt != 0 && "Unexpected opcode");
2360 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
2361 MachineInstr *ShiftMI =
2362 MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg());
2364 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
2365 assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");
2367 unsigned InnerOpc;
2368 switch (InnerShiftAmt - OuterShiftAmt) {
2369 default:
2370 llvm_unreachable("Unexpected shift amount");
2371 case 0:
2372 InnerOpc = RISCV::ADD;
2373 break;
2374 case 1:
2375 InnerOpc = RISCV::SH1ADD;
2376 break;
2377 case 2:
2378 InnerOpc = RISCV::SH2ADD;
2379 break;
2380 case 3:
2381 InnerOpc = RISCV::SH3ADD;
2382 break;
2385 const MachineOperand &X = AddMI->getOperand(3 - AddOpIdx);
2386 const MachineOperand &Y = ShiftMI->getOperand(1);
2387 const MachineOperand &Z = Root.getOperand(1);
2389 Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2391 auto MIB1 = BuildMI(*MF, MIMetadata(Root), TII->get(InnerOpc), NewVR)
2392 .addReg(Y.getReg(), getKillRegState(Y.isKill()))
2393 .addReg(Z.getReg(), getKillRegState(Z.isKill()));
2394 auto MIB2 = BuildMI(*MF, MIMetadata(Root), TII->get(Root.getOpcode()),
2395 Root.getOperand(0).getReg())
2396 .addReg(NewVR, RegState::Kill)
2397 .addReg(X.getReg(), getKillRegState(X.isKill()));
2399 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
2400 InsInstrs.push_back(MIB1);
2401 InsInstrs.push_back(MIB2);
2402 DelInstrs.push_back(ShiftMI);
2403 DelInstrs.push_back(AddMI);
2404 DelInstrs.push_back(&Root);
2407 void RISCVInstrInfo::genAlternativeCodeSequence(
2408 MachineInstr &Root, unsigned Pattern,
2409 SmallVectorImpl<MachineInstr *> &InsInstrs,
2410 SmallVectorImpl<MachineInstr *> &DelInstrs,
2411 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
2412 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2413 switch (Pattern) {
2414 default:
2415 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
2416 DelInstrs, InstrIdxForVirtReg);
2417 return;
2418 case RISCVMachineCombinerPattern::FMADD_AX:
2419 case RISCVMachineCombinerPattern::FMSUB: {
2420 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg());
2421 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2422 return;
2424 case RISCVMachineCombinerPattern::FMADD_XA:
2425 case RISCVMachineCombinerPattern::FNMSUB: {
2426 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg());
2427 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2428 return;
2430 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1:
2431 genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2432 return;
2433 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2:
2434 genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2435 return;
2439 bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
2440 StringRef &ErrInfo) const {
2441 MCInstrDesc const &Desc = MI.getDesc();
2443 for (const auto &[Index, Operand] : enumerate(Desc.operands())) {
2444 unsigned OpType = Operand.OperandType;
2445 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
2446 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
2447 const MachineOperand &MO = MI.getOperand(Index);
2448 if (MO.isReg()) {
2449 ErrInfo = "Expected a non-register operand.";
2450 return false;
2452 if (MO.isImm()) {
2453 int64_t Imm = MO.getImm();
2454 bool Ok;
2455 switch (OpType) {
2456 default:
2457 llvm_unreachable("Unexpected operand type");
2459 // clang-format off
2460 #define CASE_OPERAND_UIMM(NUM) \
2461 case RISCVOp::OPERAND_UIMM##NUM: \
2462 Ok = isUInt<NUM>(Imm); \
2463 break;
2464 #define CASE_OPERAND_SIMM(NUM) \
2465 case RISCVOp::OPERAND_SIMM##NUM: \
2466 Ok = isInt<NUM>(Imm); \
2467 break;
2468 CASE_OPERAND_UIMM(1)
2469 CASE_OPERAND_UIMM(2)
2470 CASE_OPERAND_UIMM(3)
2471 CASE_OPERAND_UIMM(4)
2472 CASE_OPERAND_UIMM(5)
2473 CASE_OPERAND_UIMM(6)
2474 CASE_OPERAND_UIMM(7)
2475 CASE_OPERAND_UIMM(8)
2476 CASE_OPERAND_UIMM(12)
2477 CASE_OPERAND_UIMM(20)
2478 // clang-format on
2479 case RISCVOp::OPERAND_UIMM2_LSB0:
2480 Ok = isShiftedUInt<1, 1>(Imm);
2481 break;
2482 case RISCVOp::OPERAND_UIMM5_LSB0:
2483 Ok = isShiftedUInt<4, 1>(Imm);
2484 break;
2485 case RISCVOp::OPERAND_UIMM6_LSB0:
2486 Ok = isShiftedUInt<5, 1>(Imm);
2487 break;
2488 case RISCVOp::OPERAND_UIMM7_LSB00:
2489 Ok = isShiftedUInt<5, 2>(Imm);
2490 break;
2491 case RISCVOp::OPERAND_UIMM8_LSB00:
2492 Ok = isShiftedUInt<6, 2>(Imm);
2493 break;
2494 case RISCVOp::OPERAND_UIMM8_LSB000:
2495 Ok = isShiftedUInt<5, 3>(Imm);
2496 break;
2497 case RISCVOp::OPERAND_UIMM8_GE32:
2498 Ok = isUInt<8>(Imm) && Imm >= 32;
2499 break;
2500 case RISCVOp::OPERAND_UIMM9_LSB000:
2501 Ok = isShiftedUInt<6, 3>(Imm);
2502 break;
2503 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
2504 Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
2505 break;
2506 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:
2507 Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0);
2508 break;
2509 case RISCVOp::OPERAND_ZERO:
2510 Ok = Imm == 0;
2511 break;
2512 // clang-format off
2513 CASE_OPERAND_SIMM(5)
2514 CASE_OPERAND_SIMM(6)
2515 CASE_OPERAND_SIMM(12)
2516 // clang-format on
2517 case RISCVOp::OPERAND_SIMM5_PLUS1:
2518 Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16;
2519 break;
2520 case RISCVOp::OPERAND_SIMM6_NONZERO:
2521 Ok = Imm != 0 && isInt<6>(Imm);
2522 break;
2523 case RISCVOp::OPERAND_VTYPEI10:
2524 Ok = isUInt<10>(Imm);
2525 break;
2526 case RISCVOp::OPERAND_VTYPEI11:
2527 Ok = isUInt<11>(Imm);
2528 break;
2529 case RISCVOp::OPERAND_SIMM12_LSB00000:
2530 Ok = isShiftedInt<7, 5>(Imm);
2531 break;
2532 case RISCVOp::OPERAND_UIMMLOG2XLEN:
2533 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
2534 break;
2535 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
2536 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
2537 Ok = Ok && Imm != 0;
2538 break;
2539 case RISCVOp::OPERAND_CLUI_IMM:
2540 Ok = (isUInt<5>(Imm) && Imm != 0) ||
2541 (Imm >= 0xfffe0 && Imm <= 0xfffff);
2542 break;
2543 case RISCVOp::OPERAND_RVKRNUM:
2544 Ok = Imm >= 0 && Imm <= 10;
2545 break;
2546 case RISCVOp::OPERAND_RVKRNUM_0_7:
2547 Ok = Imm >= 0 && Imm <= 7;
2548 break;
2549 case RISCVOp::OPERAND_RVKRNUM_1_10:
2550 Ok = Imm >= 1 && Imm <= 10;
2551 break;
2552 case RISCVOp::OPERAND_RVKRNUM_2_14:
2553 Ok = Imm >= 2 && Imm <= 14;
2554 break;
2555 case RISCVOp::OPERAND_SPIMM:
2556 Ok = (Imm & 0xf) == 0;
2557 break;
2558 case RISCVOp::OPERAND_FRMARG:
2559 Ok = RISCVFPRndMode::isValidRoundingMode(Imm);
2560 break;
2561 case RISCVOp::OPERAND_RTZARG:
2562 Ok = Imm == RISCVFPRndMode::RTZ;
2563 break;
2564 case RISCVOp::OPERAND_COND_CODE:
2565 Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID;
2566 break;
2567 case RISCVOp::OPERAND_VEC_POLICY:
2568 Ok = (Imm & (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) == Imm;
2569 break;
2570 case RISCVOp::OPERAND_SEW:
2571 Ok = (isUInt<5>(Imm) && RISCVVType::isValidSEW(1 << Imm));
2572 break;
2573 case RISCVOp::OPERAND_SEW_MASK:
2574 Ok = Imm == 0;
2575 break;
2576 case RISCVOp::OPERAND_VEC_RM:
2577 assert(RISCVII::hasRoundModeOp(Desc.TSFlags));
2578 if (RISCVII::usesVXRM(Desc.TSFlags))
2579 Ok = isUInt<2>(Imm);
2580 else
2581 Ok = RISCVFPRndMode::isValidRoundingMode(Imm);
2582 break;
2584 if (!Ok) {
2585 ErrInfo = "Invalid immediate";
2586 return false;
2592 const uint64_t TSFlags = Desc.TSFlags;
2593 if (RISCVII::hasVLOp(TSFlags)) {
2594 const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
2595 if (!Op.isImm() && !Op.isReg()) {
2596 ErrInfo = "Invalid operand type for VL operand";
2597 return false;
2599 if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
2600 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2601 auto *RC = MRI.getRegClass(Op.getReg());
2602 if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
2603 ErrInfo = "Invalid register class for VL operand";
2604 return false;
2607 if (!RISCVII::hasSEWOp(TSFlags)) {
2608 ErrInfo = "VL operand w/o SEW operand?";
2609 return false;
2612 if (RISCVII::hasSEWOp(TSFlags)) {
2613 unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
2614 if (!MI.getOperand(OpIdx).isImm()) {
2615 ErrInfo = "SEW value expected to be an immediate";
2616 return false;
2618 uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
2619 if (Log2SEW > 31) {
2620 ErrInfo = "Unexpected SEW value";
2621 return false;
2623 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
2624 if (!RISCVVType::isValidSEW(SEW)) {
2625 ErrInfo = "Unexpected SEW value";
2626 return false;
2629 if (RISCVII::hasVecPolicyOp(TSFlags)) {
2630 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
2631 if (!MI.getOperand(OpIdx).isImm()) {
2632 ErrInfo = "Policy operand expected to be an immediate";
2633 return false;
2635 uint64_t Policy = MI.getOperand(OpIdx).getImm();
2636 if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) {
2637 ErrInfo = "Invalid Policy Value";
2638 return false;
2640 if (!RISCVII::hasVLOp(TSFlags)) {
2641 ErrInfo = "policy operand w/o VL operand?";
2642 return false;
2645 // VecPolicy operands can only exist on instructions with passthru/merge
2646 // arguments. Note that not all arguments with passthru have vec policy
2647 // operands- some instructions have implicit policies.
2648 unsigned UseOpIdx;
2649 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
2650 ErrInfo = "policy operand w/o tied operand?";
2651 return false;
2655 if (int Idx = RISCVII::getFRMOpNum(Desc);
2656 Idx >= 0 && MI.getOperand(Idx).getImm() == RISCVFPRndMode::DYN &&
2657 !MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr)) {
2658 ErrInfo = "dynamic rounding mode should read FRM";
2659 return false;
2662 return true;
2665 bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
2666 const MachineInstr &AddrI,
2667 ExtAddrMode &AM) const {
2668 switch (MemI.getOpcode()) {
2669 default:
2670 return false;
2671 case RISCV::LB:
2672 case RISCV::LBU:
2673 case RISCV::LH:
2674 case RISCV::LH_INX:
2675 case RISCV::LHU:
2676 case RISCV::LW:
2677 case RISCV::LW_INX:
2678 case RISCV::LWU:
2679 case RISCV::LD:
2680 case RISCV::FLH:
2681 case RISCV::FLW:
2682 case RISCV::FLD:
2683 case RISCV::SB:
2684 case RISCV::SH:
2685 case RISCV::SH_INX:
2686 case RISCV::SW:
2687 case RISCV::SW_INX:
2688 case RISCV::SD:
2689 case RISCV::FSH:
2690 case RISCV::FSW:
2691 case RISCV::FSD:
2692 break;
2695 if (MemI.getOperand(0).getReg() == Reg)
2696 return false;
2698 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||
2699 !AddrI.getOperand(2).isImm())
2700 return false;
2702 int64_t OldOffset = MemI.getOperand(2).getImm();
2703 int64_t Disp = AddrI.getOperand(2).getImm();
2704 int64_t NewOffset = OldOffset + Disp;
2705 if (!STI.is64Bit())
2706 NewOffset = SignExtend64<32>(NewOffset);
2708 if (!isInt<12>(NewOffset))
2709 return false;
2711 AM.BaseReg = AddrI.getOperand(1).getReg();
2712 AM.ScaledReg = 0;
2713 AM.Scale = 0;
2714 AM.Displacement = NewOffset;
2715 AM.Form = ExtAddrMode::Formula::Basic;
2716 return true;
2719 MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
2720 const ExtAddrMode &AM) const {
2722 const DebugLoc &DL = MemI.getDebugLoc();
2723 MachineBasicBlock &MBB = *MemI.getParent();
2725 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
2726 "Addressing mode not supported for folding");
2728 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
2729 .addReg(MemI.getOperand(0).getReg(),
2730 MemI.mayLoad() ? RegState::Define : 0)
2731 .addReg(AM.BaseReg)
2732 .addImm(AM.Displacement)
2733 .setMemRefs(MemI.memoperands())
2734 .setMIFlags(MemI.getFlags());
2737 bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
2738 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2739 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2740 const TargetRegisterInfo *TRI) const {
2741 if (!LdSt.mayLoadOrStore())
2742 return false;
2744 // Conservatively, only handle scalar loads/stores for now.
2745 switch (LdSt.getOpcode()) {
2746 case RISCV::LB:
2747 case RISCV::LBU:
2748 case RISCV::SB:
2749 case RISCV::LH:
2750 case RISCV::LH_INX:
2751 case RISCV::LHU:
2752 case RISCV::FLH:
2753 case RISCV::SH:
2754 case RISCV::SH_INX:
2755 case RISCV::FSH:
2756 case RISCV::LW:
2757 case RISCV::LW_INX:
2758 case RISCV::LWU:
2759 case RISCV::FLW:
2760 case RISCV::SW:
2761 case RISCV::SW_INX:
2762 case RISCV::FSW:
2763 case RISCV::LD:
2764 case RISCV::FLD:
2765 case RISCV::SD:
2766 case RISCV::FSD:
2767 break;
2768 default:
2769 return false;
2771 const MachineOperand *BaseOp;
2772 OffsetIsScalable = false;
2773 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2774 return false;
2775 BaseOps.push_back(BaseOp);
2776 return true;
2779 // TODO: This was copied from SIInstrInfo. Could it be lifted to a common
2780 // helper?
2781 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
2782 ArrayRef<const MachineOperand *> BaseOps1,
2783 const MachineInstr &MI2,
2784 ArrayRef<const MachineOperand *> BaseOps2) {
2785 // Only examine the first "base" operand of each instruction, on the
2786 // assumption that it represents the real base address of the memory access.
2787 // Other operands are typically offsets or indices from this base address.
2788 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
2789 return true;
2791 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
2792 return false;
2794 auto MO1 = *MI1.memoperands_begin();
2795 auto MO2 = *MI2.memoperands_begin();
2796 if (MO1->getAddrSpace() != MO2->getAddrSpace())
2797 return false;
2799 auto Base1 = MO1->getValue();
2800 auto Base2 = MO2->getValue();
2801 if (!Base1 || !Base2)
2802 return false;
2803 Base1 = getUnderlyingObject(Base1);
2804 Base2 = getUnderlyingObject(Base2);
2806 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
2807 return false;
2809 return Base1 == Base2;
2812 bool RISCVInstrInfo::shouldClusterMemOps(
2813 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
2814 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2815 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
2816 unsigned NumBytes) const {
2817 // If the mem ops (to be clustered) do not have the same base ptr, then they
2818 // should not be clustered
2819 if (!BaseOps1.empty() && !BaseOps2.empty()) {
2820 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
2821 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
2822 if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
2823 return false;
2824 } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
2825 // If only one base op is empty, they do not have the same base ptr
2826 return false;
2829 unsigned CacheLineSize =
2830 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
2831 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
2832 CacheLineSize = CacheLineSize ? CacheLineSize : 64;
2833 // Cluster if the memory operations are on the same or a neighbouring cache
2834 // line, but limit the maximum ClusterSize to avoid creating too much
2835 // additional register pressure.
2836 return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize;
2839 // Set BaseReg (the base register operand), Offset (the byte offset being
2840 // accessed) and the access Width of the passed instruction that reads/writes
2841 // memory. Returns false if the instruction does not read/write memory or the
2842 // BaseReg/Offset/Width can't be determined. Is not guaranteed to always
2843 // recognise base operands and offsets in all cases.
2844 // TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
2845 // function) and set it as appropriate.
2846 bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
2847 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
2848 LocationSize &Width, const TargetRegisterInfo *TRI) const {
2849 if (!LdSt.mayLoadOrStore())
2850 return false;
2852 // Here we assume the standard RISC-V ISA, which uses a base+offset
2853 // addressing mode. You'll need to relax these conditions to support custom
2854 // load/store instructions.
2855 if (LdSt.getNumExplicitOperands() != 3)
2856 return false;
2857 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2858 !LdSt.getOperand(2).isImm())
2859 return false;
2861 if (!LdSt.hasOneMemOperand())
2862 return false;
2864 Width = (*LdSt.memoperands_begin())->getSize();
2865 BaseReg = &LdSt.getOperand(1);
2866 Offset = LdSt.getOperand(2).getImm();
2867 return true;
2870 bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
2871 const MachineInstr &MIa, const MachineInstr &MIb) const {
2872 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
2873 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
2875 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
2876 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
2877 return false;
2879 // Retrieve the base register, offset from the base register and width. Width
2880 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
2881 // base registers are identical, and the offset of a lower memory access +
2882 // the width doesn't overlap the offset of a higher memory access,
2883 // then the memory accesses are different.
2884 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
2885 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
2886 int64_t OffsetA = 0, OffsetB = 0;
2887 LocationSize WidthA = 0, WidthB = 0;
2888 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
2889 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
2890 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
2891 int LowOffset = std::min(OffsetA, OffsetB);
2892 int HighOffset = std::max(OffsetA, OffsetB);
2893 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2894 if (LowWidth.hasValue() &&
2895 LowOffset + (int)LowWidth.getValue() <= HighOffset)
2896 return true;
2899 return false;
2902 std::pair<unsigned, unsigned>
2903 RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
2904 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
2905 return std::make_pair(TF & Mask, TF & ~Mask);
2908 ArrayRef<std::pair<unsigned, const char *>>
2909 RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
2910 using namespace RISCVII;
2911 static const std::pair<unsigned, const char *> TargetFlags[] = {
2912 {MO_CALL, "riscv-call"},
2913 {MO_LO, "riscv-lo"},
2914 {MO_HI, "riscv-hi"},
2915 {MO_PCREL_LO, "riscv-pcrel-lo"},
2916 {MO_PCREL_HI, "riscv-pcrel-hi"},
2917 {MO_GOT_HI, "riscv-got-hi"},
2918 {MO_TPREL_LO, "riscv-tprel-lo"},
2919 {MO_TPREL_HI, "riscv-tprel-hi"},
2920 {MO_TPREL_ADD, "riscv-tprel-add"},
2921 {MO_TLS_GOT_HI, "riscv-tls-got-hi"},
2922 {MO_TLS_GD_HI, "riscv-tls-gd-hi"},
2923 {MO_TLSDESC_HI, "riscv-tlsdesc-hi"},
2924 {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},
2925 {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},
2926 {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}};
2927 return ArrayRef(TargetFlags);
2929 bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
2930 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
2931 const Function &F = MF.getFunction();
2933 // Can F be deduplicated by the linker? If it can, don't outline from it.
2934 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
2935 return false;
2937 // Don't outline from functions with section markings; the program could
2938 // expect that all the code is in the named section.
2939 if (F.hasSection())
2940 return false;
2942 // It's safe to outline from MF.
2943 return true;
2946 bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
2947 unsigned &Flags) const {
2948 // More accurate safety checking is done in getOutliningCandidateInfo.
2949 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
2952 // Enum values indicating how an outlined call should be constructed.
2953 enum MachineOutlinerConstructionID {
2954 MachineOutlinerTailCall,
2955 MachineOutlinerDefault
2958 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
2959 MachineFunction &MF) const {
2960 return MF.getFunction().hasMinSize();
2963 static bool isCandidatePatchable(const MachineBasicBlock &MBB) {
2964 const MachineFunction *MF = MBB.getParent();
2965 const Function &F = MF->getFunction();
2966 return F.getFnAttribute("fentry-call").getValueAsBool() ||
2967 F.hasFnAttribute("patchable-function-entry");
2970 static bool isMIReadsReg(const MachineInstr &MI, const TargetRegisterInfo *TRI,
2971 unsigned RegNo) {
2972 return MI.readsRegister(RegNo, TRI) ||
2973 MI.getDesc().hasImplicitUseOfPhysReg(RegNo);
2976 static bool isMIModifiesReg(const MachineInstr &MI,
2977 const TargetRegisterInfo *TRI, unsigned RegNo) {
2978 return MI.modifiesRegister(RegNo, TRI) ||
2979 MI.getDesc().hasImplicitDefOfPhysReg(RegNo);
2982 static bool cannotInsertTailCall(const MachineBasicBlock &MBB) {
2983 if (!MBB.back().isReturn())
2984 return true;
2985 if (isCandidatePatchable(MBB))
2986 return true;
2988 // If the candidate reads the pre-set register
2989 // that can be used for expanding PseudoTAIL instruction,
2990 // then we cannot insert tail call.
2991 const TargetSubtargetInfo &STI = MBB.getParent()->getSubtarget();
2992 unsigned TailExpandUseRegNo =
2993 RISCVII::getTailExpandUseRegNo(STI.getFeatureBits());
2994 for (const MachineInstr &MI : MBB) {
2995 if (isMIReadsReg(MI, STI.getRegisterInfo(), TailExpandUseRegNo))
2996 return true;
2997 if (isMIModifiesReg(MI, STI.getRegisterInfo(), TailExpandUseRegNo))
2998 break;
3000 return false;
3003 static std::optional<MachineOutlinerConstructionID>
3004 analyzeCandidate(outliner::Candidate &C) {
3005 // If last instruction is return then we can rely on
3006 // the verification already performed in the getOutliningTypeImpl.
3007 if (C.back().isReturn()) {
3008 assert(!cannotInsertTailCall(*C.getMBB()) &&
3009 "The candidate who uses return instruction must be outlined "
3010 "using tail call");
3011 return MachineOutlinerTailCall;
3014 auto CandidateUsesX5 = [](outliner::Candidate &C) {
3015 const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
3016 if (std::any_of(C.begin(), C.end(), [TRI](const MachineInstr &MI) {
3017 return isMIModifiesReg(MI, TRI, RISCV::X5);
3019 return true;
3020 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
3023 if (!CandidateUsesX5(C))
3024 return MachineOutlinerDefault;
3026 return std::nullopt;
3029 std::optional<std::unique_ptr<outliner::OutlinedFunction>>
3030 RISCVInstrInfo::getOutliningCandidateInfo(
3031 const MachineModuleInfo &MMI,
3032 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
3033 unsigned MinRepeats) const {
3035 // Each RepeatedSequenceLoc is identical.
3036 outliner::Candidate &Candidate = RepeatedSequenceLocs[0];
3037 auto CandidateInfo = analyzeCandidate(Candidate);
3038 if (!CandidateInfo)
3039 RepeatedSequenceLocs.clear();
3041 // If the sequence doesn't have enough candidates left, then we're done.
3042 if (RepeatedSequenceLocs.size() < MinRepeats)
3043 return std::nullopt;
3045 unsigned InstrSizeCExt =
3046 Candidate.getMF()->getSubtarget<RISCVSubtarget>().hasStdExtCOrZca() ? 2
3047 : 4;
3048 unsigned CallOverhead = 0, FrameOverhead = 0;
3050 MachineOutlinerConstructionID MOCI = CandidateInfo.value();
3051 switch (MOCI) {
3052 case MachineOutlinerDefault:
3053 // call t0, function = 8 bytes.
3054 CallOverhead = 8;
3055 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
3056 FrameOverhead = InstrSizeCExt;
3057 break;
3058 case MachineOutlinerTailCall:
3059 // tail call = auipc + jalr in the worst case without linker relaxation.
3060 CallOverhead = 4 + InstrSizeCExt;
3061 // Using tail call we move ret instruction from caller to callee.
3062 FrameOverhead = 0;
3063 break;
3066 for (auto &C : RepeatedSequenceLocs)
3067 C.setCallInfo(MOCI, CallOverhead);
3069 unsigned SequenceSize = 0;
3070 for (auto &MI : Candidate)
3071 SequenceSize += getInstSizeInBytes(MI);
3073 return std::make_unique<outliner::OutlinedFunction>(
3074 RepeatedSequenceLocs, SequenceSize, FrameOverhead, MOCI);
3077 outliner::InstrType
3078 RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
3079 MachineBasicBlock::iterator &MBBI,
3080 unsigned Flags) const {
3081 MachineInstr &MI = *MBBI;
3082 MachineBasicBlock *MBB = MI.getParent();
3083 const TargetRegisterInfo *TRI =
3084 MBB->getParent()->getSubtarget().getRegisterInfo();
3085 const auto &F = MI.getMF()->getFunction();
3087 // We can manually strip out CFI instructions later.
3088 if (MI.isCFIInstruction())
3089 // If current function has exception handling code, we can't outline &
3090 // strip these CFI instructions since it may break .eh_frame section
3091 // needed in unwinding.
3092 return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
3093 : outliner::InstrType::Invisible;
3095 if (cannotInsertTailCall(*MBB) &&
3096 (MI.isReturn() || isMIModifiesReg(MI, TRI, RISCV::X5)))
3097 return outliner::InstrType::Illegal;
3099 // Make sure the operands don't reference something unsafe.
3100 for (const auto &MO : MI.operands()) {
3102 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out
3103 // if any possible.
3104 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
3105 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
3106 F.hasSection() || F.getSectionPrefix()))
3107 return outliner::InstrType::Illegal;
3110 return outliner::InstrType::Legal;
3113 void RISCVInstrInfo::buildOutlinedFrame(
3114 MachineBasicBlock &MBB, MachineFunction &MF,
3115 const outliner::OutlinedFunction &OF) const {
3117 // Strip out any CFI instructions
3118 bool Changed = true;
3119 while (Changed) {
3120 Changed = false;
3121 auto I = MBB.begin();
3122 auto E = MBB.end();
3123 for (; I != E; ++I) {
3124 if (I->isCFIInstruction()) {
3125 I->removeFromParent();
3126 Changed = true;
3127 break;
3132 if (OF.FrameConstructionID == MachineOutlinerTailCall)
3133 return;
3135 MBB.addLiveIn(RISCV::X5);
3137 // Add in a return instruction to the end of the outlined frame.
3138 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
3139 .addReg(RISCV::X0, RegState::Define)
3140 .addReg(RISCV::X5)
3141 .addImm(0));
3144 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
3145 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
3146 MachineFunction &MF, outliner::Candidate &C) const {
3148 if (C.CallConstructionID == MachineOutlinerTailCall) {
3149 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL))
3150 .addGlobalAddress(M.getNamedValue(MF.getName()),
3151 /*Offset=*/0, RISCVII::MO_CALL));
3152 return It;
3155 // Add in a call instruction to the outlined function at the given location.
3156 It = MBB.insert(It,
3157 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
3158 .addGlobalAddress(M.getNamedValue(MF.getName()), 0,
3159 RISCVII::MO_CALL));
3160 return It;
3163 std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
3164 Register Reg) const {
3165 // TODO: Handle cases where Reg is a super- or sub-register of the
3166 // destination register.
3167 const MachineOperand &Op0 = MI.getOperand(0);
3168 if (!Op0.isReg() || Reg != Op0.getReg())
3169 return std::nullopt;
3171 // Don't consider ADDIW as a candidate because the caller may not be aware
3172 // of its sign extension behaviour.
3173 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&
3174 MI.getOperand(2).isImm())
3175 return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()};
3177 return std::nullopt;
3180 // MIR printer helper function to annotate Operands with a comment.
3181 std::string RISCVInstrInfo::createMIROperandComment(
3182 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
3183 const TargetRegisterInfo *TRI) const {
3184 // Print a generic comment for this operand if there is one.
3185 std::string GenericComment =
3186 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
3187 if (!GenericComment.empty())
3188 return GenericComment;
3190 // If not, we must have an immediate operand.
3191 if (!Op.isImm())
3192 return std::string();
3194 const MCInstrDesc &Desc = MI.getDesc();
3195 if (OpIdx >= Desc.getNumOperands())
3196 return std::string();
3198 std::string Comment;
3199 raw_string_ostream OS(Comment);
3201 const MCOperandInfo &OpInfo = Desc.operands()[OpIdx];
3203 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
3204 // operand of vector codegen pseudos.
3205 switch (OpInfo.OperandType) {
3206 case RISCVOp::OPERAND_VTYPEI10:
3207 case RISCVOp::OPERAND_VTYPEI11: {
3208 unsigned Imm = Op.getImm();
3209 RISCVVType::printVType(Imm, OS);
3210 break;
3212 case RISCVOp::OPERAND_SEW:
3213 case RISCVOp::OPERAND_SEW_MASK: {
3214 unsigned Log2SEW = Op.getImm();
3215 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
3216 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
3217 OS << "e" << SEW;
3218 break;
3220 case RISCVOp::OPERAND_VEC_POLICY:
3221 unsigned Policy = Op.getImm();
3222 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
3223 "Invalid Policy Value");
3224 OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
3225 << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
3226 break;
3229 return Comment;
3232 // clang-format off
3233 #define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
3234 RISCV::Pseudo##OP##_##LMUL
3236 #define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
3237 RISCV::Pseudo##OP##_##LMUL##_MASK
3239 #define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
3240 CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
3241 case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
3243 #define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
3244 CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
3245 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
3246 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
3247 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
3248 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
3249 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
3251 #define CASE_RVV_OPCODE_UNMASK(OP) \
3252 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
3253 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
3255 #define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
3256 CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
3257 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
3258 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
3259 case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
3260 case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
3261 case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
3263 #define CASE_RVV_OPCODE_MASK(OP) \
3264 CASE_RVV_OPCODE_MASK_WIDEN(OP): \
3265 case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
3267 #define CASE_RVV_OPCODE_WIDEN(OP) \
3268 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
3269 case CASE_RVV_OPCODE_MASK_WIDEN(OP)
3271 #define CASE_RVV_OPCODE(OP) \
3272 CASE_RVV_OPCODE_UNMASK(OP): \
3273 case CASE_RVV_OPCODE_MASK(OP)
3274 // clang-format on
3276 // clang-format off
3277 #define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
3278 RISCV::PseudoV##OP##_##TYPE##_##LMUL
3280 #define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \
3281 CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
3282 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
3283 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
3284 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)
3286 #define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \
3287 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
3288 case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE)
3290 #define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \
3291 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
3292 case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE)
3294 #define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
3295 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
3296 case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE)
3298 // VFMA instructions are SEW specific.
3299 #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
3300 RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW
3302 #define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
3303 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
3304 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
3305 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
3306 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)
3308 #define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
3309 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
3310 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)
3312 #define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
3313 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
3314 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)
3316 #define CASE_VFMA_OPCODE_VV(OP) \
3317 CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
3318 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
3319 case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
3321 #define CASE_VFMA_SPLATS(OP) \
3322 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
3323 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
3324 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
3325 // clang-format on
3327 bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
3328 unsigned &SrcOpIdx1,
3329 unsigned &SrcOpIdx2) const {
3330 const MCInstrDesc &Desc = MI.getDesc();
3331 if (!Desc.isCommutable())
3332 return false;
3334 switch (MI.getOpcode()) {
3335 case RISCV::TH_MVEQZ:
3336 case RISCV::TH_MVNEZ:
3337 // We can't commute operands if operand 2 (i.e., rs1 in
3338 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
3339 // not valid as the in/out-operand 1).
3340 if (MI.getOperand(2).getReg() == RISCV::X0)
3341 return false;
3342 // Operands 1 and 2 are commutable, if we switch the opcode.
3343 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
3344 case RISCV::TH_MULA:
3345 case RISCV::TH_MULAW:
3346 case RISCV::TH_MULAH:
3347 case RISCV::TH_MULS:
3348 case RISCV::TH_MULSW:
3349 case RISCV::TH_MULSH:
3350 // Operands 2 and 3 are commutable.
3351 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
3352 case RISCV::PseudoCCMOVGPRNoX0:
3353 case RISCV::PseudoCCMOVGPR:
3354 // Operands 4 and 5 are commutable.
3355 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
3356 case CASE_RVV_OPCODE(VADD_VV):
3357 case CASE_RVV_OPCODE(VAND_VV):
3358 case CASE_RVV_OPCODE(VOR_VV):
3359 case CASE_RVV_OPCODE(VXOR_VV):
3360 case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
3361 case CASE_RVV_OPCODE_MASK(VMSNE_VV):
3362 case CASE_RVV_OPCODE(VMIN_VV):
3363 case CASE_RVV_OPCODE(VMINU_VV):
3364 case CASE_RVV_OPCODE(VMAX_VV):
3365 case CASE_RVV_OPCODE(VMAXU_VV):
3366 case CASE_RVV_OPCODE(VMUL_VV):
3367 case CASE_RVV_OPCODE(VMULH_VV):
3368 case CASE_RVV_OPCODE(VMULHU_VV):
3369 case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
3370 case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
3371 case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
3372 case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
3373 case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
3374 case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
3375 case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
3376 case CASE_RVV_OPCODE(VSADD_VV):
3377 case CASE_RVV_OPCODE(VSADDU_VV):
3378 case CASE_RVV_OPCODE(VAADD_VV):
3379 case CASE_RVV_OPCODE(VAADDU_VV):
3380 case CASE_RVV_OPCODE(VSMUL_VV):
3381 // Operands 2 and 3 are commutable.
3382 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
3383 case CASE_VFMA_SPLATS(FMADD):
3384 case CASE_VFMA_SPLATS(FMSUB):
3385 case CASE_VFMA_SPLATS(FMACC):
3386 case CASE_VFMA_SPLATS(FMSAC):
3387 case CASE_VFMA_SPLATS(FNMADD):
3388 case CASE_VFMA_SPLATS(FNMSUB):
3389 case CASE_VFMA_SPLATS(FNMACC):
3390 case CASE_VFMA_SPLATS(FNMSAC):
3391 case CASE_VFMA_OPCODE_VV(FMACC):
3392 case CASE_VFMA_OPCODE_VV(FMSAC):
3393 case CASE_VFMA_OPCODE_VV(FNMACC):
3394 case CASE_VFMA_OPCODE_VV(FNMSAC):
3395 case CASE_VMA_OPCODE_LMULS(MADD, VX):
3396 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
3397 case CASE_VMA_OPCODE_LMULS(MACC, VX):
3398 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
3399 case CASE_VMA_OPCODE_LMULS(MACC, VV):
3400 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
3401 // If the tail policy is undisturbed we can't commute.
3402 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
3403 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
3404 return false;
3406 // For these instructions we can only swap operand 1 and operand 3 by
3407 // changing the opcode.
3408 unsigned CommutableOpIdx1 = 1;
3409 unsigned CommutableOpIdx2 = 3;
3410 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3411 CommutableOpIdx2))
3412 return false;
3413 return true;
3415 case CASE_VFMA_OPCODE_VV(FMADD):
3416 case CASE_VFMA_OPCODE_VV(FMSUB):
3417 case CASE_VFMA_OPCODE_VV(FNMADD):
3418 case CASE_VFMA_OPCODE_VV(FNMSUB):
3419 case CASE_VMA_OPCODE_LMULS(MADD, VV):
3420 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
3421 // If the tail policy is undisturbed we can't commute.
3422 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
3423 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
3424 return false;
3426 // For these instructions we have more freedom. We can commute with the
3427 // other multiplicand or with the addend/subtrahend/minuend.
3429 // Any fixed operand must be from source 1, 2 or 3.
3430 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
3431 return false;
3432 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
3433 return false;
3435 // It both ops are fixed one must be the tied source.
3436 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
3437 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
3438 return false;
3440 // Look for two different register operands assumed to be commutable
3441 // regardless of the FMA opcode. The FMA opcode is adjusted later if
3442 // needed.
3443 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
3444 SrcOpIdx2 == CommuteAnyOperandIndex) {
3445 // At least one of operands to be commuted is not specified and
3446 // this method is free to choose appropriate commutable operands.
3447 unsigned CommutableOpIdx1 = SrcOpIdx1;
3448 if (SrcOpIdx1 == SrcOpIdx2) {
3449 // Both of operands are not fixed. Set one of commutable
3450 // operands to the tied source.
3451 CommutableOpIdx1 = 1;
3452 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
3453 // Only one of the operands is not fixed.
3454 CommutableOpIdx1 = SrcOpIdx2;
3457 // CommutableOpIdx1 is well defined now. Let's choose another commutable
3458 // operand and assign its index to CommutableOpIdx2.
3459 unsigned CommutableOpIdx2;
3460 if (CommutableOpIdx1 != 1) {
3461 // If we haven't already used the tied source, we must use it now.
3462 CommutableOpIdx2 = 1;
3463 } else {
3464 Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg();
3466 // The commuted operands should have different registers.
3467 // Otherwise, the commute transformation does not change anything and
3468 // is useless. We use this as a hint to make our decision.
3469 if (Op1Reg != MI.getOperand(2).getReg())
3470 CommutableOpIdx2 = 2;
3471 else
3472 CommutableOpIdx2 = 3;
3475 // Assign the found pair of commutable indices to SrcOpIdx1 and
3476 // SrcOpIdx2 to return those values.
3477 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3478 CommutableOpIdx2))
3479 return false;
3482 return true;
3486 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
3489 // clang-format off
3490 #define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
3491 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
3492 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
3493 break;
3495 #define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \
3496 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
3497 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
3498 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
3499 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
3501 #define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \
3502 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
3503 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
3505 #define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \
3506 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
3507 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
3509 #define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
3510 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
3511 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
3513 #define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
3514 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
3515 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
3516 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
3518 // VFMA depends on SEW.
3519 #define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
3520 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
3521 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
3522 break;
3524 #define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
3525 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
3526 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
3527 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
3528 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)
3530 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
3531 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
3532 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
3534 #define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
3535 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
3536 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
3537 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
3539 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
3540 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
3541 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)
3543 #define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \
3544 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \
3545 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW)
3547 #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
3548 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
3549 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
3550 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
3552 MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
3553 bool NewMI,
3554 unsigned OpIdx1,
3555 unsigned OpIdx2) const {
3556 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
3557 if (NewMI)
3558 return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
3559 return MI;
3562 switch (MI.getOpcode()) {
3563 case RISCV::TH_MVEQZ:
3564 case RISCV::TH_MVNEZ: {
3565 auto &WorkingMI = cloneIfNew(MI);
3566 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
3567 : RISCV::TH_MVEQZ));
3568 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
3569 OpIdx2);
3571 case RISCV::PseudoCCMOVGPRNoX0:
3572 case RISCV::PseudoCCMOVGPR: {
3573 // CCMOV can be commuted by inverting the condition.
3574 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
3575 CC = RISCVCC::getOppositeBranchCondition(CC);
3576 auto &WorkingMI = cloneIfNew(MI);
3577 WorkingMI.getOperand(3).setImm(CC);
3578 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
3579 OpIdx1, OpIdx2);
3581 case CASE_VFMA_SPLATS(FMACC):
3582 case CASE_VFMA_SPLATS(FMADD):
3583 case CASE_VFMA_SPLATS(FMSAC):
3584 case CASE_VFMA_SPLATS(FMSUB):
3585 case CASE_VFMA_SPLATS(FNMACC):
3586 case CASE_VFMA_SPLATS(FNMADD):
3587 case CASE_VFMA_SPLATS(FNMSAC):
3588 case CASE_VFMA_SPLATS(FNMSUB):
3589 case CASE_VFMA_OPCODE_VV(FMACC):
3590 case CASE_VFMA_OPCODE_VV(FMSAC):
3591 case CASE_VFMA_OPCODE_VV(FNMACC):
3592 case CASE_VFMA_OPCODE_VV(FNMSAC):
3593 case CASE_VMA_OPCODE_LMULS(MADD, VX):
3594 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
3595 case CASE_VMA_OPCODE_LMULS(MACC, VX):
3596 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
3597 case CASE_VMA_OPCODE_LMULS(MACC, VV):
3598 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
3599 // It only make sense to toggle these between clobbering the
3600 // addend/subtrahend/minuend one of the multiplicands.
3601 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
3602 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");
3603 unsigned Opc;
3604 switch (MI.getOpcode()) {
3605 default:
3606 llvm_unreachable("Unexpected opcode");
3607 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
3608 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
3609 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
3610 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
3611 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
3612 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
3613 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
3614 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
3615 CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)
3616 CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB)
3617 CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD)
3618 CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB)
3619 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
3620 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
3621 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
3622 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
3623 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
3624 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
3627 auto &WorkingMI = cloneIfNew(MI);
3628 WorkingMI.setDesc(get(Opc));
3629 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
3630 OpIdx1, OpIdx2);
3632 case CASE_VFMA_OPCODE_VV(FMADD):
3633 case CASE_VFMA_OPCODE_VV(FMSUB):
3634 case CASE_VFMA_OPCODE_VV(FNMADD):
3635 case CASE_VFMA_OPCODE_VV(FNMSUB):
3636 case CASE_VMA_OPCODE_LMULS(MADD, VV):
3637 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
3638 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
3639 // If one of the operands, is the addend we need to change opcode.
3640 // Otherwise we're just swapping 2 of the multiplicands.
3641 if (OpIdx1 == 3 || OpIdx2 == 3) {
3642 unsigned Opc;
3643 switch (MI.getOpcode()) {
3644 default:
3645 llvm_unreachable("Unexpected opcode");
3646 CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)
3647 CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC)
3648 CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC)
3649 CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC)
3650 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
3651 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
3654 auto &WorkingMI = cloneIfNew(MI);
3655 WorkingMI.setDesc(get(Opc));
3656 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
3657 OpIdx1, OpIdx2);
3659 // Let the default code handle it.
3660 break;
3664 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
3667 #undef CASE_RVV_OPCODE_UNMASK_LMUL
3668 #undef CASE_RVV_OPCODE_MASK_LMUL
3669 #undef CASE_RVV_OPCODE_LMUL
3670 #undef CASE_RVV_OPCODE_UNMASK_WIDEN
3671 #undef CASE_RVV_OPCODE_UNMASK
3672 #undef CASE_RVV_OPCODE_MASK_WIDEN
3673 #undef CASE_RVV_OPCODE_MASK
3674 #undef CASE_RVV_OPCODE_WIDEN
3675 #undef CASE_RVV_OPCODE
3677 #undef CASE_VMA_OPCODE_COMMON
3678 #undef CASE_VMA_OPCODE_LMULS_M1
3679 #undef CASE_VMA_OPCODE_LMULS_MF2
3680 #undef CASE_VMA_OPCODE_LMULS_MF4
3681 #undef CASE_VMA_OPCODE_LMULS
3682 #undef CASE_VFMA_OPCODE_COMMON
3683 #undef CASE_VFMA_OPCODE_LMULS_M1
3684 #undef CASE_VFMA_OPCODE_LMULS_MF2
3685 #undef CASE_VFMA_OPCODE_LMULS_MF4
3686 #undef CASE_VFMA_OPCODE_VV
3687 #undef CASE_VFMA_SPLATS
3689 // clang-format off
3690 #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
3691 RISCV::PseudoV##OP##_##LMUL##_TIED
3693 #define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \
3694 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
3695 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
3696 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
3697 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
3698 case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
3700 #define CASE_WIDEOP_OPCODE_LMULS(OP) \
3701 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
3702 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)
3704 #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
3705 case RISCV::PseudoV##OP##_##LMUL##_TIED: \
3706 NewOpc = RISCV::PseudoV##OP##_##LMUL; \
3707 break;
3709 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3710 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
3711 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
3712 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
3713 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
3714 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
3716 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3717 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
3718 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3720 // FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
3721 #define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
3722 RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED
3724 #define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \
3725 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
3726 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
3727 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
3728 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
3729 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
3730 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
3731 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
3732 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
3733 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \
3735 #define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
3736 case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
3737 NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
3738 break;
3740 #define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3741 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
3742 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
3743 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
3744 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
3745 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
3746 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
3747 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
3748 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
3749 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
3751 #define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3752 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3753 // clang-format on
3755 MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
3756 LiveVariables *LV,
3757 LiveIntervals *LIS) const {
3758 MachineInstrBuilder MIB;
3759 switch (MI.getOpcode()) {
3760 default:
3761 return nullptr;
3762 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
3763 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
3764 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
3765 MI.getNumExplicitOperands() == 7 &&
3766 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
3767 // If the tail policy is undisturbed we can't convert.
3768 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
3769 1) == 0)
3770 return nullptr;
3771 // clang-format off
3772 unsigned NewOpc;
3773 switch (MI.getOpcode()) {
3774 default:
3775 llvm_unreachable("Unexpected opcode");
3776 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
3777 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
3779 // clang-format on
3781 MachineBasicBlock &MBB = *MI.getParent();
3782 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3783 .add(MI.getOperand(0))
3784 .addReg(MI.getOperand(0).getReg(), RegState::Undef)
3785 .add(MI.getOperand(1))
3786 .add(MI.getOperand(2))
3787 .add(MI.getOperand(3))
3788 .add(MI.getOperand(4))
3789 .add(MI.getOperand(5))
3790 .add(MI.getOperand(6));
3791 break;
3793 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
3794 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
3795 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
3796 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
3797 // If the tail policy is undisturbed we can't convert.
3798 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
3799 MI.getNumExplicitOperands() == 6);
3800 if ((MI.getOperand(5).getImm() & 1) == 0)
3801 return nullptr;
3803 // clang-format off
3804 unsigned NewOpc;
3805 switch (MI.getOpcode()) {
3806 default:
3807 llvm_unreachable("Unexpected opcode");
3808 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
3809 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
3810 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
3811 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
3813 // clang-format on
3815 MachineBasicBlock &MBB = *MI.getParent();
3816 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3817 .add(MI.getOperand(0))
3818 .addReg(MI.getOperand(0).getReg(), RegState::Undef)
3819 .add(MI.getOperand(1))
3820 .add(MI.getOperand(2))
3821 .add(MI.getOperand(3))
3822 .add(MI.getOperand(4))
3823 .add(MI.getOperand(5));
3824 break;
3827 MIB.copyImplicitOps(MI);
3829 if (LV) {
3830 unsigned NumOps = MI.getNumOperands();
3831 for (unsigned I = 1; I < NumOps; ++I) {
3832 MachineOperand &Op = MI.getOperand(I);
3833 if (Op.isReg() && Op.isKill())
3834 LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
3838 if (LIS) {
3839 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
3841 if (MI.getOperand(0).isEarlyClobber()) {
3842 // Use operand 1 was tied to early-clobber def operand 0, so its live
3843 // interval could have ended at an early-clobber slot. Now they are not
3844 // tied we need to update it to the normal register slot.
3845 LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
3846 LiveRange::Segment *S = LI.getSegmentContaining(Idx);
3847 if (S->end == Idx.getRegSlot(true))
3848 S->end = Idx.getRegSlot();
3852 return MIB;
3855 #undef CASE_WIDEOP_OPCODE_COMMON
3856 #undef CASE_WIDEOP_OPCODE_LMULS_MF4
3857 #undef CASE_WIDEOP_OPCODE_LMULS
3858 #undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
3859 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3860 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
3861 #undef CASE_FP_WIDEOP_OPCODE_COMMON
3862 #undef CASE_FP_WIDEOP_OPCODE_LMULS_MF4
3863 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
3864 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3865 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS
3867 void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
3868 MachineBasicBlock::iterator II, const DebugLoc &DL,
3869 Register DestReg, uint32_t Amount,
3870 MachineInstr::MIFlag Flag) const {
3871 MachineRegisterInfo &MRI = MF.getRegInfo();
3872 if (llvm::has_single_bit<uint32_t>(Amount)) {
3873 uint32_t ShiftAmount = Log2_32(Amount);
3874 if (ShiftAmount == 0)
3875 return;
3876 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3877 .addReg(DestReg, RegState::Kill)
3878 .addImm(ShiftAmount)
3879 .setMIFlag(Flag);
3880 } else if (STI.hasStdExtZba() &&
3881 ((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) ||
3882 (Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) ||
3883 (Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) {
3884 // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
3885 unsigned Opc;
3886 uint32_t ShiftAmount;
3887 if (Amount % 9 == 0) {
3888 Opc = RISCV::SH3ADD;
3889 ShiftAmount = Log2_64(Amount / 9);
3890 } else if (Amount % 5 == 0) {
3891 Opc = RISCV::SH2ADD;
3892 ShiftAmount = Log2_64(Amount / 5);
3893 } else if (Amount % 3 == 0) {
3894 Opc = RISCV::SH1ADD;
3895 ShiftAmount = Log2_64(Amount / 3);
3896 } else {
3897 llvm_unreachable("implied by if-clause");
3899 if (ShiftAmount)
3900 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3901 .addReg(DestReg, RegState::Kill)
3902 .addImm(ShiftAmount)
3903 .setMIFlag(Flag);
3904 BuildMI(MBB, II, DL, get(Opc), DestReg)
3905 .addReg(DestReg, RegState::Kill)
3906 .addReg(DestReg)
3907 .setMIFlag(Flag);
3908 } else if (llvm::has_single_bit<uint32_t>(Amount - 1)) {
3909 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3910 uint32_t ShiftAmount = Log2_32(Amount - 1);
3911 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3912 .addReg(DestReg)
3913 .addImm(ShiftAmount)
3914 .setMIFlag(Flag);
3915 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3916 .addReg(ScaledRegister, RegState::Kill)
3917 .addReg(DestReg, RegState::Kill)
3918 .setMIFlag(Flag);
3919 } else if (llvm::has_single_bit<uint32_t>(Amount + 1)) {
3920 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3921 uint32_t ShiftAmount = Log2_32(Amount + 1);
3922 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3923 .addReg(DestReg)
3924 .addImm(ShiftAmount)
3925 .setMIFlag(Flag);
3926 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg)
3927 .addReg(ScaledRegister, RegState::Kill)
3928 .addReg(DestReg, RegState::Kill)
3929 .setMIFlag(Flag);
3930 } else if (STI.hasStdExtZmmul()) {
3931 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3932 movImm(MBB, II, DL, N, Amount, Flag);
3933 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
3934 .addReg(DestReg, RegState::Kill)
3935 .addReg(N, RegState::Kill)
3936 .setMIFlag(Flag);
3937 } else {
3938 Register Acc;
3939 uint32_t PrevShiftAmount = 0;
3940 for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) {
3941 if (Amount & (1U << ShiftAmount)) {
3942 if (ShiftAmount)
3943 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3944 .addReg(DestReg, RegState::Kill)
3945 .addImm(ShiftAmount - PrevShiftAmount)
3946 .setMIFlag(Flag);
3947 if (Amount >> (ShiftAmount + 1)) {
3948 // If we don't have an accmulator yet, create it and copy DestReg.
3949 if (!Acc) {
3950 Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3951 BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc)
3952 .addReg(DestReg)
3953 .setMIFlag(Flag);
3954 } else {
3955 BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
3956 .addReg(Acc, RegState::Kill)
3957 .addReg(DestReg)
3958 .setMIFlag(Flag);
3961 PrevShiftAmount = ShiftAmount;
3964 assert(Acc && "Expected valid accumulator");
3965 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3966 .addReg(DestReg, RegState::Kill)
3967 .addReg(Acc, RegState::Kill)
3968 .setMIFlag(Flag);
3972 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
3973 RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
3974 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
3975 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
3976 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
3977 return ArrayRef(TargetFlags);
3980 unsigned RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
3981 return OptLevel >= CodeGenOptLevel::Aggressive
3982 ? STI.getTailDupAggressiveThreshold()
3983 : 2;
3986 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
3987 bool RISCV::isSEXT_W(const MachineInstr &MI) {
3988 return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
3989 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0;
3992 // Returns true if this is the zext.w pattern, adduw rd, rs1, x0.
3993 bool RISCV::isZEXT_W(const MachineInstr &MI) {
3994 return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() &&
3995 MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0;
3998 // Returns true if this is the zext.b pattern, andi rd, rs1, 255.
3999 bool RISCV::isZEXT_B(const MachineInstr &MI) {
4000 return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() &&
4001 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;
4004 static bool isRVVWholeLoadStore(unsigned Opcode) {
4005 switch (Opcode) {
4006 default:
4007 return false;
4008 case RISCV::VS1R_V:
4009 case RISCV::VS2R_V:
4010 case RISCV::VS4R_V:
4011 case RISCV::VS8R_V:
4012 case RISCV::VL1RE8_V:
4013 case RISCV::VL2RE8_V:
4014 case RISCV::VL4RE8_V:
4015 case RISCV::VL8RE8_V:
4016 case RISCV::VL1RE16_V:
4017 case RISCV::VL2RE16_V:
4018 case RISCV::VL4RE16_V:
4019 case RISCV::VL8RE16_V:
4020 case RISCV::VL1RE32_V:
4021 case RISCV::VL2RE32_V:
4022 case RISCV::VL4RE32_V:
4023 case RISCV::VL8RE32_V:
4024 case RISCV::VL1RE64_V:
4025 case RISCV::VL2RE64_V:
4026 case RISCV::VL4RE64_V:
4027 case RISCV::VL8RE64_V:
4028 return true;
4032 bool RISCV::isRVVSpill(const MachineInstr &MI) {
4033 // RVV lacks any support for immediate addressing for stack addresses, so be
4034 // conservative.
4035 unsigned Opcode = MI.getOpcode();
4036 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
4037 !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
4038 return false;
4039 return true;
4042 std::optional<std::pair<unsigned, unsigned>>
4043 RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
4044 switch (Opcode) {
4045 default:
4046 return std::nullopt;
4047 case RISCV::PseudoVSPILL2_M1:
4048 case RISCV::PseudoVRELOAD2_M1:
4049 return std::make_pair(2u, 1u);
4050 case RISCV::PseudoVSPILL2_M2:
4051 case RISCV::PseudoVRELOAD2_M2:
4052 return std::make_pair(2u, 2u);
4053 case RISCV::PseudoVSPILL2_M4:
4054 case RISCV::PseudoVRELOAD2_M4:
4055 return std::make_pair(2u, 4u);
4056 case RISCV::PseudoVSPILL3_M1:
4057 case RISCV::PseudoVRELOAD3_M1:
4058 return std::make_pair(3u, 1u);
4059 case RISCV::PseudoVSPILL3_M2:
4060 case RISCV::PseudoVRELOAD3_M2:
4061 return std::make_pair(3u, 2u);
4062 case RISCV::PseudoVSPILL4_M1:
4063 case RISCV::PseudoVRELOAD4_M1:
4064 return std::make_pair(4u, 1u);
4065 case RISCV::PseudoVSPILL4_M2:
4066 case RISCV::PseudoVRELOAD4_M2:
4067 return std::make_pair(4u, 2u);
4068 case RISCV::PseudoVSPILL5_M1:
4069 case RISCV::PseudoVRELOAD5_M1:
4070 return std::make_pair(5u, 1u);
4071 case RISCV::PseudoVSPILL6_M1:
4072 case RISCV::PseudoVRELOAD6_M1:
4073 return std::make_pair(6u, 1u);
4074 case RISCV::PseudoVSPILL7_M1:
4075 case RISCV::PseudoVRELOAD7_M1:
4076 return std::make_pair(7u, 1u);
4077 case RISCV::PseudoVSPILL8_M1:
4078 case RISCV::PseudoVRELOAD8_M1:
4079 return std::make_pair(8u, 1u);
4083 bool RISCV::isFaultFirstLoad(const MachineInstr &MI) {
4084 return MI.getNumExplicitDefs() == 2 &&
4085 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) && !MI.isInlineAsm();
4088 bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
4089 int16_t MI1FrmOpIdx =
4090 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);
4091 int16_t MI2FrmOpIdx =
4092 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm);
4093 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0)
4094 return false;
4095 MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx);
4096 MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx);
4097 return FrmOp1.getImm() == FrmOp2.getImm();
4100 std::optional<unsigned>
4101 RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) {
4102 // TODO: Handle Zvbb instructions
4103 switch (Opcode) {
4104 default:
4105 return std::nullopt;
4107 // 11.6. Vector Single-Width Shift Instructions
4108 case RISCV::VSLL_VX:
4109 case RISCV::VSRL_VX:
4110 case RISCV::VSRA_VX:
4111 // 12.4. Vector Single-Width Scaling Shift Instructions
4112 case RISCV::VSSRL_VX:
4113 case RISCV::VSSRA_VX:
4114 // Only the low lg2(SEW) bits of the shift-amount value are used.
4115 return Log2SEW;
4117 // 11.7 Vector Narrowing Integer Right Shift Instructions
4118 case RISCV::VNSRL_WX:
4119 case RISCV::VNSRA_WX:
4120 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
4121 case RISCV::VNCLIPU_WX:
4122 case RISCV::VNCLIP_WX:
4123 // Only the low lg2(2*SEW) bits of the shift-amount value are used.
4124 return Log2SEW + 1;
4126 // 11.1. Vector Single-Width Integer Add and Subtract
4127 case RISCV::VADD_VX:
4128 case RISCV::VSUB_VX:
4129 case RISCV::VRSUB_VX:
4130 // 11.2. Vector Widening Integer Add/Subtract
4131 case RISCV::VWADDU_VX:
4132 case RISCV::VWSUBU_VX:
4133 case RISCV::VWADD_VX:
4134 case RISCV::VWSUB_VX:
4135 case RISCV::VWADDU_WX:
4136 case RISCV::VWSUBU_WX:
4137 case RISCV::VWADD_WX:
4138 case RISCV::VWSUB_WX:
4139 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
4140 case RISCV::VADC_VXM:
4141 case RISCV::VADC_VIM:
4142 case RISCV::VMADC_VXM:
4143 case RISCV::VMADC_VIM:
4144 case RISCV::VMADC_VX:
4145 case RISCV::VSBC_VXM:
4146 case RISCV::VMSBC_VXM:
4147 case RISCV::VMSBC_VX:
4148 // 11.5 Vector Bitwise Logical Instructions
4149 case RISCV::VAND_VX:
4150 case RISCV::VOR_VX:
4151 case RISCV::VXOR_VX:
4152 // 11.8. Vector Integer Compare Instructions
4153 case RISCV::VMSEQ_VX:
4154 case RISCV::VMSNE_VX:
4155 case RISCV::VMSLTU_VX:
4156 case RISCV::VMSLT_VX:
4157 case RISCV::VMSLEU_VX:
4158 case RISCV::VMSLE_VX:
4159 case RISCV::VMSGTU_VX:
4160 case RISCV::VMSGT_VX:
4161 // 11.9. Vector Integer Min/Max Instructions
4162 case RISCV::VMINU_VX:
4163 case RISCV::VMIN_VX:
4164 case RISCV::VMAXU_VX:
4165 case RISCV::VMAX_VX:
4166 // 11.10. Vector Single-Width Integer Multiply Instructions
4167 case RISCV::VMUL_VX:
4168 case RISCV::VMULH_VX:
4169 case RISCV::VMULHU_VX:
4170 case RISCV::VMULHSU_VX:
4171 // 11.11. Vector Integer Divide Instructions
4172 case RISCV::VDIVU_VX:
4173 case RISCV::VDIV_VX:
4174 case RISCV::VREMU_VX:
4175 case RISCV::VREM_VX:
4176 // 11.12. Vector Widening Integer Multiply Instructions
4177 case RISCV::VWMUL_VX:
4178 case RISCV::VWMULU_VX:
4179 case RISCV::VWMULSU_VX:
4180 // 11.13. Vector Single-Width Integer Multiply-Add Instructions
4181 case RISCV::VMACC_VX:
4182 case RISCV::VNMSAC_VX:
4183 case RISCV::VMADD_VX:
4184 case RISCV::VNMSUB_VX:
4185 // 11.14. Vector Widening Integer Multiply-Add Instructions
4186 case RISCV::VWMACCU_VX:
4187 case RISCV::VWMACC_VX:
4188 case RISCV::VWMACCSU_VX:
4189 case RISCV::VWMACCUS_VX:
4190 // 11.15. Vector Integer Merge Instructions
4191 case RISCV::VMERGE_VXM:
4192 // 11.16. Vector Integer Move Instructions
4193 case RISCV::VMV_V_X:
4194 // 12.1. Vector Single-Width Saturating Add and Subtract
4195 case RISCV::VSADDU_VX:
4196 case RISCV::VSADD_VX:
4197 case RISCV::VSSUBU_VX:
4198 case RISCV::VSSUB_VX:
4199 // 12.2. Vector Single-Width Averaging Add and Subtract
4200 case RISCV::VAADDU_VX:
4201 case RISCV::VAADD_VX:
4202 case RISCV::VASUBU_VX:
4203 case RISCV::VASUB_VX:
4204 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
4205 case RISCV::VSMUL_VX:
4206 // 16.1. Integer Scalar Move Instructions
4207 case RISCV::VMV_S_X:
4208 return 1U << Log2SEW;
4212 unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
4213 const RISCVVPseudosTable::PseudoInfo *RVV =
4214 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
4215 if (!RVV)
4216 return 0;
4217 return RVV->BaseInstr;
4220 unsigned RISCV::getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW) {
4221 unsigned DestEEW =
4222 (Desc.TSFlags & RISCVII::DestEEWMask) >> RISCVII::DestEEWShift;
4223 // EEW = 1
4224 if (DestEEW == 0)
4225 return 0;
4226 // EEW = SEW * n
4227 unsigned Scaled = Log2SEW + (DestEEW - 1);
4228 assert(Scaled >= 3 && Scaled <= 6);
4229 return Scaled;
4232 /// Given two VL operands, do we know that LHS <= RHS?
4233 bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
4234 if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() &&
4235 LHS.getReg() == RHS.getReg())
4236 return true;
4237 if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel)
4238 return true;
4239 if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel)
4240 return false;
4241 if (!LHS.isImm() || !RHS.isImm())
4242 return false;
4243 return LHS.getImm() <= RHS.getImm();
4246 namespace {
4247 class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
4248 const MachineInstr *LHS;
4249 const MachineInstr *RHS;
4250 SmallVector<MachineOperand, 3> Cond;
4252 public:
4253 RISCVPipelinerLoopInfo(const MachineInstr *LHS, const MachineInstr *RHS,
4254 const SmallVectorImpl<MachineOperand> &Cond)
4255 : LHS(LHS), RHS(RHS), Cond(Cond.begin(), Cond.end()) {}
4257 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
4258 // Make the instructions for loop control be placed in stage 0.
4259 // The predecessors of LHS/RHS are considered by the caller.
4260 if (LHS && MI == LHS)
4261 return true;
4262 if (RHS && MI == RHS)
4263 return true;
4264 return false;
4267 std::optional<bool> createTripCountGreaterCondition(
4268 int TC, MachineBasicBlock &MBB,
4269 SmallVectorImpl<MachineOperand> &CondParam) override {
4270 // A branch instruction will be inserted as "if (Cond) goto epilogue".
4271 // Cond is normalized for such use.
4272 // The predecessors of the branch are assumed to have already been inserted.
4273 CondParam = Cond;
4274 return {};
4277 void setPreheader(MachineBasicBlock *NewPreheader) override {}
4279 void adjustTripCount(int TripCountAdjust) override {}
4281 void disposed() override {}
4283 } // namespace
4285 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
4286 RISCVInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
4287 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
4288 SmallVector<MachineOperand, 4> Cond;
4289 if (analyzeBranch(*LoopBB, TBB, FBB, Cond, /*AllowModify=*/false))
4290 return nullptr;
4292 // Infinite loops are not supported
4293 if (TBB == LoopBB && FBB == LoopBB)
4294 return nullptr;
4296 // Must be conditional branch
4297 if (FBB == nullptr)
4298 return nullptr;
4300 assert((TBB == LoopBB || FBB == LoopBB) &&
4301 "The Loop must be a single-basic-block loop");
4303 // Normalization for createTripCountGreaterCondition()
4304 if (TBB == LoopBB)
4305 reverseBranchCondition(Cond);
4307 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
4308 auto FindRegDef = [&MRI](MachineOperand &Op) -> const MachineInstr * {
4309 if (!Op.isReg())
4310 return nullptr;
4311 Register Reg = Op.getReg();
4312 if (!Reg.isVirtual())
4313 return nullptr;
4314 return MRI.getVRegDef(Reg);
4317 const MachineInstr *LHS = FindRegDef(Cond[1]);
4318 const MachineInstr *RHS = FindRegDef(Cond[2]);
4319 if (LHS && LHS->isPHI())
4320 return nullptr;
4321 if (RHS && RHS->isPHI())
4322 return nullptr;
4324 return std::make_unique<RISCVPipelinerLoopInfo>(LHS, RHS, Cond);