1 //===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===---------------------------------------------------------------------===//
9 // This pass does some optimizations for *W instructions at the MI level.
11 // First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either
12 // because the sign extended bits aren't consumed or because the input was
13 // already sign extended by an earlier instruction.
16 // 1. Unless explicit disabled or the target prefers instructions with W suffix,
17 // it removes the -w suffix from opw instructions whenever all users are
18 // dependent only on the lower word of the result of the instruction.
19 // The cases handled are:
20 // * addi.w because it helps reduce test differences between LA32 and LA64
21 // w/o being a pessimization.
23 // 2. Or if explicit enabled or the target prefers instructions with W suffix,
24 // it adds the W suffix to the instruction whenever all users are dependent
25 // only on the lower word of the result of the instruction.
26 // The cases handled are:
27 // * add.d/addi.d/sub.d/mul.d.
28 // * slli.d with imm < 32.
30 //===---------------------------------------------------------------------===//
32 #include "LoongArch.h"
33 #include "LoongArchMachineFunctionInfo.h"
34 #include "LoongArchSubtarget.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/CodeGen/MachineFunctionPass.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
42 #define DEBUG_TYPE "loongarch-opt-w-instrs"
43 #define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions"
45 STATISTIC(NumRemovedSExtW
, "Number of removed sign-extensions");
46 STATISTIC(NumTransformedToWInstrs
,
47 "Number of instructions transformed to W-ops");
50 DisableSExtWRemoval("loongarch-disable-sextw-removal",
51 cl::desc("Disable removal of sign-extend insn"),
52 cl::init(false), cl::Hidden
);
54 DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix",
55 cl::desc("Disable convert to D suffix"),
56 cl::init(false), cl::Hidden
);
60 class LoongArchOptWInstrs
: public MachineFunctionPass
{
64 LoongArchOptWInstrs() : MachineFunctionPass(ID
) {}
66 bool runOnMachineFunction(MachineFunction
&MF
) override
;
67 bool removeSExtWInstrs(MachineFunction
&MF
, const LoongArchInstrInfo
&TII
,
68 const LoongArchSubtarget
&ST
,
69 MachineRegisterInfo
&MRI
);
70 bool convertToDSuffixes(MachineFunction
&MF
, const LoongArchInstrInfo
&TII
,
71 const LoongArchSubtarget
&ST
,
72 MachineRegisterInfo
&MRI
);
73 bool convertToWSuffixes(MachineFunction
&MF
, const LoongArchInstrInfo
&TII
,
74 const LoongArchSubtarget
&ST
,
75 MachineRegisterInfo
&MRI
);
77 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
79 MachineFunctionPass::getAnalysisUsage(AU
);
82 StringRef
getPassName() const override
{ return LOONGARCH_OPT_W_INSTRS_NAME
; }
85 } // end anonymous namespace
87 char LoongArchOptWInstrs::ID
= 0;
88 INITIALIZE_PASS(LoongArchOptWInstrs
, DEBUG_TYPE
, LOONGARCH_OPT_W_INSTRS_NAME
,
91 FunctionPass
*llvm::createLoongArchOptWInstrsPass() {
92 return new LoongArchOptWInstrs();
95 // Checks if all users only demand the lower \p OrigBits of the original
96 // instruction's result.
97 // TODO: handle multiple interdependent transformations
98 static bool hasAllNBitUsers(const MachineInstr
&OrigMI
,
99 const LoongArchSubtarget
&ST
,
100 const MachineRegisterInfo
&MRI
, unsigned OrigBits
) {
102 SmallSet
<std::pair
<const MachineInstr
*, unsigned>, 4> Visited
;
103 SmallVector
<std::pair
<const MachineInstr
*, unsigned>, 4> Worklist
;
105 Worklist
.push_back(std::make_pair(&OrigMI
, OrigBits
));
107 while (!Worklist
.empty()) {
108 auto P
= Worklist
.pop_back_val();
109 const MachineInstr
*MI
= P
.first
;
110 unsigned Bits
= P
.second
;
112 if (!Visited
.insert(P
).second
)
115 // Only handle instructions with one def.
116 if (MI
->getNumExplicitDefs() != 1)
119 Register DestReg
= MI
->getOperand(0).getReg();
120 if (!DestReg
.isVirtual())
123 for (auto &UserOp
: MRI
.use_nodbg_operands(DestReg
)) {
124 const MachineInstr
*UserMI
= UserOp
.getParent();
125 unsigned OpIdx
= UserOp
.getOperandNo();
127 switch (UserMI
->getOpcode()) {
132 case LoongArch::ADD_W
:
133 case LoongArch::ADDI_W
:
134 case LoongArch::SUB_W
:
135 case LoongArch::ALSL_W
:
136 case LoongArch::ALSL_WU
:
137 case LoongArch::MUL_W
:
138 case LoongArch::MULH_W
:
139 case LoongArch::MULH_WU
:
140 case LoongArch::MULW_D_W
:
141 case LoongArch::MULW_D_WU
:
142 // TODO: {DIV,MOD}.{W,WU} consumes the upper 32 bits before LA664+.
143 // case LoongArch::DIV_W:
144 // case LoongArch::DIV_WU:
145 // case LoongArch::MOD_W:
146 // case LoongArch::MOD_WU:
147 case LoongArch::SLL_W
:
148 case LoongArch::SLLI_W
:
149 case LoongArch::SRL_W
:
150 case LoongArch::SRLI_W
:
151 case LoongArch::SRA_W
:
152 case LoongArch::SRAI_W
:
153 case LoongArch::ROTR_W
:
154 case LoongArch::ROTRI_W
:
155 case LoongArch::CLO_W
:
156 case LoongArch::CLZ_W
:
157 case LoongArch::CTO_W
:
158 case LoongArch::CTZ_W
:
159 case LoongArch::BYTEPICK_W
:
160 case LoongArch::REVB_2H
:
161 case LoongArch::BITREV_4B
:
162 case LoongArch::BITREV_W
:
163 case LoongArch::BSTRINS_W
:
164 case LoongArch::BSTRPICK_W
:
165 case LoongArch::CRC_W_W_W
:
166 case LoongArch::CRCC_W_W_W
:
167 case LoongArch::MOVGR2FCSR
:
168 case LoongArch::MOVGR2FRH_W
:
169 case LoongArch::MOVGR2FR_W_64
:
173 case LoongArch::MOVGR2CF
:
177 case LoongArch::EXT_W_B
:
181 case LoongArch::EXT_W_H
:
186 case LoongArch::SRLI_D
: {
187 // If we are shifting right by less than Bits, and users don't demand
188 // any bits that were shifted into [Bits-1:0], then we can consider this
190 unsigned ShAmt
= UserMI
->getOperand(2).getImm();
192 Worklist
.push_back(std::make_pair(UserMI
, Bits
- ShAmt
));
198 // these overwrite higher input bits, otherwise the lower word of output
199 // depends only on the lower word of input. So check their uses read W.
200 case LoongArch::SLLI_D
:
201 if (Bits
>= (ST
.getGRLen() - UserMI
->getOperand(2).getImm()))
203 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
205 case LoongArch::ANDI
: {
206 uint64_t Imm
= UserMI
->getOperand(2).getImm();
207 if (Bits
>= (unsigned)llvm::bit_width(Imm
))
209 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
212 case LoongArch::ORI
: {
213 uint64_t Imm
= UserMI
->getOperand(2).getImm();
214 if (Bits
>= (unsigned)llvm::bit_width
<uint64_t>(~Imm
))
216 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
220 case LoongArch::SLL_D
:
221 // Operand 2 is the shift amount which uses log2(grlen) bits.
223 if (Bits
>= Log2_32(ST
.getGRLen()))
227 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
230 case LoongArch::SRA_D
:
231 case LoongArch::SRL_D
:
232 case LoongArch::ROTR_D
:
233 // Operand 2 is the shift amount which uses 6 bits.
234 if (OpIdx
== 2 && Bits
>= Log2_32(ST
.getGRLen()))
238 case LoongArch::ST_B
:
239 case LoongArch::STX_B
:
240 case LoongArch::STGT_B
:
241 case LoongArch::STLE_B
:
242 case LoongArch::IOCSRWR_B
:
243 // The first argument is the value to store.
244 if (OpIdx
== 0 && Bits
>= 8)
247 case LoongArch::ST_H
:
248 case LoongArch::STX_H
:
249 case LoongArch::STGT_H
:
250 case LoongArch::STLE_H
:
251 case LoongArch::IOCSRWR_H
:
252 // The first argument is the value to store.
253 if (OpIdx
== 0 && Bits
>= 16)
256 case LoongArch::ST_W
:
257 case LoongArch::STX_W
:
258 case LoongArch::SCREL_W
:
259 case LoongArch::STPTR_W
:
260 case LoongArch::STGT_W
:
261 case LoongArch::STLE_W
:
262 case LoongArch::IOCSRWR_W
:
263 // The first argument is the value to store.
264 if (OpIdx
== 0 && Bits
>= 32)
268 case LoongArch::CRC_W_B_W
:
269 case LoongArch::CRCC_W_B_W
:
270 if ((OpIdx
== 1 && Bits
>= 8) || (OpIdx
== 2 && Bits
>= 32))
273 case LoongArch::CRC_W_H_W
:
274 case LoongArch::CRCC_W_H_W
:
275 if ((OpIdx
== 1 && Bits
>= 16) || (OpIdx
== 2 && Bits
>= 32))
278 case LoongArch::CRC_W_D_W
:
279 case LoongArch::CRCC_W_D_W
:
280 if (OpIdx
== 2 && Bits
>= 32)
284 // For these, lower word of output in these operations, depends only on
285 // the lower word of input. So, we check all uses only read lower word.
286 case LoongArch::COPY
:
288 case LoongArch::ADD_D
:
289 case LoongArch::ADDI_D
:
290 case LoongArch::SUB_D
:
291 case LoongArch::MUL_D
:
296 case LoongArch::XORI
:
297 case LoongArch::ANDN
:
299 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
302 case LoongArch::MASKNEZ
:
303 case LoongArch::MASKEQZ
:
306 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
315 static bool hasAllWUsers(const MachineInstr
&OrigMI
,
316 const LoongArchSubtarget
&ST
,
317 const MachineRegisterInfo
&MRI
) {
318 return hasAllNBitUsers(OrigMI
, ST
, MRI
, 32);
321 // This function returns true if the machine instruction always outputs a value
322 // where bits 63:32 match bit 31.
323 static bool isSignExtendingOpW(const MachineInstr
&MI
,
324 const MachineRegisterInfo
&MRI
, unsigned OpNo
) {
325 switch (MI
.getOpcode()) {
327 case LoongArch::ADD_W
:
328 case LoongArch::SUB_W
:
329 case LoongArch::ADDI_W
:
330 case LoongArch::ALSL_W
:
331 case LoongArch::LU12I_W
:
333 case LoongArch::SLTU
:
334 case LoongArch::SLTI
:
335 case LoongArch::SLTUI
:
336 case LoongArch::ANDI
:
337 case LoongArch::MUL_W
:
338 case LoongArch::MULH_W
:
339 case LoongArch::MULH_WU
:
340 case LoongArch::DIV_W
:
341 case LoongArch::MOD_W
:
342 case LoongArch::DIV_WU
:
343 case LoongArch::MOD_WU
:
344 case LoongArch::SLL_W
:
345 case LoongArch::SRL_W
:
346 case LoongArch::SRA_W
:
347 case LoongArch::ROTR_W
:
348 case LoongArch::SLLI_W
:
349 case LoongArch::SRLI_W
:
350 case LoongArch::SRAI_W
:
351 case LoongArch::ROTRI_W
:
352 case LoongArch::EXT_W_B
:
353 case LoongArch::EXT_W_H
:
354 case LoongArch::CLO_W
:
355 case LoongArch::CLZ_W
:
356 case LoongArch::CTO_W
:
357 case LoongArch::CTZ_W
:
358 case LoongArch::BYTEPICK_W
:
359 case LoongArch::REVB_2H
:
360 case LoongArch::BITREV_4B
:
361 case LoongArch::BITREV_W
:
362 case LoongArch::BSTRINS_W
:
363 case LoongArch::BSTRPICK_W
:
364 case LoongArch::LD_B
:
365 case LoongArch::LD_H
:
366 case LoongArch::LD_W
:
367 case LoongArch::LD_BU
:
368 case LoongArch::LD_HU
:
369 case LoongArch::LL_W
:
370 case LoongArch::LLACQ_W
:
371 case LoongArch::RDTIMEL_W
:
372 case LoongArch::RDTIMEH_W
:
373 case LoongArch::CPUCFG
:
374 case LoongArch::LDX_B
:
375 case LoongArch::LDX_H
:
376 case LoongArch::LDX_W
:
377 case LoongArch::LDX_BU
:
378 case LoongArch::LDX_HU
:
379 case LoongArch::LDPTR_W
:
380 case LoongArch::LDGT_B
:
381 case LoongArch::LDGT_H
:
382 case LoongArch::LDGT_W
:
383 case LoongArch::LDLE_B
:
384 case LoongArch::LDLE_H
:
385 case LoongArch::LDLE_W
:
386 case LoongArch::AMSWAP_B
:
387 case LoongArch::AMSWAP_H
:
388 case LoongArch::AMSWAP_W
:
389 case LoongArch::AMADD_B
:
390 case LoongArch::AMADD_H
:
391 case LoongArch::AMADD_W
:
392 case LoongArch::AMAND_W
:
393 case LoongArch::AMOR_W
:
394 case LoongArch::AMXOR_W
:
395 case LoongArch::AMMAX_W
:
396 case LoongArch::AMMIN_W
:
397 case LoongArch::AMMAX_WU
:
398 case LoongArch::AMMIN_WU
:
399 case LoongArch::AMSWAP__DB_B
:
400 case LoongArch::AMSWAP__DB_H
:
401 case LoongArch::AMSWAP__DB_W
:
402 case LoongArch::AMADD__DB_B
:
403 case LoongArch::AMADD__DB_H
:
404 case LoongArch::AMADD__DB_W
:
405 case LoongArch::AMAND__DB_W
:
406 case LoongArch::AMOR__DB_W
:
407 case LoongArch::AMXOR__DB_W
:
408 case LoongArch::AMMAX__DB_W
:
409 case LoongArch::AMMIN__DB_W
:
410 case LoongArch::AMMAX__DB_WU
:
411 case LoongArch::AMMIN__DB_WU
:
412 case LoongArch::AMCAS_B
:
413 case LoongArch::AMCAS_H
:
414 case LoongArch::AMCAS_W
:
415 case LoongArch::AMCAS__DB_B
:
416 case LoongArch::AMCAS__DB_H
:
417 case LoongArch::AMCAS__DB_W
:
418 case LoongArch::CRC_W_B_W
:
419 case LoongArch::CRC_W_H_W
:
420 case LoongArch::CRC_W_W_W
:
421 case LoongArch::CRC_W_D_W
:
422 case LoongArch::CRCC_W_B_W
:
423 case LoongArch::CRCC_W_H_W
:
424 case LoongArch::CRCC_W_W_W
:
425 case LoongArch::CRCC_W_D_W
:
426 case LoongArch::IOCSRRD_B
:
427 case LoongArch::IOCSRRD_H
:
428 case LoongArch::IOCSRRD_W
:
429 case LoongArch::MOVFR2GR_S
:
430 case LoongArch::MOVFCSR2GR
:
431 case LoongArch::MOVCF2GR
:
432 case LoongArch::MOVFRH2GR_S
:
433 case LoongArch::MOVFR2GR_S_64
:
436 // Special cases that require checking operands.
437 // shifting right sufficiently makes the value 32-bit sign-extended
438 case LoongArch::SRAI_D
:
439 return MI
.getOperand(2).getImm() >= 32;
440 case LoongArch::SRLI_D
:
441 return MI
.getOperand(2).getImm() > 32;
442 // The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended.
443 case LoongArch::ADDI_D
:
445 return MI
.getOperand(1).isReg() &&
446 MI
.getOperand(1).getReg() == LoongArch::R0
;
447 // A bits extract is sign extended if the msb is less than 31.
448 case LoongArch::BSTRPICK_D
:
449 return MI
.getOperand(2).getImm() < 31;
450 // Copying from R0 produces zero.
451 case LoongArch::COPY
:
452 return MI
.getOperand(1).getReg() == LoongArch::R0
;
453 // Ignore the scratch register destination.
454 case LoongArch::PseudoMaskedAtomicSwap32
:
455 case LoongArch::PseudoAtomicSwap32
:
456 case LoongArch::PseudoMaskedAtomicLoadAdd32
:
457 case LoongArch::PseudoMaskedAtomicLoadSub32
:
458 case LoongArch::PseudoAtomicLoadNand32
:
459 case LoongArch::PseudoMaskedAtomicLoadNand32
:
460 case LoongArch::PseudoAtomicLoadAdd32
:
461 case LoongArch::PseudoAtomicLoadSub32
:
462 case LoongArch::PseudoAtomicLoadAnd32
:
463 case LoongArch::PseudoAtomicLoadOr32
:
464 case LoongArch::PseudoAtomicLoadXor32
:
465 case LoongArch::PseudoMaskedAtomicLoadUMax32
:
466 case LoongArch::PseudoMaskedAtomicLoadUMin32
:
467 case LoongArch::PseudoCmpXchg32
:
468 case LoongArch::PseudoMaskedCmpXchg32
:
469 case LoongArch::PseudoMaskedAtomicLoadMax32
:
470 case LoongArch::PseudoMaskedAtomicLoadMin32
:
477 static bool isSignExtendedW(Register SrcReg
, const LoongArchSubtarget
&ST
,
478 const MachineRegisterInfo
&MRI
,
479 SmallPtrSetImpl
<MachineInstr
*> &FixableDef
) {
480 SmallSet
<Register
, 4> Visited
;
481 SmallVector
<Register
, 4> Worklist
;
483 auto AddRegToWorkList
= [&](Register SrcReg
) {
484 if (!SrcReg
.isVirtual())
486 Worklist
.push_back(SrcReg
);
490 if (!AddRegToWorkList(SrcReg
))
493 while (!Worklist
.empty()) {
494 Register Reg
= Worklist
.pop_back_val();
496 // If we already visited this register, we don't need to check it again.
497 if (!Visited
.insert(Reg
).second
)
500 MachineInstr
*MI
= MRI
.getVRegDef(Reg
);
504 int OpNo
= MI
->findRegisterDefOperandIdx(Reg
, /*TRI=*/nullptr);
505 assert(OpNo
!= -1 && "Couldn't find register");
507 // If this is a sign extending operation we don't need to look any further.
508 if (isSignExtendingOpW(*MI
, MRI
, OpNo
))
511 // Is this an instruction that propagates sign extend?
512 switch (MI
->getOpcode()) {
514 // Unknown opcode, give up.
516 case LoongArch::COPY
: {
517 const MachineFunction
*MF
= MI
->getMF();
518 const LoongArchMachineFunctionInfo
*LAFI
=
519 MF
->getInfo
<LoongArchMachineFunctionInfo
>();
521 // If this is the entry block and the register is livein, see if we know
522 // it is sign extended.
523 if (MI
->getParent() == &MF
->front()) {
524 Register VReg
= MI
->getOperand(0).getReg();
525 if (MF
->getRegInfo().isLiveIn(VReg
) && LAFI
->isSExt32Register(VReg
))
529 Register CopySrcReg
= MI
->getOperand(1).getReg();
530 if (CopySrcReg
== LoongArch::R4
) {
531 // For a method return value, we check the ZExt/SExt flags in attribute.
532 // We assume the following code sequence for method call.
533 // PseudoCALL @bar, ...
534 // ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
537 // We use the PseudoCall to look up the IR function being called to find
538 // its return attributes.
539 const MachineBasicBlock
*MBB
= MI
->getParent();
540 auto II
= MI
->getIterator();
541 if (II
== MBB
->instr_begin() ||
542 (--II
)->getOpcode() != LoongArch::ADJCALLSTACKUP
)
545 const MachineInstr
&CallMI
= *(--II
);
546 if (!CallMI
.isCall() || !CallMI
.getOperand(0).isGlobal())
550 dyn_cast_if_present
<Function
>(CallMI
.getOperand(0).getGlobal());
554 auto *IntTy
= dyn_cast
<IntegerType
>(CalleeFn
->getReturnType());
558 const AttributeSet
&Attrs
= CalleeFn
->getAttributes().getRetAttrs();
559 unsigned BitWidth
= IntTy
->getBitWidth();
560 if ((BitWidth
<= 32 && Attrs
.hasAttribute(Attribute::SExt
)) ||
561 (BitWidth
< 32 && Attrs
.hasAttribute(Attribute::ZExt
)))
565 if (!AddRegToWorkList(CopySrcReg
))
571 // For these, we just need to check if the 1st operand is sign extended.
572 case LoongArch::MOD_D
:
573 case LoongArch::ANDI
:
575 case LoongArch::XORI
:
576 // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
577 // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
578 // Logical operations use a sign extended 12-bit immediate.
579 if (!AddRegToWorkList(MI
->getOperand(1).getReg()))
583 case LoongArch::MOD_DU
:
587 case LoongArch::ANDN
:
589 case LoongArch::PHI
: {
590 // If all incoming values are sign-extended, the output of AND, OR, XOR,
591 // or PHI is also sign-extended.
593 // The input registers for PHI are operand 1, 3, ...
594 // The input registers for others are operand 1 and 2.
595 unsigned B
= 1, E
= 3, D
= 1;
596 switch (MI
->getOpcode()) {
598 E
= MI
->getNumOperands();
603 for (unsigned I
= B
; I
!= E
; I
+= D
) {
604 if (!MI
->getOperand(I
).isReg())
607 if (!AddRegToWorkList(MI
->getOperand(I
).getReg()))
614 case LoongArch::MASKEQZ
:
615 case LoongArch::MASKNEZ
:
616 // Instructions return zero or operand 1. Result is sign extended if
617 // operand 1 is sign extended.
618 if (!AddRegToWorkList(MI
->getOperand(1).getReg()))
622 // With these opcode, we can "fix" them with the W-version
623 // if we know all users of the result only rely on bits 31:0
624 case LoongArch::SLLI_D
:
625 // SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits
626 if (MI
->getOperand(2).getImm() >= 32)
629 case LoongArch::ADDI_D
:
630 case LoongArch::ADD_D
:
631 case LoongArch::LD_D
:
632 case LoongArch::LD_WU
:
633 case LoongArch::MUL_D
:
634 case LoongArch::SUB_D
:
635 if (hasAllWUsers(*MI
, ST
, MRI
)) {
636 FixableDef
.insert(MI
);
640 // If all incoming values are sign-extended and all users only use
641 // the lower 32 bits, then convert them to W versions.
642 case LoongArch::DIV_D
: {
643 if (!AddRegToWorkList(MI
->getOperand(1).getReg()))
645 if (!AddRegToWorkList(MI
->getOperand(2).getReg()))
647 if (hasAllWUsers(*MI
, ST
, MRI
)) {
648 FixableDef
.insert(MI
);
656 // If we get here, then every node we visited produces a sign extended value
657 // or propagated sign extended values. So the result must be sign extended.
661 static unsigned getWOp(unsigned Opcode
) {
663 case LoongArch::ADDI_D
:
664 return LoongArch::ADDI_W
;
665 case LoongArch::ADD_D
:
666 return LoongArch::ADD_W
;
667 case LoongArch::DIV_D
:
668 return LoongArch::DIV_W
;
669 case LoongArch::LD_D
:
670 case LoongArch::LD_WU
:
671 return LoongArch::LD_W
;
672 case LoongArch::MUL_D
:
673 return LoongArch::MUL_W
;
674 case LoongArch::SLLI_D
:
675 return LoongArch::SLLI_W
;
676 case LoongArch::SUB_D
:
677 return LoongArch::SUB_W
;
679 llvm_unreachable("Unexpected opcode for replacement with W variant");
683 bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction
&MF
,
684 const LoongArchInstrInfo
&TII
,
685 const LoongArchSubtarget
&ST
,
686 MachineRegisterInfo
&MRI
) {
687 if (DisableSExtWRemoval
)
690 bool MadeChange
= false;
691 for (MachineBasicBlock
&MBB
: MF
) {
692 for (MachineInstr
&MI
: llvm::make_early_inc_range(MBB
)) {
693 // We're looking for the sext.w pattern ADDI.W rd, rs, 0.
694 if (!LoongArch::isSEXT_W(MI
))
697 Register SrcReg
= MI
.getOperand(1).getReg();
699 SmallPtrSet
<MachineInstr
*, 4> FixableDefs
;
701 // If all users only use the lower bits, this sext.w is redundant.
702 // Or if all definitions reaching MI sign-extend their output,
703 // then sext.w is redundant.
704 if (!hasAllWUsers(MI
, ST
, MRI
) &&
705 !isSignExtendedW(SrcReg
, ST
, MRI
, FixableDefs
))
708 Register DstReg
= MI
.getOperand(0).getReg();
709 if (!MRI
.constrainRegClass(SrcReg
, MRI
.getRegClass(DstReg
)))
712 // Convert Fixable instructions to their W versions.
713 for (MachineInstr
*Fixable
: FixableDefs
) {
714 LLVM_DEBUG(dbgs() << "Replacing " << *Fixable
);
715 Fixable
->setDesc(TII
.get(getWOp(Fixable
->getOpcode())));
716 Fixable
->clearFlag(MachineInstr::MIFlag::NoSWrap
);
717 Fixable
->clearFlag(MachineInstr::MIFlag::NoUWrap
);
718 Fixable
->clearFlag(MachineInstr::MIFlag::IsExact
);
719 LLVM_DEBUG(dbgs() << " with " << *Fixable
);
720 ++NumTransformedToWInstrs
;
723 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
724 MRI
.replaceRegWith(DstReg
, SrcReg
);
725 MRI
.clearKillFlags(SrcReg
);
726 MI
.eraseFromParent();
735 bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction
&MF
,
736 const LoongArchInstrInfo
&TII
,
737 const LoongArchSubtarget
&ST
,
738 MachineRegisterInfo
&MRI
) {
739 bool MadeChange
= false;
740 for (MachineBasicBlock
&MBB
: MF
) {
741 for (MachineInstr
&MI
: MBB
) {
743 switch (MI
.getOpcode()) {
746 case LoongArch::ADDI_W
:
747 Opc
= LoongArch::ADDI_D
;
751 if (hasAllWUsers(MI
, ST
, MRI
)) {
752 MI
.setDesc(TII
.get(Opc
));
761 bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction
&MF
,
762 const LoongArchInstrInfo
&TII
,
763 const LoongArchSubtarget
&ST
,
764 MachineRegisterInfo
&MRI
) {
765 bool MadeChange
= false;
766 for (MachineBasicBlock
&MBB
: MF
) {
767 for (MachineInstr
&MI
: MBB
) {
770 switch (MI
.getOpcode()) {
773 case LoongArch::ADD_D
:
774 WOpc
= LoongArch::ADD_W
;
776 case LoongArch::ADDI_D
:
777 WOpc
= LoongArch::ADDI_W
;
779 case LoongArch::SUB_D
:
780 WOpc
= LoongArch::SUB_W
;
782 case LoongArch::MUL_D
:
783 WOpc
= LoongArch::MUL_W
;
785 case LoongArch::SLLI_D
:
786 // SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits
787 if (MI
.getOperand(2).getImm() >= 32)
789 WOpc
= LoongArch::SLLI_W
;
791 case LoongArch::LD_D
:
792 case LoongArch::LD_WU
:
793 WOpc
= LoongArch::LD_W
;
797 if (hasAllWUsers(MI
, ST
, MRI
)) {
798 LLVM_DEBUG(dbgs() << "Replacing " << MI
);
799 MI
.setDesc(TII
.get(WOpc
));
800 MI
.clearFlag(MachineInstr::MIFlag::NoSWrap
);
801 MI
.clearFlag(MachineInstr::MIFlag::NoUWrap
);
802 MI
.clearFlag(MachineInstr::MIFlag::IsExact
);
803 LLVM_DEBUG(dbgs() << " with " << MI
);
804 ++NumTransformedToWInstrs
;
813 bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction
&MF
) {
814 if (skipFunction(MF
.getFunction()))
817 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
818 const LoongArchSubtarget
&ST
= MF
.getSubtarget
<LoongArchSubtarget
>();
819 const LoongArchInstrInfo
&TII
= *ST
.getInstrInfo();
824 bool MadeChange
= false;
825 MadeChange
|= removeSExtWInstrs(MF
, TII
, ST
, MRI
);
827 if (!(DisableCvtToDSuffix
|| ST
.preferWInst()))
828 MadeChange
|= convertToDSuffixes(MF
, TII
, ST
, MRI
);
830 if (ST
.preferWInst())
831 MadeChange
|= convertToWSuffixes(MF
, TII
, ST
, MRI
);