1 //===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===---------------------------------------------------------------------===//
9 // This pass does some optimizations for *W instructions at the MI level.
11 // First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either
12 // because the sign extended bits aren't consumed or because the input was
13 // already sign extended by an earlier instruction.
16 // 1. Unless explicit disabled or the target prefers instructions with W suffix,
17 // it removes the -w suffix from opw instructions whenever all users are
18 // dependent only on the lower word of the result of the instruction.
19 // The cases handled are:
20 // * addi.w because it helps reduce test differences between LA32 and LA64
21 // w/o being a pessimization.
23 // 2. Or if explicit enabled or the target prefers instructions with W suffix,
24 // it adds the W suffix to the instruction whenever all users are dependent
25 // only on the lower word of the result of the instruction.
26 // The cases handled are:
27 // * add.d/addi.d/sub.d/mul.d.
28 // * slli.d with imm < 32.
30 //===---------------------------------------------------------------------===//
32 #include "LoongArch.h"
33 #include "LoongArchMachineFunctionInfo.h"
34 #include "LoongArchSubtarget.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/CodeGen/MachineFunctionPass.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
42 #define DEBUG_TYPE "loongarch-opt-w-instrs"
43 #define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions"
45 STATISTIC(NumRemovedSExtW
, "Number of removed sign-extensions");
46 STATISTIC(NumTransformedToWInstrs
,
47 "Number of instructions transformed to W-ops");
50 DisableSExtWRemoval("loongarch-disable-sextw-removal",
51 cl::desc("Disable removal of sign-extend insn"),
52 cl::init(false), cl::Hidden
);
54 DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix",
55 cl::desc("Disable convert to D suffix"),
56 cl::init(false), cl::Hidden
);
60 class LoongArchOptWInstrs
: public MachineFunctionPass
{
64 LoongArchOptWInstrs() : MachineFunctionPass(ID
) {}
66 bool runOnMachineFunction(MachineFunction
&MF
) override
;
67 bool removeSExtWInstrs(MachineFunction
&MF
, const LoongArchInstrInfo
&TII
,
68 const LoongArchSubtarget
&ST
,
69 MachineRegisterInfo
&MRI
);
70 bool convertToDSuffixes(MachineFunction
&MF
, const LoongArchInstrInfo
&TII
,
71 const LoongArchSubtarget
&ST
,
72 MachineRegisterInfo
&MRI
);
73 bool convertToWSuffixes(MachineFunction
&MF
, const LoongArchInstrInfo
&TII
,
74 const LoongArchSubtarget
&ST
,
75 MachineRegisterInfo
&MRI
);
77 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
79 MachineFunctionPass::getAnalysisUsage(AU
);
82 StringRef
getPassName() const override
{ return LOONGARCH_OPT_W_INSTRS_NAME
; }
85 } // end anonymous namespace
87 char LoongArchOptWInstrs::ID
= 0;
88 INITIALIZE_PASS(LoongArchOptWInstrs
, DEBUG_TYPE
, LOONGARCH_OPT_W_INSTRS_NAME
,
91 FunctionPass
*llvm::createLoongArchOptWInstrsPass() {
92 return new LoongArchOptWInstrs();
95 // Checks if all users only demand the lower \p OrigBits of the original
96 // instruction's result.
97 // TODO: handle multiple interdependent transformations
98 static bool hasAllNBitUsers(const MachineInstr
&OrigMI
,
99 const LoongArchSubtarget
&ST
,
100 const MachineRegisterInfo
&MRI
, unsigned OrigBits
) {
102 SmallSet
<std::pair
<const MachineInstr
*, unsigned>, 4> Visited
;
103 SmallVector
<std::pair
<const MachineInstr
*, unsigned>, 4> Worklist
;
105 Worklist
.push_back(std::make_pair(&OrigMI
, OrigBits
));
107 while (!Worklist
.empty()) {
108 auto P
= Worklist
.pop_back_val();
109 const MachineInstr
*MI
= P
.first
;
110 unsigned Bits
= P
.second
;
112 if (!Visited
.insert(P
).second
)
115 // Only handle instructions with one def.
116 if (MI
->getNumExplicitDefs() != 1)
119 Register DestReg
= MI
->getOperand(0).getReg();
120 if (!DestReg
.isVirtual())
123 for (auto &UserOp
: MRI
.use_nodbg_operands(DestReg
)) {
124 const MachineInstr
*UserMI
= UserOp
.getParent();
125 unsigned OpIdx
= UserOp
.getOperandNo();
127 switch (UserMI
->getOpcode()) {
131 case LoongArch::ADD_W
:
132 case LoongArch::ADDI_W
:
133 case LoongArch::SUB_W
:
134 case LoongArch::ALSL_W
:
135 case LoongArch::ALSL_WU
:
136 case LoongArch::MUL_W
:
137 case LoongArch::MULH_W
:
138 case LoongArch::MULH_WU
:
139 case LoongArch::MULW_D_W
:
140 case LoongArch::MULW_D_WU
:
141 case LoongArch::SLL_W
:
142 case LoongArch::SLLI_W
:
143 case LoongArch::SRL_W
:
144 case LoongArch::SRLI_W
:
145 case LoongArch::SRA_W
:
146 case LoongArch::SRAI_W
:
147 case LoongArch::ROTR_W
:
148 case LoongArch::ROTRI_W
:
149 case LoongArch::CLO_W
:
150 case LoongArch::CLZ_W
:
151 case LoongArch::CTO_W
:
152 case LoongArch::CTZ_W
:
153 case LoongArch::BYTEPICK_W
:
154 case LoongArch::REVB_2H
:
155 case LoongArch::BITREV_4B
:
156 case LoongArch::BITREV_W
:
157 case LoongArch::BSTRINS_W
:
158 case LoongArch::BSTRPICK_W
:
159 case LoongArch::CRC_W_W_W
:
160 case LoongArch::CRCC_W_W_W
:
161 case LoongArch::MOVGR2FCSR
:
162 case LoongArch::MOVGR2FRH_W
:
163 case LoongArch::MOVGR2FR_W_64
:
164 case LoongArch::VINSGR2VR_W
:
165 case LoongArch::XVINSGR2VR_W
:
166 case LoongArch::VREPLGR2VR_W
:
167 case LoongArch::XVREPLGR2VR_W
:
171 // {DIV,MOD}.W{U} consumes the upper 32 bits if the div32
172 // feature is not enabled.
173 case LoongArch::DIV_W
:
174 case LoongArch::DIV_WU
:
175 case LoongArch::MOD_W
:
176 case LoongArch::MOD_WU
:
177 if (Bits
>= 32 && ST
.hasDiv32())
180 case LoongArch::MOVGR2CF
:
181 case LoongArch::VREPLVE_D
:
182 case LoongArch::XVREPLVE_D
:
186 case LoongArch::VREPLVE_W
:
187 case LoongArch::XVREPLVE_W
:
191 case LoongArch::VREPLVE_H
:
192 case LoongArch::XVREPLVE_H
:
196 case LoongArch::VREPLVE_B
:
197 case LoongArch::XVREPLVE_B
:
201 case LoongArch::EXT_W_B
:
202 case LoongArch::VINSGR2VR_B
:
203 case LoongArch::VREPLGR2VR_B
:
204 case LoongArch::XVREPLGR2VR_B
:
208 case LoongArch::EXT_W_H
:
209 case LoongArch::VINSGR2VR_H
:
210 case LoongArch::VREPLGR2VR_H
:
211 case LoongArch::XVREPLGR2VR_H
:
216 case LoongArch::SRLI_D
: {
217 // If we are shifting right by less than Bits, and users don't demand
218 // any bits that were shifted into [Bits-1:0], then we can consider this
220 unsigned ShAmt
= UserMI
->getOperand(2).getImm();
222 Worklist
.push_back(std::make_pair(UserMI
, Bits
- ShAmt
));
228 // these overwrite higher input bits, otherwise the lower word of output
229 // depends only on the lower word of input. So check their uses read W.
230 case LoongArch::SLLI_D
:
231 if (Bits
>= (ST
.getGRLen() - UserMI
->getOperand(2).getImm()))
233 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
235 case LoongArch::ANDI
: {
236 uint64_t Imm
= UserMI
->getOperand(2).getImm();
237 if (Bits
>= (unsigned)llvm::bit_width(Imm
))
239 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
242 case LoongArch::ORI
: {
243 uint64_t Imm
= UserMI
->getOperand(2).getImm();
244 if (Bits
>= (unsigned)llvm::bit_width
<uint64_t>(~Imm
))
246 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
250 case LoongArch::SLL_D
:
251 // Operand 2 is the shift amount which uses log2(grlen) bits.
253 if (Bits
>= Log2_32(ST
.getGRLen()))
257 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
260 case LoongArch::SRA_D
:
261 case LoongArch::SRL_D
:
262 case LoongArch::ROTR_D
:
263 // Operand 2 is the shift amount which uses 6 bits.
264 if (OpIdx
== 2 && Bits
>= Log2_32(ST
.getGRLen()))
268 case LoongArch::ST_B
:
269 case LoongArch::STX_B
:
270 case LoongArch::STGT_B
:
271 case LoongArch::STLE_B
:
272 case LoongArch::IOCSRWR_B
:
273 // The first argument is the value to store.
274 if (OpIdx
== 0 && Bits
>= 8)
277 case LoongArch::ST_H
:
278 case LoongArch::STX_H
:
279 case LoongArch::STGT_H
:
280 case LoongArch::STLE_H
:
281 case LoongArch::IOCSRWR_H
:
282 // The first argument is the value to store.
283 if (OpIdx
== 0 && Bits
>= 16)
286 case LoongArch::ST_W
:
287 case LoongArch::STX_W
:
288 case LoongArch::SCREL_W
:
289 case LoongArch::STPTR_W
:
290 case LoongArch::STGT_W
:
291 case LoongArch::STLE_W
:
292 case LoongArch::IOCSRWR_W
:
293 // The first argument is the value to store.
294 if (OpIdx
== 0 && Bits
>= 32)
298 case LoongArch::CRC_W_B_W
:
299 case LoongArch::CRCC_W_B_W
:
300 if ((OpIdx
== 1 && Bits
>= 8) || (OpIdx
== 2 && Bits
>= 32))
303 case LoongArch::CRC_W_H_W
:
304 case LoongArch::CRCC_W_H_W
:
305 if ((OpIdx
== 1 && Bits
>= 16) || (OpIdx
== 2 && Bits
>= 32))
308 case LoongArch::CRC_W_D_W
:
309 case LoongArch::CRCC_W_D_W
:
310 if (OpIdx
== 2 && Bits
>= 32)
314 // For these, lower word of output in these operations, depends only on
315 // the lower word of input. So, we check all uses only read lower word.
316 case LoongArch::COPY
:
318 case LoongArch::ADD_D
:
319 case LoongArch::ADDI_D
:
320 case LoongArch::SUB_D
:
321 case LoongArch::MUL_D
:
326 case LoongArch::XORI
:
327 case LoongArch::ANDN
:
329 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
332 case LoongArch::MASKNEZ
:
333 case LoongArch::MASKEQZ
:
336 Worklist
.push_back(std::make_pair(UserMI
, Bits
));
345 static bool hasAllWUsers(const MachineInstr
&OrigMI
,
346 const LoongArchSubtarget
&ST
,
347 const MachineRegisterInfo
&MRI
) {
348 return hasAllNBitUsers(OrigMI
, ST
, MRI
, 32);
351 // This function returns true if the machine instruction always outputs a value
352 // where bits 63:32 match bit 31.
353 static bool isSignExtendingOpW(const MachineInstr
&MI
,
354 const MachineRegisterInfo
&MRI
, unsigned OpNo
) {
355 switch (MI
.getOpcode()) {
357 case LoongArch::ADD_W
:
358 case LoongArch::SUB_W
:
359 case LoongArch::ADDI_W
:
360 case LoongArch::ALSL_W
:
361 case LoongArch::LU12I_W
:
363 case LoongArch::SLTU
:
364 case LoongArch::SLTI
:
365 case LoongArch::SLTUI
:
366 case LoongArch::ANDI
:
367 case LoongArch::MUL_W
:
368 case LoongArch::MULH_W
:
369 case LoongArch::MULH_WU
:
370 case LoongArch::DIV_W
:
371 case LoongArch::MOD_W
:
372 case LoongArch::DIV_WU
:
373 case LoongArch::MOD_WU
:
374 case LoongArch::SLL_W
:
375 case LoongArch::SRL_W
:
376 case LoongArch::SRA_W
:
377 case LoongArch::ROTR_W
:
378 case LoongArch::SLLI_W
:
379 case LoongArch::SRLI_W
:
380 case LoongArch::SRAI_W
:
381 case LoongArch::ROTRI_W
:
382 case LoongArch::EXT_W_B
:
383 case LoongArch::EXT_W_H
:
384 case LoongArch::CLO_W
:
385 case LoongArch::CLZ_W
:
386 case LoongArch::CTO_W
:
387 case LoongArch::CTZ_W
:
388 case LoongArch::BYTEPICK_W
:
389 case LoongArch::REVB_2H
:
390 case LoongArch::BITREV_4B
:
391 case LoongArch::BITREV_W
:
392 case LoongArch::BSTRINS_W
:
393 case LoongArch::BSTRPICK_W
:
394 case LoongArch::LD_B
:
395 case LoongArch::LD_H
:
396 case LoongArch::LD_W
:
397 case LoongArch::LD_BU
:
398 case LoongArch::LD_HU
:
399 case LoongArch::LL_W
:
400 case LoongArch::LLACQ_W
:
401 case LoongArch::RDTIMEL_W
:
402 case LoongArch::RDTIMEH_W
:
403 case LoongArch::CPUCFG
:
404 case LoongArch::LDX_B
:
405 case LoongArch::LDX_H
:
406 case LoongArch::LDX_W
:
407 case LoongArch::LDX_BU
:
408 case LoongArch::LDX_HU
:
409 case LoongArch::LDPTR_W
:
410 case LoongArch::LDGT_B
:
411 case LoongArch::LDGT_H
:
412 case LoongArch::LDGT_W
:
413 case LoongArch::LDLE_B
:
414 case LoongArch::LDLE_H
:
415 case LoongArch::LDLE_W
:
416 case LoongArch::AMSWAP_B
:
417 case LoongArch::AMSWAP_H
:
418 case LoongArch::AMSWAP_W
:
419 case LoongArch::AMADD_B
:
420 case LoongArch::AMADD_H
:
421 case LoongArch::AMADD_W
:
422 case LoongArch::AMAND_W
:
423 case LoongArch::AMOR_W
:
424 case LoongArch::AMXOR_W
:
425 case LoongArch::AMMAX_W
:
426 case LoongArch::AMMIN_W
:
427 case LoongArch::AMMAX_WU
:
428 case LoongArch::AMMIN_WU
:
429 case LoongArch::AMSWAP__DB_B
:
430 case LoongArch::AMSWAP__DB_H
:
431 case LoongArch::AMSWAP__DB_W
:
432 case LoongArch::AMADD__DB_B
:
433 case LoongArch::AMADD__DB_H
:
434 case LoongArch::AMADD__DB_W
:
435 case LoongArch::AMAND__DB_W
:
436 case LoongArch::AMOR__DB_W
:
437 case LoongArch::AMXOR__DB_W
:
438 case LoongArch::AMMAX__DB_W
:
439 case LoongArch::AMMIN__DB_W
:
440 case LoongArch::AMMAX__DB_WU
:
441 case LoongArch::AMMIN__DB_WU
:
442 case LoongArch::AMCAS_B
:
443 case LoongArch::AMCAS_H
:
444 case LoongArch::AMCAS_W
:
445 case LoongArch::AMCAS__DB_B
:
446 case LoongArch::AMCAS__DB_H
:
447 case LoongArch::AMCAS__DB_W
:
448 case LoongArch::CRC_W_B_W
:
449 case LoongArch::CRC_W_H_W
:
450 case LoongArch::CRC_W_W_W
:
451 case LoongArch::CRC_W_D_W
:
452 case LoongArch::CRCC_W_B_W
:
453 case LoongArch::CRCC_W_H_W
:
454 case LoongArch::CRCC_W_W_W
:
455 case LoongArch::CRCC_W_D_W
:
456 case LoongArch::IOCSRRD_B
:
457 case LoongArch::IOCSRRD_H
:
458 case LoongArch::IOCSRRD_W
:
459 case LoongArch::MOVFR2GR_S
:
460 case LoongArch::MOVFCSR2GR
:
461 case LoongArch::MOVCF2GR
:
462 case LoongArch::MOVFRH2GR_S
:
463 case LoongArch::MOVFR2GR_S_64
:
464 case LoongArch::VPICKVE2GR_W
:
465 case LoongArch::XVPICKVE2GR_W
:
467 // Special cases that require checking operands.
468 // shifting right sufficiently makes the value 32-bit sign-extended
469 case LoongArch::SRAI_D
:
470 return MI
.getOperand(2).getImm() >= 32;
471 case LoongArch::SRLI_D
:
472 return MI
.getOperand(2).getImm() > 32;
473 // The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended.
474 case LoongArch::ADDI_D
:
476 return MI
.getOperand(1).isReg() &&
477 MI
.getOperand(1).getReg() == LoongArch::R0
;
478 // A bits extract is sign extended if the msb is less than 31.
479 case LoongArch::BSTRPICK_D
:
480 return MI
.getOperand(2).getImm() < 31;
481 // Copying from R0 produces zero.
482 case LoongArch::COPY
:
483 return MI
.getOperand(1).getReg() == LoongArch::R0
;
484 // Ignore the scratch register destination.
485 case LoongArch::PseudoMaskedAtomicSwap32
:
486 case LoongArch::PseudoAtomicSwap32
:
487 case LoongArch::PseudoMaskedAtomicLoadAdd32
:
488 case LoongArch::PseudoMaskedAtomicLoadSub32
:
489 case LoongArch::PseudoAtomicLoadNand32
:
490 case LoongArch::PseudoMaskedAtomicLoadNand32
:
491 case LoongArch::PseudoAtomicLoadAdd32
:
492 case LoongArch::PseudoAtomicLoadSub32
:
493 case LoongArch::PseudoAtomicLoadAnd32
:
494 case LoongArch::PseudoAtomicLoadOr32
:
495 case LoongArch::PseudoAtomicLoadXor32
:
496 case LoongArch::PseudoMaskedAtomicLoadUMax32
:
497 case LoongArch::PseudoMaskedAtomicLoadUMin32
:
498 case LoongArch::PseudoCmpXchg32
:
499 case LoongArch::PseudoMaskedCmpXchg32
:
500 case LoongArch::PseudoMaskedAtomicLoadMax32
:
501 case LoongArch::PseudoMaskedAtomicLoadMin32
:
508 static bool isSignExtendedW(Register SrcReg
, const LoongArchSubtarget
&ST
,
509 const MachineRegisterInfo
&MRI
,
510 SmallPtrSetImpl
<MachineInstr
*> &FixableDef
) {
511 SmallSet
<Register
, 4> Visited
;
512 SmallVector
<Register
, 4> Worklist
;
514 auto AddRegToWorkList
= [&](Register SrcReg
) {
515 if (!SrcReg
.isVirtual())
517 Worklist
.push_back(SrcReg
);
521 if (!AddRegToWorkList(SrcReg
))
524 while (!Worklist
.empty()) {
525 Register Reg
= Worklist
.pop_back_val();
527 // If we already visited this register, we don't need to check it again.
528 if (!Visited
.insert(Reg
).second
)
531 MachineInstr
*MI
= MRI
.getVRegDef(Reg
);
535 int OpNo
= MI
->findRegisterDefOperandIdx(Reg
, /*TRI=*/nullptr);
536 assert(OpNo
!= -1 && "Couldn't find register");
538 // If this is a sign extending operation we don't need to look any further.
539 if (isSignExtendingOpW(*MI
, MRI
, OpNo
))
542 // Is this an instruction that propagates sign extend?
543 switch (MI
->getOpcode()) {
545 // Unknown opcode, give up.
547 case LoongArch::COPY
: {
548 const MachineFunction
*MF
= MI
->getMF();
549 const LoongArchMachineFunctionInfo
*LAFI
=
550 MF
->getInfo
<LoongArchMachineFunctionInfo
>();
552 // If this is the entry block and the register is livein, see if we know
553 // it is sign extended.
554 if (MI
->getParent() == &MF
->front()) {
555 Register VReg
= MI
->getOperand(0).getReg();
556 if (MF
->getRegInfo().isLiveIn(VReg
) && LAFI
->isSExt32Register(VReg
))
560 Register CopySrcReg
= MI
->getOperand(1).getReg();
561 if (CopySrcReg
== LoongArch::R4
) {
562 // For a method return value, we check the ZExt/SExt flags in attribute.
563 // We assume the following code sequence for method call.
564 // PseudoCALL @bar, ...
565 // ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
568 // We use the PseudoCall to look up the IR function being called to find
569 // its return attributes.
570 const MachineBasicBlock
*MBB
= MI
->getParent();
571 auto II
= MI
->getIterator();
572 if (II
== MBB
->instr_begin() ||
573 (--II
)->getOpcode() != LoongArch::ADJCALLSTACKUP
)
576 const MachineInstr
&CallMI
= *(--II
);
577 if (!CallMI
.isCall() || !CallMI
.getOperand(0).isGlobal())
581 dyn_cast_if_present
<Function
>(CallMI
.getOperand(0).getGlobal());
585 auto *IntTy
= dyn_cast
<IntegerType
>(CalleeFn
->getReturnType());
589 const AttributeSet
&Attrs
= CalleeFn
->getAttributes().getRetAttrs();
590 unsigned BitWidth
= IntTy
->getBitWidth();
591 if ((BitWidth
<= 32 && Attrs
.hasAttribute(Attribute::SExt
)) ||
592 (BitWidth
< 32 && Attrs
.hasAttribute(Attribute::ZExt
)))
596 if (!AddRegToWorkList(CopySrcReg
))
602 // For these, we just need to check if the 1st operand is sign extended.
603 case LoongArch::MOD_D
:
604 case LoongArch::ANDI
:
606 case LoongArch::XORI
:
607 // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
608 // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
609 // Logical operations use a sign extended 12-bit immediate.
610 if (!AddRegToWorkList(MI
->getOperand(1).getReg()))
614 case LoongArch::MOD_DU
:
618 case LoongArch::ANDN
:
620 case LoongArch::PHI
: {
621 // If all incoming values are sign-extended, the output of AND, OR, XOR,
622 // or PHI is also sign-extended.
624 // The input registers for PHI are operand 1, 3, ...
625 // The input registers for others are operand 1 and 2.
626 unsigned B
= 1, E
= 3, D
= 1;
627 switch (MI
->getOpcode()) {
629 E
= MI
->getNumOperands();
634 for (unsigned I
= B
; I
!= E
; I
+= D
) {
635 if (!MI
->getOperand(I
).isReg())
638 if (!AddRegToWorkList(MI
->getOperand(I
).getReg()))
645 case LoongArch::MASKEQZ
:
646 case LoongArch::MASKNEZ
:
647 // Instructions return zero or operand 1. Result is sign extended if
648 // operand 1 is sign extended.
649 if (!AddRegToWorkList(MI
->getOperand(1).getReg()))
653 // With these opcode, we can "fix" them with the W-version
654 // if we know all users of the result only rely on bits 31:0
655 case LoongArch::SLLI_D
:
656 // SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits
657 if (MI
->getOperand(2).getImm() >= 32)
660 case LoongArch::ADDI_D
:
661 case LoongArch::ADD_D
:
662 case LoongArch::LD_D
:
663 case LoongArch::LD_WU
:
664 case LoongArch::MUL_D
:
665 case LoongArch::SUB_D
:
666 if (hasAllWUsers(*MI
, ST
, MRI
)) {
667 FixableDef
.insert(MI
);
671 // If all incoming values are sign-extended and all users only use
672 // the lower 32 bits, then convert them to W versions.
673 case LoongArch::DIV_D
: {
674 if (!AddRegToWorkList(MI
->getOperand(1).getReg()))
676 if (!AddRegToWorkList(MI
->getOperand(2).getReg()))
678 if (hasAllWUsers(*MI
, ST
, MRI
)) {
679 FixableDef
.insert(MI
);
687 // If we get here, then every node we visited produces a sign extended value
688 // or propagated sign extended values. So the result must be sign extended.
692 static unsigned getWOp(unsigned Opcode
) {
694 case LoongArch::ADDI_D
:
695 return LoongArch::ADDI_W
;
696 case LoongArch::ADD_D
:
697 return LoongArch::ADD_W
;
698 case LoongArch::DIV_D
:
699 return LoongArch::DIV_W
;
700 case LoongArch::LD_D
:
701 case LoongArch::LD_WU
:
702 return LoongArch::LD_W
;
703 case LoongArch::MUL_D
:
704 return LoongArch::MUL_W
;
705 case LoongArch::SLLI_D
:
706 return LoongArch::SLLI_W
;
707 case LoongArch::SUB_D
:
708 return LoongArch::SUB_W
;
710 llvm_unreachable("Unexpected opcode for replacement with W variant");
714 bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction
&MF
,
715 const LoongArchInstrInfo
&TII
,
716 const LoongArchSubtarget
&ST
,
717 MachineRegisterInfo
&MRI
) {
718 if (DisableSExtWRemoval
)
721 bool MadeChange
= false;
722 for (MachineBasicBlock
&MBB
: MF
) {
723 for (MachineInstr
&MI
: llvm::make_early_inc_range(MBB
)) {
724 // We're looking for the sext.w pattern ADDI.W rd, rs, 0.
725 if (!LoongArch::isSEXT_W(MI
))
728 Register SrcReg
= MI
.getOperand(1).getReg();
730 SmallPtrSet
<MachineInstr
*, 4> FixableDefs
;
732 // If all users only use the lower bits, this sext.w is redundant.
733 // Or if all definitions reaching MI sign-extend their output,
734 // then sext.w is redundant.
735 if (!hasAllWUsers(MI
, ST
, MRI
) &&
736 !isSignExtendedW(SrcReg
, ST
, MRI
, FixableDefs
))
739 Register DstReg
= MI
.getOperand(0).getReg();
740 if (!MRI
.constrainRegClass(SrcReg
, MRI
.getRegClass(DstReg
)))
743 // Convert Fixable instructions to their W versions.
744 for (MachineInstr
*Fixable
: FixableDefs
) {
745 LLVM_DEBUG(dbgs() << "Replacing " << *Fixable
);
746 Fixable
->setDesc(TII
.get(getWOp(Fixable
->getOpcode())));
747 Fixable
->clearFlag(MachineInstr::MIFlag::NoSWrap
);
748 Fixable
->clearFlag(MachineInstr::MIFlag::NoUWrap
);
749 Fixable
->clearFlag(MachineInstr::MIFlag::IsExact
);
750 LLVM_DEBUG(dbgs() << " with " << *Fixable
);
751 ++NumTransformedToWInstrs
;
754 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
755 MRI
.replaceRegWith(DstReg
, SrcReg
);
756 MRI
.clearKillFlags(SrcReg
);
757 MI
.eraseFromParent();
766 bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction
&MF
,
767 const LoongArchInstrInfo
&TII
,
768 const LoongArchSubtarget
&ST
,
769 MachineRegisterInfo
&MRI
) {
770 bool MadeChange
= false;
771 for (MachineBasicBlock
&MBB
: MF
) {
772 for (MachineInstr
&MI
: MBB
) {
774 switch (MI
.getOpcode()) {
777 case LoongArch::ADDI_W
:
778 Opc
= LoongArch::ADDI_D
;
782 if (hasAllWUsers(MI
, ST
, MRI
)) {
783 MI
.setDesc(TII
.get(Opc
));
792 bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction
&MF
,
793 const LoongArchInstrInfo
&TII
,
794 const LoongArchSubtarget
&ST
,
795 MachineRegisterInfo
&MRI
) {
796 bool MadeChange
= false;
797 for (MachineBasicBlock
&MBB
: MF
) {
798 for (MachineInstr
&MI
: MBB
) {
801 switch (MI
.getOpcode()) {
804 case LoongArch::ADD_D
:
805 WOpc
= LoongArch::ADD_W
;
807 case LoongArch::ADDI_D
:
808 WOpc
= LoongArch::ADDI_W
;
810 case LoongArch::SUB_D
:
811 WOpc
= LoongArch::SUB_W
;
813 case LoongArch::MUL_D
:
814 WOpc
= LoongArch::MUL_W
;
816 case LoongArch::SLLI_D
:
817 // SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits
818 if (MI
.getOperand(2).getImm() >= 32)
820 WOpc
= LoongArch::SLLI_W
;
822 case LoongArch::LD_D
:
823 case LoongArch::LD_WU
:
824 WOpc
= LoongArch::LD_W
;
828 if (hasAllWUsers(MI
, ST
, MRI
)) {
829 LLVM_DEBUG(dbgs() << "Replacing " << MI
);
830 MI
.setDesc(TII
.get(WOpc
));
831 MI
.clearFlag(MachineInstr::MIFlag::NoSWrap
);
832 MI
.clearFlag(MachineInstr::MIFlag::NoUWrap
);
833 MI
.clearFlag(MachineInstr::MIFlag::IsExact
);
834 LLVM_DEBUG(dbgs() << " with " << MI
);
835 ++NumTransformedToWInstrs
;
844 bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction
&MF
) {
845 if (skipFunction(MF
.getFunction()))
848 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
849 const LoongArchSubtarget
&ST
= MF
.getSubtarget
<LoongArchSubtarget
>();
850 const LoongArchInstrInfo
&TII
= *ST
.getInstrInfo();
855 bool MadeChange
= false;
856 MadeChange
|= removeSExtWInstrs(MF
, TII
, ST
, MRI
);
858 if (!(DisableCvtToDSuffix
|| ST
.preferWInst()))
859 MadeChange
|= convertToDSuffixes(MF
, TII
, ST
, MRI
);
861 if (ST
.preferWInst())
862 MadeChange
|= convertToWSuffixes(MF
, TII
, ST
, MRI
);