1 //===- X86CompressEVEX.cpp ------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass compresses instructions from EVEX space to legacy/VEX/EVEX space
10 // when possible in order to reduce code size or facilitate HW decoding.
12 // Possible compression:
13 // a. AVX512 instruction (EVEX) -> AVX instruction (VEX)
14 // b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy/VEX)
15 // c. NDD (EVEX) -> non-NDD (legacy)
16 // d. NF_ND (EVEX) -> NF (EVEX)
17 // e. NonNF (EVEX) -> NF (EVEX)
19 // Compression a, b and c can always reduce code size, with some exceptions
20 // such as promoted 16-bit CRC32 which is as long as the legacy version.
23 // crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
25 // crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
27 // From performance perspective, these should be same (same uops and same EXE
28 // ports). From a FMV perspective, an older legacy encoding is preferred b/c it
29 // can execute in more places (broader HW install base). So we will still do
32 // Compression d can help hardware decode (HW may skip reading the NDD
33 // register) although the instruction length remains unchanged.
35 // Compression e can help hardware skip updating EFLAGS although the instruction
36 // length remains unchanged.
37 //===----------------------------------------------------------------------===//
39 #include "MCTargetDesc/X86BaseInfo.h"
41 #include "X86InstrInfo.h"
42 #include "X86Subtarget.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/CodeGen/MachineFunction.h"
45 #include "llvm/CodeGen/MachineFunctionPass.h"
46 #include "llvm/CodeGen/MachineInstr.h"
47 #include "llvm/CodeGen/MachineOperand.h"
48 #include "llvm/MC/MCInstrDesc.h"
49 #include "llvm/Pass.h"
56 #define COMP_EVEX_DESC "Compressing EVEX instrs when possible"
57 #define COMP_EVEX_NAME "x86-compress-evex"
59 #define DEBUG_TYPE COMP_EVEX_NAME
62 // Including the generated EVEX compression tables.
63 #define GET_X86_COMPRESS_EVEX_TABLE
64 #include "X86GenInstrMapping.inc"
66 class CompressEVEXPass
: public MachineFunctionPass
{
69 CompressEVEXPass() : MachineFunctionPass(ID
) {}
70 StringRef
getPassName() const override
{ return COMP_EVEX_DESC
; }
72 bool runOnMachineFunction(MachineFunction
&MF
) override
;
74 // This pass runs after regalloc and doesn't support VReg operands.
75 MachineFunctionProperties
getRequiredProperties() const override
{
76 return MachineFunctionProperties().set(
77 MachineFunctionProperties::Property::NoVRegs
);
81 } // end anonymous namespace
83 char CompressEVEXPass::ID
= 0;
85 static bool usesExtendedRegister(const MachineInstr
&MI
) {
86 auto isHiRegIdx
= [](unsigned Reg
) {
87 // Check for XMM register with indexes between 16 - 31.
88 if (Reg
>= X86::XMM16
&& Reg
<= X86::XMM31
)
90 // Check for YMM register with indexes between 16 - 31.
91 if (Reg
>= X86::YMM16
&& Reg
<= X86::YMM31
)
93 // Check for GPR with indexes between 16 - 31.
94 if (X86II::isApxExtendedReg(Reg
))
99 // Check that operands are not ZMM regs or
100 // XMM/YMM regs with hi indexes between 16 - 31.
101 for (const MachineOperand
&MO
: MI
.explicit_operands()) {
105 Register Reg
= MO
.getReg();
106 assert(!X86II::isZMMReg(Reg
) &&
107 "ZMM instructions should not be in the EVEX->VEX tables");
115 // Do any custom cleanup needed to finalize the conversion.
116 static bool performCustomAdjustments(MachineInstr
&MI
, unsigned NewOpc
) {
118 unsigned Opc
= MI
.getOpcode();
120 case X86::VALIGNDZ128rri
:
121 case X86::VALIGNDZ128rmi
:
122 case X86::VALIGNQZ128rri
:
123 case X86::VALIGNQZ128rmi
: {
124 assert((NewOpc
== X86::VPALIGNRrri
|| NewOpc
== X86::VPALIGNRrmi
) &&
125 "Unexpected new opcode!");
127 (Opc
== X86::VALIGNQZ128rri
|| Opc
== X86::VALIGNQZ128rmi
) ? 8 : 4;
128 MachineOperand
&Imm
= MI
.getOperand(MI
.getNumExplicitOperands() - 1);
129 Imm
.setImm(Imm
.getImm() * Scale
);
132 case X86::VSHUFF32X4Z256rmi
:
133 case X86::VSHUFF32X4Z256rri
:
134 case X86::VSHUFF64X2Z256rmi
:
135 case X86::VSHUFF64X2Z256rri
:
136 case X86::VSHUFI32X4Z256rmi
:
137 case X86::VSHUFI32X4Z256rri
:
138 case X86::VSHUFI64X2Z256rmi
:
139 case X86::VSHUFI64X2Z256rri
: {
140 assert((NewOpc
== X86::VPERM2F128rri
|| NewOpc
== X86::VPERM2I128rri
||
141 NewOpc
== X86::VPERM2F128rmi
|| NewOpc
== X86::VPERM2I128rmi
) &&
142 "Unexpected new opcode!");
143 MachineOperand
&Imm
= MI
.getOperand(MI
.getNumExplicitOperands() - 1);
144 int64_t ImmVal
= Imm
.getImm();
145 // Set bit 5, move bit 1 to bit 4, copy bit 0.
146 Imm
.setImm(0x20 | ((ImmVal
& 2) << 3) | (ImmVal
& 1));
149 case X86::VRNDSCALEPDZ128rri
:
150 case X86::VRNDSCALEPDZ128rmi
:
151 case X86::VRNDSCALEPSZ128rri
:
152 case X86::VRNDSCALEPSZ128rmi
:
153 case X86::VRNDSCALEPDZ256rri
:
154 case X86::VRNDSCALEPDZ256rmi
:
155 case X86::VRNDSCALEPSZ256rri
:
156 case X86::VRNDSCALEPSZ256rmi
:
157 case X86::VRNDSCALESDZrri
:
158 case X86::VRNDSCALESDZrmi
:
159 case X86::VRNDSCALESSZrri
:
160 case X86::VRNDSCALESSZrmi
:
161 case X86::VRNDSCALESDZrri_Int
:
162 case X86::VRNDSCALESDZrmi_Int
:
163 case X86::VRNDSCALESSZrri_Int
:
164 case X86::VRNDSCALESSZrmi_Int
:
165 const MachineOperand
&Imm
= MI
.getOperand(MI
.getNumExplicitOperands() - 1);
166 int64_t ImmVal
= Imm
.getImm();
167 // Ensure that only bits 3:0 of the immediate are used.
168 if ((ImmVal
& 0xf) != ImmVal
)
176 static bool CompressEVEXImpl(MachineInstr
&MI
, const X86Subtarget
&ST
) {
177 uint64_t TSFlags
= MI
.getDesc().TSFlags
;
179 // Check for EVEX instructions only.
180 if ((TSFlags
& X86II::EncodingMask
) != X86II::EVEX
)
183 // Instructions with mask or 512-bit vector can't be converted to VEX.
184 if (TSFlags
& (X86II::EVEX_K
| X86II::EVEX_L2
))
187 auto IsRedundantNewDataDest
= [&](unsigned &Opc
) {
188 // $rbx = ADD64rr_ND $rbx, $rax / $rbx = ADD64rr_ND $rax, $rbx
190 // $rbx = ADD64rr $rbx, $rax
191 const MCInstrDesc
&Desc
= MI
.getDesc();
192 Register Reg0
= MI
.getOperand(0).getReg();
193 const MachineOperand
&Op1
= MI
.getOperand(1);
194 if (!Op1
.isReg() || X86::getFirstAddrOperandIdx(MI
) == 1 ||
195 X86::isCFCMOVCC(MI
.getOpcode()))
197 Register Reg1
= Op1
.getReg();
201 // Op1 and Op2 may be commutable for ND instructions.
202 if (!Desc
.isCommutable() || Desc
.getNumOperands() < 3 ||
203 !MI
.getOperand(2).isReg() || MI
.getOperand(2).getReg() != Reg0
)
205 // Opcode may change after commute, e.g. SHRD -> SHLD
206 ST
.getInstrInfo()->commuteInstruction(MI
, false, 1, 2);
207 Opc
= MI
.getOpcode();
211 // EVEX_B has several meanings.
213 // register form: rounding control or SAE
214 // memory form: broadcast
219 // For AVX512 cases, EVEX prefix is needed in order to carry this information
220 // thus preventing the transformation to VEX encoding.
221 bool IsND
= X86II::hasNewDataDest(TSFlags
);
222 if (TSFlags
& X86II::EVEX_B
&& !IsND
)
224 unsigned Opc
= MI
.getOpcode();
225 // MOVBE*rr is special because it has semantic of NDD but not set EVEX_B.
226 bool IsNDLike
= IsND
|| Opc
== X86::MOVBE32rr
|| Opc
== X86::MOVBE64rr
;
227 bool IsRedundantNDD
= IsNDLike
? IsRedundantNewDataDest(Opc
) : false;
229 auto GetCompressedOpc
= [&](unsigned Opc
) -> unsigned {
230 ArrayRef
<X86TableEntry
> Table
= ArrayRef(X86CompressEVEXTable
);
231 const auto I
= llvm::lower_bound(Table
, Opc
);
232 if (I
== Table
.end() || I
->OldOpc
!= Opc
)
235 if (usesExtendedRegister(MI
) || !checkPredicate(I
->NewOpc
, &ST
) ||
236 !performCustomAdjustments(MI
, I
->NewOpc
))
240 // NonNF -> NF only if it's not a compressible NDD instruction and eflags is
242 unsigned NewOpc
= IsRedundantNDD
243 ? X86::getNonNDVariant(Opc
)
244 : ((IsNDLike
&& ST
.hasNF() &&
245 MI
.registerDefIsDead(X86::EFLAGS
, /*TRI=*/nullptr))
246 ? X86::getNFVariant(Opc
)
247 : GetCompressedOpc(Opc
));
252 const MCInstrDesc
&NewDesc
= ST
.getInstrInfo()->get(NewOpc
);
255 switch (NewDesc
.TSFlags
& X86II::EncodingMask
) {
257 AsmComment
= X86::AC_EVEX_2_LEGACY
;
260 AsmComment
= X86::AC_EVEX_2_VEX
;
263 AsmComment
= X86::AC_EVEX_2_EVEX
;
264 assert(IsND
&& (NewDesc
.TSFlags
& X86II::EVEX_NF
) &&
265 "Unknown EVEX2EVEX compression");
268 llvm_unreachable("Unknown EVEX compression");
270 MI
.setAsmPrinterFlag(AsmComment
);
272 MI
.tieOperands(0, 1);
277 bool CompressEVEXPass::runOnMachineFunction(MachineFunction
&MF
) {
279 // Make sure the tables are sorted.
280 static std::atomic
<bool> TableChecked(false);
281 if (!TableChecked
.load(std::memory_order_relaxed
)) {
282 assert(llvm::is_sorted(X86CompressEVEXTable
) &&
283 "X86CompressEVEXTable is not sorted!");
284 TableChecked
.store(true, std::memory_order_relaxed
);
287 const X86Subtarget
&ST
= MF
.getSubtarget
<X86Subtarget
>();
288 if (!ST
.hasAVX512() && !ST
.hasEGPR() && !ST
.hasNDD())
291 bool Changed
= false;
293 for (MachineBasicBlock
&MBB
: MF
) {
294 // Traverse the basic block.
295 for (MachineInstr
&MI
: MBB
)
296 Changed
|= CompressEVEXImpl(MI
, ST
);
302 INITIALIZE_PASS(CompressEVEXPass
, COMP_EVEX_NAME
, COMP_EVEX_DESC
, false, false)
304 FunctionPass
*llvm::createX86CompressEVEXPass() {
305 return new CompressEVEXPass();