AMDGPU: Mark test as XFAIL in expensive_checks builds
[llvm-project.git] / llvm / lib / Target / X86 / X86CompressEVEX.cpp
blob7213ed32fc46d00d9f33086ae6d3d6d9e1acdfe4
1 //===- X86CompressEVEX.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass compresses instructions from EVEX space to legacy/VEX/EVEX space
10 // when possible in order to reduce code size or facilitate HW decoding.
12 // Possible compression:
13 // a. AVX512 instruction (EVEX) -> AVX instruction (VEX)
14 // b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy/VEX)
15 // c. NDD (EVEX) -> non-NDD (legacy)
16 // d. NF_ND (EVEX) -> NF (EVEX)
17 // e. NonNF (EVEX) -> NF (EVEX)
19 // Compression a, b and c can always reduce code size, with some exceptions
20 // such as promoted 16-bit CRC32 which is as long as the legacy version.
22 // legacy:
23 // crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
24 // promoted:
25 // crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
27 // From performance perspective, these should be same (same uops and same EXE
28 // ports). From a FMV perspective, an older legacy encoding is preferred b/c it
29 // can execute in more places (broader HW install base). So we will still do
30 // the compression.
32 // Compression d can help hardware decode (HW may skip reading the NDD
33 // register) although the instruction length remains unchanged.
35 // Compression e can help hardware skip updating EFLAGS although the instruction
36 // length remains unchanged.
37 //===----------------------------------------------------------------------===//
39 #include "MCTargetDesc/X86BaseInfo.h"
40 #include "X86.h"
41 #include "X86InstrInfo.h"
42 #include "X86Subtarget.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/CodeGen/MachineFunction.h"
45 #include "llvm/CodeGen/MachineFunctionPass.h"
46 #include "llvm/CodeGen/MachineInstr.h"
47 #include "llvm/CodeGen/MachineOperand.h"
48 #include "llvm/MC/MCInstrDesc.h"
49 #include "llvm/Pass.h"
50 #include <atomic>
51 #include <cassert>
52 #include <cstdint>
54 using namespace llvm;
56 #define COMP_EVEX_DESC "Compressing EVEX instrs when possible"
57 #define COMP_EVEX_NAME "x86-compress-evex"
59 #define DEBUG_TYPE COMP_EVEX_NAME
61 namespace {
62 // Including the generated EVEX compression tables.
63 #define GET_X86_COMPRESS_EVEX_TABLE
64 #include "X86GenInstrMapping.inc"
66 class CompressEVEXPass : public MachineFunctionPass {
67 public:
68 static char ID;
69 CompressEVEXPass() : MachineFunctionPass(ID) {}
70 StringRef getPassName() const override { return COMP_EVEX_DESC; }
72 bool runOnMachineFunction(MachineFunction &MF) override;
74 // This pass runs after regalloc and doesn't support VReg operands.
75 MachineFunctionProperties getRequiredProperties() const override {
76 return MachineFunctionProperties().set(
77 MachineFunctionProperties::Property::NoVRegs);
81 } // end anonymous namespace
83 char CompressEVEXPass::ID = 0;
85 static bool usesExtendedRegister(const MachineInstr &MI) {
86 auto isHiRegIdx = [](unsigned Reg) {
87 // Check for XMM register with indexes between 16 - 31.
88 if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
89 return true;
90 // Check for YMM register with indexes between 16 - 31.
91 if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
92 return true;
93 // Check for GPR with indexes between 16 - 31.
94 if (X86II::isApxExtendedReg(Reg))
95 return true;
96 return false;
99 // Check that operands are not ZMM regs or
100 // XMM/YMM regs with hi indexes between 16 - 31.
101 for (const MachineOperand &MO : MI.explicit_operands()) {
102 if (!MO.isReg())
103 continue;
105 Register Reg = MO.getReg();
106 assert(!X86II::isZMMReg(Reg) &&
107 "ZMM instructions should not be in the EVEX->VEX tables");
108 if (isHiRegIdx(Reg))
109 return true;
112 return false;
115 // Do any custom cleanup needed to finalize the conversion.
116 static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
117 (void)NewOpc;
118 unsigned Opc = MI.getOpcode();
119 switch (Opc) {
120 case X86::VALIGNDZ128rri:
121 case X86::VALIGNDZ128rmi:
122 case X86::VALIGNQZ128rri:
123 case X86::VALIGNQZ128rmi: {
124 assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
125 "Unexpected new opcode!");
126 unsigned Scale =
127 (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
128 MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
129 Imm.setImm(Imm.getImm() * Scale);
130 break;
132 case X86::VSHUFF32X4Z256rmi:
133 case X86::VSHUFF32X4Z256rri:
134 case X86::VSHUFF64X2Z256rmi:
135 case X86::VSHUFF64X2Z256rri:
136 case X86::VSHUFI32X4Z256rmi:
137 case X86::VSHUFI32X4Z256rri:
138 case X86::VSHUFI64X2Z256rmi:
139 case X86::VSHUFI64X2Z256rri: {
140 assert((NewOpc == X86::VPERM2F128rri || NewOpc == X86::VPERM2I128rri ||
141 NewOpc == X86::VPERM2F128rmi || NewOpc == X86::VPERM2I128rmi) &&
142 "Unexpected new opcode!");
143 MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
144 int64_t ImmVal = Imm.getImm();
145 // Set bit 5, move bit 1 to bit 4, copy bit 0.
146 Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
147 break;
149 case X86::VRNDSCALEPDZ128rri:
150 case X86::VRNDSCALEPDZ128rmi:
151 case X86::VRNDSCALEPSZ128rri:
152 case X86::VRNDSCALEPSZ128rmi:
153 case X86::VRNDSCALEPDZ256rri:
154 case X86::VRNDSCALEPDZ256rmi:
155 case X86::VRNDSCALEPSZ256rri:
156 case X86::VRNDSCALEPSZ256rmi:
157 case X86::VRNDSCALESDZrri:
158 case X86::VRNDSCALESDZrmi:
159 case X86::VRNDSCALESSZrri:
160 case X86::VRNDSCALESSZrmi:
161 case X86::VRNDSCALESDZrri_Int:
162 case X86::VRNDSCALESDZrmi_Int:
163 case X86::VRNDSCALESSZrri_Int:
164 case X86::VRNDSCALESSZrmi_Int:
165 const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
166 int64_t ImmVal = Imm.getImm();
167 // Ensure that only bits 3:0 of the immediate are used.
168 if ((ImmVal & 0xf) != ImmVal)
169 return false;
170 break;
173 return true;
176 static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
177 uint64_t TSFlags = MI.getDesc().TSFlags;
179 // Check for EVEX instructions only.
180 if ((TSFlags & X86II::EncodingMask) != X86II::EVEX)
181 return false;
183 // Instructions with mask or 512-bit vector can't be converted to VEX.
184 if (TSFlags & (X86II::EVEX_K | X86II::EVEX_L2))
185 return false;
187 auto IsRedundantNewDataDest = [&](unsigned &Opc) {
188 // $rbx = ADD64rr_ND $rbx, $rax / $rbx = ADD64rr_ND $rax, $rbx
189 // ->
190 // $rbx = ADD64rr $rbx, $rax
191 const MCInstrDesc &Desc = MI.getDesc();
192 Register Reg0 = MI.getOperand(0).getReg();
193 const MachineOperand &Op1 = MI.getOperand(1);
194 if (!Op1.isReg() || X86::getFirstAddrOperandIdx(MI) == 1 ||
195 X86::isCFCMOVCC(MI.getOpcode()))
196 return false;
197 Register Reg1 = Op1.getReg();
198 if (Reg1 == Reg0)
199 return true;
201 // Op1 and Op2 may be commutable for ND instructions.
202 if (!Desc.isCommutable() || Desc.getNumOperands() < 3 ||
203 !MI.getOperand(2).isReg() || MI.getOperand(2).getReg() != Reg0)
204 return false;
205 // Opcode may change after commute, e.g. SHRD -> SHLD
206 ST.getInstrInfo()->commuteInstruction(MI, false, 1, 2);
207 Opc = MI.getOpcode();
208 return true;
211 // EVEX_B has several meanings.
212 // AVX512:
213 // register form: rounding control or SAE
214 // memory form: broadcast
216 // APX:
217 // MAP4: NDD
219 // For AVX512 cases, EVEX prefix is needed in order to carry this information
220 // thus preventing the transformation to VEX encoding.
221 bool IsND = X86II::hasNewDataDest(TSFlags);
222 if (TSFlags & X86II::EVEX_B && !IsND)
223 return false;
224 unsigned Opc = MI.getOpcode();
225 // MOVBE*rr is special because it has semantic of NDD but not set EVEX_B.
226 bool IsNDLike = IsND || Opc == X86::MOVBE32rr || Opc == X86::MOVBE64rr;
227 bool IsRedundantNDD = IsNDLike ? IsRedundantNewDataDest(Opc) : false;
229 auto GetCompressedOpc = [&](unsigned Opc) -> unsigned {
230 ArrayRef<X86TableEntry> Table = ArrayRef(X86CompressEVEXTable);
231 const auto I = llvm::lower_bound(Table, Opc);
232 if (I == Table.end() || I->OldOpc != Opc)
233 return 0;
235 if (usesExtendedRegister(MI) || !checkPredicate(I->NewOpc, &ST) ||
236 !performCustomAdjustments(MI, I->NewOpc))
237 return 0;
238 return I->NewOpc;
240 // NonNF -> NF only if it's not a compressible NDD instruction and eflags is
241 // dead.
242 unsigned NewOpc = IsRedundantNDD
243 ? X86::getNonNDVariant(Opc)
244 : ((IsNDLike && ST.hasNF() &&
245 MI.registerDefIsDead(X86::EFLAGS, /*TRI=*/nullptr))
246 ? X86::getNFVariant(Opc)
247 : GetCompressedOpc(Opc));
249 if (!NewOpc)
250 return false;
252 const MCInstrDesc &NewDesc = ST.getInstrInfo()->get(NewOpc);
253 MI.setDesc(NewDesc);
254 unsigned AsmComment;
255 switch (NewDesc.TSFlags & X86II::EncodingMask) {
256 case X86II::LEGACY:
257 AsmComment = X86::AC_EVEX_2_LEGACY;
258 break;
259 case X86II::VEX:
260 AsmComment = X86::AC_EVEX_2_VEX;
261 break;
262 case X86II::EVEX:
263 AsmComment = X86::AC_EVEX_2_EVEX;
264 assert(IsND && (NewDesc.TSFlags & X86II::EVEX_NF) &&
265 "Unknown EVEX2EVEX compression");
266 break;
267 default:
268 llvm_unreachable("Unknown EVEX compression");
270 MI.setAsmPrinterFlag(AsmComment);
271 if (IsRedundantNDD)
272 MI.tieOperands(0, 1);
274 return true;
277 bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) {
278 #ifndef NDEBUG
279 // Make sure the tables are sorted.
280 static std::atomic<bool> TableChecked(false);
281 if (!TableChecked.load(std::memory_order_relaxed)) {
282 assert(llvm::is_sorted(X86CompressEVEXTable) &&
283 "X86CompressEVEXTable is not sorted!");
284 TableChecked.store(true, std::memory_order_relaxed);
286 #endif
287 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
288 if (!ST.hasAVX512() && !ST.hasEGPR() && !ST.hasNDD())
289 return false;
291 bool Changed = false;
293 for (MachineBasicBlock &MBB : MF) {
294 // Traverse the basic block.
295 for (MachineInstr &MI : MBB)
296 Changed |= CompressEVEXImpl(MI, ST);
299 return Changed;
302 INITIALIZE_PASS(CompressEVEXPass, COMP_EVEX_NAME, COMP_EVEX_DESC, false, false)
304 FunctionPass *llvm::createX86CompressEVEXPass() {
305 return new CompressEVEXPass();