AMDGPU: Mark test as XFAIL in expensive_checks builds
[llvm-project.git] / llvm / lib / Target / X86 / X86FixupInstTuning.cpp
blob8c10a078046868c9e1b571f8a7eb4ebf3be5e056
1 //===-- X86FixupInstTunings.cpp - replace instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file does a tuning pass replacing slower machine instructions
10 // with faster ones. We do this here, as opposed to during normal ISel, as
11 // attempting to get the "right" instruction can break patterns. This pass
12 // is not meant search for special cases where an instruction can be transformed
13 // to another, it is only meant to do transformations where the old instruction
14 // is always replacable with the new instructions. For example:
16 // `vpermq ymm` -> `vshufd ymm`
17 // -- BAD, not always valid (lane cross/non-repeated mask)
19 // `vpermilps ymm` -> `vshufd ymm`
20 // -- GOOD, always replaceable
22 //===----------------------------------------------------------------------===//
24 #include "X86.h"
25 #include "X86InstrInfo.h"
26 #include "X86Subtarget.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 using namespace llvm;
33 #define DEBUG_TYPE "x86-fixup-inst-tuning"
35 STATISTIC(NumInstChanges, "Number of instructions changes");
37 namespace {
38 class X86FixupInstTuningPass : public MachineFunctionPass {
39 public:
40 static char ID;
42 X86FixupInstTuningPass() : MachineFunctionPass(ID) {}
44 StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }
46 bool runOnMachineFunction(MachineFunction &MF) override;
47 bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
48 MachineBasicBlock::iterator &I);
50 // This pass runs after regalloc and doesn't support VReg operands.
51 MachineFunctionProperties getRequiredProperties() const override {
52 return MachineFunctionProperties().set(
53 MachineFunctionProperties::Property::NoVRegs);
56 private:
57 const X86InstrInfo *TII = nullptr;
58 const X86Subtarget *ST = nullptr;
59 const MCSchedModel *SM = nullptr;
61 } // end anonymous namespace
63 char X86FixupInstTuningPass::ID = 0;
65 INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
67 FunctionPass *llvm::createX86FixupInstTuning() {
68 return new X86FixupInstTuningPass();
71 template <typename T>
72 static std::optional<bool> CmpOptionals(T NewVal, T CurVal) {
73 if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
74 return *NewVal < *CurVal;
76 return std::nullopt;
79 bool X86FixupInstTuningPass::processInstruction(
80 MachineFunction &MF, MachineBasicBlock &MBB,
81 MachineBasicBlock::iterator &I) {
82 MachineInstr &MI = *I;
83 unsigned Opc = MI.getOpcode();
84 unsigned NumOperands = MI.getDesc().getNumOperands();
86 auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> {
87 // We already checked that SchedModel exists in `NewOpcPreferable`.
88 return MCSchedModel::getReciprocalThroughput(
89 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
92 auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> {
93 // We already checked that SchedModel exists in `NewOpcPreferable`.
94 return MCSchedModel::computeInstrLatency(
95 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
98 auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> {
99 if (unsigned Size = TII->get(Opcode).getSize())
100 return Size;
101 // Zero size means we where unable to compute it.
102 return std::nullopt;
105 auto NewOpcPreferable = [&](unsigned NewOpc,
106 bool ReplaceInTie = true) -> bool {
107 std::optional<bool> Res;
108 if (SM->hasInstrSchedModel()) {
109 // Compare tput -> lat -> code size.
110 Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));
111 if (Res.has_value())
112 return *Res;
114 Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc));
115 if (Res.has_value())
116 return *Res;
119 Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));
120 if (Res.has_value())
121 return *Res;
123 // We either have either were unable to get tput/lat/codesize or all values
124 // were equal. Return specified option for a tie.
125 return ReplaceInTie;
128 // `vpermilpd r, i` -> `vshufpd r, r, i`
129 // `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
130 // `vshufpd` is always as fast or faster than `vpermilpd` and takes
131 // 1 less byte of code size for VEX and EVEX encoding.
132 auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {
133 if (!NewOpcPreferable(NewOpc))
134 return false;
135 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
136 MI.removeOperand(NumOperands - 1);
137 MI.addOperand(MI.getOperand(NumOperands - 2));
138 MI.setDesc(TII->get(NewOpc));
139 MI.addOperand(MachineOperand::CreateImm(MaskImm));
140 return true;
143 // `vpermilps r, i` -> `vshufps r, r, i`
144 // `vpermilps r, i, k` -> `vshufps r, r, i, k`
145 // `vshufps` is always as fast or faster than `vpermilps` and takes
146 // 1 less byte of code size for VEX and EVEX encoding.
147 auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {
148 if (!NewOpcPreferable(NewOpc))
149 return false;
150 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
151 MI.removeOperand(NumOperands - 1);
152 MI.addOperand(MI.getOperand(NumOperands - 2));
153 MI.setDesc(TII->get(NewOpc));
154 MI.addOperand(MachineOperand::CreateImm(MaskImm));
155 return true;
158 // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
159 // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
160 // byte of code size.
161 auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
162 // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
163 // `vpshufd` saves a byte of code size.
164 if (!ST->hasNoDomainDelayShuffle() ||
165 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
166 return false;
167 MI.setDesc(TII->get(NewOpc));
168 return true;
171 // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
172 // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
173 // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
174 // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
175 // `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
176 // `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
177 // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
178 // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
179 // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
180 // -> `vunpck{l|h}qdq`
181 // 2) If `vshufpd` faster than `vunpck{l|h}pd`
182 // -> `vshufpd`
184 // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
185 auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {
186 if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
187 return false;
189 MI.setDesc(TII->get(NewOpc));
190 MI.addOperand(MachineOperand::CreateImm(MaskImm));
191 return true;
194 auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {
195 // TODO it may be worth it to set ReplaceInTie to `true` as there is no real
196 // downside to the integer unpck, but if someone doesn't specify exact
197 // target we won't find it faster.
198 if (!ST->hasNoDomainDelayShuffle() ||
199 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
200 return false;
201 MI.setDesc(TII->get(NewOpc));
202 return true;
205 auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,
206 unsigned NewOpc) -> bool {
207 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
208 return true;
209 return ProcessUNPCK(NewOpc, 0x00);
211 auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,
212 unsigned NewOpc) -> bool {
213 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
214 return true;
215 return ProcessUNPCK(NewOpc, 0xff);
218 auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {
219 return ProcessUNPCKToIntDomain(NewOpcIntDomain);
222 auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
223 return ProcessUNPCKToIntDomain(NewOpc);
226 switch (Opc) {
227 case X86::VPERMILPDri:
228 return ProcessVPERMILPDri(X86::VSHUFPDrri);
229 case X86::VPERMILPDYri:
230 return ProcessVPERMILPDri(X86::VSHUFPDYrri);
231 case X86::VPERMILPDZ128ri:
232 return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
233 case X86::VPERMILPDZ256ri:
234 return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
235 case X86::VPERMILPDZri:
236 return ProcessVPERMILPDri(X86::VSHUFPDZrri);
237 case X86::VPERMILPDZ128rikz:
238 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
239 case X86::VPERMILPDZ256rikz:
240 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
241 case X86::VPERMILPDZrikz:
242 return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
243 case X86::VPERMILPDZ128rik:
244 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
245 case X86::VPERMILPDZ256rik:
246 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
247 case X86::VPERMILPDZrik:
248 return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
250 case X86::VPERMILPSri:
251 return ProcessVPERMILPSri(X86::VSHUFPSrri);
252 case X86::VPERMILPSYri:
253 return ProcessVPERMILPSri(X86::VSHUFPSYrri);
254 case X86::VPERMILPSZ128ri:
255 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
256 case X86::VPERMILPSZ256ri:
257 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
258 case X86::VPERMILPSZri:
259 return ProcessVPERMILPSri(X86::VSHUFPSZrri);
260 case X86::VPERMILPSZ128rikz:
261 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
262 case X86::VPERMILPSZ256rikz:
263 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
264 case X86::VPERMILPSZrikz:
265 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
266 case X86::VPERMILPSZ128rik:
267 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
268 case X86::VPERMILPSZ256rik:
269 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
270 case X86::VPERMILPSZrik:
271 return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
272 case X86::VPERMILPSmi:
273 return ProcessVPERMILPSmi(X86::VPSHUFDmi);
274 case X86::VPERMILPSYmi:
275 // TODO: See if there is a more generic way we can test if the replacement
276 // instruction is supported.
277 return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;
278 case X86::VPERMILPSZ128mi:
279 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
280 case X86::VPERMILPSZ256mi:
281 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
282 case X86::VPERMILPSZmi:
283 return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
284 case X86::VPERMILPSZ128mikz:
285 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
286 case X86::VPERMILPSZ256mikz:
287 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
288 case X86::VPERMILPSZmikz:
289 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
290 case X86::VPERMILPSZ128mik:
291 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
292 case X86::VPERMILPSZ256mik:
293 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
294 case X86::VPERMILPSZmik:
295 return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
297 case X86::MOVLHPSrr:
298 case X86::UNPCKLPDrr:
299 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
300 case X86::VMOVLHPSrr:
301 case X86::VUNPCKLPDrr:
302 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
303 case X86::VUNPCKLPDYrr:
304 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
305 // VMOVLHPS is always 128 bits.
306 case X86::VMOVLHPSZrr:
307 case X86::VUNPCKLPDZ128rr:
308 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
309 case X86::VUNPCKLPDZ256rr:
310 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
311 case X86::VUNPCKLPDZrr:
312 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
313 case X86::VUNPCKLPDZ128rrk:
314 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
315 case X86::VUNPCKLPDZ256rrk:
316 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
317 case X86::VUNPCKLPDZrrk:
318 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
319 case X86::VUNPCKLPDZ128rrkz:
320 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
321 case X86::VUNPCKLPDZ256rrkz:
322 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
323 case X86::VUNPCKLPDZrrkz:
324 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
325 case X86::UNPCKHPDrr:
326 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
327 case X86::VUNPCKHPDrr:
328 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
329 case X86::VUNPCKHPDYrr:
330 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
331 case X86::VUNPCKHPDZ128rr:
332 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
333 case X86::VUNPCKHPDZ256rr:
334 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
335 case X86::VUNPCKHPDZrr:
336 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
337 case X86::VUNPCKHPDZ128rrk:
338 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
339 case X86::VUNPCKHPDZ256rrk:
340 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
341 case X86::VUNPCKHPDZrrk:
342 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
343 case X86::VUNPCKHPDZ128rrkz:
344 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
345 case X86::VUNPCKHPDZ256rrkz:
346 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
347 case X86::VUNPCKHPDZrrkz:
348 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
349 case X86::UNPCKLPDrm:
350 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
351 case X86::VUNPCKLPDrm:
352 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
353 case X86::VUNPCKLPDYrm:
354 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
355 case X86::VUNPCKLPDZ128rm:
356 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
357 case X86::VUNPCKLPDZ256rm:
358 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
359 case X86::VUNPCKLPDZrm:
360 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
361 case X86::VUNPCKLPDZ128rmk:
362 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
363 case X86::VUNPCKLPDZ256rmk:
364 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
365 case X86::VUNPCKLPDZrmk:
366 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
367 case X86::VUNPCKLPDZ128rmkz:
368 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
369 case X86::VUNPCKLPDZ256rmkz:
370 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
371 case X86::VUNPCKLPDZrmkz:
372 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
373 case X86::UNPCKHPDrm:
374 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
375 case X86::VUNPCKHPDrm:
376 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
377 case X86::VUNPCKHPDYrm:
378 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
379 case X86::VUNPCKHPDZ128rm:
380 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
381 case X86::VUNPCKHPDZ256rm:
382 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
383 case X86::VUNPCKHPDZrm:
384 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
385 case X86::VUNPCKHPDZ128rmk:
386 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
387 case X86::VUNPCKHPDZ256rmk:
388 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
389 case X86::VUNPCKHPDZrmk:
390 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
391 case X86::VUNPCKHPDZ128rmkz:
392 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
393 case X86::VUNPCKHPDZ256rmkz:
394 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
395 case X86::VUNPCKHPDZrmkz:
396 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
398 case X86::UNPCKLPSrr:
399 return ProcessUNPCKPS(X86::PUNPCKLDQrr);
400 case X86::VUNPCKLPSrr:
401 return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
402 case X86::VUNPCKLPSYrr:
403 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
404 case X86::VUNPCKLPSZ128rr:
405 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
406 case X86::VUNPCKLPSZ256rr:
407 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
408 case X86::VUNPCKLPSZrr:
409 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
410 case X86::VUNPCKLPSZ128rrk:
411 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
412 case X86::VUNPCKLPSZ256rrk:
413 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
414 case X86::VUNPCKLPSZrrk:
415 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
416 case X86::VUNPCKLPSZ128rrkz:
417 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
418 case X86::VUNPCKLPSZ256rrkz:
419 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
420 case X86::VUNPCKLPSZrrkz:
421 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
422 case X86::UNPCKHPSrr:
423 return ProcessUNPCKPS(X86::PUNPCKHDQrr);
424 case X86::VUNPCKHPSrr:
425 return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
426 case X86::VUNPCKHPSYrr:
427 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
428 case X86::VUNPCKHPSZ128rr:
429 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
430 case X86::VUNPCKHPSZ256rr:
431 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
432 case X86::VUNPCKHPSZrr:
433 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
434 case X86::VUNPCKHPSZ128rrk:
435 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
436 case X86::VUNPCKHPSZ256rrk:
437 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
438 case X86::VUNPCKHPSZrrk:
439 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
440 case X86::VUNPCKHPSZ128rrkz:
441 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
442 case X86::VUNPCKHPSZ256rrkz:
443 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
444 case X86::VUNPCKHPSZrrkz:
445 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
446 case X86::UNPCKLPSrm:
447 return ProcessUNPCKPS(X86::PUNPCKLDQrm);
448 case X86::VUNPCKLPSrm:
449 return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
450 case X86::VUNPCKLPSYrm:
451 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
452 case X86::VUNPCKLPSZ128rm:
453 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
454 case X86::VUNPCKLPSZ256rm:
455 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
456 case X86::VUNPCKLPSZrm:
457 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
458 case X86::VUNPCKLPSZ128rmk:
459 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
460 case X86::VUNPCKLPSZ256rmk:
461 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
462 case X86::VUNPCKLPSZrmk:
463 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
464 case X86::VUNPCKLPSZ128rmkz:
465 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
466 case X86::VUNPCKLPSZ256rmkz:
467 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
468 case X86::VUNPCKLPSZrmkz:
469 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
470 case X86::UNPCKHPSrm:
471 return ProcessUNPCKPS(X86::PUNPCKHDQrm);
472 case X86::VUNPCKHPSrm:
473 return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
474 case X86::VUNPCKHPSYrm:
475 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
476 case X86::VUNPCKHPSZ128rm:
477 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
478 case X86::VUNPCKHPSZ256rm:
479 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
480 case X86::VUNPCKHPSZrm:
481 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
482 case X86::VUNPCKHPSZ128rmk:
483 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
484 case X86::VUNPCKHPSZ256rmk:
485 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
486 case X86::VUNPCKHPSZrmk:
487 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
488 case X86::VUNPCKHPSZ128rmkz:
489 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
490 case X86::VUNPCKHPSZ256rmkz:
491 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
492 case X86::VUNPCKHPSZrmkz:
493 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
494 default:
495 return false;
499 bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) {
500 LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
501 bool Changed = false;
502 ST = &MF.getSubtarget<X86Subtarget>();
503 TII = ST->getInstrInfo();
504 SM = &ST->getSchedModel();
506 for (MachineBasicBlock &MBB : MF) {
507 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
508 if (processInstruction(MF, MBB, I)) {
509 ++NumInstChanges;
510 Changed = true;
514 LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);
515 return Changed;