1 //===-- X86FixupInstTunings.cpp - replace instructions -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file does a tuning pass replacing slower machine instructions
10 // with faster ones. We do this here, as opposed to during normal ISel, as
11 // attempting to get the "right" instruction can break patterns. This pass
12 // is not meant search for special cases where an instruction can be transformed
13 // to another, it is only meant to do transformations where the old instruction
14 // is always replacable with the new instructions. For example:
16 // `vpermq ymm` -> `vshufd ymm`
17 // -- BAD, not always valid (lane cross/non-repeated mask)
19 // `vpermilps ymm` -> `vshufd ymm`
20 // -- GOOD, always replaceable
22 //===----------------------------------------------------------------------===//
25 #include "X86InstrInfo.h"
26 #include "X86Subtarget.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #define DEBUG_TYPE "x86-fixup-inst-tuning"
35 STATISTIC(NumInstChanges
, "Number of instructions changes");
38 class X86FixupInstTuningPass
: public MachineFunctionPass
{
42 X86FixupInstTuningPass() : MachineFunctionPass(ID
) {}
44 StringRef
getPassName() const override
{ return "X86 Fixup Inst Tuning"; }
46 bool runOnMachineFunction(MachineFunction
&MF
) override
;
47 bool processInstruction(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
48 MachineBasicBlock::iterator
&I
);
50 // This pass runs after regalloc and doesn't support VReg operands.
51 MachineFunctionProperties
getRequiredProperties() const override
{
52 return MachineFunctionProperties().set(
53 MachineFunctionProperties::Property::NoVRegs
);
57 const X86InstrInfo
*TII
= nullptr;
58 const X86Subtarget
*ST
= nullptr;
59 const MCSchedModel
*SM
= nullptr;
61 } // end anonymous namespace
63 char X86FixupInstTuningPass::ID
= 0;
65 INITIALIZE_PASS(X86FixupInstTuningPass
, DEBUG_TYPE
, DEBUG_TYPE
, false, false)
67 FunctionPass
*llvm::createX86FixupInstTuning() {
68 return new X86FixupInstTuningPass();
72 static std::optional
<bool> CmpOptionals(T NewVal
, T CurVal
) {
73 if (NewVal
.has_value() && CurVal
.has_value() && *NewVal
!= *CurVal
)
74 return *NewVal
< *CurVal
;
79 bool X86FixupInstTuningPass::processInstruction(
80 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
81 MachineBasicBlock::iterator
&I
) {
82 MachineInstr
&MI
= *I
;
83 unsigned Opc
= MI
.getOpcode();
84 unsigned NumOperands
= MI
.getDesc().getNumOperands();
86 auto GetInstTput
= [&](unsigned Opcode
) -> std::optional
<double> {
87 // We already checked that SchedModel exists in `NewOpcPreferable`.
88 return MCSchedModel::getReciprocalThroughput(
89 *ST
, *(SM
->getSchedClassDesc(TII
->get(Opcode
).getSchedClass())));
92 auto GetInstLat
= [&](unsigned Opcode
) -> std::optional
<double> {
93 // We already checked that SchedModel exists in `NewOpcPreferable`.
94 return MCSchedModel::computeInstrLatency(
95 *ST
, *(SM
->getSchedClassDesc(TII
->get(Opcode
).getSchedClass())));
98 auto GetInstSize
= [&](unsigned Opcode
) -> std::optional
<unsigned> {
99 if (unsigned Size
= TII
->get(Opcode
).getSize())
101 // Zero size means we where unable to compute it.
105 auto NewOpcPreferable
= [&](unsigned NewOpc
,
106 bool ReplaceInTie
= true) -> bool {
107 std::optional
<bool> Res
;
108 if (SM
->hasInstrSchedModel()) {
109 // Compare tput -> lat -> code size.
110 Res
= CmpOptionals(GetInstTput(NewOpc
), GetInstTput(Opc
));
114 Res
= CmpOptionals(GetInstLat(NewOpc
), GetInstLat(Opc
));
119 Res
= CmpOptionals(GetInstSize(Opc
), GetInstSize(NewOpc
));
123 // We either have either were unable to get tput/lat/codesize or all values
124 // were equal. Return specified option for a tie.
128 // `vpermilpd r, i` -> `vshufpd r, r, i`
129 // `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
130 // `vshufpd` is always as fast or faster than `vpermilpd` and takes
131 // 1 less byte of code size for VEX and EVEX encoding.
132 auto ProcessVPERMILPDri
= [&](unsigned NewOpc
) -> bool {
133 if (!NewOpcPreferable(NewOpc
))
135 unsigned MaskImm
= MI
.getOperand(NumOperands
- 1).getImm();
136 MI
.removeOperand(NumOperands
- 1);
137 MI
.addOperand(MI
.getOperand(NumOperands
- 2));
138 MI
.setDesc(TII
->get(NewOpc
));
139 MI
.addOperand(MachineOperand::CreateImm(MaskImm
));
143 // `vpermilps r, i` -> `vshufps r, r, i`
144 // `vpermilps r, i, k` -> `vshufps r, r, i, k`
145 // `vshufps` is always as fast or faster than `vpermilps` and takes
146 // 1 less byte of code size for VEX and EVEX encoding.
147 auto ProcessVPERMILPSri
= [&](unsigned NewOpc
) -> bool {
148 if (!NewOpcPreferable(NewOpc
))
150 unsigned MaskImm
= MI
.getOperand(NumOperands
- 1).getImm();
151 MI
.removeOperand(NumOperands
- 1);
152 MI
.addOperand(MI
.getOperand(NumOperands
- 2));
153 MI
.setDesc(TII
->get(NewOpc
));
154 MI
.addOperand(MachineOperand::CreateImm(MaskImm
));
158 // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
159 // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
160 // byte of code size.
161 auto ProcessVPERMILPSmi
= [&](unsigned NewOpc
) -> bool {
162 // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
163 // `vpshufd` saves a byte of code size.
164 if (!ST
->hasNoDomainDelayShuffle() ||
165 !NewOpcPreferable(NewOpc
, /*ReplaceInTie*/ false))
167 MI
.setDesc(TII
->get(NewOpc
));
171 // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
172 // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
173 // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
174 // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
175 // `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
176 // `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
177 // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
178 // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
179 // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
180 // -> `vunpck{l|h}qdq`
181 // 2) If `vshufpd` faster than `vunpck{l|h}pd`
184 // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
185 auto ProcessUNPCK
= [&](unsigned NewOpc
, unsigned MaskImm
) -> bool {
186 if (!NewOpcPreferable(NewOpc
, /*ReplaceInTie*/ false))
189 MI
.setDesc(TII
->get(NewOpc
));
190 MI
.addOperand(MachineOperand::CreateImm(MaskImm
));
194 auto ProcessUNPCKToIntDomain
= [&](unsigned NewOpc
) -> bool {
195 // TODO it may be worth it to set ReplaceInTie to `true` as there is no real
196 // downside to the integer unpck, but if someone doesn't specify exact
197 // target we won't find it faster.
198 if (!ST
->hasNoDomainDelayShuffle() ||
199 !NewOpcPreferable(NewOpc
, /*ReplaceInTie*/ false))
201 MI
.setDesc(TII
->get(NewOpc
));
205 auto ProcessUNPCKLPDrr
= [&](unsigned NewOpcIntDomain
,
206 unsigned NewOpc
) -> bool {
207 if (ProcessUNPCKToIntDomain(NewOpcIntDomain
))
209 return ProcessUNPCK(NewOpc
, 0x00);
211 auto ProcessUNPCKHPDrr
= [&](unsigned NewOpcIntDomain
,
212 unsigned NewOpc
) -> bool {
213 if (ProcessUNPCKToIntDomain(NewOpcIntDomain
))
215 return ProcessUNPCK(NewOpc
, 0xff);
218 auto ProcessUNPCKPDrm
= [&](unsigned NewOpcIntDomain
) -> bool {
219 return ProcessUNPCKToIntDomain(NewOpcIntDomain
);
222 auto ProcessUNPCKPS
= [&](unsigned NewOpc
) -> bool {
223 return ProcessUNPCKToIntDomain(NewOpc
);
227 case X86::VPERMILPDri
:
228 return ProcessVPERMILPDri(X86::VSHUFPDrri
);
229 case X86::VPERMILPDYri
:
230 return ProcessVPERMILPDri(X86::VSHUFPDYrri
);
231 case X86::VPERMILPDZ128ri
:
232 return ProcessVPERMILPDri(X86::VSHUFPDZ128rri
);
233 case X86::VPERMILPDZ256ri
:
234 return ProcessVPERMILPDri(X86::VSHUFPDZ256rri
);
235 case X86::VPERMILPDZri
:
236 return ProcessVPERMILPDri(X86::VSHUFPDZrri
);
237 case X86::VPERMILPDZ128rikz
:
238 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz
);
239 case X86::VPERMILPDZ256rikz
:
240 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz
);
241 case X86::VPERMILPDZrikz
:
242 return ProcessVPERMILPDri(X86::VSHUFPDZrrikz
);
243 case X86::VPERMILPDZ128rik
:
244 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik
);
245 case X86::VPERMILPDZ256rik
:
246 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik
);
247 case X86::VPERMILPDZrik
:
248 return ProcessVPERMILPDri(X86::VSHUFPDZrrik
);
250 case X86::VPERMILPSri
:
251 return ProcessVPERMILPSri(X86::VSHUFPSrri
);
252 case X86::VPERMILPSYri
:
253 return ProcessVPERMILPSri(X86::VSHUFPSYrri
);
254 case X86::VPERMILPSZ128ri
:
255 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri
);
256 case X86::VPERMILPSZ256ri
:
257 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri
);
258 case X86::VPERMILPSZri
:
259 return ProcessVPERMILPSri(X86::VSHUFPSZrri
);
260 case X86::VPERMILPSZ128rikz
:
261 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz
);
262 case X86::VPERMILPSZ256rikz
:
263 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz
);
264 case X86::VPERMILPSZrikz
:
265 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz
);
266 case X86::VPERMILPSZ128rik
:
267 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik
);
268 case X86::VPERMILPSZ256rik
:
269 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik
);
270 case X86::VPERMILPSZrik
:
271 return ProcessVPERMILPSri(X86::VSHUFPSZrrik
);
272 case X86::VPERMILPSmi
:
273 return ProcessVPERMILPSmi(X86::VPSHUFDmi
);
274 case X86::VPERMILPSYmi
:
275 // TODO: See if there is a more generic way we can test if the replacement
276 // instruction is supported.
277 return ST
->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi
) : false;
278 case X86::VPERMILPSZ128mi
:
279 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi
);
280 case X86::VPERMILPSZ256mi
:
281 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi
);
282 case X86::VPERMILPSZmi
:
283 return ProcessVPERMILPSmi(X86::VPSHUFDZmi
);
284 case X86::VPERMILPSZ128mikz
:
285 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz
);
286 case X86::VPERMILPSZ256mikz
:
287 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz
);
288 case X86::VPERMILPSZmikz
:
289 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz
);
290 case X86::VPERMILPSZ128mik
:
291 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik
);
292 case X86::VPERMILPSZ256mik
:
293 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik
);
294 case X86::VPERMILPSZmik
:
295 return ProcessVPERMILPSmi(X86::VPSHUFDZmik
);
298 case X86::UNPCKLPDrr
:
299 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr
, X86::SHUFPDrri
);
300 case X86::VMOVLHPSrr
:
301 case X86::VUNPCKLPDrr
:
302 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr
, X86::VSHUFPDrri
);
303 case X86::VUNPCKLPDYrr
:
304 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr
, X86::VSHUFPDYrri
);
305 // VMOVLHPS is always 128 bits.
306 case X86::VMOVLHPSZrr
:
307 case X86::VUNPCKLPDZ128rr
:
308 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr
, X86::VSHUFPDZ128rri
);
309 case X86::VUNPCKLPDZ256rr
:
310 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr
, X86::VSHUFPDZ256rri
);
311 case X86::VUNPCKLPDZrr
:
312 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr
, X86::VSHUFPDZrri
);
313 case X86::VUNPCKLPDZ128rrk
:
314 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk
, X86::VSHUFPDZ128rrik
);
315 case X86::VUNPCKLPDZ256rrk
:
316 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk
, X86::VSHUFPDZ256rrik
);
317 case X86::VUNPCKLPDZrrk
:
318 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk
, X86::VSHUFPDZrrik
);
319 case X86::VUNPCKLPDZ128rrkz
:
320 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz
, X86::VSHUFPDZ128rrikz
);
321 case X86::VUNPCKLPDZ256rrkz
:
322 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz
, X86::VSHUFPDZ256rrikz
);
323 case X86::VUNPCKLPDZrrkz
:
324 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz
, X86::VSHUFPDZrrikz
);
325 case X86::UNPCKHPDrr
:
326 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr
, X86::SHUFPDrri
);
327 case X86::VUNPCKHPDrr
:
328 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr
, X86::VSHUFPDrri
);
329 case X86::VUNPCKHPDYrr
:
330 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr
, X86::VSHUFPDYrri
);
331 case X86::VUNPCKHPDZ128rr
:
332 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr
, X86::VSHUFPDZ128rri
);
333 case X86::VUNPCKHPDZ256rr
:
334 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr
, X86::VSHUFPDZ256rri
);
335 case X86::VUNPCKHPDZrr
:
336 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr
, X86::VSHUFPDZrri
);
337 case X86::VUNPCKHPDZ128rrk
:
338 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk
, X86::VSHUFPDZ128rrik
);
339 case X86::VUNPCKHPDZ256rrk
:
340 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk
, X86::VSHUFPDZ256rrik
);
341 case X86::VUNPCKHPDZrrk
:
342 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk
, X86::VSHUFPDZrrik
);
343 case X86::VUNPCKHPDZ128rrkz
:
344 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz
, X86::VSHUFPDZ128rrikz
);
345 case X86::VUNPCKHPDZ256rrkz
:
346 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz
, X86::VSHUFPDZ256rrikz
);
347 case X86::VUNPCKHPDZrrkz
:
348 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz
, X86::VSHUFPDZrrikz
);
349 case X86::UNPCKLPDrm
:
350 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm
);
351 case X86::VUNPCKLPDrm
:
352 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm
);
353 case X86::VUNPCKLPDYrm
:
354 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm
);
355 case X86::VUNPCKLPDZ128rm
:
356 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm
);
357 case X86::VUNPCKLPDZ256rm
:
358 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm
);
359 case X86::VUNPCKLPDZrm
:
360 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm
);
361 case X86::VUNPCKLPDZ128rmk
:
362 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk
);
363 case X86::VUNPCKLPDZ256rmk
:
364 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk
);
365 case X86::VUNPCKLPDZrmk
:
366 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk
);
367 case X86::VUNPCKLPDZ128rmkz
:
368 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz
);
369 case X86::VUNPCKLPDZ256rmkz
:
370 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz
);
371 case X86::VUNPCKLPDZrmkz
:
372 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz
);
373 case X86::UNPCKHPDrm
:
374 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm
);
375 case X86::VUNPCKHPDrm
:
376 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm
);
377 case X86::VUNPCKHPDYrm
:
378 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm
);
379 case X86::VUNPCKHPDZ128rm
:
380 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm
);
381 case X86::VUNPCKHPDZ256rm
:
382 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm
);
383 case X86::VUNPCKHPDZrm
:
384 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm
);
385 case X86::VUNPCKHPDZ128rmk
:
386 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk
);
387 case X86::VUNPCKHPDZ256rmk
:
388 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk
);
389 case X86::VUNPCKHPDZrmk
:
390 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk
);
391 case X86::VUNPCKHPDZ128rmkz
:
392 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz
);
393 case X86::VUNPCKHPDZ256rmkz
:
394 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz
);
395 case X86::VUNPCKHPDZrmkz
:
396 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz
);
398 case X86::UNPCKLPSrr
:
399 return ProcessUNPCKPS(X86::PUNPCKLDQrr
);
400 case X86::VUNPCKLPSrr
:
401 return ProcessUNPCKPS(X86::VPUNPCKLDQrr
);
402 case X86::VUNPCKLPSYrr
:
403 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr
);
404 case X86::VUNPCKLPSZ128rr
:
405 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr
);
406 case X86::VUNPCKLPSZ256rr
:
407 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr
);
408 case X86::VUNPCKLPSZrr
:
409 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr
);
410 case X86::VUNPCKLPSZ128rrk
:
411 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk
);
412 case X86::VUNPCKLPSZ256rrk
:
413 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk
);
414 case X86::VUNPCKLPSZrrk
:
415 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk
);
416 case X86::VUNPCKLPSZ128rrkz
:
417 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz
);
418 case X86::VUNPCKLPSZ256rrkz
:
419 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz
);
420 case X86::VUNPCKLPSZrrkz
:
421 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz
);
422 case X86::UNPCKHPSrr
:
423 return ProcessUNPCKPS(X86::PUNPCKHDQrr
);
424 case X86::VUNPCKHPSrr
:
425 return ProcessUNPCKPS(X86::VPUNPCKHDQrr
);
426 case X86::VUNPCKHPSYrr
:
427 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr
);
428 case X86::VUNPCKHPSZ128rr
:
429 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr
);
430 case X86::VUNPCKHPSZ256rr
:
431 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr
);
432 case X86::VUNPCKHPSZrr
:
433 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr
);
434 case X86::VUNPCKHPSZ128rrk
:
435 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk
);
436 case X86::VUNPCKHPSZ256rrk
:
437 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk
);
438 case X86::VUNPCKHPSZrrk
:
439 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk
);
440 case X86::VUNPCKHPSZ128rrkz
:
441 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz
);
442 case X86::VUNPCKHPSZ256rrkz
:
443 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz
);
444 case X86::VUNPCKHPSZrrkz
:
445 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz
);
446 case X86::UNPCKLPSrm
:
447 return ProcessUNPCKPS(X86::PUNPCKLDQrm
);
448 case X86::VUNPCKLPSrm
:
449 return ProcessUNPCKPS(X86::VPUNPCKLDQrm
);
450 case X86::VUNPCKLPSYrm
:
451 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm
);
452 case X86::VUNPCKLPSZ128rm
:
453 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm
);
454 case X86::VUNPCKLPSZ256rm
:
455 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm
);
456 case X86::VUNPCKLPSZrm
:
457 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm
);
458 case X86::VUNPCKLPSZ128rmk
:
459 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk
);
460 case X86::VUNPCKLPSZ256rmk
:
461 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk
);
462 case X86::VUNPCKLPSZrmk
:
463 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk
);
464 case X86::VUNPCKLPSZ128rmkz
:
465 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz
);
466 case X86::VUNPCKLPSZ256rmkz
:
467 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz
);
468 case X86::VUNPCKLPSZrmkz
:
469 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz
);
470 case X86::UNPCKHPSrm
:
471 return ProcessUNPCKPS(X86::PUNPCKHDQrm
);
472 case X86::VUNPCKHPSrm
:
473 return ProcessUNPCKPS(X86::VPUNPCKHDQrm
);
474 case X86::VUNPCKHPSYrm
:
475 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm
);
476 case X86::VUNPCKHPSZ128rm
:
477 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm
);
478 case X86::VUNPCKHPSZ256rm
:
479 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm
);
480 case X86::VUNPCKHPSZrm
:
481 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm
);
482 case X86::VUNPCKHPSZ128rmk
:
483 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk
);
484 case X86::VUNPCKHPSZ256rmk
:
485 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk
);
486 case X86::VUNPCKHPSZrmk
:
487 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk
);
488 case X86::VUNPCKHPSZ128rmkz
:
489 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz
);
490 case X86::VUNPCKHPSZ256rmkz
:
491 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz
);
492 case X86::VUNPCKHPSZrmkz
:
493 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz
);
499 bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction
&MF
) {
500 LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
501 bool Changed
= false;
502 ST
= &MF
.getSubtarget
<X86Subtarget
>();
503 TII
= ST
->getInstrInfo();
504 SM
= &ST
->getSchedModel();
506 for (MachineBasicBlock
&MBB
: MF
) {
507 for (MachineBasicBlock::iterator I
= MBB
.begin(); I
!= MBB
.end(); ++I
) {
508 if (processInstruction(MF
, MBB
, I
)) {
514 LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);