1 //===-- X86FixupInstTunings.cpp - replace instructions -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file does a tuning pass replacing slower machine instructions
10 // with faster ones. We do this here, as opposed to during normal ISel, as
11 // attempting to get the "right" instruction can break patterns. This pass
12 // is not meant search for special cases where an instruction can be transformed
13 // to another, it is only meant to do transformations where the old instruction
14 // is always replacable with the new instructions. For example:
16 // `vpermq ymm` -> `vshufd ymm`
17 // -- BAD, not always valid (lane cross/non-repeated mask)
19 // `vpermilps ymm` -> `vshufd ymm`
20 // -- GOOD, always replaceable
22 //===----------------------------------------------------------------------===//
25 #include "X86InstrInfo.h"
26 #include "X86Subtarget.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #define DEBUG_TYPE "x86-fixup-inst-tuning"
36 STATISTIC(NumInstChanges
, "Number of instructions changes");
39 class X86FixupInstTuningPass
: public MachineFunctionPass
{
43 X86FixupInstTuningPass() : MachineFunctionPass(ID
) {}
45 StringRef
getPassName() const override
{ return "X86 Fixup Inst Tuning"; }
47 bool runOnMachineFunction(MachineFunction
&MF
) override
;
48 bool processInstruction(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
49 MachineBasicBlock::iterator
&I
);
51 // This pass runs after regalloc and doesn't support VReg operands.
52 MachineFunctionProperties
getRequiredProperties() const override
{
53 return MachineFunctionProperties().set(
54 MachineFunctionProperties::Property::NoVRegs
);
58 const X86InstrInfo
*TII
= nullptr;
59 const X86Subtarget
*ST
= nullptr;
60 const MCSchedModel
*SM
= nullptr;
62 } // end anonymous namespace
64 char X86FixupInstTuningPass::ID
= 0;
66 INITIALIZE_PASS(X86FixupInstTuningPass
, DEBUG_TYPE
, DEBUG_TYPE
, false, false)
68 FunctionPass
*llvm::createX86FixupInstTuning() {
69 return new X86FixupInstTuningPass();
73 static std::optional
<bool> CmpOptionals(T NewVal
, T CurVal
) {
74 if (NewVal
.has_value() && CurVal
.has_value() && *NewVal
!= *CurVal
)
75 return *NewVal
< *CurVal
;
80 bool X86FixupInstTuningPass::processInstruction(
81 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
82 MachineBasicBlock::iterator
&I
) {
83 MachineInstr
&MI
= *I
;
84 unsigned Opc
= MI
.getOpcode();
85 unsigned NumOperands
= MI
.getDesc().getNumOperands();
87 auto GetInstTput
= [&](unsigned Opcode
) -> std::optional
<double> {
88 // We already checked that SchedModel exists in `NewOpcPreferable`.
89 return MCSchedModel::getReciprocalThroughput(
90 *ST
, *(SM
->getSchedClassDesc(TII
->get(Opcode
).getSchedClass())));
93 auto GetInstLat
= [&](unsigned Opcode
) -> std::optional
<double> {
94 // We already checked that SchedModel exists in `NewOpcPreferable`.
95 return MCSchedModel::computeInstrLatency(
96 *ST
, *(SM
->getSchedClassDesc(TII
->get(Opcode
).getSchedClass())));
99 auto GetInstSize
= [&](unsigned Opcode
) -> std::optional
<unsigned> {
100 if (unsigned Size
= TII
->get(Opcode
).getSize())
102 // Zero size means we where unable to compute it.
106 auto NewOpcPreferable
= [&](unsigned NewOpc
,
107 bool ReplaceInTie
= true) -> bool {
108 std::optional
<bool> Res
;
109 if (SM
->hasInstrSchedModel()) {
110 // Compare tput -> lat -> code size.
111 Res
= CmpOptionals(GetInstTput(NewOpc
), GetInstTput(Opc
));
115 Res
= CmpOptionals(GetInstLat(NewOpc
), GetInstLat(Opc
));
120 Res
= CmpOptionals(GetInstSize(Opc
), GetInstSize(NewOpc
));
124 // We either have either were unable to get tput/lat/codesize or all values
125 // were equal. Return specified option for a tie.
129 // `vpermilpd r, i` -> `vshufpd r, r, i`
130 // `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
131 // `vshufpd` is always as fast or faster than `vpermilpd` and takes
132 // 1 less byte of code size for VEX and EVEX encoding.
133 auto ProcessVPERMILPDri
= [&](unsigned NewOpc
) -> bool {
134 if (!NewOpcPreferable(NewOpc
))
136 unsigned MaskImm
= MI
.getOperand(NumOperands
- 1).getImm();
137 MI
.removeOperand(NumOperands
- 1);
138 MI
.addOperand(MI
.getOperand(NumOperands
- 2));
139 MI
.setDesc(TII
->get(NewOpc
));
140 MI
.addOperand(MachineOperand::CreateImm(MaskImm
));
144 // `vpermilps r, i` -> `vshufps r, r, i`
145 // `vpermilps r, i, k` -> `vshufps r, r, i, k`
146 // `vshufps` is always as fast or faster than `vpermilps` and takes
147 // 1 less byte of code size for VEX and EVEX encoding.
148 auto ProcessVPERMILPSri
= [&](unsigned NewOpc
) -> bool {
149 if (!NewOpcPreferable(NewOpc
))
151 unsigned MaskImm
= MI
.getOperand(NumOperands
- 1).getImm();
152 MI
.removeOperand(NumOperands
- 1);
153 MI
.addOperand(MI
.getOperand(NumOperands
- 2));
154 MI
.setDesc(TII
->get(NewOpc
));
155 MI
.addOperand(MachineOperand::CreateImm(MaskImm
));
159 // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
160 // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
161 // byte of code size.
162 auto ProcessVPERMILPSmi
= [&](unsigned NewOpc
) -> bool {
163 // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
164 // `vpshufd` saves a byte of code size.
165 if (!ST
->hasNoDomainDelayShuffle() ||
166 !NewOpcPreferable(NewOpc
, /*ReplaceInTie*/ false))
168 MI
.setDesc(TII
->get(NewOpc
));
172 // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
173 // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
174 // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
175 // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
176 // `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
177 // `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
178 // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
179 // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
180 // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
181 // -> `vunpck{l|h}qdq`
182 // 2) If `vshufpd` faster than `vunpck{l|h}pd`
185 // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
186 auto ProcessUNPCK
= [&](unsigned NewOpc
, unsigned MaskImm
) -> bool {
187 if (!NewOpcPreferable(NewOpc
, /*ReplaceInTie*/ false))
190 MI
.setDesc(TII
->get(NewOpc
));
191 MI
.addOperand(MachineOperand::CreateImm(MaskImm
));
195 auto ProcessUNPCKToIntDomain
= [&](unsigned NewOpc
) -> bool {
196 // TODO it may be worth it to set ReplaceInTie to `true` as there is no real
197 // downside to the integer unpck, but if someone doesn't specify exact
198 // target we won't find it faster.
199 if (!ST
->hasNoDomainDelayShuffle() ||
200 !NewOpcPreferable(NewOpc
, /*ReplaceInTie*/ false))
202 MI
.setDesc(TII
->get(NewOpc
));
206 auto ProcessUNPCKLPDrr
= [&](unsigned NewOpcIntDomain
,
207 unsigned NewOpc
) -> bool {
208 if (ProcessUNPCKToIntDomain(NewOpcIntDomain
))
210 return ProcessUNPCK(NewOpc
, 0x00);
212 auto ProcessUNPCKHPDrr
= [&](unsigned NewOpcIntDomain
,
213 unsigned NewOpc
) -> bool {
214 if (ProcessUNPCKToIntDomain(NewOpcIntDomain
))
216 return ProcessUNPCK(NewOpc
, 0xff);
219 auto ProcessUNPCKPDrm
= [&](unsigned NewOpcIntDomain
) -> bool {
220 return ProcessUNPCKToIntDomain(NewOpcIntDomain
);
223 auto ProcessUNPCKPS
= [&](unsigned NewOpc
) -> bool {
224 return ProcessUNPCKToIntDomain(NewOpc
);
228 case X86::VPERMILPDri
:
229 return ProcessVPERMILPDri(X86::VSHUFPDrri
);
230 case X86::VPERMILPDYri
:
231 return ProcessVPERMILPDri(X86::VSHUFPDYrri
);
232 case X86::VPERMILPDZ128ri
:
233 return ProcessVPERMILPDri(X86::VSHUFPDZ128rri
);
234 case X86::VPERMILPDZ256ri
:
235 return ProcessVPERMILPDri(X86::VSHUFPDZ256rri
);
236 case X86::VPERMILPDZri
:
237 return ProcessVPERMILPDri(X86::VSHUFPDZrri
);
238 case X86::VPERMILPDZ128rikz
:
239 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz
);
240 case X86::VPERMILPDZ256rikz
:
241 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz
);
242 case X86::VPERMILPDZrikz
:
243 return ProcessVPERMILPDri(X86::VSHUFPDZrrikz
);
244 case X86::VPERMILPDZ128rik
:
245 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik
);
246 case X86::VPERMILPDZ256rik
:
247 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik
);
248 case X86::VPERMILPDZrik
:
249 return ProcessVPERMILPDri(X86::VSHUFPDZrrik
);
251 case X86::VPERMILPSri
:
252 return ProcessVPERMILPSri(X86::VSHUFPSrri
);
253 case X86::VPERMILPSYri
:
254 return ProcessVPERMILPSri(X86::VSHUFPSYrri
);
255 case X86::VPERMILPSZ128ri
:
256 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri
);
257 case X86::VPERMILPSZ256ri
:
258 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri
);
259 case X86::VPERMILPSZri
:
260 return ProcessVPERMILPSri(X86::VSHUFPSZrri
);
261 case X86::VPERMILPSZ128rikz
:
262 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz
);
263 case X86::VPERMILPSZ256rikz
:
264 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz
);
265 case X86::VPERMILPSZrikz
:
266 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz
);
267 case X86::VPERMILPSZ128rik
:
268 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik
);
269 case X86::VPERMILPSZ256rik
:
270 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik
);
271 case X86::VPERMILPSZrik
:
272 return ProcessVPERMILPSri(X86::VSHUFPSZrrik
);
273 case X86::VPERMILPSmi
:
274 return ProcessVPERMILPSmi(X86::VPSHUFDmi
);
275 case X86::VPERMILPSYmi
:
276 // TODO: See if there is a more generic way we can test if the replacement
277 // instruction is supported.
278 return ST
->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi
) : false;
279 case X86::VPERMILPSZ128mi
:
280 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi
);
281 case X86::VPERMILPSZ256mi
:
282 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi
);
283 case X86::VPERMILPSZmi
:
284 return ProcessVPERMILPSmi(X86::VPSHUFDZmi
);
285 case X86::VPERMILPSZ128mikz
:
286 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz
);
287 case X86::VPERMILPSZ256mikz
:
288 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz
);
289 case X86::VPERMILPSZmikz
:
290 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz
);
291 case X86::VPERMILPSZ128mik
:
292 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik
);
293 case X86::VPERMILPSZ256mik
:
294 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik
);
295 case X86::VPERMILPSZmik
:
296 return ProcessVPERMILPSmi(X86::VPSHUFDZmik
);
299 case X86::UNPCKLPDrr
:
300 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr
, X86::SHUFPDrri
);
301 case X86::VMOVLHPSrr
:
302 case X86::VUNPCKLPDrr
:
303 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr
, X86::VSHUFPDrri
);
304 case X86::VUNPCKLPDYrr
:
305 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr
, X86::VSHUFPDYrri
);
306 // VMOVLHPS is always 128 bits.
307 case X86::VMOVLHPSZrr
:
308 case X86::VUNPCKLPDZ128rr
:
309 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr
, X86::VSHUFPDZ128rri
);
310 case X86::VUNPCKLPDZ256rr
:
311 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr
, X86::VSHUFPDZ256rri
);
312 case X86::VUNPCKLPDZrr
:
313 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr
, X86::VSHUFPDZrri
);
314 case X86::VUNPCKLPDZ128rrk
:
315 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk
, X86::VSHUFPDZ128rrik
);
316 case X86::VUNPCKLPDZ256rrk
:
317 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk
, X86::VSHUFPDZ256rrik
);
318 case X86::VUNPCKLPDZrrk
:
319 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk
, X86::VSHUFPDZrrik
);
320 case X86::VUNPCKLPDZ128rrkz
:
321 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz
, X86::VSHUFPDZ128rrikz
);
322 case X86::VUNPCKLPDZ256rrkz
:
323 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz
, X86::VSHUFPDZ256rrikz
);
324 case X86::VUNPCKLPDZrrkz
:
325 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz
, X86::VSHUFPDZrrikz
);
326 case X86::UNPCKHPDrr
:
327 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr
, X86::SHUFPDrri
);
328 case X86::VUNPCKHPDrr
:
329 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr
, X86::VSHUFPDrri
);
330 case X86::VUNPCKHPDYrr
:
331 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr
, X86::VSHUFPDYrri
);
332 case X86::VUNPCKHPDZ128rr
:
333 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr
, X86::VSHUFPDZ128rri
);
334 case X86::VUNPCKHPDZ256rr
:
335 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr
, X86::VSHUFPDZ256rri
);
336 case X86::VUNPCKHPDZrr
:
337 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr
, X86::VSHUFPDZrri
);
338 case X86::VUNPCKHPDZ128rrk
:
339 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk
, X86::VSHUFPDZ128rrik
);
340 case X86::VUNPCKHPDZ256rrk
:
341 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk
, X86::VSHUFPDZ256rrik
);
342 case X86::VUNPCKHPDZrrk
:
343 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk
, X86::VSHUFPDZrrik
);
344 case X86::VUNPCKHPDZ128rrkz
:
345 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz
, X86::VSHUFPDZ128rrikz
);
346 case X86::VUNPCKHPDZ256rrkz
:
347 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz
, X86::VSHUFPDZ256rrikz
);
348 case X86::VUNPCKHPDZrrkz
:
349 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz
, X86::VSHUFPDZrrikz
);
350 case X86::UNPCKLPDrm
:
351 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm
);
352 case X86::VUNPCKLPDrm
:
353 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm
);
354 case X86::VUNPCKLPDYrm
:
355 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm
);
356 case X86::VUNPCKLPDZ128rm
:
357 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm
);
358 case X86::VUNPCKLPDZ256rm
:
359 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm
);
360 case X86::VUNPCKLPDZrm
:
361 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm
);
362 case X86::VUNPCKLPDZ128rmk
:
363 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk
);
364 case X86::VUNPCKLPDZ256rmk
:
365 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk
);
366 case X86::VUNPCKLPDZrmk
:
367 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk
);
368 case X86::VUNPCKLPDZ128rmkz
:
369 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz
);
370 case X86::VUNPCKLPDZ256rmkz
:
371 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz
);
372 case X86::VUNPCKLPDZrmkz
:
373 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz
);
374 case X86::UNPCKHPDrm
:
375 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm
);
376 case X86::VUNPCKHPDrm
:
377 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm
);
378 case X86::VUNPCKHPDYrm
:
379 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm
);
380 case X86::VUNPCKHPDZ128rm
:
381 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm
);
382 case X86::VUNPCKHPDZ256rm
:
383 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm
);
384 case X86::VUNPCKHPDZrm
:
385 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm
);
386 case X86::VUNPCKHPDZ128rmk
:
387 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk
);
388 case X86::VUNPCKHPDZ256rmk
:
389 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk
);
390 case X86::VUNPCKHPDZrmk
:
391 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk
);
392 case X86::VUNPCKHPDZ128rmkz
:
393 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz
);
394 case X86::VUNPCKHPDZ256rmkz
:
395 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz
);
396 case X86::VUNPCKHPDZrmkz
:
397 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz
);
399 case X86::UNPCKLPSrr
:
400 return ProcessUNPCKPS(X86::PUNPCKLDQrr
);
401 case X86::VUNPCKLPSrr
:
402 return ProcessUNPCKPS(X86::VPUNPCKLDQrr
);
403 case X86::VUNPCKLPSYrr
:
404 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr
);
405 case X86::VUNPCKLPSZ128rr
:
406 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr
);
407 case X86::VUNPCKLPSZ256rr
:
408 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr
);
409 case X86::VUNPCKLPSZrr
:
410 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr
);
411 case X86::VUNPCKLPSZ128rrk
:
412 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk
);
413 case X86::VUNPCKLPSZ256rrk
:
414 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk
);
415 case X86::VUNPCKLPSZrrk
:
416 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk
);
417 case X86::VUNPCKLPSZ128rrkz
:
418 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz
);
419 case X86::VUNPCKLPSZ256rrkz
:
420 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz
);
421 case X86::VUNPCKLPSZrrkz
:
422 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz
);
423 case X86::UNPCKHPSrr
:
424 return ProcessUNPCKPS(X86::PUNPCKHDQrr
);
425 case X86::VUNPCKHPSrr
:
426 return ProcessUNPCKPS(X86::VPUNPCKHDQrr
);
427 case X86::VUNPCKHPSYrr
:
428 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr
);
429 case X86::VUNPCKHPSZ128rr
:
430 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr
);
431 case X86::VUNPCKHPSZ256rr
:
432 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr
);
433 case X86::VUNPCKHPSZrr
:
434 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr
);
435 case X86::VUNPCKHPSZ128rrk
:
436 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk
);
437 case X86::VUNPCKHPSZ256rrk
:
438 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk
);
439 case X86::VUNPCKHPSZrrk
:
440 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk
);
441 case X86::VUNPCKHPSZ128rrkz
:
442 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz
);
443 case X86::VUNPCKHPSZ256rrkz
:
444 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz
);
445 case X86::VUNPCKHPSZrrkz
:
446 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz
);
447 case X86::UNPCKLPSrm
:
448 return ProcessUNPCKPS(X86::PUNPCKLDQrm
);
449 case X86::VUNPCKLPSrm
:
450 return ProcessUNPCKPS(X86::VPUNPCKLDQrm
);
451 case X86::VUNPCKLPSYrm
:
452 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm
);
453 case X86::VUNPCKLPSZ128rm
:
454 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm
);
455 case X86::VUNPCKLPSZ256rm
:
456 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm
);
457 case X86::VUNPCKLPSZrm
:
458 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm
);
459 case X86::VUNPCKLPSZ128rmk
:
460 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk
);
461 case X86::VUNPCKLPSZ256rmk
:
462 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk
);
463 case X86::VUNPCKLPSZrmk
:
464 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk
);
465 case X86::VUNPCKLPSZ128rmkz
:
466 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz
);
467 case X86::VUNPCKLPSZ256rmkz
:
468 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz
);
469 case X86::VUNPCKLPSZrmkz
:
470 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz
);
471 case X86::UNPCKHPSrm
:
472 return ProcessUNPCKPS(X86::PUNPCKHDQrm
);
473 case X86::VUNPCKHPSrm
:
474 return ProcessUNPCKPS(X86::VPUNPCKHDQrm
);
475 case X86::VUNPCKHPSYrm
:
476 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm
);
477 case X86::VUNPCKHPSZ128rm
:
478 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm
);
479 case X86::VUNPCKHPSZ256rm
:
480 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm
);
481 case X86::VUNPCKHPSZrm
:
482 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm
);
483 case X86::VUNPCKHPSZ128rmk
:
484 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk
);
485 case X86::VUNPCKHPSZ256rmk
:
486 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk
);
487 case X86::VUNPCKHPSZrmk
:
488 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk
);
489 case X86::VUNPCKHPSZ128rmkz
:
490 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz
);
491 case X86::VUNPCKHPSZ256rmkz
:
492 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz
);
493 case X86::VUNPCKHPSZrmkz
:
494 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz
);
500 bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction
&MF
) {
501 LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
502 bool Changed
= false;
503 ST
= &MF
.getSubtarget
<X86Subtarget
>();
504 TII
= ST
->getInstrInfo();
505 SM
= &ST
->getSchedModel();
507 for (MachineBasicBlock
&MBB
: MF
) {
508 for (MachineBasicBlock::iterator I
= MBB
.begin(); I
!= MBB
.end(); ++I
) {
509 if (processInstruction(MF
, MBB
, I
)) {
515 LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);