[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / X86 / X86ISelLowering.h
blob90188e758e511235ab21cfda2b24cf1d7b675d0c
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17 #include "llvm/CodeGen/CallingConvLower.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
21 namespace llvm {
22 class X86Subtarget;
23 class X86TargetMachine;
25 namespace X86ISD {
26 // X86 Specific DAG Nodes
27 enum NodeType : unsigned {
28 // Start the numbering where the builtin ops leave off.
29 FIRST_NUMBER = ISD::BUILTIN_OP_END,
31 /// Bit scan forward.
32 BSF,
33 /// Bit scan reverse.
34 BSR,
36 /// Double shift instructions. These correspond to
37 /// X86::SHLDxx and X86::SHRDxx instructions.
38 SHLD,
39 SHRD,
41 /// Bitwise logical AND of floating point values. This corresponds
42 /// to X86::ANDPS or X86::ANDPD.
43 FAND,
45 /// Bitwise logical OR of floating point values. This corresponds
46 /// to X86::ORPS or X86::ORPD.
47 FOR,
49 /// Bitwise logical XOR of floating point values. This corresponds
50 /// to X86::XORPS or X86::XORPD.
51 FXOR,
53 /// Bitwise logical ANDNOT of floating point values. This
54 /// corresponds to X86::ANDNPS or X86::ANDNPD.
55 FANDN,
57 /// These operations represent an abstract X86 call
58 /// instruction, which includes a bunch of information. In particular the
59 /// operands of these node are:
60 ///
61 /// #0 - The incoming token chain
62 /// #1 - The callee
63 /// #2 - The number of arg bytes the caller pushes on the stack.
64 /// #3 - The number of arg bytes the callee pops off the stack.
65 /// #4 - The value to pass in AL/AX/EAX (optional)
66 /// #5 - The value to pass in DL/DX/EDX (optional)
67 ///
68 /// The result values of these nodes are:
69 ///
70 /// #0 - The outgoing token chain
71 /// #1 - The first register result value (optional)
72 /// #2 - The second register result value (optional)
73 ///
74 CALL,
76 /// Same as call except it adds the NoTrack prefix.
77 NT_CALL,
79 /// X86 compare and logical compare instructions.
80 CMP, COMI, UCOMI,
82 /// X86 bit-test instructions.
83 BT,
85 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
86 /// operand, usually produced by a CMP instruction.
87 SETCC,
89 /// X86 Select
90 SELECTS,
92 // Same as SETCC except it's materialized with a sbb and the value is all
93 // one's or all zero's.
94 SETCC_CARRY, // R = carry_bit ? ~0 : 0
96 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
97 /// Operands are two FP values to compare; result is a mask of
98 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
99 FSETCC,
101 /// X86 FP SETCC, similar to above, but with output as an i1 mask and
102 /// and a version with SAE.
103 FSETCCM, FSETCCM_SAE,
105 /// X86 conditional moves. Operand 0 and operand 1 are the two values
106 /// to select from. Operand 2 is the condition code, and operand 3 is the
107 /// flag operand produced by a CMP or TEST instruction.
108 CMOV,
110 /// X86 conditional branches. Operand 0 is the chain operand, operand 1
111 /// is the block to branch if condition is true, operand 2 is the
112 /// condition code, and operand 3 is the flag operand produced by a CMP
113 /// or TEST instruction.
114 BRCOND,
116 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
117 /// operand 1 is the target address.
118 NT_BRIND,
120 /// Return with a flag operand. Operand 0 is the chain operand, operand
121 /// 1 is the number of bytes of stack to pop.
122 RET_FLAG,
124 /// Return from interrupt. Operand 0 is the number of bytes to pop.
125 IRET,
127 /// Repeat fill, corresponds to X86::REP_STOSx.
128 REP_STOS,
130 /// Repeat move, corresponds to X86::REP_MOVSx.
131 REP_MOVS,
133 /// On Darwin, this node represents the result of the popl
134 /// at function entry, used for PIC code.
135 GlobalBaseReg,
137 /// A wrapper node for TargetConstantPool, TargetJumpTable,
138 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
139 /// MCSymbol and TargetBlockAddress.
140 Wrapper,
142 /// Special wrapper used under X86-64 PIC mode for RIP
143 /// relative displacements.
144 WrapperRIP,
146 /// Copies a 64-bit value from an MMX vector to the low word
147 /// of an XMM vector, with the high word zero filled.
148 MOVQ2DQ,
150 /// Copies a 64-bit value from the low word of an XMM vector
151 /// to an MMX vector.
152 MOVDQ2Q,
154 /// Copies a 32-bit value from the low word of a MMX
155 /// vector to a GPR.
156 MMX_MOVD2W,
158 /// Copies a GPR into the low 32-bit word of a MMX vector
159 /// and zero out the high word.
160 MMX_MOVW2D,
162 /// Extract an 8-bit value from a vector and zero extend it to
163 /// i32, corresponds to X86::PEXTRB.
164 PEXTRB,
166 /// Extract a 16-bit value from a vector and zero extend it to
167 /// i32, corresponds to X86::PEXTRW.
168 PEXTRW,
170 /// Insert any element of a 4 x float vector into any element
171 /// of a destination 4 x floatvector.
172 INSERTPS,
174 /// Insert the lower 8-bits of a 32-bit value to a vector,
175 /// corresponds to X86::PINSRB.
176 PINSRB,
178 /// Insert the lower 16-bits of a 32-bit value to a vector,
179 /// corresponds to X86::PINSRW.
180 PINSRW,
182 /// Shuffle 16 8-bit values within a vector.
183 PSHUFB,
185 /// Compute Sum of Absolute Differences.
186 PSADBW,
187 /// Compute Double Block Packed Sum-Absolute-Differences
188 DBPSADBW,
190 /// Bitwise Logical AND NOT of Packed FP values.
191 ANDNP,
193 /// Blend where the selector is an immediate.
194 BLENDI,
196 /// Dynamic (non-constant condition) vector blend where only the sign bits
197 /// of the condition elements are used. This is used to enforce that the
198 /// condition mask is not valid for generic VSELECT optimizations. This
199 /// is also used to implement the intrinsics.
200 /// Operands are in VSELECT order: MASK, TRUE, FALSE
201 BLENDV,
203 /// Combined add and sub on an FP vector.
204 ADDSUB,
206 // FP vector ops with rounding mode.
207 FADD_RND, FADDS, FADDS_RND,
208 FSUB_RND, FSUBS, FSUBS_RND,
209 FMUL_RND, FMULS, FMULS_RND,
210 FDIV_RND, FDIVS, FDIVS_RND,
211 FMAX_SAE, FMAXS_SAE,
212 FMIN_SAE, FMINS_SAE,
213 FSQRT_RND, FSQRTS, FSQRTS_RND,
215 // FP vector get exponent.
216 FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
217 // Extract Normalized Mantissas.
218 VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
219 // FP Scale.
220 SCALEF, SCALEF_RND,
221 SCALEFS, SCALEFS_RND,
223 // Unsigned Integer average.
224 AVG,
226 /// Integer horizontal add/sub.
227 HADD,
228 HSUB,
230 /// Floating point horizontal add/sub.
231 FHADD,
232 FHSUB,
234 // Detect Conflicts Within a Vector
235 CONFLICT,
237 /// Floating point max and min.
238 FMAX, FMIN,
240 /// Commutative FMIN and FMAX.
241 FMAXC, FMINC,
243 /// Scalar intrinsic floating point max and min.
244 FMAXS, FMINS,
246 /// Floating point reciprocal-sqrt and reciprocal approximation.
247 /// Note that these typically require refinement
248 /// in order to obtain suitable precision.
249 FRSQRT, FRCP,
251 // AVX-512 reciprocal approximations with a little more precision.
252 RSQRT14, RSQRT14S, RCP14, RCP14S,
254 // Thread Local Storage.
255 TLSADDR,
257 // Thread Local Storage. A call to get the start address
258 // of the TLS block for the current module.
259 TLSBASEADDR,
261 // Thread Local Storage. When calling to an OS provided
262 // thunk at the address from an earlier relocation.
263 TLSCALL,
265 // Exception Handling helpers.
266 EH_RETURN,
268 // SjLj exception handling setjmp.
269 EH_SJLJ_SETJMP,
271 // SjLj exception handling longjmp.
272 EH_SJLJ_LONGJMP,
274 // SjLj exception handling dispatch.
275 EH_SJLJ_SETUP_DISPATCH,
277 /// Tail call return. See X86TargetLowering::LowerCall for
278 /// the list of operands.
279 TC_RETURN,
281 // Vector move to low scalar and zero higher vector elements.
282 VZEXT_MOVL,
284 // Vector integer truncate.
285 VTRUNC,
286 // Vector integer truncate with unsigned/signed saturation.
287 VTRUNCUS, VTRUNCS,
289 // Masked version of the above. Used when less than a 128-bit result is
290 // produced since the mask only applies to the lower elements and can't
291 // be represented by a select.
292 // SRC, PASSTHRU, MASK
293 VMTRUNC, VMTRUNCUS, VMTRUNCS,
295 // Vector FP extend.
296 VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
298 // Vector FP round.
299 VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
301 // Masked version of above. Used for v2f64->v4f32.
302 // SRC, PASSTHRU, MASK
303 VMFPROUND,
305 // 128-bit vector logical left / right shift
306 VSHLDQ, VSRLDQ,
308 // Vector shift elements
309 VSHL, VSRL, VSRA,
311 // Vector variable shift
312 VSHLV, VSRLV, VSRAV,
314 // Vector shift elements by immediate
315 VSHLI, VSRLI, VSRAI,
317 // Shifts of mask registers.
318 KSHIFTL, KSHIFTR,
320 // Bit rotate by immediate
321 VROTLI, VROTRI,
323 // Vector packed double/float comparison.
324 CMPP,
326 // Vector integer comparisons.
327 PCMPEQ, PCMPGT,
329 // v8i16 Horizontal minimum and position.
330 PHMINPOS,
332 MULTISHIFT,
334 /// Vector comparison generating mask bits for fp and
335 /// integer signed and unsigned data types.
336 CMPM,
337 // Vector comparison with SAE for FP values
338 CMPM_SAE,
340 // Arithmetic operations with FLAGS results.
341 ADD, SUB, ADC, SBB, SMUL, UMUL,
342 OR, XOR, AND,
344 // Bit field extract.
345 BEXTR,
347 // Zero High Bits Starting with Specified Bit Position.
348 BZHI,
350 // X86-specific multiply by immediate.
351 MUL_IMM,
353 // Vector sign bit extraction.
354 MOVMSK,
356 // Vector bitwise comparisons.
357 PTEST,
359 // Vector packed fp sign bitwise comparisons.
360 TESTP,
362 // OR/AND test for masks.
363 KORTEST,
364 KTEST,
366 // ADD for masks.
367 KADD,
369 // Several flavors of instructions with vector shuffle behaviors.
370 // Saturated signed/unnsigned packing.
371 PACKSS,
372 PACKUS,
373 // Intra-lane alignr.
374 PALIGNR,
375 // AVX512 inter-lane alignr.
376 VALIGN,
377 PSHUFD,
378 PSHUFHW,
379 PSHUFLW,
380 SHUFP,
381 // VBMI2 Concat & Shift.
382 VSHLD,
383 VSHRD,
384 VSHLDV,
385 VSHRDV,
386 //Shuffle Packed Values at 128-bit granularity.
387 SHUF128,
388 MOVDDUP,
389 MOVSHDUP,
390 MOVSLDUP,
391 MOVLHPS,
392 MOVHLPS,
393 MOVSD,
394 MOVSS,
395 UNPCKL,
396 UNPCKH,
397 VPERMILPV,
398 VPERMILPI,
399 VPERMI,
400 VPERM2X128,
402 // Variable Permute (VPERM).
403 // Res = VPERMV MaskV, V0
404 VPERMV,
406 // 3-op Variable Permute (VPERMT2).
407 // Res = VPERMV3 V0, MaskV, V1
408 VPERMV3,
410 // Bitwise ternary logic.
411 VPTERNLOG,
412 // Fix Up Special Packed Float32/64 values.
413 VFIXUPIMM, VFIXUPIMM_SAE,
414 VFIXUPIMMS, VFIXUPIMMS_SAE,
415 // Range Restriction Calculation For Packed Pairs of Float32/64 values.
416 VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
417 // Reduce - Perform Reduction Transformation on scalar\packed FP.
418 VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
419 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
420 // Also used by the legacy (V)ROUND intrinsics where we mask out the
421 // scaling part of the immediate.
422 VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
423 // Tests Types Of a FP Values for packed types.
424 VFPCLASS,
425 // Tests Types Of a FP Values for scalar types.
426 VFPCLASSS,
428 // Broadcast (splat) scalar or element 0 of a vector. If the operand is
429 // a vector, this node may change the vector length as part of the splat.
430 VBROADCAST,
431 // Broadcast mask to vector.
432 VBROADCASTM,
433 // Broadcast subvector to vector.
434 SUBV_BROADCAST,
436 /// SSE4A Extraction and Insertion.
437 EXTRQI, INSERTQI,
439 // XOP arithmetic/logical shifts.
440 VPSHA, VPSHL,
441 // XOP signed/unsigned integer comparisons.
442 VPCOM, VPCOMU,
443 // XOP packed permute bytes.
444 VPPERM,
445 // XOP two source permutation.
446 VPERMIL2,
448 // Vector multiply packed unsigned doubleword integers.
449 PMULUDQ,
450 // Vector multiply packed signed doubleword integers.
451 PMULDQ,
452 // Vector Multiply Packed UnsignedIntegers with Round and Scale.
453 MULHRS,
455 // Multiply and Add Packed Integers.
456 VPMADDUBSW, VPMADDWD,
458 // AVX512IFMA multiply and add.
459 // NOTE: These are different than the instruction and perform
460 // op0 x op1 + op2.
461 VPMADD52L, VPMADD52H,
463 // VNNI
464 VPDPBUSD,
465 VPDPBUSDS,
466 VPDPWSSD,
467 VPDPWSSDS,
469 // FMA nodes.
470 // We use the target independent ISD::FMA for the non-inverted case.
471 FNMADD,
472 FMSUB,
473 FNMSUB,
474 FMADDSUB,
475 FMSUBADD,
477 // FMA with rounding mode.
478 FMADD_RND,
479 FNMADD_RND,
480 FMSUB_RND,
481 FNMSUB_RND,
482 FMADDSUB_RND,
483 FMSUBADD_RND,
485 // Compress and expand.
486 COMPRESS,
487 EXPAND,
489 // Bits shuffle
490 VPSHUFBITQMB,
492 // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
493 SINT_TO_FP_RND, UINT_TO_FP_RND,
494 SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
495 SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
497 // Vector float/double to signed/unsigned integer.
498 CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
499 // Scalar float/double to signed/unsigned integer.
500 CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
502 // Vector float/double to signed/unsigned integer with truncation.
503 CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
504 // Scalar float/double to signed/unsigned integer with truncation.
505 CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
507 // Vector signed/unsigned integer to float/double.
508 CVTSI2P, CVTUI2P,
510 // Masked versions of above. Used for v2f64->v4f32.
511 // SRC, PASSTHRU, MASK
512 MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
513 MCVTSI2P, MCVTUI2P,
515 // Vector float to bfloat16.
516 // Convert TWO packed single data to one packed BF16 data
517 CVTNE2PS2BF16,
518 // Convert packed single data to packed BF16 data
519 CVTNEPS2BF16,
520 // Masked version of above.
521 // SRC, PASSTHRU, MASK
522 MCVTNEPS2BF16,
524 // Dot product of BF16 pairs to accumulated into
525 // packed single precision.
526 DPBF16PS,
528 // Save xmm argument registers to the stack, according to %al. An operator
529 // is needed so that this can be expanded with control flow.
530 VASTART_SAVE_XMM_REGS,
532 // Windows's _chkstk call to do stack probing.
533 WIN_ALLOCA,
535 // For allocating variable amounts of stack space when using
536 // segmented stacks. Check if the current stacklet has enough space, and
537 // falls back to heap allocation if not.
538 SEG_ALLOCA,
540 // Memory barriers.
541 MEMBARRIER,
542 MFENCE,
544 // Store FP status word into i16 register.
545 FNSTSW16r,
547 // Store contents of %ah into %eflags.
548 SAHF,
550 // Get a random integer and indicate whether it is valid in CF.
551 RDRAND,
553 // Get a NIST SP800-90B & C compliant random integer and
554 // indicate whether it is valid in CF.
555 RDSEED,
557 // Protection keys
558 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
559 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
560 // value for ECX.
561 RDPKRU, WRPKRU,
563 // SSE42 string comparisons.
564 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
565 // will emit one or two instructions based on which results are used. If
566 // flags and index/mask this allows us to use a single instruction since
567 // we won't have to pick and opcode for flags. Instead we can rely on the
568 // DAG to CSE everything and decide at isel.
569 PCMPISTR,
570 PCMPESTR,
572 // Test if in transactional execution.
573 XTEST,
575 // ERI instructions.
576 RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
577 RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
579 // Conversions between float and half-float.
580 CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
582 // Masked version of above.
583 // SRC, RND, PASSTHRU, MASK
584 MCVTPS2PH,
586 // Galois Field Arithmetic Instructions
587 GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
589 // LWP insert record.
590 LWPINS,
592 // User level wait
593 UMWAIT, TPAUSE,
595 // Enqueue Stores Instructions
596 ENQCMD, ENQCMDS,
598 // For avx512-vp2intersect
599 VP2INTERSECT,
601 // Compare and swap.
602 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
603 LCMPXCHG8_DAG,
604 LCMPXCHG16_DAG,
605 LCMPXCHG8_SAVE_EBX_DAG,
606 LCMPXCHG16_SAVE_RBX_DAG,
608 /// LOCK-prefixed arithmetic read-modify-write instructions.
609 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
610 LADD, LSUB, LOR, LXOR, LAND,
612 // Load, scalar_to_vector, and zero extend.
613 VZEXT_LOAD,
615 // extract_vector_elt, store.
616 VEXTRACT_STORE,
618 // Store FP control world into i16 memory.
619 FNSTCW16m,
621 /// This instruction implements FP_TO_SINT with the
622 /// integer destination in memory and a FP reg source. This corresponds
623 /// to the X86::FIST*m instructions and the rounding mode change stuff. It
624 /// has two inputs (token chain and address) and two outputs (int value
625 /// and token chain). Memory VT specifies the type to store to.
626 FP_TO_INT_IN_MEM,
628 /// This instruction implements SINT_TO_FP with the
629 /// integer source in memory and FP reg result. This corresponds to the
630 /// X86::FILD*m instructions. It has two inputs (token chain and address)
631 /// and two outputs (FP value and token chain). FILD_FLAG also produces a
632 /// flag). The integer source type is specified by the memory VT.
633 FILD,
634 FILD_FLAG,
636 /// This instruction implements a fp->int store from FP stack
637 /// slots. This corresponds to the fist instruction. It takes a
638 /// chain operand, value to store, address, and glue. The memory VT
639 /// specifies the type to store as.
640 FIST,
642 /// This instruction implements an extending load to FP stack slots.
643 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
644 /// operand, and ptr to load from. The memory VT specifies the type to
645 /// load from.
646 FLD,
648 /// This instruction implements a truncating store from FP stack
649 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
650 /// chain operand, value to store, address, and glue. The memory VT
651 /// specifies the type to store as.
652 FST,
654 /// This instruction grabs the address of the next argument
655 /// from a va_list. (reads and modifies the va_list in memory)
656 VAARG_64,
658 // Vector truncating store with unsigned/signed saturation
659 VTRUNCSTOREUS, VTRUNCSTORES,
660 // Vector truncating masked store with unsigned/signed saturation
661 VMTRUNCSTOREUS, VMTRUNCSTORES,
663 // X86 specific gather and scatter
664 MGATHER, MSCATTER,
666 // WARNING: Do not add anything in the end unless you want the node to
667 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
668 // opcodes will be thought as target memory ops!
670 } // end namespace X86ISD
672 /// Define some predicates that are used for node matching.
673 namespace X86 {
674 /// Returns true if Elt is a constant zero or floating point constant +0.0.
675 bool isZeroNode(SDValue Elt);
677 /// Returns true of the given offset can be
678 /// fit into displacement field of the instruction.
679 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
680 bool hasSymbolicDisplacement = true);
682 /// Determines whether the callee is required to pop its
683 /// own arguments. Callee pop is necessary to support tail calls.
684 bool isCalleePop(CallingConv::ID CallingConv,
685 bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
687 /// If Op is a constant whose elements are all the same constant or
688 /// undefined, return true and return the constant value in \p SplatVal.
689 bool isConstantSplat(SDValue Op, APInt &SplatVal);
690 } // end namespace X86
692 //===--------------------------------------------------------------------===//
693 // X86 Implementation of the TargetLowering interface
694 class X86TargetLowering final : public TargetLowering {
695 public:
696 explicit X86TargetLowering(const X86TargetMachine &TM,
697 const X86Subtarget &STI);
699 unsigned getJumpTableEncoding() const override;
700 bool useSoftFloat() const override;
702 void markLibCallAttributes(MachineFunction *MF, unsigned CC,
703 ArgListTy &Args) const override;
705 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
706 return MVT::i8;
709 const MCExpr *
710 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
711 const MachineBasicBlock *MBB, unsigned uid,
712 MCContext &Ctx) const override;
714 /// Returns relocation base for the given PIC jumptable.
715 SDValue getPICJumpTableRelocBase(SDValue Table,
716 SelectionDAG &DAG) const override;
717 const MCExpr *
718 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
719 unsigned JTI, MCContext &Ctx) const override;
721 /// Return the desired alignment for ByVal aggregate
722 /// function arguments in the caller parameter area. For X86, aggregates
723 /// that contains are placed at 16-byte boundaries while the rest are at
724 /// 4-byte boundaries.
725 unsigned getByValTypeAlignment(Type *Ty,
726 const DataLayout &DL) const override;
728 /// Returns the target specific optimal type for load
729 /// and store operations as a result of memset, memcpy, and memmove
730 /// lowering. If DstAlign is zero that means it's safe to destination
731 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
732 /// means there isn't a need to check it against alignment requirement,
733 /// probably because the source does not need to be loaded. If 'IsMemset' is
734 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
735 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
736 /// source is constant so it does not need to be loaded.
737 /// It returns EVT::Other if the type should be determined using generic
738 /// target-independent logic.
739 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
740 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
741 const AttributeList &FuncAttributes) const override;
743 /// Returns true if it's safe to use load / store of the
744 /// specified type to expand memcpy / memset inline. This is mostly true
745 /// for all types except for some special cases. For example, on X86
746 /// targets without SSE2 f64 load / store are done with fldl / fstpl which
747 /// also does type conversion. Note the specified type doesn't have to be
748 /// legal as the hook is used before type legalization.
749 bool isSafeMemOpType(MVT VT) const override;
751 /// Returns true if the target allows unaligned memory accesses of the
752 /// specified type. Returns whether it is "fast" in the last argument.
753 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
754 MachineMemOperand::Flags Flags,
755 bool *Fast) const override;
757 /// Provide custom lowering hooks for some operations.
759 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
761 /// Places new result values for the node in Results (their number
762 /// and types must exactly match those of the original return values of
763 /// the node), or leaves Results empty, which indicates that the node is not
764 /// to be custom lowered after all.
765 void LowerOperationWrapper(SDNode *N,
766 SmallVectorImpl<SDValue> &Results,
767 SelectionDAG &DAG) const override;
769 /// Replace the results of node with an illegal result
770 /// type with new values built out of custom code.
772 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
773 SelectionDAG &DAG) const override;
775 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
777 // Return true if it is profitable to combine a BUILD_VECTOR with a
778 // stride-pattern to a shuffle and a truncate.
779 // Example of such a combine:
780 // v4i32 build_vector((extract_elt V, 1),
781 // (extract_elt V, 3),
782 // (extract_elt V, 5),
783 // (extract_elt V, 7))
784 // -->
785 // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
786 // v4i64)
787 bool isDesirableToCombineBuildVectorToShuffleTruncate(
788 ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
790 /// Return true if the target has native support for
791 /// the specified value type and it is 'desirable' to use the type for the
792 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
793 /// instruction encodings are longer and some i16 instructions are slow.
794 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
796 /// Return true if the target has native support for the
797 /// specified value type and it is 'desirable' to use the type. e.g. On x86
798 /// i16 is legal, but undesirable since i16 instruction encodings are longer
799 /// and some i16 instructions are slow.
800 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
802 /// Return 1 if we can compute the negated form of the specified expression
803 /// for the same cost as the expression itself, or 2 if we can compute the
804 /// negated form more cheaply than the expression itself. Else return 0.
805 char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations,
806 bool ForCodeSize, unsigned Depth) const override;
808 /// If isNegatibleForFree returns true, return the newly negated expression.
809 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
810 bool LegalOperations, bool ForCodeSize,
811 unsigned Depth) const override;
813 MachineBasicBlock *
814 EmitInstrWithCustomInserter(MachineInstr &MI,
815 MachineBasicBlock *MBB) const override;
817 /// This method returns the name of a target specific DAG node.
818 const char *getTargetNodeName(unsigned Opcode) const override;
820 /// Do not merge vector stores after legalization because that may conflict
821 /// with x86-specific store splitting optimizations.
822 bool mergeStoresAfterLegalization(EVT MemVT) const override {
823 return !MemVT.isVector();
826 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
827 const SelectionDAG &DAG) const override;
829 bool isCheapToSpeculateCttz() const override;
831 bool isCheapToSpeculateCtlz() const override;
833 bool isCtlzFast() const override;
835 bool hasBitPreservingFPLogic(EVT VT) const override {
836 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
839 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
840 // If the pair to store is a mixture of float and int values, we will
841 // save two bitwise instructions and one float-to-int instruction and
842 // increase one store instruction. There is potentially a more
843 // significant benefit because it avoids the float->int domain switch
844 // for input value. So It is more likely a win.
845 if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
846 (LTy.isInteger() && HTy.isFloatingPoint()))
847 return true;
848 // If the pair only contains int values, we will save two bitwise
849 // instructions and increase one store instruction (costing one more
850 // store buffer). Since the benefit is more blurred so we leave
851 // such pair out until we get testcase to prove it is a win.
852 return false;
855 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
857 bool hasAndNotCompare(SDValue Y) const override;
859 bool hasAndNot(SDValue Y) const override;
861 bool hasBitTest(SDValue X, SDValue Y) const override;
863 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
864 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
865 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
866 SelectionDAG &DAG) const override;
868 bool shouldFoldConstantShiftPairToMask(const SDNode *N,
869 CombineLevel Level) const override;
871 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
873 bool
874 shouldTransformSignedTruncationCheck(EVT XVT,
875 unsigned KeptBits) const override {
876 // For vectors, we don't have a preference..
877 if (XVT.isVector())
878 return false;
880 auto VTIsOk = [](EVT VT) -> bool {
881 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
882 VT == MVT::i64;
885 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
886 // XVT will be larger than KeptBitsVT.
887 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
888 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
891 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
893 bool shouldSplatInsEltVarIndex(EVT VT) const override;
895 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
896 return VT.isScalarInteger();
899 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
900 MVT hasFastEqualityCompare(unsigned NumBits) const override;
902 /// Return the value type to use for ISD::SETCC.
903 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
904 EVT VT) const override;
906 bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
907 TargetLoweringOpt &TLO) const override;
909 /// Determine which of the bits specified in Mask are known to be either
910 /// zero or one and return them in the KnownZero/KnownOne bitsets.
911 void computeKnownBitsForTargetNode(const SDValue Op,
912 KnownBits &Known,
913 const APInt &DemandedElts,
914 const SelectionDAG &DAG,
915 unsigned Depth = 0) const override;
917 /// Determine the number of bits in the operation that are sign bits.
918 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
919 const APInt &DemandedElts,
920 const SelectionDAG &DAG,
921 unsigned Depth) const override;
923 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
924 const APInt &DemandedElts,
925 APInt &KnownUndef,
926 APInt &KnownZero,
927 TargetLoweringOpt &TLO,
928 unsigned Depth) const override;
930 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
931 const APInt &DemandedBits,
932 const APInt &DemandedElts,
933 KnownBits &Known,
934 TargetLoweringOpt &TLO,
935 unsigned Depth) const override;
937 SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
938 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
939 SelectionDAG &DAG, unsigned Depth) const override;
941 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
943 SDValue unwrapAddress(SDValue N) const override;
945 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
947 bool ExpandInlineAsm(CallInst *CI) const override;
949 ConstraintType getConstraintType(StringRef Constraint) const override;
951 /// Examine constraint string and operand type and determine a weight value.
952 /// The operand object must already have been set up with the operand type.
953 ConstraintWeight
954 getSingleConstraintMatchWeight(AsmOperandInfo &info,
955 const char *constraint) const override;
957 const char *LowerXConstraint(EVT ConstraintVT) const override;
959 /// Lower the specified operand into the Ops vector. If it is invalid, don't
960 /// add anything to Ops. If hasMemory is true it means one of the asm
961 /// constraint of the inline asm instruction being processed is 'm'.
962 void LowerAsmOperandForConstraint(SDValue Op,
963 std::string &Constraint,
964 std::vector<SDValue> &Ops,
965 SelectionDAG &DAG) const override;
967 unsigned
968 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
969 if (ConstraintCode == "i")
970 return InlineAsm::Constraint_i;
971 else if (ConstraintCode == "o")
972 return InlineAsm::Constraint_o;
973 else if (ConstraintCode == "v")
974 return InlineAsm::Constraint_v;
975 else if (ConstraintCode == "X")
976 return InlineAsm::Constraint_X;
977 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
980 /// Handle Lowering flag assembly outputs.
981 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
982 const AsmOperandInfo &Constraint,
983 SelectionDAG &DAG) const override;
985 /// Given a physical register constraint
986 /// (e.g. {edx}), return the register number and the register class for the
987 /// register. This should only be used for C_Register constraints. On
988 /// error, this returns a register number of 0.
989 std::pair<unsigned, const TargetRegisterClass *>
990 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
991 StringRef Constraint, MVT VT) const override;
993 /// Return true if the addressing mode represented
994 /// by AM is legal for this target, for a load/store of the specified type.
995 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
996 Type *Ty, unsigned AS,
997 Instruction *I = nullptr) const override;
999 /// Return true if the specified immediate is legal
1000 /// icmp immediate, that is the target has icmp instructions which can
1001 /// compare a register against the immediate without having to materialize
1002 /// the immediate into a register.
1003 bool isLegalICmpImmediate(int64_t Imm) const override;
1005 /// Return true if the specified immediate is legal
1006 /// add immediate, that is the target has add instructions which can
1007 /// add a register and the immediate without having to materialize
1008 /// the immediate into a register.
1009 bool isLegalAddImmediate(int64_t Imm) const override;
1011 bool isLegalStoreImmediate(int64_t Imm) const override;
1013 /// Return the cost of the scaling factor used in the addressing
1014 /// mode represented by AM for this target, for a load/store
1015 /// of the specified type.
1016 /// If the AM is supported, the return value must be >= 0.
1017 /// If the AM is not supported, it returns a negative value.
1018 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1019 unsigned AS) const override;
1021 bool isVectorShiftByScalarCheap(Type *Ty) const override;
1023 /// Add x86-specific opcodes to the default list.
1024 bool isBinOp(unsigned Opcode) const override;
1026 /// Returns true if the opcode is a commutative binary operation.
1027 bool isCommutativeBinOp(unsigned Opcode) const override;
1029 /// Return true if it's free to truncate a value of
1030 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1031 /// register EAX to i16 by referencing its sub-register AX.
1032 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1033 bool isTruncateFree(EVT VT1, EVT VT2) const override;
1035 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1037 /// Return true if any actual instruction that defines a
1038 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1039 /// register. This does not necessarily include registers defined in
1040 /// unknown ways, such as incoming arguments, or copies from unknown
1041 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1042 /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1043 /// all instructions that define 32-bit values implicit zero-extend the
1044 /// result out to 64 bits.
1045 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1046 bool isZExtFree(EVT VT1, EVT VT2) const override;
1047 bool isZExtFree(SDValue Val, EVT VT2) const override;
1049 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1050 /// extend node) is profitable.
1051 bool isVectorLoadExtDesirable(SDValue) const override;
1053 /// Return true if an FMA operation is faster than a pair of fmul and fadd
1054 /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1055 /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1056 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1058 /// Return true if it's profitable to narrow
1059 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1060 /// from i32 to i8 but not from i32 to i16.
1061 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1063 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1064 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1065 /// true and stores the intrinsic information into the IntrinsicInfo that was
1066 /// passed to the function.
1067 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1068 MachineFunction &MF,
1069 unsigned Intrinsic) const override;
1071 /// Returns true if the target can instruction select the
1072 /// specified FP immediate natively. If false, the legalizer will
1073 /// materialize the FP immediate as a load from a constant pool.
1074 bool isFPImmLegal(const APFloat &Imm, EVT VT,
1075 bool ForCodeSize) const override;
1077 /// Targets can use this to indicate that they only support *some*
1078 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1079 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1080 /// be legal.
1081 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1083 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1084 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1085 /// constant pool entry.
1086 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1088 /// Returns true if lowering to a jump table is allowed.
1089 bool areJTsAllowed(const Function *Fn) const override;
1091 /// If true, then instruction selection should
1092 /// seek to shrink the FP constant of the specified type to a smaller type
1093 /// in order to save space and / or reduce runtime.
1094 bool ShouldShrinkFPConstant(EVT VT) const override {
1095 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1096 // expensive than a straight movsd. On the other hand, it's important to
1097 // shrink long double fp constant since fldt is very slow.
1098 return !X86ScalarSSEf64 || VT == MVT::f80;
1101 /// Return true if we believe it is correct and profitable to reduce the
1102 /// load node to a smaller type.
1103 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1104 EVT NewVT) const override;
1106 /// Return true if the specified scalar FP type is computed in an SSE
1107 /// register, not on the X87 floating point stack.
1108 bool isScalarFPTypeInSSEReg(EVT VT) const {
1109 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1110 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
1113 /// Returns true if it is beneficial to convert a load of a constant
1114 /// to just the constant itself.
1115 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1116 Type *Ty) const override;
1118 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1120 bool convertSelectOfConstantsToMath(EVT VT) const override;
1122 bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1123 SDValue C) const override;
1125 bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1126 bool IsSigned) const override;
1128 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1129 /// with this index.
1130 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1131 unsigned Index) const override;
1133 /// Scalar ops always have equal or better analysis/performance/power than
1134 /// the vector equivalent, so this always makes sense if the scalar op is
1135 /// supported.
1136 bool shouldScalarizeBinop(SDValue) const override;
1138 /// Extract of a scalar FP value from index 0 of a vector is free.
1139 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1140 EVT EltVT = VT.getScalarType();
1141 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1144 /// Overflow nodes should get combined/lowered to optimal instructions
1145 /// (they should allow eliminating explicit compares by getting flags from
1146 /// math ops).
1147 bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1149 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1150 unsigned AddrSpace) const override {
1151 // If we can replace more than 2 scalar stores, there will be a reduction
1152 // in instructions even after we add a vector constant load.
1153 return NumElem > 2;
1156 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1157 const SelectionDAG &DAG,
1158 const MachineMemOperand &MMO) const override;
1160 /// Intel processors have a unified instruction and data cache
1161 const char * getClearCacheBuiltinName() const override {
1162 return nullptr; // nothing to do, move along.
1165 unsigned getRegisterByName(const char* RegName, EVT VT,
1166 SelectionDAG &DAG) const override;
1168 /// If a physical register, this returns the register that receives the
1169 /// exception address on entry to an EH pad.
1170 unsigned
1171 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1173 /// If a physical register, this returns the register that receives the
1174 /// exception typeid on entry to a landing pad.
1175 unsigned
1176 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1178 virtual bool needsFixedCatchObjects() const override;
1180 /// This method returns a target specific FastISel object,
1181 /// or null if the target does not support "fast" ISel.
1182 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1183 const TargetLibraryInfo *libInfo) const override;
1185 /// If the target has a standard location for the stack protector cookie,
1186 /// returns the address of that location. Otherwise, returns nullptr.
1187 Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1189 bool useLoadStackGuardNode() const override;
1190 bool useStackGuardXorFP() const override;
1191 void insertSSPDeclarations(Module &M) const override;
1192 Value *getSDagStackGuard(const Module &M) const override;
1193 Function *getSSPStackGuardCheck(const Module &M) const override;
1194 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1195 const SDLoc &DL) const override;
1198 /// Return true if the target stores SafeStack pointer at a fixed offset in
1199 /// some non-standard address space, and populates the address space and
1200 /// offset as appropriate.
1201 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1203 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1204 SelectionDAG &DAG) const;
1206 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1208 /// Customize the preferred legalization strategy for certain types.
1209 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1211 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1212 EVT VT) const override;
1214 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1215 CallingConv::ID CC,
1216 EVT VT) const override;
1218 unsigned getVectorTypeBreakdownForCallingConv(
1219 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1220 unsigned &NumIntermediates, MVT &RegisterVT) const override;
1222 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1224 bool supportSwiftError() const override;
1226 StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1228 unsigned getStackProbeSize(MachineFunction &MF) const;
1230 bool hasVectorBlend() const override { return true; }
1232 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1234 /// Lower interleaved load(s) into target specific
1235 /// instructions/intrinsics.
1236 bool lowerInterleavedLoad(LoadInst *LI,
1237 ArrayRef<ShuffleVectorInst *> Shuffles,
1238 ArrayRef<unsigned> Indices,
1239 unsigned Factor) const override;
1241 /// Lower interleaved store(s) into target specific
1242 /// instructions/intrinsics.
1243 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1244 unsigned Factor) const override;
1246 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1247 SDValue Addr, SelectionDAG &DAG)
1248 const override;
1250 protected:
1251 std::pair<const TargetRegisterClass *, uint8_t>
1252 findRepresentativeClass(const TargetRegisterInfo *TRI,
1253 MVT VT) const override;
1255 private:
1256 /// Keep a reference to the X86Subtarget around so that we can
1257 /// make the right decision when generating code for different targets.
1258 const X86Subtarget &Subtarget;
1260 /// Select between SSE or x87 floating point ops.
1261 /// When SSE is available, use it for f32 operations.
1262 /// When SSE2 is available, use it for f64 operations.
1263 bool X86ScalarSSEf32;
1264 bool X86ScalarSSEf64;
1266 /// A list of legal FP immediates.
1267 std::vector<APFloat> LegalFPImmediates;
1269 /// Indicate that this x86 target can instruction
1270 /// select the specified FP immediate natively.
1271 void addLegalFPImmediate(const APFloat& Imm) {
1272 LegalFPImmediates.push_back(Imm);
1275 SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1276 CallingConv::ID CallConv, bool isVarArg,
1277 const SmallVectorImpl<ISD::InputArg> &Ins,
1278 const SDLoc &dl, SelectionDAG &DAG,
1279 SmallVectorImpl<SDValue> &InVals,
1280 uint32_t *RegMask) const;
1281 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1282 const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1283 const SDLoc &dl, SelectionDAG &DAG,
1284 const CCValAssign &VA, MachineFrameInfo &MFI,
1285 unsigned i) const;
1286 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1287 const SDLoc &dl, SelectionDAG &DAG,
1288 const CCValAssign &VA,
1289 ISD::ArgFlagsTy Flags) const;
1291 // Call lowering helpers.
1293 /// Check whether the call is eligible for tail call optimization. Targets
1294 /// that want to do tail call optimization should implement this function.
1295 bool IsEligibleForTailCallOptimization(SDValue Callee,
1296 CallingConv::ID CalleeCC,
1297 bool isVarArg,
1298 bool isCalleeStructRet,
1299 bool isCallerStructRet,
1300 Type *RetTy,
1301 const SmallVectorImpl<ISD::OutputArg> &Outs,
1302 const SmallVectorImpl<SDValue> &OutVals,
1303 const SmallVectorImpl<ISD::InputArg> &Ins,
1304 SelectionDAG& DAG) const;
1305 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1306 SDValue Chain, bool IsTailCall,
1307 bool Is64Bit, int FPDiff,
1308 const SDLoc &dl) const;
1310 unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1311 SelectionDAG &DAG) const;
1313 unsigned getAddressSpace(void) const;
1315 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1317 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1318 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1319 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1320 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1322 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1323 const unsigned char OpFlags = 0) const;
1324 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1325 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1326 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1327 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1328 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1330 /// Creates target global address or external symbol nodes for calls or
1331 /// other uses.
1332 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1333 bool ForCall) const;
1335 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1336 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1337 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1338 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1339 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1340 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1341 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1342 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1343 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1344 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1345 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1346 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1347 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1348 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1349 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1350 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1351 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1352 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1353 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1354 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1355 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1356 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1357 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1358 SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1359 SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1360 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1361 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1362 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1363 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1365 SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
1366 RTLIB::Libcall Call) const;
1368 SDValue
1369 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1370 const SmallVectorImpl<ISD::InputArg> &Ins,
1371 const SDLoc &dl, SelectionDAG &DAG,
1372 SmallVectorImpl<SDValue> &InVals) const override;
1373 SDValue LowerCall(CallLoweringInfo &CLI,
1374 SmallVectorImpl<SDValue> &InVals) const override;
1376 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1377 const SmallVectorImpl<ISD::OutputArg> &Outs,
1378 const SmallVectorImpl<SDValue> &OutVals,
1379 const SDLoc &dl, SelectionDAG &DAG) const override;
1381 bool supportSplitCSR(MachineFunction *MF) const override {
1382 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1383 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1385 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1386 void insertCopiesSplitCSR(
1387 MachineBasicBlock *Entry,
1388 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1390 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1392 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1394 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1395 ISD::NodeType ExtendKind) const override;
1397 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1398 bool isVarArg,
1399 const SmallVectorImpl<ISD::OutputArg> &Outs,
1400 LLVMContext &Context) const override;
1402 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1404 TargetLoweringBase::AtomicExpansionKind
1405 shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1406 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1407 TargetLoweringBase::AtomicExpansionKind
1408 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1410 LoadInst *
1411 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1413 bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1414 bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1416 bool needsCmpXchgNb(Type *MemType) const;
1418 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1419 MachineBasicBlock *DispatchBB, int FI) const;
1421 // Utility function to emit the low-level va_arg code for X86-64.
1422 MachineBasicBlock *
1423 EmitVAARG64WithCustomInserter(MachineInstr &MI,
1424 MachineBasicBlock *MBB) const;
1426 /// Utility function to emit the xmm reg save portion of va_start.
1427 MachineBasicBlock *
1428 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1429 MachineBasicBlock *BB) const;
1431 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1432 MachineInstr &MI2,
1433 MachineBasicBlock *BB) const;
1435 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1436 MachineBasicBlock *BB) const;
1438 MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1439 MachineBasicBlock *BB) const;
1441 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1442 MachineBasicBlock *BB) const;
1444 MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1445 MachineBasicBlock *BB) const;
1447 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1448 MachineBasicBlock *BB) const;
1450 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1451 MachineBasicBlock *BB) const;
1453 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1454 MachineBasicBlock *BB) const;
1456 MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1457 MachineBasicBlock *BB) const;
1459 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1460 MachineBasicBlock *MBB) const;
1462 void emitSetJmpShadowStackFix(MachineInstr &MI,
1463 MachineBasicBlock *MBB) const;
1465 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1466 MachineBasicBlock *MBB) const;
1468 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1469 MachineBasicBlock *MBB) const;
1471 MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1472 MachineBasicBlock *MBB) const;
1474 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1475 MachineBasicBlock *MBB) const;
1477 /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1478 /// equivalent, for use with the given x86 condition code.
1479 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1480 SelectionDAG &DAG) const;
1482 /// Convert a comparison if required by the subtarget.
1483 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1485 /// Emit flags for the given setcc condition and operands. Also returns the
1486 /// corresponding X86 condition code constant in X86CC.
1487 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1488 ISD::CondCode CC, const SDLoc &dl,
1489 SelectionDAG &DAG,
1490 SDValue &X86CC) const;
1492 /// Check if replacement of SQRT with RSQRT should be disabled.
1493 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1495 /// Use rsqrt* to speed up sqrt calculations.
1496 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1497 int &RefinementSteps, bool &UseOneConstNR,
1498 bool Reciprocal) const override;
1500 /// Use rcp* to speed up fdiv calculations.
1501 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1502 int &RefinementSteps) const override;
1504 /// Reassociate floating point divisions into multiply by reciprocal.
1505 unsigned combineRepeatedFPDivisors() const override;
1507 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1508 SmallVectorImpl<SDNode *> &Created) const override;
1511 namespace X86 {
1512 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1513 const TargetLibraryInfo *libInfo);
1514 } // end namespace X86
1516 // Base class for all X86 non-masked store operations.
1517 class X86StoreSDNode : public MemSDNode {
1518 public:
1519 X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1520 SDVTList VTs, EVT MemVT,
1521 MachineMemOperand *MMO)
1522 :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1523 const SDValue &getValue() const { return getOperand(1); }
1524 const SDValue &getBasePtr() const { return getOperand(2); }
1526 static bool classof(const SDNode *N) {
1527 return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1528 N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1532 // Base class for all X86 masked store operations.
1533 // The class has the same order of operands as MaskedStoreSDNode for
1534 // convenience.
1535 class X86MaskedStoreSDNode : public MemSDNode {
1536 public:
1537 X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1538 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1539 MachineMemOperand *MMO)
1540 : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1542 const SDValue &getValue() const { return getOperand(1); }
1543 const SDValue &getBasePtr() const { return getOperand(2); }
1544 const SDValue &getMask() const { return getOperand(3); }
1546 static bool classof(const SDNode *N) {
1547 return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1548 N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1552 // X86 Truncating Store with Signed saturation.
1553 class TruncSStoreSDNode : public X86StoreSDNode {
1554 public:
1555 TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1556 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1557 : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1559 static bool classof(const SDNode *N) {
1560 return N->getOpcode() == X86ISD::VTRUNCSTORES;
1564 // X86 Truncating Store with Unsigned saturation.
1565 class TruncUSStoreSDNode : public X86StoreSDNode {
1566 public:
1567 TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1568 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1569 : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1571 static bool classof(const SDNode *N) {
1572 return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1576 // X86 Truncating Masked Store with Signed saturation.
1577 class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1578 public:
1579 MaskedTruncSStoreSDNode(unsigned Order,
1580 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1581 MachineMemOperand *MMO)
1582 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1584 static bool classof(const SDNode *N) {
1585 return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1589 // X86 Truncating Masked Store with Unsigned saturation.
1590 class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1591 public:
1592 MaskedTruncUSStoreSDNode(unsigned Order,
1593 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1594 MachineMemOperand *MMO)
1595 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1597 static bool classof(const SDNode *N) {
1598 return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1602 // X86 specific Gather/Scatter nodes.
1603 // The class has the same order of operands as MaskedGatherScatterSDNode for
1604 // convenience.
1605 class X86MaskedGatherScatterSDNode : public MemSDNode {
1606 public:
1607 X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1608 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1609 MachineMemOperand *MMO)
1610 : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1612 const SDValue &getBasePtr() const { return getOperand(3); }
1613 const SDValue &getIndex() const { return getOperand(4); }
1614 const SDValue &getMask() const { return getOperand(2); }
1615 const SDValue &getScale() const { return getOperand(5); }
1617 static bool classof(const SDNode *N) {
1618 return N->getOpcode() == X86ISD::MGATHER ||
1619 N->getOpcode() == X86ISD::MSCATTER;
1623 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1624 public:
1625 X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1626 EVT MemVT, MachineMemOperand *MMO)
1627 : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1628 MMO) {}
1630 const SDValue &getPassThru() const { return getOperand(1); }
1632 static bool classof(const SDNode *N) {
1633 return N->getOpcode() == X86ISD::MGATHER;
1637 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1638 public:
1639 X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1640 EVT MemVT, MachineMemOperand *MMO)
1641 : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1642 MMO) {}
1644 const SDValue &getValue() const { return getOperand(1); }
1646 static bool classof(const SDNode *N) {
1647 return N->getOpcode() == X86ISD::MSCATTER;
1651 /// Generate unpacklo/unpackhi shuffle mask.
1652 template <typename T = int>
1653 void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1654 bool Unary) {
1655 assert(Mask.empty() && "Expected an empty shuffle mask vector");
1656 int NumElts = VT.getVectorNumElements();
1657 int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1658 for (int i = 0; i < NumElts; ++i) {
1659 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1660 int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1661 Pos += (Unary ? 0 : NumElts * (i % 2));
1662 Pos += (Lo ? 0 : NumEltsInLane / 2);
1663 Mask.push_back(Pos);
1667 /// Helper function to scale a shuffle or target shuffle mask, replacing each
1668 /// mask index with the scaled sequential indices for an equivalent narrowed
1669 /// mask. This is the reverse process to canWidenShuffleElements, but can
1670 /// always succeed.
1671 template <typename T>
1672 void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1673 SmallVectorImpl<T> &ScaledMask) {
1674 assert(0 < Scale && "Unexpected scaling factor");
1675 size_t NumElts = Mask.size();
1676 ScaledMask.assign(NumElts * Scale, -1);
1678 for (int i = 0; i != (int)NumElts; ++i) {
1679 int M = Mask[i];
1681 // Repeat sentinel values in every mask element.
1682 if (M < 0) {
1683 for (int s = 0; s != Scale; ++s)
1684 ScaledMask[(Scale * i) + s] = M;
1685 continue;
1688 // Scale mask element and increment across each mask element.
1689 for (int s = 0; s != Scale; ++s)
1690 ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1693 } // end namespace llvm
1695 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H