[llvm-exegesis] Fix missing std::move.
[llvm-complete.git] / lib / Target / X86 / X86ISelLowering.h
blobb5e9eb3b86f8046545645d34a5d6b485b0336827
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
11 // selection DAG.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/Target/TargetOptions.h"
23 namespace llvm {
24 class X86Subtarget;
25 class X86TargetMachine;
27 namespace X86ISD {
28 // X86 Specific DAG Nodes
29 enum NodeType : unsigned {
30 // Start the numbering where the builtin ops leave off.
31 FIRST_NUMBER = ISD::BUILTIN_OP_END,
33 /// Bit scan forward.
34 BSF,
35 /// Bit scan reverse.
36 BSR,
38 /// Double shift instructions. These correspond to
39 /// X86::SHLDxx and X86::SHRDxx instructions.
40 SHLD,
41 SHRD,
43 /// Bitwise logical AND of floating point values. This corresponds
44 /// to X86::ANDPS or X86::ANDPD.
45 FAND,
47 /// Bitwise logical OR of floating point values. This corresponds
48 /// to X86::ORPS or X86::ORPD.
49 FOR,
51 /// Bitwise logical XOR of floating point values. This corresponds
52 /// to X86::XORPS or X86::XORPD.
53 FXOR,
55 /// Bitwise logical ANDNOT of floating point values. This
56 /// corresponds to X86::ANDNPS or X86::ANDNPD.
57 FANDN,
59 /// These operations represent an abstract X86 call
60 /// instruction, which includes a bunch of information. In particular the
61 /// operands of these node are:
62 ///
63 /// #0 - The incoming token chain
64 /// #1 - The callee
65 /// #2 - The number of arg bytes the caller pushes on the stack.
66 /// #3 - The number of arg bytes the callee pops off the stack.
67 /// #4 - The value to pass in AL/AX/EAX (optional)
68 /// #5 - The value to pass in DL/DX/EDX (optional)
69 ///
70 /// The result values of these nodes are:
71 ///
72 /// #0 - The outgoing token chain
73 /// #1 - The first register result value (optional)
74 /// #2 - The second register result value (optional)
75 ///
76 CALL,
78 /// Same as call except it adds the NoTrack prefix.
79 NT_CALL,
81 /// This operation implements the lowering for readcyclecounter.
82 RDTSC_DAG,
84 /// X86 Read Time-Stamp Counter and Processor ID.
85 RDTSCP_DAG,
87 /// X86 Read Performance Monitoring Counters.
88 RDPMC_DAG,
90 /// X86 compare and logical compare instructions.
91 CMP, COMI, UCOMI,
93 /// X86 bit-test instructions.
94 BT,
96 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
97 /// operand, usually produced by a CMP instruction.
98 SETCC,
100 /// X86 Select
101 SELECT, SELECTS,
103 // Same as SETCC except it's materialized with a sbb and the value is all
104 // one's or all zero's.
105 SETCC_CARRY, // R = carry_bit ? ~0 : 0
107 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
108 /// Operands are two FP values to compare; result is a mask of
109 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
110 FSETCC,
112 /// X86 FP SETCC, similar to above, but with output as an i1 mask and
113 /// with optional rounding mode.
114 FSETCCM, FSETCCM_RND,
116 /// X86 conditional moves. Operand 0 and operand 1 are the two values
117 /// to select from. Operand 2 is the condition code, and operand 3 is the
118 /// flag operand produced by a CMP or TEST instruction. It also writes a
119 /// flag result.
120 CMOV,
122 /// X86 conditional branches. Operand 0 is the chain operand, operand 1
123 /// is the block to branch if condition is true, operand 2 is the
124 /// condition code, and operand 3 is the flag operand produced by a CMP
125 /// or TEST instruction.
126 BRCOND,
128 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
129 /// operand 1 is the target address.
130 NT_BRIND,
132 /// Return with a flag operand. Operand 0 is the chain operand, operand
133 /// 1 is the number of bytes of stack to pop.
134 RET_FLAG,
136 /// Return from interrupt. Operand 0 is the number of bytes to pop.
137 IRET,
139 /// Repeat fill, corresponds to X86::REP_STOSx.
140 REP_STOS,
142 /// Repeat move, corresponds to X86::REP_MOVSx.
143 REP_MOVS,
145 /// On Darwin, this node represents the result of the popl
146 /// at function entry, used for PIC code.
147 GlobalBaseReg,
149 /// A wrapper node for TargetConstantPool, TargetJumpTable,
150 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
151 /// MCSymbol and TargetBlockAddress.
152 Wrapper,
154 /// Special wrapper used under X86-64 PIC mode for RIP
155 /// relative displacements.
156 WrapperRIP,
158 /// Copies a 64-bit value from the low word of an XMM vector
159 /// to an MMX vector.
160 MOVDQ2Q,
162 /// Copies a 32-bit value from the low word of a MMX
163 /// vector to a GPR.
164 MMX_MOVD2W,
166 /// Copies a GPR into the low 32-bit word of a MMX vector
167 /// and zero out the high word.
168 MMX_MOVW2D,
170 /// Extract an 8-bit value from a vector and zero extend it to
171 /// i32, corresponds to X86::PEXTRB.
172 PEXTRB,
174 /// Extract a 16-bit value from a vector and zero extend it to
175 /// i32, corresponds to X86::PEXTRW.
176 PEXTRW,
178 /// Insert any element of a 4 x float vector into any element
179 /// of a destination 4 x floatvector.
180 INSERTPS,
182 /// Insert the lower 8-bits of a 32-bit value to a vector,
183 /// corresponds to X86::PINSRB.
184 PINSRB,
186 /// Insert the lower 16-bits of a 32-bit value to a vector,
187 /// corresponds to X86::PINSRW.
188 PINSRW,
190 /// Shuffle 16 8-bit values within a vector.
191 PSHUFB,
193 /// Compute Sum of Absolute Differences.
194 PSADBW,
195 /// Compute Double Block Packed Sum-Absolute-Differences
196 DBPSADBW,
198 /// Bitwise Logical AND NOT of Packed FP values.
199 ANDNP,
201 /// Blend where the selector is an immediate.
202 BLENDI,
204 /// Dynamic (non-constant condition) vector blend where only the sign bits
205 /// of the condition elements are used. This is used to enforce that the
206 /// condition mask is not valid for generic VSELECT optimizations.
207 SHRUNKBLEND,
209 /// Combined add and sub on an FP vector.
210 ADDSUB,
212 // FP vector ops with rounding mode.
213 FADD_RND, FADDS_RND,
214 FSUB_RND, FSUBS_RND,
215 FMUL_RND, FMULS_RND,
216 FDIV_RND, FDIVS_RND,
217 FMAX_RND, FMAXS_RND,
218 FMIN_RND, FMINS_RND,
219 FSQRT_RND, FSQRTS_RND,
221 // FP vector get exponent.
222 FGETEXP_RND, FGETEXPS_RND,
223 // Extract Normalized Mantissas.
224 VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
225 // FP Scale.
226 SCALEF,
227 SCALEFS,
229 // Integer add/sub with unsigned saturation.
230 ADDUS,
231 SUBUS,
233 // Integer add/sub with signed saturation.
234 ADDS,
235 SUBS,
237 // Unsigned Integer average.
238 AVG,
240 /// Integer horizontal add/sub.
241 HADD,
242 HSUB,
244 /// Floating point horizontal add/sub.
245 FHADD,
246 FHSUB,
248 // Detect Conflicts Within a Vector
249 CONFLICT,
251 /// Floating point max and min.
252 FMAX, FMIN,
254 /// Commutative FMIN and FMAX.
255 FMAXC, FMINC,
257 /// Scalar intrinsic floating point max and min.
258 FMAXS, FMINS,
260 /// Floating point reciprocal-sqrt and reciprocal approximation.
261 /// Note that these typically require refinement
262 /// in order to obtain suitable precision.
263 FRSQRT, FRCP,
265 // AVX-512 reciprocal approximations with a little more precision.
266 RSQRT14, RSQRT14S, RCP14, RCP14S,
268 // Thread Local Storage.
269 TLSADDR,
271 // Thread Local Storage. A call to get the start address
272 // of the TLS block for the current module.
273 TLSBASEADDR,
275 // Thread Local Storage. When calling to an OS provided
276 // thunk at the address from an earlier relocation.
277 TLSCALL,
279 // Exception Handling helpers.
280 EH_RETURN,
282 // SjLj exception handling setjmp.
283 EH_SJLJ_SETJMP,
285 // SjLj exception handling longjmp.
286 EH_SJLJ_LONGJMP,
288 // SjLj exception handling dispatch.
289 EH_SJLJ_SETUP_DISPATCH,
291 /// Tail call return. See X86TargetLowering::LowerCall for
292 /// the list of operands.
293 TC_RETURN,
295 // Vector move to low scalar and zero higher vector elements.
296 VZEXT_MOVL,
298 // Vector integer zero-extend.
299 VZEXT,
300 // Vector integer signed-extend.
301 VSEXT,
303 // Vector integer truncate.
304 VTRUNC,
305 // Vector integer truncate with unsigned/signed saturation.
306 VTRUNCUS, VTRUNCS,
308 // Vector FP extend.
309 VFPEXT, VFPEXT_RND, VFPEXTS_RND,
311 // Vector FP round.
312 VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
314 // 128-bit vector logical left / right shift
315 VSHLDQ, VSRLDQ,
317 // Vector shift elements
318 VSHL, VSRL, VSRA,
320 // Vector variable shift right arithmetic.
321 // Unlike ISD::SRA, in case shift count greater then element size
322 // use sign bit to fill destination data element.
323 VSRAV,
325 // Vector shift elements by immediate
326 VSHLI, VSRLI, VSRAI,
328 // Shifts of mask registers.
329 KSHIFTL, KSHIFTR,
331 // Bit rotate by immediate
332 VROTLI, VROTRI,
334 // Vector packed double/float comparison.
335 CMPP,
337 // Vector integer comparisons.
338 PCMPEQ, PCMPGT,
340 // v8i16 Horizontal minimum and position.
341 PHMINPOS,
343 MULTISHIFT,
345 /// Vector comparison generating mask bits for fp and
346 /// integer signed and unsigned data types.
347 CMPM,
348 // Vector comparison with rounding mode for FP values
349 CMPM_RND,
351 // Arithmetic operations with FLAGS results.
352 ADD, SUB, ADC, SBB, SMUL,
353 INC, DEC, OR, XOR, AND,
355 // Bit field extract.
356 BEXTR,
358 // LOW, HI, FLAGS = umul LHS, RHS.
359 UMUL,
361 // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
362 SMUL8, UMUL8,
364 // 8-bit divrem that zero-extend the high result (AH).
365 UDIVREM8_ZEXT_HREG,
366 SDIVREM8_SEXT_HREG,
368 // X86-specific multiply by immediate.
369 MUL_IMM,
371 // Vector sign bit extraction.
372 MOVMSK,
374 // Vector bitwise comparisons.
375 PTEST,
377 // Vector packed fp sign bitwise comparisons.
378 TESTP,
380 // OR/AND test for masks.
381 KORTEST,
382 KTEST,
384 // ADD for masks.
385 KADD,
387 // Several flavors of instructions with vector shuffle behaviors.
388 // Saturated signed/unnsigned packing.
389 PACKSS,
390 PACKUS,
391 // Intra-lane alignr.
392 PALIGNR,
393 // AVX512 inter-lane alignr.
394 VALIGN,
395 PSHUFD,
396 PSHUFHW,
397 PSHUFLW,
398 SHUFP,
399 // VBMI2 Concat & Shift.
400 VSHLD,
401 VSHRD,
402 VSHLDV,
403 VSHRDV,
404 //Shuffle Packed Values at 128-bit granularity.
405 SHUF128,
406 MOVDDUP,
407 MOVSHDUP,
408 MOVSLDUP,
409 MOVLHPS,
410 MOVHLPS,
411 MOVSD,
412 MOVSS,
413 UNPCKL,
414 UNPCKH,
415 VPERMILPV,
416 VPERMILPI,
417 VPERMI,
418 VPERM2X128,
420 // Variable Permute (VPERM).
421 // Res = VPERMV MaskV, V0
422 VPERMV,
424 // 3-op Variable Permute (VPERMT2).
425 // Res = VPERMV3 V0, MaskV, V1
426 VPERMV3,
428 // Bitwise ternary logic.
429 VPTERNLOG,
430 // Fix Up Special Packed Float32/64 values.
431 VFIXUPIMM,
432 VFIXUPIMMS,
433 // Range Restriction Calculation For Packed Pairs of Float32/64 values.
434 VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
435 // Reduce - Perform Reduction Transformation on scalar\packed FP.
436 VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
437 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
438 // Also used by the legacy (V)ROUND intrinsics where we mask out the
439 // scaling part of the immediate.
440 VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
441 // Tests Types Of a FP Values for packed types.
442 VFPCLASS,
443 // Tests Types Of a FP Values for scalar types.
444 VFPCLASSS,
446 // Broadcast scalar to vector.
447 VBROADCAST,
448 // Broadcast mask to vector.
449 VBROADCASTM,
450 // Broadcast subvector to vector.
451 SUBV_BROADCAST,
453 /// SSE4A Extraction and Insertion.
454 EXTRQI, INSERTQI,
456 // XOP arithmetic/logical shifts.
457 VPSHA, VPSHL,
458 // XOP signed/unsigned integer comparisons.
459 VPCOM, VPCOMU,
460 // XOP packed permute bytes.
461 VPPERM,
462 // XOP two source permutation.
463 VPERMIL2,
465 // Vector multiply packed unsigned doubleword integers.
466 PMULUDQ,
467 // Vector multiply packed signed doubleword integers.
468 PMULDQ,
469 // Vector Multiply Packed UnsignedIntegers with Round and Scale.
470 MULHRS,
472 // Multiply and Add Packed Integers.
473 VPMADDUBSW, VPMADDWD,
475 // AVX512IFMA multiply and add.
476 // NOTE: These are different than the instruction and perform
477 // op0 x op1 + op2.
478 VPMADD52L, VPMADD52H,
480 // VNNI
481 VPDPBUSD,
482 VPDPBUSDS,
483 VPDPWSSD,
484 VPDPWSSDS,
486 // FMA nodes.
487 // We use the target independent ISD::FMA for the non-inverted case.
488 FNMADD,
489 FMSUB,
490 FNMSUB,
491 FMADDSUB,
492 FMSUBADD,
494 // FMA with rounding mode.
495 FMADD_RND,
496 FNMADD_RND,
497 FMSUB_RND,
498 FNMSUB_RND,
499 FMADDSUB_RND,
500 FMSUBADD_RND,
502 // Compress and expand.
503 COMPRESS,
504 EXPAND,
506 // Bits shuffle
507 VPSHUFBITQMB,
509 // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
510 SINT_TO_FP_RND, UINT_TO_FP_RND,
511 SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
513 // Vector float/double to signed/unsigned integer.
514 CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
515 // Scalar float/double to signed/unsigned integer.
516 CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
518 // Vector float/double to signed/unsigned integer with truncation.
519 CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
520 // Scalar float/double to signed/unsigned integer with truncation.
521 CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
523 // Vector signed/unsigned integer to float/double.
524 CVTSI2P, CVTUI2P,
526 // Save xmm argument registers to the stack, according to %al. An operator
527 // is needed so that this can be expanded with control flow.
528 VASTART_SAVE_XMM_REGS,
530 // Windows's _chkstk call to do stack probing.
531 WIN_ALLOCA,
533 // For allocating variable amounts of stack space when using
534 // segmented stacks. Check if the current stacklet has enough space, and
535 // falls back to heap allocation if not.
536 SEG_ALLOCA,
538 // Memory barriers.
539 MEMBARRIER,
540 MFENCE,
542 // Store FP status word into i16 register.
543 FNSTSW16r,
545 // Store contents of %ah into %eflags.
546 SAHF,
548 // Get a random integer and indicate whether it is valid in CF.
549 RDRAND,
551 // Get a NIST SP800-90B & C compliant random integer and
552 // indicate whether it is valid in CF.
553 RDSEED,
555 // SSE42 string comparisons.
556 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
557 // will emit one or two instructions based on which results are used. If
558 // flags and index/mask this allows us to use a single instruction since
559 // we won't have to pick and opcode for flags. Instead we can rely on the
560 // DAG to CSE everything and decide at isel.
561 PCMPISTR,
562 PCMPESTR,
564 // Test if in transactional execution.
565 XTEST,
567 // ERI instructions.
568 RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
570 // Conversions between float and half-float.
571 CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
573 // Galois Field Arithmetic Instructions
574 GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
576 // LWP insert record.
577 LWPINS,
579 // User level wait
580 UMWAIT, TPAUSE,
582 // Compare and swap.
583 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
584 LCMPXCHG8_DAG,
585 LCMPXCHG16_DAG,
586 LCMPXCHG8_SAVE_EBX_DAG,
587 LCMPXCHG16_SAVE_RBX_DAG,
589 /// LOCK-prefixed arithmetic read-modify-write instructions.
590 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
591 LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
593 // Load, scalar_to_vector, and zero extend.
594 VZEXT_LOAD,
596 // Store FP control world into i16 memory.
597 FNSTCW16m,
599 /// This instruction implements FP_TO_SINT with the
600 /// integer destination in memory and a FP reg source. This corresponds
601 /// to the X86::FIST*m instructions and the rounding mode change stuff. It
602 /// has two inputs (token chain and address) and two outputs (int value
603 /// and token chain).
604 FP_TO_INT16_IN_MEM,
605 FP_TO_INT32_IN_MEM,
606 FP_TO_INT64_IN_MEM,
608 /// This instruction implements SINT_TO_FP with the
609 /// integer source in memory and FP reg result. This corresponds to the
610 /// X86::FILD*m instructions. It has three inputs (token chain, address,
611 /// and source type) and two outputs (FP value and token chain). FILD_FLAG
612 /// also produces a flag).
613 FILD,
614 FILD_FLAG,
616 /// This instruction implements an extending load to FP stack slots.
617 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
618 /// operand, ptr to load from, and a ValueType node indicating the type
619 /// to load to.
620 FLD,
622 /// This instruction implements a truncating store to FP stack
623 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
624 /// chain operand, value to store, address, and a ValueType to store it
625 /// as.
626 FST,
628 /// This instruction grabs the address of the next argument
629 /// from a va_list. (reads and modifies the va_list in memory)
630 VAARG_64,
632 // Vector truncating store with unsigned/signed saturation
633 VTRUNCSTOREUS, VTRUNCSTORES,
634 // Vector truncating masked store with unsigned/signed saturation
635 VMTRUNCSTOREUS, VMTRUNCSTORES,
637 // X86 specific gather and scatter
638 MGATHER, MSCATTER,
640 // WARNING: Do not add anything in the end unless you want the node to
641 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
642 // opcodes will be thought as target memory ops!
644 } // end namespace X86ISD
646 /// Define some predicates that are used for node matching.
647 namespace X86 {
648 /// Returns true if Elt is a constant zero or floating point constant +0.0.
649 bool isZeroNode(SDValue Elt);
651 /// Returns true of the given offset can be
652 /// fit into displacement field of the instruction.
653 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
654 bool hasSymbolicDisplacement = true);
656 /// Determines whether the callee is required to pop its
657 /// own arguments. Callee pop is necessary to support tail calls.
658 bool isCalleePop(CallingConv::ID CallingConv,
659 bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
661 } // end namespace X86
663 //===--------------------------------------------------------------------===//
664 // X86 Implementation of the TargetLowering interface
665 class X86TargetLowering final : public TargetLowering {
666 public:
667 explicit X86TargetLowering(const X86TargetMachine &TM,
668 const X86Subtarget &STI);
670 unsigned getJumpTableEncoding() const override;
671 bool useSoftFloat() const override;
673 void markLibCallAttributes(MachineFunction *MF, unsigned CC,
674 ArgListTy &Args) const override;
676 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
677 return MVT::i8;
680 const MCExpr *
681 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
682 const MachineBasicBlock *MBB, unsigned uid,
683 MCContext &Ctx) const override;
685 /// Returns relocation base for the given PIC jumptable.
686 SDValue getPICJumpTableRelocBase(SDValue Table,
687 SelectionDAG &DAG) const override;
688 const MCExpr *
689 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
690 unsigned JTI, MCContext &Ctx) const override;
692 /// Return the desired alignment for ByVal aggregate
693 /// function arguments in the caller parameter area. For X86, aggregates
694 /// that contains are placed at 16-byte boundaries while the rest are at
695 /// 4-byte boundaries.
696 unsigned getByValTypeAlignment(Type *Ty,
697 const DataLayout &DL) const override;
699 /// Returns the target specific optimal type for load
700 /// and store operations as a result of memset, memcpy, and memmove
701 /// lowering. If DstAlign is zero that means it's safe to destination
702 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
703 /// means there isn't a need to check it against alignment requirement,
704 /// probably because the source does not need to be loaded. If 'IsMemset' is
705 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
706 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
707 /// source is constant so it does not need to be loaded.
708 /// It returns EVT::Other if the type should be determined using generic
709 /// target-independent logic.
710 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
711 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
712 MachineFunction &MF) const override;
714 /// Returns true if it's safe to use load / store of the
715 /// specified type to expand memcpy / memset inline. This is mostly true
716 /// for all types except for some special cases. For example, on X86
717 /// targets without SSE2 f64 load / store are done with fldl / fstpl which
718 /// also does type conversion. Note the specified type doesn't have to be
719 /// legal as the hook is used before type legalization.
720 bool isSafeMemOpType(MVT VT) const override;
722 /// Returns true if the target allows unaligned memory accesses of the
723 /// specified type. Returns whether it is "fast" in the last argument.
724 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
725 bool *Fast) const override;
727 /// Provide custom lowering hooks for some operations.
729 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
731 /// Places new result values for the node in Results (their number
732 /// and types must exactly match those of the original return values of
733 /// the node), or leaves Results empty, which indicates that the node is not
734 /// to be custom lowered after all.
735 void LowerOperationWrapper(SDNode *N,
736 SmallVectorImpl<SDValue> &Results,
737 SelectionDAG &DAG) const override;
739 /// Replace the results of node with an illegal result
740 /// type with new values built out of custom code.
742 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
743 SelectionDAG &DAG) const override;
745 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
747 // Return true if it is profitable to combine a BUILD_VECTOR with a
748 // stride-pattern to a shuffle and a truncate.
749 // Example of such a combine:
750 // v4i32 build_vector((extract_elt V, 1),
751 // (extract_elt V, 3),
752 // (extract_elt V, 5),
753 // (extract_elt V, 7))
754 // -->
755 // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
756 // v4i64)
757 bool isDesirableToCombineBuildVectorToShuffleTruncate(
758 ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
760 /// Return true if the target has native support for
761 /// the specified value type and it is 'desirable' to use the type for the
762 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
763 /// instruction encodings are longer and some i16 instructions are slow.
764 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
766 /// Return true if the target has native support for the
767 /// specified value type and it is 'desirable' to use the type. e.g. On x86
768 /// i16 is legal, but undesirable since i16 instruction encodings are longer
769 /// and some i16 instructions are slow.
770 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
772 MachineBasicBlock *
773 EmitInstrWithCustomInserter(MachineInstr &MI,
774 MachineBasicBlock *MBB) const override;
776 /// This method returns the name of a target specific DAG node.
777 const char *getTargetNodeName(unsigned Opcode) const override;
779 bool mergeStoresAfterLegalization() const override { return true; }
781 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
782 const SelectionDAG &DAG) const override;
784 bool isCheapToSpeculateCttz() const override;
786 bool isCheapToSpeculateCtlz() const override;
788 bool isCtlzFast() const override;
790 bool hasBitPreservingFPLogic(EVT VT) const override {
791 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
794 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
795 // If the pair to store is a mixture of float and int values, we will
796 // save two bitwise instructions and one float-to-int instruction and
797 // increase one store instruction. There is potentially a more
798 // significant benefit because it avoids the float->int domain switch
799 // for input value. So It is more likely a win.
800 if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
801 (LTy.isInteger() && HTy.isFloatingPoint()))
802 return true;
803 // If the pair only contains int values, we will save two bitwise
804 // instructions and increase one store instruction (costing one more
805 // store buffer). Since the benefit is more blurred so we leave
806 // such pair out until we get testcase to prove it is a win.
807 return false;
810 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
812 bool hasAndNotCompare(SDValue Y) const override;
814 bool hasAndNot(SDValue Y) const override;
816 bool preferShiftsToClearExtremeBits(SDValue Y) const override;
818 bool
819 shouldTransformSignedTruncationCheck(EVT XVT,
820 unsigned KeptBits) const override {
821 // For vectors, we don't have a preference..
822 if (XVT.isVector())
823 return false;
825 auto VTIsOk = [](EVT VT) -> bool {
826 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
827 VT == MVT::i64;
830 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
831 // XVT will be larger than KeptBitsVT.
832 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
833 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
836 bool shouldSplatInsEltVarIndex(EVT VT) const override;
838 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
839 return VT.isScalarInteger();
842 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
843 MVT hasFastEqualityCompare(unsigned NumBits) const override;
845 /// Allow multiple load pairs per block for smaller and faster code.
846 unsigned getMemcmpEqZeroLoadsPerBlock() const override {
847 return 2;
850 /// Return the value type to use for ISD::SETCC.
851 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
852 EVT VT) const override;
854 bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
855 TargetLoweringOpt &TLO) const override;
857 /// Determine which of the bits specified in Mask are known to be either
858 /// zero or one and return them in the KnownZero/KnownOne bitsets.
859 void computeKnownBitsForTargetNode(const SDValue Op,
860 KnownBits &Known,
861 const APInt &DemandedElts,
862 const SelectionDAG &DAG,
863 unsigned Depth = 0) const override;
865 /// Determine the number of bits in the operation that are sign bits.
866 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
867 const APInt &DemandedElts,
868 const SelectionDAG &DAG,
869 unsigned Depth) const override;
871 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
872 const APInt &DemandedElts,
873 APInt &KnownUndef,
874 APInt &KnownZero,
875 TargetLoweringOpt &TLO,
876 unsigned Depth) const override;
878 SDValue unwrapAddress(SDValue N) const override;
880 bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
881 int64_t &Offset) const override;
883 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
885 bool ExpandInlineAsm(CallInst *CI) const override;
887 ConstraintType getConstraintType(StringRef Constraint) const override;
889 /// Examine constraint string and operand type and determine a weight value.
890 /// The operand object must already have been set up with the operand type.
891 ConstraintWeight
892 getSingleConstraintMatchWeight(AsmOperandInfo &info,
893 const char *constraint) const override;
895 const char *LowerXConstraint(EVT ConstraintVT) const override;
897 /// Lower the specified operand into the Ops vector. If it is invalid, don't
898 /// add anything to Ops. If hasMemory is true it means one of the asm
899 /// constraint of the inline asm instruction being processed is 'm'.
900 void LowerAsmOperandForConstraint(SDValue Op,
901 std::string &Constraint,
902 std::vector<SDValue> &Ops,
903 SelectionDAG &DAG) const override;
905 unsigned
906 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
907 if (ConstraintCode == "i")
908 return InlineAsm::Constraint_i;
909 else if (ConstraintCode == "o")
910 return InlineAsm::Constraint_o;
911 else if (ConstraintCode == "v")
912 return InlineAsm::Constraint_v;
913 else if (ConstraintCode == "X")
914 return InlineAsm::Constraint_X;
915 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
918 /// Given a physical register constraint
919 /// (e.g. {edx}), return the register number and the register class for the
920 /// register. This should only be used for C_Register constraints. On
921 /// error, this returns a register number of 0.
922 std::pair<unsigned, const TargetRegisterClass *>
923 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
924 StringRef Constraint, MVT VT) const override;
926 /// Return true if the addressing mode represented
927 /// by AM is legal for this target, for a load/store of the specified type.
928 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
929 Type *Ty, unsigned AS,
930 Instruction *I = nullptr) const override;
932 /// Return true if the specified immediate is legal
933 /// icmp immediate, that is the target has icmp instructions which can
934 /// compare a register against the immediate without having to materialize
935 /// the immediate into a register.
936 bool isLegalICmpImmediate(int64_t Imm) const override;
938 /// Return true if the specified immediate is legal
939 /// add immediate, that is the target has add instructions which can
940 /// add a register and the immediate without having to materialize
941 /// the immediate into a register.
942 bool isLegalAddImmediate(int64_t Imm) const override;
944 /// Return the cost of the scaling factor used in the addressing
945 /// mode represented by AM for this target, for a load/store
946 /// of the specified type.
947 /// If the AM is supported, the return value must be >= 0.
948 /// If the AM is not supported, it returns a negative value.
949 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
950 unsigned AS) const override;
952 bool isVectorShiftByScalarCheap(Type *Ty) const override;
954 /// Return true if it's free to truncate a value of
955 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
956 /// register EAX to i16 by referencing its sub-register AX.
957 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
958 bool isTruncateFree(EVT VT1, EVT VT2) const override;
960 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
962 /// Return true if any actual instruction that defines a
963 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
964 /// register. This does not necessarily include registers defined in
965 /// unknown ways, such as incoming arguments, or copies from unknown
966 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
967 /// does not necessarily apply to truncate instructions. e.g. on x86-64,
968 /// all instructions that define 32-bit values implicit zero-extend the
969 /// result out to 64 bits.
970 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
971 bool isZExtFree(EVT VT1, EVT VT2) const override;
972 bool isZExtFree(SDValue Val, EVT VT2) const override;
974 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
975 /// extend node) is profitable.
976 bool isVectorLoadExtDesirable(SDValue) const override;
978 /// Return true if an FMA operation is faster than a pair of fmul and fadd
979 /// instructions. fmuladd intrinsics will be expanded to FMAs when this
980 /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
981 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
983 /// Return true if it's profitable to narrow
984 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
985 /// from i32 to i8 but not from i32 to i16.
986 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
988 /// Given an intrinsic, checks if on the target the intrinsic will need to map
989 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
990 /// true and stores the intrinsic information into the IntrinsicInfo that was
991 /// passed to the function.
992 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
993 MachineFunction &MF,
994 unsigned Intrinsic) const override;
996 /// Returns true if the target can instruction select the
997 /// specified FP immediate natively. If false, the legalizer will
998 /// materialize the FP immediate as a load from a constant pool.
999 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
1001 /// Targets can use this to indicate that they only support *some*
1002 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1003 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1004 /// be legal.
1005 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1007 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1008 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1009 /// constant pool entry.
1010 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1012 /// Returns true if lowering to a jump table is allowed.
1013 bool areJTsAllowed(const Function *Fn) const override;
1015 /// If true, then instruction selection should
1016 /// seek to shrink the FP constant of the specified type to a smaller type
1017 /// in order to save space and / or reduce runtime.
1018 bool ShouldShrinkFPConstant(EVT VT) const override {
1019 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1020 // expensive than a straight movsd. On the other hand, it's important to
1021 // shrink long double fp constant since fldt is very slow.
1022 return !X86ScalarSSEf64 || VT == MVT::f80;
1025 /// Return true if we believe it is correct and profitable to reduce the
1026 /// load node to a smaller type.
1027 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1028 EVT NewVT) const override;
1030 /// Return true if the specified scalar FP type is computed in an SSE
1031 /// register, not on the X87 floating point stack.
1032 bool isScalarFPTypeInSSEReg(EVT VT) const {
1033 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1034 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
1037 /// Returns true if it is beneficial to convert a load of a constant
1038 /// to just the constant itself.
1039 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1040 Type *Ty) const override;
1042 bool convertSelectOfConstantsToMath(EVT VT) const override;
1044 bool decomposeMulByConstant(EVT VT, SDValue C) const override;
1046 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1047 /// with this index.
1048 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1049 unsigned Index) const override;
1051 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1052 unsigned AddrSpace) const override {
1053 // If we can replace more than 2 scalar stores, there will be a reduction
1054 // in instructions even after we add a vector constant load.
1055 return NumElem > 2;
1058 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1060 /// Intel processors have a unified instruction and data cache
1061 const char * getClearCacheBuiltinName() const override {
1062 return nullptr; // nothing to do, move along.
1065 unsigned getRegisterByName(const char* RegName, EVT VT,
1066 SelectionDAG &DAG) const override;
1068 /// If a physical register, this returns the register that receives the
1069 /// exception address on entry to an EH pad.
1070 unsigned
1071 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1073 /// If a physical register, this returns the register that receives the
1074 /// exception typeid on entry to a landing pad.
1075 unsigned
1076 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1078 virtual bool needsFixedCatchObjects() const override;
1080 /// This method returns a target specific FastISel object,
1081 /// or null if the target does not support "fast" ISel.
1082 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1083 const TargetLibraryInfo *libInfo) const override;
1085 /// If the target has a standard location for the stack protector cookie,
1086 /// returns the address of that location. Otherwise, returns nullptr.
1087 Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1089 bool useLoadStackGuardNode() const override;
1090 bool useStackGuardXorFP() const override;
1091 void insertSSPDeclarations(Module &M) const override;
1092 Value *getSDagStackGuard(const Module &M) const override;
1093 Value *getSSPStackGuardCheck(const Module &M) const override;
1094 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1095 const SDLoc &DL) const override;
1098 /// Return true if the target stores SafeStack pointer at a fixed offset in
1099 /// some non-standard address space, and populates the address space and
1100 /// offset as appropriate.
1101 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1103 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1104 SelectionDAG &DAG) const;
1106 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1108 /// Customize the preferred legalization strategy for certain types.
1109 LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
1111 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1112 EVT VT) const override;
1114 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1115 CallingConv::ID CC,
1116 EVT VT) const override;
1118 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1120 bool supportSwiftError() const override;
1122 StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1124 bool hasVectorBlend() const override { return true; }
1126 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1128 /// Lower interleaved load(s) into target specific
1129 /// instructions/intrinsics.
1130 bool lowerInterleavedLoad(LoadInst *LI,
1131 ArrayRef<ShuffleVectorInst *> Shuffles,
1132 ArrayRef<unsigned> Indices,
1133 unsigned Factor) const override;
1135 /// Lower interleaved store(s) into target specific
1136 /// instructions/intrinsics.
1137 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1138 unsigned Factor) const override;
1140 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1141 SDValue Addr, SelectionDAG &DAG)
1142 const override;
1144 protected:
1145 std::pair<const TargetRegisterClass *, uint8_t>
1146 findRepresentativeClass(const TargetRegisterInfo *TRI,
1147 MVT VT) const override;
1149 private:
1150 /// Keep a reference to the X86Subtarget around so that we can
1151 /// make the right decision when generating code for different targets.
1152 const X86Subtarget &Subtarget;
1154 /// Select between SSE or x87 floating point ops.
1155 /// When SSE is available, use it for f32 operations.
1156 /// When SSE2 is available, use it for f64 operations.
1157 bool X86ScalarSSEf32;
1158 bool X86ScalarSSEf64;
1160 /// A list of legal FP immediates.
1161 std::vector<APFloat> LegalFPImmediates;
1163 /// Indicate that this x86 target can instruction
1164 /// select the specified FP immediate natively.
1165 void addLegalFPImmediate(const APFloat& Imm) {
1166 LegalFPImmediates.push_back(Imm);
1169 SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1170 CallingConv::ID CallConv, bool isVarArg,
1171 const SmallVectorImpl<ISD::InputArg> &Ins,
1172 const SDLoc &dl, SelectionDAG &DAG,
1173 SmallVectorImpl<SDValue> &InVals,
1174 uint32_t *RegMask) const;
1175 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1176 const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1177 const SDLoc &dl, SelectionDAG &DAG,
1178 const CCValAssign &VA, MachineFrameInfo &MFI,
1179 unsigned i) const;
1180 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1181 const SDLoc &dl, SelectionDAG &DAG,
1182 const CCValAssign &VA,
1183 ISD::ArgFlagsTy Flags) const;
1185 // Call lowering helpers.
1187 /// Check whether the call is eligible for tail call optimization. Targets
1188 /// that want to do tail call optimization should implement this function.
1189 bool IsEligibleForTailCallOptimization(SDValue Callee,
1190 CallingConv::ID CalleeCC,
1191 bool isVarArg,
1192 bool isCalleeStructRet,
1193 bool isCallerStructRet,
1194 Type *RetTy,
1195 const SmallVectorImpl<ISD::OutputArg> &Outs,
1196 const SmallVectorImpl<SDValue> &OutVals,
1197 const SmallVectorImpl<ISD::InputArg> &Ins,
1198 SelectionDAG& DAG) const;
1199 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1200 SDValue Chain, bool IsTailCall,
1201 bool Is64Bit, int FPDiff,
1202 const SDLoc &dl) const;
1204 unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1205 SelectionDAG &DAG) const;
1207 unsigned getAddressSpace(void) const;
1209 std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
1210 bool isSigned,
1211 bool isReplace) const;
1213 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1214 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1215 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1216 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1218 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1219 const unsigned char OpFlags = 0) const;
1220 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1221 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1222 SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1223 int64_t Offset, SelectionDAG &DAG) const;
1224 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1225 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1226 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1228 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1229 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1230 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1231 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1232 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1233 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1234 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1235 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1236 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1237 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1238 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1239 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1240 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1241 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1242 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1243 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1244 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1245 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1246 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1247 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1248 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1249 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1250 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1251 SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1252 SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1253 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1255 SDValue
1256 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1257 const SmallVectorImpl<ISD::InputArg> &Ins,
1258 const SDLoc &dl, SelectionDAG &DAG,
1259 SmallVectorImpl<SDValue> &InVals) const override;
1260 SDValue LowerCall(CallLoweringInfo &CLI,
1261 SmallVectorImpl<SDValue> &InVals) const override;
1263 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1264 const SmallVectorImpl<ISD::OutputArg> &Outs,
1265 const SmallVectorImpl<SDValue> &OutVals,
1266 const SDLoc &dl, SelectionDAG &DAG) const override;
1268 bool supportSplitCSR(MachineFunction *MF) const override {
1269 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1270 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1272 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1273 void insertCopiesSplitCSR(
1274 MachineBasicBlock *Entry,
1275 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1277 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1279 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1281 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1282 ISD::NodeType ExtendKind) const override;
1284 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1285 bool isVarArg,
1286 const SmallVectorImpl<ISD::OutputArg> &Outs,
1287 LLVMContext &Context) const override;
1289 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1291 TargetLoweringBase::AtomicExpansionKind
1292 shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1293 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1294 TargetLoweringBase::AtomicExpansionKind
1295 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1297 LoadInst *
1298 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1300 bool needsCmpXchgNb(Type *MemType) const;
1302 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1303 MachineBasicBlock *DispatchBB, int FI) const;
1305 // Utility function to emit the low-level va_arg code for X86-64.
1306 MachineBasicBlock *
1307 EmitVAARG64WithCustomInserter(MachineInstr &MI,
1308 MachineBasicBlock *MBB) const;
1310 /// Utility function to emit the xmm reg save portion of va_start.
1311 MachineBasicBlock *
1312 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1313 MachineBasicBlock *BB) const;
1315 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1316 MachineInstr &MI2,
1317 MachineBasicBlock *BB) const;
1319 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1320 MachineBasicBlock *BB) const;
1322 MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1323 MachineBasicBlock *BB) const;
1325 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1326 MachineBasicBlock *BB) const;
1328 MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1329 MachineBasicBlock *BB) const;
1331 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1332 MachineBasicBlock *BB) const;
1334 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1335 MachineBasicBlock *BB) const;
1337 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1338 MachineBasicBlock *BB) const;
1340 MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1341 MachineBasicBlock *BB) const;
1343 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1344 MachineBasicBlock *MBB) const;
1346 void emitSetJmpShadowStackFix(MachineInstr &MI,
1347 MachineBasicBlock *MBB) const;
1349 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1350 MachineBasicBlock *MBB) const;
1352 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1353 MachineBasicBlock *MBB) const;
1355 MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1356 MachineBasicBlock *MBB) const;
1358 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1359 MachineBasicBlock *MBB) const;
1361 /// Emit nodes that will be selected as "test Op0,Op0", or something
1362 /// equivalent, for use with the given x86 condition code.
1363 SDValue EmitTest(SDValue Op0, unsigned X86CC, const SDLoc &dl,
1364 SelectionDAG &DAG) const;
1366 /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1367 /// equivalent, for use with the given x86 condition code.
1368 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1369 SelectionDAG &DAG) const;
1371 /// Convert a comparison if required by the subtarget.
1372 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1374 /// Check if replacement of SQRT with RSQRT should be disabled.
1375 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1377 /// Use rsqrt* to speed up sqrt calculations.
1378 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1379 int &RefinementSteps, bool &UseOneConstNR,
1380 bool Reciprocal) const override;
1382 /// Use rcp* to speed up fdiv calculations.
1383 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1384 int &RefinementSteps) const override;
1386 /// Reassociate floating point divisions into multiply by reciprocal.
1387 unsigned combineRepeatedFPDivisors() const override;
1390 namespace X86 {
1391 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1392 const TargetLibraryInfo *libInfo);
1393 } // end namespace X86
1395 // Base class for all X86 non-masked store operations.
1396 class X86StoreSDNode : public MemSDNode {
1397 public:
1398 X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1399 SDVTList VTs, EVT MemVT,
1400 MachineMemOperand *MMO)
1401 :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1402 const SDValue &getValue() const { return getOperand(1); }
1403 const SDValue &getBasePtr() const { return getOperand(2); }
1405 static bool classof(const SDNode *N) {
1406 return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1407 N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1411 // Base class for all X86 masked store operations.
1412 // The class has the same order of operands as MaskedStoreSDNode for
1413 // convenience.
1414 class X86MaskedStoreSDNode : public MemSDNode {
1415 public:
1416 X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1417 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1418 MachineMemOperand *MMO)
1419 : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1421 const SDValue &getValue() const { return getOperand(1); }
1422 const SDValue &getBasePtr() const { return getOperand(2); }
1423 const SDValue &getMask() const { return getOperand(3); }
1425 static bool classof(const SDNode *N) {
1426 return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1427 N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1431 // X86 Truncating Store with Signed saturation.
1432 class TruncSStoreSDNode : public X86StoreSDNode {
1433 public:
1434 TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1435 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1436 : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1438 static bool classof(const SDNode *N) {
1439 return N->getOpcode() == X86ISD::VTRUNCSTORES;
1443 // X86 Truncating Store with Unsigned saturation.
1444 class TruncUSStoreSDNode : public X86StoreSDNode {
1445 public:
1446 TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1447 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1448 : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1450 static bool classof(const SDNode *N) {
1451 return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1455 // X86 Truncating Masked Store with Signed saturation.
1456 class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1457 public:
1458 MaskedTruncSStoreSDNode(unsigned Order,
1459 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1460 MachineMemOperand *MMO)
1461 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1463 static bool classof(const SDNode *N) {
1464 return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1468 // X86 Truncating Masked Store with Unsigned saturation.
1469 class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1470 public:
1471 MaskedTruncUSStoreSDNode(unsigned Order,
1472 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1473 MachineMemOperand *MMO)
1474 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1476 static bool classof(const SDNode *N) {
1477 return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1481 // X86 specific Gather/Scatter nodes.
1482 // The class has the same order of operands as MaskedGatherScatterSDNode for
1483 // convenience.
1484 class X86MaskedGatherScatterSDNode : public MemSDNode {
1485 public:
1486 X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1487 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1488 MachineMemOperand *MMO)
1489 : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1491 const SDValue &getBasePtr() const { return getOperand(3); }
1492 const SDValue &getIndex() const { return getOperand(4); }
1493 const SDValue &getMask() const { return getOperand(2); }
1494 const SDValue &getScale() const { return getOperand(5); }
1496 static bool classof(const SDNode *N) {
1497 return N->getOpcode() == X86ISD::MGATHER ||
1498 N->getOpcode() == X86ISD::MSCATTER;
1502 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1503 public:
1504 X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1505 EVT MemVT, MachineMemOperand *MMO)
1506 : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1507 MMO) {}
1509 const SDValue &getPassThru() const { return getOperand(1); }
1511 static bool classof(const SDNode *N) {
1512 return N->getOpcode() == X86ISD::MGATHER;
1516 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1517 public:
1518 X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1519 EVT MemVT, MachineMemOperand *MMO)
1520 : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1521 MMO) {}
1523 const SDValue &getValue() const { return getOperand(1); }
1525 static bool classof(const SDNode *N) {
1526 return N->getOpcode() == X86ISD::MSCATTER;
1530 /// Generate unpacklo/unpackhi shuffle mask.
1531 template <typename T = int>
1532 void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1533 bool Unary) {
1534 assert(Mask.empty() && "Expected an empty shuffle mask vector");
1535 int NumElts = VT.getVectorNumElements();
1536 int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1537 for (int i = 0; i < NumElts; ++i) {
1538 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1539 int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1540 Pos += (Unary ? 0 : NumElts * (i % 2));
1541 Pos += (Lo ? 0 : NumEltsInLane / 2);
1542 Mask.push_back(Pos);
1546 /// Helper function to scale a shuffle or target shuffle mask, replacing each
1547 /// mask index with the scaled sequential indices for an equivalent narrowed
1548 /// mask. This is the reverse process to canWidenShuffleElements, but can
1549 /// always succeed.
1550 template <typename T>
1551 void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1552 SmallVectorImpl<T> &ScaledMask) {
1553 assert(0 < Scale && "Unexpected scaling factor");
1554 int NumElts = Mask.size();
1555 ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1557 for (int i = 0; i != NumElts; ++i) {
1558 int M = Mask[i];
1560 // Repeat sentinel values in every mask element.
1561 if (M < 0) {
1562 for (int s = 0; s != Scale; ++s)
1563 ScaledMask[(Scale * i) + s] = M;
1564 continue;
1567 // Scale mask element and increment across each mask element.
1568 for (int s = 0; s != Scale; ++s)
1569 ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1572 } // end namespace llvm
1574 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H