Revert r354244 "[DAGCombiner] Eliminate dead stores to stack."
[llvm-complete.git] / lib / Target / X86 / X86ISelLowering.h
bloba328b954c238b3e5528858ce1e5b1ff061cd31cb
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17 #include "llvm/CodeGen/CallingConvLower.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
20 #include "llvm/Target/TargetOptions.h"
22 namespace llvm {
23 class X86Subtarget;
24 class X86TargetMachine;
26 namespace X86ISD {
27 // X86 Specific DAG Nodes
28 enum NodeType : unsigned {
29 // Start the numbering where the builtin ops leave off.
30 FIRST_NUMBER = ISD::BUILTIN_OP_END,
32 /// Bit scan forward.
33 BSF,
34 /// Bit scan reverse.
35 BSR,
37 /// Double shift instructions. These correspond to
38 /// X86::SHLDxx and X86::SHRDxx instructions.
39 SHLD,
40 SHRD,
42 /// Bitwise logical AND of floating point values. This corresponds
43 /// to X86::ANDPS or X86::ANDPD.
44 FAND,
46 /// Bitwise logical OR of floating point values. This corresponds
47 /// to X86::ORPS or X86::ORPD.
48 FOR,
50 /// Bitwise logical XOR of floating point values. This corresponds
51 /// to X86::XORPS or X86::XORPD.
52 FXOR,
54 /// Bitwise logical ANDNOT of floating point values. This
55 /// corresponds to X86::ANDNPS or X86::ANDNPD.
56 FANDN,
58 /// These operations represent an abstract X86 call
59 /// instruction, which includes a bunch of information. In particular the
60 /// operands of these node are:
61 ///
62 /// #0 - The incoming token chain
63 /// #1 - The callee
64 /// #2 - The number of arg bytes the caller pushes on the stack.
65 /// #3 - The number of arg bytes the callee pops off the stack.
66 /// #4 - The value to pass in AL/AX/EAX (optional)
67 /// #5 - The value to pass in DL/DX/EDX (optional)
68 ///
69 /// The result values of these nodes are:
70 ///
71 /// #0 - The outgoing token chain
72 /// #1 - The first register result value (optional)
73 /// #2 - The second register result value (optional)
74 ///
75 CALL,
77 /// Same as call except it adds the NoTrack prefix.
78 NT_CALL,
80 /// This operation implements the lowering for readcyclecounter.
81 RDTSC_DAG,
83 /// X86 Read Time-Stamp Counter and Processor ID.
84 RDTSCP_DAG,
86 /// X86 Read Performance Monitoring Counters.
87 RDPMC_DAG,
89 /// X86 compare and logical compare instructions.
90 CMP, COMI, UCOMI,
92 /// X86 bit-test instructions.
93 BT,
95 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
96 /// operand, usually produced by a CMP instruction.
97 SETCC,
99 /// X86 Select
100 SELECTS,
102 // Same as SETCC except it's materialized with a sbb and the value is all
103 // one's or all zero's.
104 SETCC_CARRY, // R = carry_bit ? ~0 : 0
106 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
107 /// Operands are two FP values to compare; result is a mask of
108 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
109 FSETCC,
111 /// X86 FP SETCC, similar to above, but with output as an i1 mask and
112 /// with optional rounding mode.
113 FSETCCM, FSETCCM_RND,
115 /// X86 conditional moves. Operand 0 and operand 1 are the two values
116 /// to select from. Operand 2 is the condition code, and operand 3 is the
117 /// flag operand produced by a CMP or TEST instruction. It also writes a
118 /// flag result.
119 CMOV,
121 /// X86 conditional branches. Operand 0 is the chain operand, operand 1
122 /// is the block to branch if condition is true, operand 2 is the
123 /// condition code, and operand 3 is the flag operand produced by a CMP
124 /// or TEST instruction.
125 BRCOND,
127 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
128 /// operand 1 is the target address.
129 NT_BRIND,
131 /// Return with a flag operand. Operand 0 is the chain operand, operand
132 /// 1 is the number of bytes of stack to pop.
133 RET_FLAG,
135 /// Return from interrupt. Operand 0 is the number of bytes to pop.
136 IRET,
138 /// Repeat fill, corresponds to X86::REP_STOSx.
139 REP_STOS,
141 /// Repeat move, corresponds to X86::REP_MOVSx.
142 REP_MOVS,
144 /// On Darwin, this node represents the result of the popl
145 /// at function entry, used for PIC code.
146 GlobalBaseReg,
148 /// A wrapper node for TargetConstantPool, TargetJumpTable,
149 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
150 /// MCSymbol and TargetBlockAddress.
151 Wrapper,
153 /// Special wrapper used under X86-64 PIC mode for RIP
154 /// relative displacements.
155 WrapperRIP,
157 /// Copies a 64-bit value from the low word of an XMM vector
158 /// to an MMX vector.
159 MOVDQ2Q,
161 /// Copies a 32-bit value from the low word of a MMX
162 /// vector to a GPR.
163 MMX_MOVD2W,
165 /// Copies a GPR into the low 32-bit word of a MMX vector
166 /// and zero out the high word.
167 MMX_MOVW2D,
169 /// Extract an 8-bit value from a vector and zero extend it to
170 /// i32, corresponds to X86::PEXTRB.
171 PEXTRB,
173 /// Extract a 16-bit value from a vector and zero extend it to
174 /// i32, corresponds to X86::PEXTRW.
175 PEXTRW,
177 /// Insert any element of a 4 x float vector into any element
178 /// of a destination 4 x floatvector.
179 INSERTPS,
181 /// Insert the lower 8-bits of a 32-bit value to a vector,
182 /// corresponds to X86::PINSRB.
183 PINSRB,
185 /// Insert the lower 16-bits of a 32-bit value to a vector,
186 /// corresponds to X86::PINSRW.
187 PINSRW,
189 /// Shuffle 16 8-bit values within a vector.
190 PSHUFB,
192 /// Compute Sum of Absolute Differences.
193 PSADBW,
194 /// Compute Double Block Packed Sum-Absolute-Differences
195 DBPSADBW,
197 /// Bitwise Logical AND NOT of Packed FP values.
198 ANDNP,
200 /// Blend where the selector is an immediate.
201 BLENDI,
203 /// Dynamic (non-constant condition) vector blend where only the sign bits
204 /// of the condition elements are used. This is used to enforce that the
205 /// condition mask is not valid for generic VSELECT optimizations. This
206 /// is also used to implement the intrinsics.
207 /// Operands are in VSELECT order: MASK, TRUE, FALSE
208 BLENDV,
210 /// Combined add and sub on an FP vector.
211 ADDSUB,
213 // FP vector ops with rounding mode.
214 FADD_RND, FADDS_RND,
215 FSUB_RND, FSUBS_RND,
216 FMUL_RND, FMULS_RND,
217 FDIV_RND, FDIVS_RND,
218 FMAX_RND, FMAXS_RND,
219 FMIN_RND, FMINS_RND,
220 FSQRT_RND, FSQRTS_RND,
222 // FP vector get exponent.
223 FGETEXP_RND, FGETEXPS_RND,
224 // Extract Normalized Mantissas.
225 VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
226 // FP Scale.
227 SCALEF,
228 SCALEFS,
230 // Unsigned Integer average.
231 AVG,
233 /// Integer horizontal add/sub.
234 HADD,
235 HSUB,
237 /// Floating point horizontal add/sub.
238 FHADD,
239 FHSUB,
241 // Detect Conflicts Within a Vector
242 CONFLICT,
244 /// Floating point max and min.
245 FMAX, FMIN,
247 /// Commutative FMIN and FMAX.
248 FMAXC, FMINC,
250 /// Scalar intrinsic floating point max and min.
251 FMAXS, FMINS,
253 /// Floating point reciprocal-sqrt and reciprocal approximation.
254 /// Note that these typically require refinement
255 /// in order to obtain suitable precision.
256 FRSQRT, FRCP,
258 // AVX-512 reciprocal approximations with a little more precision.
259 RSQRT14, RSQRT14S, RCP14, RCP14S,
261 // Thread Local Storage.
262 TLSADDR,
264 // Thread Local Storage. A call to get the start address
265 // of the TLS block for the current module.
266 TLSBASEADDR,
268 // Thread Local Storage. When calling to an OS provided
269 // thunk at the address from an earlier relocation.
270 TLSCALL,
272 // Exception Handling helpers.
273 EH_RETURN,
275 // SjLj exception handling setjmp.
276 EH_SJLJ_SETJMP,
278 // SjLj exception handling longjmp.
279 EH_SJLJ_LONGJMP,
281 // SjLj exception handling dispatch.
282 EH_SJLJ_SETUP_DISPATCH,
284 /// Tail call return. See X86TargetLowering::LowerCall for
285 /// the list of operands.
286 TC_RETURN,
288 // Vector move to low scalar and zero higher vector elements.
289 VZEXT_MOVL,
291 // Vector integer truncate.
292 VTRUNC,
293 // Vector integer truncate with unsigned/signed saturation.
294 VTRUNCUS, VTRUNCS,
296 // Masked version of the above. Used when less than a 128-bit result is
297 // produced since the mask only applies to the lower elements and can't
298 // be represented by a select.
299 // SRC, PASSTHRU, MASK
300 VMTRUNC, VMTRUNCUS, VMTRUNCS,
302 // Vector FP extend.
303 VFPEXT, VFPEXT_RND, VFPEXTS_RND,
305 // Vector FP round.
306 VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
308 // Masked version of above. Used for v2f64->v4f32.
309 // SRC, PASSTHRU, MASK
310 VMFPROUND,
312 // 128-bit vector logical left / right shift
313 VSHLDQ, VSRLDQ,
315 // Vector shift elements
316 VSHL, VSRL, VSRA,
318 // Vector variable shift
319 VSHLV, VSRLV, VSRAV,
321 // Vector shift elements by immediate
322 VSHLI, VSRLI, VSRAI,
324 // Shifts of mask registers.
325 KSHIFTL, KSHIFTR,
327 // Bit rotate by immediate
328 VROTLI, VROTRI,
330 // Vector packed double/float comparison.
331 CMPP,
333 // Vector integer comparisons.
334 PCMPEQ, PCMPGT,
336 // v8i16 Horizontal minimum and position.
337 PHMINPOS,
339 MULTISHIFT,
341 /// Vector comparison generating mask bits for fp and
342 /// integer signed and unsigned data types.
343 CMPM,
344 // Vector comparison with rounding mode for FP values
345 CMPM_RND,
347 // Arithmetic operations with FLAGS results.
348 ADD, SUB, ADC, SBB, SMUL, UMUL,
349 OR, XOR, AND,
351 // Bit field extract.
352 BEXTR,
354 // Zero High Bits Starting with Specified Bit Position.
355 BZHI,
357 // X86-specific multiply by immediate.
358 MUL_IMM,
360 // Vector sign bit extraction.
361 MOVMSK,
363 // Vector bitwise comparisons.
364 PTEST,
366 // Vector packed fp sign bitwise comparisons.
367 TESTP,
369 // OR/AND test for masks.
370 KORTEST,
371 KTEST,
373 // ADD for masks.
374 KADD,
376 // Several flavors of instructions with vector shuffle behaviors.
377 // Saturated signed/unnsigned packing.
378 PACKSS,
379 PACKUS,
380 // Intra-lane alignr.
381 PALIGNR,
382 // AVX512 inter-lane alignr.
383 VALIGN,
384 PSHUFD,
385 PSHUFHW,
386 PSHUFLW,
387 SHUFP,
388 // VBMI2 Concat & Shift.
389 VSHLD,
390 VSHRD,
391 VSHLDV,
392 VSHRDV,
393 //Shuffle Packed Values at 128-bit granularity.
394 SHUF128,
395 MOVDDUP,
396 MOVSHDUP,
397 MOVSLDUP,
398 MOVLHPS,
399 MOVHLPS,
400 MOVSD,
401 MOVSS,
402 UNPCKL,
403 UNPCKH,
404 VPERMILPV,
405 VPERMILPI,
406 VPERMI,
407 VPERM2X128,
409 // Variable Permute (VPERM).
410 // Res = VPERMV MaskV, V0
411 VPERMV,
413 // 3-op Variable Permute (VPERMT2).
414 // Res = VPERMV3 V0, MaskV, V1
415 VPERMV3,
417 // Bitwise ternary logic.
418 VPTERNLOG,
419 // Fix Up Special Packed Float32/64 values.
420 VFIXUPIMM,
421 VFIXUPIMMS,
422 // Range Restriction Calculation For Packed Pairs of Float32/64 values.
423 VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
424 // Reduce - Perform Reduction Transformation on scalar\packed FP.
425 VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
426 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
427 // Also used by the legacy (V)ROUND intrinsics where we mask out the
428 // scaling part of the immediate.
429 VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
430 // Tests Types Of a FP Values for packed types.
431 VFPCLASS,
432 // Tests Types Of a FP Values for scalar types.
433 VFPCLASSS,
435 // Broadcast scalar to vector.
436 VBROADCAST,
437 // Broadcast mask to vector.
438 VBROADCASTM,
439 // Broadcast subvector to vector.
440 SUBV_BROADCAST,
442 /// SSE4A Extraction and Insertion.
443 EXTRQI, INSERTQI,
445 // XOP arithmetic/logical shifts.
446 VPSHA, VPSHL,
447 // XOP signed/unsigned integer comparisons.
448 VPCOM, VPCOMU,
449 // XOP packed permute bytes.
450 VPPERM,
451 // XOP two source permutation.
452 VPERMIL2,
454 // Vector multiply packed unsigned doubleword integers.
455 PMULUDQ,
456 // Vector multiply packed signed doubleword integers.
457 PMULDQ,
458 // Vector Multiply Packed UnsignedIntegers with Round and Scale.
459 MULHRS,
461 // Multiply and Add Packed Integers.
462 VPMADDUBSW, VPMADDWD,
464 // AVX512IFMA multiply and add.
465 // NOTE: These are different than the instruction and perform
466 // op0 x op1 + op2.
467 VPMADD52L, VPMADD52H,
469 // VNNI
470 VPDPBUSD,
471 VPDPBUSDS,
472 VPDPWSSD,
473 VPDPWSSDS,
475 // FMA nodes.
476 // We use the target independent ISD::FMA for the non-inverted case.
477 FNMADD,
478 FMSUB,
479 FNMSUB,
480 FMADDSUB,
481 FMSUBADD,
483 // FMA with rounding mode.
484 FMADD_RND,
485 FNMADD_RND,
486 FMSUB_RND,
487 FNMSUB_RND,
488 FMADDSUB_RND,
489 FMSUBADD_RND,
491 // Compress and expand.
492 COMPRESS,
493 EXPAND,
495 // Bits shuffle
496 VPSHUFBITQMB,
498 // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
499 SINT_TO_FP_RND, UINT_TO_FP_RND,
500 SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
502 // Vector float/double to signed/unsigned integer.
503 CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
504 // Scalar float/double to signed/unsigned integer.
505 CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
507 // Vector float/double to signed/unsigned integer with truncation.
508 CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
509 // Scalar float/double to signed/unsigned integer with truncation.
510 CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
512 // Vector signed/unsigned integer to float/double.
513 CVTSI2P, CVTUI2P,
515 // Masked versions of above. Used for v2f64->v4f32.
516 // SRC, PASSTHRU, MASK
517 MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
518 MCVTSI2P, MCVTUI2P,
520 // Save xmm argument registers to the stack, according to %al. An operator
521 // is needed so that this can be expanded with control flow.
522 VASTART_SAVE_XMM_REGS,
524 // Windows's _chkstk call to do stack probing.
525 WIN_ALLOCA,
527 // For allocating variable amounts of stack space when using
528 // segmented stacks. Check if the current stacklet has enough space, and
529 // falls back to heap allocation if not.
530 SEG_ALLOCA,
532 // Memory barriers.
533 MEMBARRIER,
534 MFENCE,
536 // Store FP status word into i16 register.
537 FNSTSW16r,
539 // Store contents of %ah into %eflags.
540 SAHF,
542 // Get a random integer and indicate whether it is valid in CF.
543 RDRAND,
545 // Get a NIST SP800-90B & C compliant random integer and
546 // indicate whether it is valid in CF.
547 RDSEED,
549 // SSE42 string comparisons.
550 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
551 // will emit one or two instructions based on which results are used. If
552 // flags and index/mask this allows us to use a single instruction since
553 // we won't have to pick and opcode for flags. Instead we can rely on the
554 // DAG to CSE everything and decide at isel.
555 PCMPISTR,
556 PCMPESTR,
558 // Test if in transactional execution.
559 XTEST,
561 // ERI instructions.
562 RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
564 // Conversions between float and half-float.
565 CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
567 // Masked version of above.
568 // SRC, RND, PASSTHRU, MASK
569 MCVTPS2PH,
571 // Galois Field Arithmetic Instructions
572 GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
574 // LWP insert record.
575 LWPINS,
577 // User level wait
578 UMWAIT, TPAUSE,
580 // Compare and swap.
581 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
582 LCMPXCHG8_DAG,
583 LCMPXCHG16_DAG,
584 LCMPXCHG8_SAVE_EBX_DAG,
585 LCMPXCHG16_SAVE_RBX_DAG,
587 /// LOCK-prefixed arithmetic read-modify-write instructions.
588 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
589 LADD, LSUB, LOR, LXOR, LAND,
591 // Load, scalar_to_vector, and zero extend.
592 VZEXT_LOAD,
594 // Store FP control world into i16 memory.
595 FNSTCW16m,
597 /// This instruction implements FP_TO_SINT with the
598 /// integer destination in memory and a FP reg source. This corresponds
599 /// to the X86::FIST*m instructions and the rounding mode change stuff. It
600 /// has two inputs (token chain and address) and two outputs (int value
601 /// and token chain). Memory VT specifies the type to store to.
602 FP_TO_INT_IN_MEM,
604 /// This instruction implements SINT_TO_FP with the
605 /// integer source in memory and FP reg result. This corresponds to the
606 /// X86::FILD*m instructions. It has two inputs (token chain and address)
607 /// and two outputs (FP value and token chain). FILD_FLAG also produces a
608 /// flag). The integer source type is specified by the memory VT.
609 FILD,
610 FILD_FLAG,
612 /// This instruction implements an extending load to FP stack slots.
613 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
614 /// operand, and ptr to load from. The memory VT specifies the type to
615 /// load from.
616 FLD,
618 /// This instruction implements a truncating store to FP stack
619 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
620 /// chain operand, value to store, and address. The memory VT specifies
621 /// the type to store as.
622 FST,
624 /// This instruction grabs the address of the next argument
625 /// from a va_list. (reads and modifies the va_list in memory)
626 VAARG_64,
628 // Vector truncating store with unsigned/signed saturation
629 VTRUNCSTOREUS, VTRUNCSTORES,
630 // Vector truncating masked store with unsigned/signed saturation
631 VMTRUNCSTOREUS, VMTRUNCSTORES,
633 // X86 specific gather and scatter
634 MGATHER, MSCATTER,
636 // WARNING: Do not add anything in the end unless you want the node to
637 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
638 // opcodes will be thought as target memory ops!
640 } // end namespace X86ISD
642 /// Define some predicates that are used for node matching.
643 namespace X86 {
644 /// Returns true if Elt is a constant zero or floating point constant +0.0.
645 bool isZeroNode(SDValue Elt);
647 /// Returns true of the given offset can be
648 /// fit into displacement field of the instruction.
649 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
650 bool hasSymbolicDisplacement = true);
652 /// Determines whether the callee is required to pop its
653 /// own arguments. Callee pop is necessary to support tail calls.
654 bool isCalleePop(CallingConv::ID CallingConv,
655 bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
657 } // end namespace X86
659 //===--------------------------------------------------------------------===//
660 // X86 Implementation of the TargetLowering interface
661 class X86TargetLowering final : public TargetLowering {
662 public:
663 explicit X86TargetLowering(const X86TargetMachine &TM,
664 const X86Subtarget &STI);
666 unsigned getJumpTableEncoding() const override;
667 bool useSoftFloat() const override;
669 void markLibCallAttributes(MachineFunction *MF, unsigned CC,
670 ArgListTy &Args) const override;
672 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
673 return MVT::i8;
676 const MCExpr *
677 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
678 const MachineBasicBlock *MBB, unsigned uid,
679 MCContext &Ctx) const override;
681 /// Returns relocation base for the given PIC jumptable.
682 SDValue getPICJumpTableRelocBase(SDValue Table,
683 SelectionDAG &DAG) const override;
684 const MCExpr *
685 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
686 unsigned JTI, MCContext &Ctx) const override;
688 /// Return the desired alignment for ByVal aggregate
689 /// function arguments in the caller parameter area. For X86, aggregates
690 /// that contains are placed at 16-byte boundaries while the rest are at
691 /// 4-byte boundaries.
692 unsigned getByValTypeAlignment(Type *Ty,
693 const DataLayout &DL) const override;
695 /// Returns the target specific optimal type for load
696 /// and store operations as a result of memset, memcpy, and memmove
697 /// lowering. If DstAlign is zero that means it's safe to destination
698 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
699 /// means there isn't a need to check it against alignment requirement,
700 /// probably because the source does not need to be loaded. If 'IsMemset' is
701 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
702 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
703 /// source is constant so it does not need to be loaded.
704 /// It returns EVT::Other if the type should be determined using generic
705 /// target-independent logic.
706 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
707 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
708 MachineFunction &MF) const override;
710 /// Returns true if it's safe to use load / store of the
711 /// specified type to expand memcpy / memset inline. This is mostly true
712 /// for all types except for some special cases. For example, on X86
713 /// targets without SSE2 f64 load / store are done with fldl / fstpl which
714 /// also does type conversion. Note the specified type doesn't have to be
715 /// legal as the hook is used before type legalization.
716 bool isSafeMemOpType(MVT VT) const override;
718 /// Returns true if the target allows unaligned memory accesses of the
719 /// specified type. Returns whether it is "fast" in the last argument.
720 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
721 bool *Fast) const override;
723 /// Provide custom lowering hooks for some operations.
725 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
727 /// Places new result values for the node in Results (their number
728 /// and types must exactly match those of the original return values of
729 /// the node), or leaves Results empty, which indicates that the node is not
730 /// to be custom lowered after all.
731 void LowerOperationWrapper(SDNode *N,
732 SmallVectorImpl<SDValue> &Results,
733 SelectionDAG &DAG) const override;
735 /// Replace the results of node with an illegal result
736 /// type with new values built out of custom code.
738 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
739 SelectionDAG &DAG) const override;
741 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
743 // Return true if it is profitable to combine a BUILD_VECTOR with a
744 // stride-pattern to a shuffle and a truncate.
745 // Example of such a combine:
746 // v4i32 build_vector((extract_elt V, 1),
747 // (extract_elt V, 3),
748 // (extract_elt V, 5),
749 // (extract_elt V, 7))
750 // -->
751 // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
752 // v4i64)
753 bool isDesirableToCombineBuildVectorToShuffleTruncate(
754 ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
756 /// Return true if the target has native support for
757 /// the specified value type and it is 'desirable' to use the type for the
758 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
759 /// instruction encodings are longer and some i16 instructions are slow.
760 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
762 /// Return true if the target has native support for the
763 /// specified value type and it is 'desirable' to use the type. e.g. On x86
764 /// i16 is legal, but undesirable since i16 instruction encodings are longer
765 /// and some i16 instructions are slow.
766 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
768 MachineBasicBlock *
769 EmitInstrWithCustomInserter(MachineInstr &MI,
770 MachineBasicBlock *MBB) const override;
772 /// This method returns the name of a target specific DAG node.
773 const char *getTargetNodeName(unsigned Opcode) const override;
775 bool mergeStoresAfterLegalization() const override { return true; }
777 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
778 const SelectionDAG &DAG) const override;
780 bool isCheapToSpeculateCttz() const override;
782 bool isCheapToSpeculateCtlz() const override;
784 bool isCtlzFast() const override;
786 bool hasBitPreservingFPLogic(EVT VT) const override {
787 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
790 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
791 // If the pair to store is a mixture of float and int values, we will
792 // save two bitwise instructions and one float-to-int instruction and
793 // increase one store instruction. There is potentially a more
794 // significant benefit because it avoids the float->int domain switch
795 // for input value. So It is more likely a win.
796 if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
797 (LTy.isInteger() && HTy.isFloatingPoint()))
798 return true;
799 // If the pair only contains int values, we will save two bitwise
800 // instructions and increase one store instruction (costing one more
801 // store buffer). Since the benefit is more blurred so we leave
802 // such pair out until we get testcase to prove it is a win.
803 return false;
806 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
808 bool hasAndNotCompare(SDValue Y) const override;
810 bool hasAndNot(SDValue Y) const override;
812 bool preferShiftsToClearExtremeBits(SDValue Y) const override;
814 bool
815 shouldTransformSignedTruncationCheck(EVT XVT,
816 unsigned KeptBits) const override {
817 // For vectors, we don't have a preference..
818 if (XVT.isVector())
819 return false;
821 auto VTIsOk = [](EVT VT) -> bool {
822 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
823 VT == MVT::i64;
826 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
827 // XVT will be larger than KeptBitsVT.
828 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
829 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
832 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
833 if (DAG.getMachineFunction().getFunction().optForMinSize())
834 return false;
835 return true;
838 bool shouldSplatInsEltVarIndex(EVT VT) const override;
840 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
841 return VT.isScalarInteger();
844 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
845 MVT hasFastEqualityCompare(unsigned NumBits) const override;
847 /// Allow multiple load pairs per block for smaller and faster code.
848 unsigned getMemcmpEqZeroLoadsPerBlock() const override {
849 return 2;
852 /// Return the value type to use for ISD::SETCC.
853 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
854 EVT VT) const override;
856 bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
857 TargetLoweringOpt &TLO) const override;
859 /// Determine which of the bits specified in Mask are known to be either
860 /// zero or one and return them in the KnownZero/KnownOne bitsets.
861 void computeKnownBitsForTargetNode(const SDValue Op,
862 KnownBits &Known,
863 const APInt &DemandedElts,
864 const SelectionDAG &DAG,
865 unsigned Depth = 0) const override;
867 /// Determine the number of bits in the operation that are sign bits.
868 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
869 const APInt &DemandedElts,
870 const SelectionDAG &DAG,
871 unsigned Depth) const override;
873 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
874 const APInt &DemandedElts,
875 APInt &KnownUndef,
876 APInt &KnownZero,
877 TargetLoweringOpt &TLO,
878 unsigned Depth) const override;
880 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
881 const APInt &DemandedBits,
882 const APInt &DemandedElts,
883 KnownBits &Known,
884 TargetLoweringOpt &TLO,
885 unsigned Depth) const override;
887 SDValue unwrapAddress(SDValue N) const override;
889 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
891 bool ExpandInlineAsm(CallInst *CI) const override;
893 ConstraintType getConstraintType(StringRef Constraint) const override;
895 /// Examine constraint string and operand type and determine a weight value.
896 /// The operand object must already have been set up with the operand type.
897 ConstraintWeight
898 getSingleConstraintMatchWeight(AsmOperandInfo &info,
899 const char *constraint) const override;
901 const char *LowerXConstraint(EVT ConstraintVT) const override;
903 /// Lower the specified operand into the Ops vector. If it is invalid, don't
904 /// add anything to Ops. If hasMemory is true it means one of the asm
905 /// constraint of the inline asm instruction being processed is 'm'.
906 void LowerAsmOperandForConstraint(SDValue Op,
907 std::string &Constraint,
908 std::vector<SDValue> &Ops,
909 SelectionDAG &DAG) const override;
911 unsigned
912 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
913 if (ConstraintCode == "i")
914 return InlineAsm::Constraint_i;
915 else if (ConstraintCode == "o")
916 return InlineAsm::Constraint_o;
917 else if (ConstraintCode == "v")
918 return InlineAsm::Constraint_v;
919 else if (ConstraintCode == "X")
920 return InlineAsm::Constraint_X;
921 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
924 /// Handle Lowering flag assembly outputs.
925 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
926 const AsmOperandInfo &Constraint,
927 SelectionDAG &DAG) const override;
929 /// Given a physical register constraint
930 /// (e.g. {edx}), return the register number and the register class for the
931 /// register. This should only be used for C_Register constraints. On
932 /// error, this returns a register number of 0.
933 std::pair<unsigned, const TargetRegisterClass *>
934 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
935 StringRef Constraint, MVT VT) const override;
937 /// Return true if the addressing mode represented
938 /// by AM is legal for this target, for a load/store of the specified type.
939 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
940 Type *Ty, unsigned AS,
941 Instruction *I = nullptr) const override;
943 /// Return true if the specified immediate is legal
944 /// icmp immediate, that is the target has icmp instructions which can
945 /// compare a register against the immediate without having to materialize
946 /// the immediate into a register.
947 bool isLegalICmpImmediate(int64_t Imm) const override;
949 /// Return true if the specified immediate is legal
950 /// add immediate, that is the target has add instructions which can
951 /// add a register and the immediate without having to materialize
952 /// the immediate into a register.
953 bool isLegalAddImmediate(int64_t Imm) const override;
955 bool isLegalStoreImmediate(int64_t Imm) const override;
957 /// Return the cost of the scaling factor used in the addressing
958 /// mode represented by AM for this target, for a load/store
959 /// of the specified type.
960 /// If the AM is supported, the return value must be >= 0.
961 /// If the AM is not supported, it returns a negative value.
962 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
963 unsigned AS) const override;
965 bool isVectorShiftByScalarCheap(Type *Ty) const override;
967 /// Return true if it's free to truncate a value of
968 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
969 /// register EAX to i16 by referencing its sub-register AX.
970 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
971 bool isTruncateFree(EVT VT1, EVT VT2) const override;
973 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
975 /// Return true if any actual instruction that defines a
976 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
977 /// register. This does not necessarily include registers defined in
978 /// unknown ways, such as incoming arguments, or copies from unknown
979 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
980 /// does not necessarily apply to truncate instructions. e.g. on x86-64,
981 /// all instructions that define 32-bit values implicit zero-extend the
982 /// result out to 64 bits.
983 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
984 bool isZExtFree(EVT VT1, EVT VT2) const override;
985 bool isZExtFree(SDValue Val, EVT VT2) const override;
987 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
988 /// extend node) is profitable.
989 bool isVectorLoadExtDesirable(SDValue) const override;
991 /// Return true if an FMA operation is faster than a pair of fmul and fadd
992 /// instructions. fmuladd intrinsics will be expanded to FMAs when this
993 /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
994 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
996 /// Return true if it's profitable to narrow
997 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
998 /// from i32 to i8 but not from i32 to i16.
999 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1001 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1002 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1003 /// true and stores the intrinsic information into the IntrinsicInfo that was
1004 /// passed to the function.
1005 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1006 MachineFunction &MF,
1007 unsigned Intrinsic) const override;
1009 /// Returns true if the target can instruction select the
1010 /// specified FP immediate natively. If false, the legalizer will
1011 /// materialize the FP immediate as a load from a constant pool.
1012 bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
1014 /// Targets can use this to indicate that they only support *some*
1015 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1016 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1017 /// be legal.
1018 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1020 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1021 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1022 /// constant pool entry.
1023 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1025 /// Returns true if lowering to a jump table is allowed.
1026 bool areJTsAllowed(const Function *Fn) const override;
1028 /// If true, then instruction selection should
1029 /// seek to shrink the FP constant of the specified type to a smaller type
1030 /// in order to save space and / or reduce runtime.
1031 bool ShouldShrinkFPConstant(EVT VT) const override {
1032 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1033 // expensive than a straight movsd. On the other hand, it's important to
1034 // shrink long double fp constant since fldt is very slow.
1035 return !X86ScalarSSEf64 || VT == MVT::f80;
1038 /// Return true if we believe it is correct and profitable to reduce the
1039 /// load node to a smaller type.
1040 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1041 EVT NewVT) const override;
1043 /// Return true if the specified scalar FP type is computed in an SSE
1044 /// register, not on the X87 floating point stack.
1045 bool isScalarFPTypeInSSEReg(EVT VT) const {
1046 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1047 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
1050 /// Returns true if it is beneficial to convert a load of a constant
1051 /// to just the constant itself.
1052 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1053 Type *Ty) const override;
1055 bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1057 bool convertSelectOfConstantsToMath(EVT VT) const override;
1059 bool decomposeMulByConstant(EVT VT, SDValue C) const override;
1061 bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1062 bool IsSigned) const override;
1064 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1065 /// with this index.
1066 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1067 unsigned Index) const override;
1069 /// Scalar ops always have equal or better analysis/performance/power than
1070 /// the vector equivalent, so this always makes sense if the scalar op is
1071 /// supported.
1072 bool shouldScalarizeBinop(SDValue) const override;
1074 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1075 unsigned AddrSpace) const override {
1076 // If we can replace more than 2 scalar stores, there will be a reduction
1077 // in instructions even after we add a vector constant load.
1078 return NumElem > 2;
1081 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
1083 /// Intel processors have a unified instruction and data cache
1084 const char * getClearCacheBuiltinName() const override {
1085 return nullptr; // nothing to do, move along.
1088 unsigned getRegisterByName(const char* RegName, EVT VT,
1089 SelectionDAG &DAG) const override;
1091 /// If a physical register, this returns the register that receives the
1092 /// exception address on entry to an EH pad.
1093 unsigned
1094 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1096 /// If a physical register, this returns the register that receives the
1097 /// exception typeid on entry to a landing pad.
1098 unsigned
1099 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1101 virtual bool needsFixedCatchObjects() const override;
1103 /// This method returns a target specific FastISel object,
1104 /// or null if the target does not support "fast" ISel.
1105 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1106 const TargetLibraryInfo *libInfo) const override;
1108 /// If the target has a standard location for the stack protector cookie,
1109 /// returns the address of that location. Otherwise, returns nullptr.
1110 Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1112 bool useLoadStackGuardNode() const override;
1113 bool useStackGuardXorFP() const override;
1114 void insertSSPDeclarations(Module &M) const override;
1115 Value *getSDagStackGuard(const Module &M) const override;
1116 Function *getSSPStackGuardCheck(const Module &M) const override;
1117 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1118 const SDLoc &DL) const override;
1121 /// Return true if the target stores SafeStack pointer at a fixed offset in
1122 /// some non-standard address space, and populates the address space and
1123 /// offset as appropriate.
1124 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1126 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1127 SelectionDAG &DAG) const;
1129 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1131 /// Customize the preferred legalization strategy for certain types.
1132 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1134 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1135 EVT VT) const override;
1137 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1138 CallingConv::ID CC,
1139 EVT VT) const override;
1141 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1143 bool supportSwiftError() const override;
1145 StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1147 bool hasVectorBlend() const override { return true; }
1149 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1151 /// Lower interleaved load(s) into target specific
1152 /// instructions/intrinsics.
1153 bool lowerInterleavedLoad(LoadInst *LI,
1154 ArrayRef<ShuffleVectorInst *> Shuffles,
1155 ArrayRef<unsigned> Indices,
1156 unsigned Factor) const override;
1158 /// Lower interleaved store(s) into target specific
1159 /// instructions/intrinsics.
1160 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1161 unsigned Factor) const override;
1163 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1164 SDValue Addr, SelectionDAG &DAG)
1165 const override;
1167 protected:
1168 std::pair<const TargetRegisterClass *, uint8_t>
1169 findRepresentativeClass(const TargetRegisterInfo *TRI,
1170 MVT VT) const override;
1172 private:
1173 /// Keep a reference to the X86Subtarget around so that we can
1174 /// make the right decision when generating code for different targets.
1175 const X86Subtarget &Subtarget;
1177 /// Select between SSE or x87 floating point ops.
1178 /// When SSE is available, use it for f32 operations.
1179 /// When SSE2 is available, use it for f64 operations.
1180 bool X86ScalarSSEf32;
1181 bool X86ScalarSSEf64;
1183 /// A list of legal FP immediates.
1184 std::vector<APFloat> LegalFPImmediates;
1186 /// Indicate that this x86 target can instruction
1187 /// select the specified FP immediate natively.
1188 void addLegalFPImmediate(const APFloat& Imm) {
1189 LegalFPImmediates.push_back(Imm);
1192 SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1193 CallingConv::ID CallConv, bool isVarArg,
1194 const SmallVectorImpl<ISD::InputArg> &Ins,
1195 const SDLoc &dl, SelectionDAG &DAG,
1196 SmallVectorImpl<SDValue> &InVals,
1197 uint32_t *RegMask) const;
1198 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1199 const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1200 const SDLoc &dl, SelectionDAG &DAG,
1201 const CCValAssign &VA, MachineFrameInfo &MFI,
1202 unsigned i) const;
1203 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1204 const SDLoc &dl, SelectionDAG &DAG,
1205 const CCValAssign &VA,
1206 ISD::ArgFlagsTy Flags) const;
1208 // Call lowering helpers.
1210 /// Check whether the call is eligible for tail call optimization. Targets
1211 /// that want to do tail call optimization should implement this function.
1212 bool IsEligibleForTailCallOptimization(SDValue Callee,
1213 CallingConv::ID CalleeCC,
1214 bool isVarArg,
1215 bool isCalleeStructRet,
1216 bool isCallerStructRet,
1217 Type *RetTy,
1218 const SmallVectorImpl<ISD::OutputArg> &Outs,
1219 const SmallVectorImpl<SDValue> &OutVals,
1220 const SmallVectorImpl<ISD::InputArg> &Ins,
1221 SelectionDAG& DAG) const;
1222 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1223 SDValue Chain, bool IsTailCall,
1224 bool Is64Bit, int FPDiff,
1225 const SDLoc &dl) const;
1227 unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1228 SelectionDAG &DAG) const;
1230 unsigned getAddressSpace(void) const;
1232 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1234 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1235 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1236 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1237 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1239 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1240 const unsigned char OpFlags = 0) const;
1241 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1242 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1243 SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
1244 int64_t Offset, SelectionDAG &DAG) const;
1245 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1246 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1247 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1249 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1250 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1251 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1252 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1253 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1254 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1255 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1256 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1257 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1258 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1259 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1260 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1261 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1262 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1263 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1264 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1265 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1266 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1267 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1268 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1269 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1270 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1271 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1272 SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1273 SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1274 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1276 SDValue
1277 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1278 const SmallVectorImpl<ISD::InputArg> &Ins,
1279 const SDLoc &dl, SelectionDAG &DAG,
1280 SmallVectorImpl<SDValue> &InVals) const override;
1281 SDValue LowerCall(CallLoweringInfo &CLI,
1282 SmallVectorImpl<SDValue> &InVals) const override;
1284 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1285 const SmallVectorImpl<ISD::OutputArg> &Outs,
1286 const SmallVectorImpl<SDValue> &OutVals,
1287 const SDLoc &dl, SelectionDAG &DAG) const override;
1289 bool supportSplitCSR(MachineFunction *MF) const override {
1290 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1291 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1293 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1294 void insertCopiesSplitCSR(
1295 MachineBasicBlock *Entry,
1296 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1298 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1300 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1302 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1303 ISD::NodeType ExtendKind) const override;
1305 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1306 bool isVarArg,
1307 const SmallVectorImpl<ISD::OutputArg> &Outs,
1308 LLVMContext &Context) const override;
1310 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1312 TargetLoweringBase::AtomicExpansionKind
1313 shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1314 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1315 TargetLoweringBase::AtomicExpansionKind
1316 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1318 LoadInst *
1319 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1321 bool needsCmpXchgNb(Type *MemType) const;
1323 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1324 MachineBasicBlock *DispatchBB, int FI) const;
1326 // Utility function to emit the low-level va_arg code for X86-64.
1327 MachineBasicBlock *
1328 EmitVAARG64WithCustomInserter(MachineInstr &MI,
1329 MachineBasicBlock *MBB) const;
1331 /// Utility function to emit the xmm reg save portion of va_start.
1332 MachineBasicBlock *
1333 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1334 MachineBasicBlock *BB) const;
1336 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1337 MachineInstr &MI2,
1338 MachineBasicBlock *BB) const;
1340 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1341 MachineBasicBlock *BB) const;
1343 MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1344 MachineBasicBlock *BB) const;
1346 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1347 MachineBasicBlock *BB) const;
1349 MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1350 MachineBasicBlock *BB) const;
1352 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1353 MachineBasicBlock *BB) const;
1355 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1356 MachineBasicBlock *BB) const;
1358 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1359 MachineBasicBlock *BB) const;
1361 MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1362 MachineBasicBlock *BB) const;
1364 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1365 MachineBasicBlock *MBB) const;
1367 void emitSetJmpShadowStackFix(MachineInstr &MI,
1368 MachineBasicBlock *MBB) const;
1370 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1371 MachineBasicBlock *MBB) const;
1373 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1374 MachineBasicBlock *MBB) const;
1376 MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1377 MachineBasicBlock *MBB) const;
1379 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1380 MachineBasicBlock *MBB) const;
1382 /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1383 /// equivalent, for use with the given x86 condition code.
1384 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1385 SelectionDAG &DAG) const;
1387 /// Convert a comparison if required by the subtarget.
1388 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1390 /// Emit flags for the given setcc condition and operands. Also returns the
1391 /// corresponding X86 condition code constant in X86CC.
1392 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1393 ISD::CondCode CC, const SDLoc &dl,
1394 SelectionDAG &DAG,
1395 SDValue &X86CC) const;
1397 /// Check if replacement of SQRT with RSQRT should be disabled.
1398 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1400 /// Use rsqrt* to speed up sqrt calculations.
1401 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1402 int &RefinementSteps, bool &UseOneConstNR,
1403 bool Reciprocal) const override;
1405 /// Use rcp* to speed up fdiv calculations.
1406 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1407 int &RefinementSteps) const override;
1409 /// Reassociate floating point divisions into multiply by reciprocal.
1410 unsigned combineRepeatedFPDivisors() const override;
1413 namespace X86 {
1414 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1415 const TargetLibraryInfo *libInfo);
1416 } // end namespace X86
1418 // Base class for all X86 non-masked store operations.
1419 class X86StoreSDNode : public MemSDNode {
1420 public:
1421 X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1422 SDVTList VTs, EVT MemVT,
1423 MachineMemOperand *MMO)
1424 :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1425 const SDValue &getValue() const { return getOperand(1); }
1426 const SDValue &getBasePtr() const { return getOperand(2); }
1428 static bool classof(const SDNode *N) {
1429 return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1430 N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1434 // Base class for all X86 masked store operations.
1435 // The class has the same order of operands as MaskedStoreSDNode for
1436 // convenience.
1437 class X86MaskedStoreSDNode : public MemSDNode {
1438 public:
1439 X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1440 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1441 MachineMemOperand *MMO)
1442 : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1444 const SDValue &getValue() const { return getOperand(1); }
1445 const SDValue &getBasePtr() const { return getOperand(2); }
1446 const SDValue &getMask() const { return getOperand(3); }
1448 static bool classof(const SDNode *N) {
1449 return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1450 N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1454 // X86 Truncating Store with Signed saturation.
1455 class TruncSStoreSDNode : public X86StoreSDNode {
1456 public:
1457 TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1458 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1459 : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1461 static bool classof(const SDNode *N) {
1462 return N->getOpcode() == X86ISD::VTRUNCSTORES;
1466 // X86 Truncating Store with Unsigned saturation.
1467 class TruncUSStoreSDNode : public X86StoreSDNode {
1468 public:
1469 TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1470 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1471 : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1473 static bool classof(const SDNode *N) {
1474 return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1478 // X86 Truncating Masked Store with Signed saturation.
1479 class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1480 public:
1481 MaskedTruncSStoreSDNode(unsigned Order,
1482 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1483 MachineMemOperand *MMO)
1484 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1486 static bool classof(const SDNode *N) {
1487 return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1491 // X86 Truncating Masked Store with Unsigned saturation.
1492 class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1493 public:
1494 MaskedTruncUSStoreSDNode(unsigned Order,
1495 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1496 MachineMemOperand *MMO)
1497 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1499 static bool classof(const SDNode *N) {
1500 return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1504 // X86 specific Gather/Scatter nodes.
1505 // The class has the same order of operands as MaskedGatherScatterSDNode for
1506 // convenience.
1507 class X86MaskedGatherScatterSDNode : public MemSDNode {
1508 public:
1509 X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1510 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1511 MachineMemOperand *MMO)
1512 : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1514 const SDValue &getBasePtr() const { return getOperand(3); }
1515 const SDValue &getIndex() const { return getOperand(4); }
1516 const SDValue &getMask() const { return getOperand(2); }
1517 const SDValue &getScale() const { return getOperand(5); }
1519 static bool classof(const SDNode *N) {
1520 return N->getOpcode() == X86ISD::MGATHER ||
1521 N->getOpcode() == X86ISD::MSCATTER;
1525 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1526 public:
1527 X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1528 EVT MemVT, MachineMemOperand *MMO)
1529 : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1530 MMO) {}
1532 const SDValue &getPassThru() const { return getOperand(1); }
1534 static bool classof(const SDNode *N) {
1535 return N->getOpcode() == X86ISD::MGATHER;
1539 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1540 public:
1541 X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1542 EVT MemVT, MachineMemOperand *MMO)
1543 : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1544 MMO) {}
1546 const SDValue &getValue() const { return getOperand(1); }
1548 static bool classof(const SDNode *N) {
1549 return N->getOpcode() == X86ISD::MSCATTER;
1553 /// Generate unpacklo/unpackhi shuffle mask.
1554 template <typename T = int>
1555 void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1556 bool Unary) {
1557 assert(Mask.empty() && "Expected an empty shuffle mask vector");
1558 int NumElts = VT.getVectorNumElements();
1559 int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1560 for (int i = 0; i < NumElts; ++i) {
1561 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1562 int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1563 Pos += (Unary ? 0 : NumElts * (i % 2));
1564 Pos += (Lo ? 0 : NumEltsInLane / 2);
1565 Mask.push_back(Pos);
1569 /// Helper function to scale a shuffle or target shuffle mask, replacing each
1570 /// mask index with the scaled sequential indices for an equivalent narrowed
1571 /// mask. This is the reverse process to canWidenShuffleElements, but can
1572 /// always succeed.
1573 template <typename T>
1574 void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1575 SmallVectorImpl<T> &ScaledMask) {
1576 assert(0 < Scale && "Unexpected scaling factor");
1577 int NumElts = Mask.size();
1578 ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
1580 for (int i = 0; i != NumElts; ++i) {
1581 int M = Mask[i];
1583 // Repeat sentinel values in every mask element.
1584 if (M < 0) {
1585 for (int s = 0; s != Scale; ++s)
1586 ScaledMask[(Scale * i) + s] = M;
1587 continue;
1590 // Scale mask element and increment across each mask element.
1591 for (int s = 0; s != Scale; ++s)
1592 ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1595 } // end namespace llvm
1597 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H