gn build: Extract git() and git_out() functions in sync script
[llvm-complete.git] / lib / Target / X86 / X86ISelLowering.h
blob09b0f6bc42b700a710fdf2cb768dd8cb56d71308
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17 #include "llvm/CodeGen/CallingConvLower.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
20 #include "llvm/Target/TargetOptions.h"
22 namespace llvm {
23 class X86Subtarget;
24 class X86TargetMachine;
26 namespace X86ISD {
27 // X86 Specific DAG Nodes
28 enum NodeType : unsigned {
29 // Start the numbering where the builtin ops leave off.
30 FIRST_NUMBER = ISD::BUILTIN_OP_END,
32 /// Bit scan forward.
33 BSF,
34 /// Bit scan reverse.
35 BSR,
37 /// Double shift instructions. These correspond to
38 /// X86::SHLDxx and X86::SHRDxx instructions.
39 SHLD,
40 SHRD,
42 /// Bitwise logical AND of floating point values. This corresponds
43 /// to X86::ANDPS or X86::ANDPD.
44 FAND,
46 /// Bitwise logical OR of floating point values. This corresponds
47 /// to X86::ORPS or X86::ORPD.
48 FOR,
50 /// Bitwise logical XOR of floating point values. This corresponds
51 /// to X86::XORPS or X86::XORPD.
52 FXOR,
54 /// Bitwise logical ANDNOT of floating point values. This
55 /// corresponds to X86::ANDNPS or X86::ANDNPD.
56 FANDN,
58 /// These operations represent an abstract X86 call
59 /// instruction, which includes a bunch of information. In particular the
60 /// operands of these node are:
61 ///
62 /// #0 - The incoming token chain
63 /// #1 - The callee
64 /// #2 - The number of arg bytes the caller pushes on the stack.
65 /// #3 - The number of arg bytes the callee pops off the stack.
66 /// #4 - The value to pass in AL/AX/EAX (optional)
67 /// #5 - The value to pass in DL/DX/EDX (optional)
68 ///
69 /// The result values of these nodes are:
70 ///
71 /// #0 - The outgoing token chain
72 /// #1 - The first register result value (optional)
73 /// #2 - The second register result value (optional)
74 ///
75 CALL,
77 /// Same as call except it adds the NoTrack prefix.
78 NT_CALL,
80 /// X86 compare and logical compare instructions.
81 CMP, COMI, UCOMI,
83 /// X86 bit-test instructions.
84 BT,
86 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
87 /// operand, usually produced by a CMP instruction.
88 SETCC,
90 /// X86 Select
91 SELECTS,
93 // Same as SETCC except it's materialized with a sbb and the value is all
94 // one's or all zero's.
95 SETCC_CARRY, // R = carry_bit ? ~0 : 0
97 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
98 /// Operands are two FP values to compare; result is a mask of
99 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
100 FSETCC,
102 /// X86 FP SETCC, similar to above, but with output as an i1 mask and
103 /// and a version with SAE.
104 FSETCCM, FSETCCM_SAE,
106 /// X86 conditional moves. Operand 0 and operand 1 are the two values
107 /// to select from. Operand 2 is the condition code, and operand 3 is the
108 /// flag operand produced by a CMP or TEST instruction.
109 CMOV,
111 /// X86 conditional branches. Operand 0 is the chain operand, operand 1
112 /// is the block to branch if condition is true, operand 2 is the
113 /// condition code, and operand 3 is the flag operand produced by a CMP
114 /// or TEST instruction.
115 BRCOND,
117 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
118 /// operand 1 is the target address.
119 NT_BRIND,
121 /// Return with a flag operand. Operand 0 is the chain operand, operand
122 /// 1 is the number of bytes of stack to pop.
123 RET_FLAG,
125 /// Return from interrupt. Operand 0 is the number of bytes to pop.
126 IRET,
128 /// Repeat fill, corresponds to X86::REP_STOSx.
129 REP_STOS,
131 /// Repeat move, corresponds to X86::REP_MOVSx.
132 REP_MOVS,
134 /// On Darwin, this node represents the result of the popl
135 /// at function entry, used for PIC code.
136 GlobalBaseReg,
138 /// A wrapper node for TargetConstantPool, TargetJumpTable,
139 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
140 /// MCSymbol and TargetBlockAddress.
141 Wrapper,
143 /// Special wrapper used under X86-64 PIC mode for RIP
144 /// relative displacements.
145 WrapperRIP,
147 /// Copies a 64-bit value from the low word of an XMM vector
148 /// to an MMX vector.
149 MOVDQ2Q,
151 /// Copies a 32-bit value from the low word of a MMX
152 /// vector to a GPR.
153 MMX_MOVD2W,
155 /// Copies a GPR into the low 32-bit word of a MMX vector
156 /// and zero out the high word.
157 MMX_MOVW2D,
159 /// Extract an 8-bit value from a vector and zero extend it to
160 /// i32, corresponds to X86::PEXTRB.
161 PEXTRB,
163 /// Extract a 16-bit value from a vector and zero extend it to
164 /// i32, corresponds to X86::PEXTRW.
165 PEXTRW,
167 /// Insert any element of a 4 x float vector into any element
168 /// of a destination 4 x floatvector.
169 INSERTPS,
171 /// Insert the lower 8-bits of a 32-bit value to a vector,
172 /// corresponds to X86::PINSRB.
173 PINSRB,
175 /// Insert the lower 16-bits of a 32-bit value to a vector,
176 /// corresponds to X86::PINSRW.
177 PINSRW,
179 /// Shuffle 16 8-bit values within a vector.
180 PSHUFB,
182 /// Compute Sum of Absolute Differences.
183 PSADBW,
184 /// Compute Double Block Packed Sum-Absolute-Differences
185 DBPSADBW,
187 /// Bitwise Logical AND NOT of Packed FP values.
188 ANDNP,
190 /// Blend where the selector is an immediate.
191 BLENDI,
193 /// Dynamic (non-constant condition) vector blend where only the sign bits
194 /// of the condition elements are used. This is used to enforce that the
195 /// condition mask is not valid for generic VSELECT optimizations. This
196 /// is also used to implement the intrinsics.
197 /// Operands are in VSELECT order: MASK, TRUE, FALSE
198 BLENDV,
200 /// Combined add and sub on an FP vector.
201 ADDSUB,
203 // FP vector ops with rounding mode.
204 FADD_RND, FADDS, FADDS_RND,
205 FSUB_RND, FSUBS, FSUBS_RND,
206 FMUL_RND, FMULS, FMULS_RND,
207 FDIV_RND, FDIVS, FDIVS_RND,
208 FMAX_SAE, FMAXS_SAE,
209 FMIN_SAE, FMINS_SAE,
210 FSQRT_RND, FSQRTS, FSQRTS_RND,
212 // FP vector get exponent.
213 FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
214 // Extract Normalized Mantissas.
215 VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
216 // FP Scale.
217 SCALEF, SCALEF_RND,
218 SCALEFS, SCALEFS_RND,
220 // Unsigned Integer average.
221 AVG,
223 /// Integer horizontal add/sub.
224 HADD,
225 HSUB,
227 /// Floating point horizontal add/sub.
228 FHADD,
229 FHSUB,
231 // Detect Conflicts Within a Vector
232 CONFLICT,
234 /// Floating point max and min.
235 FMAX, FMIN,
237 /// Commutative FMIN and FMAX.
238 FMAXC, FMINC,
240 /// Scalar intrinsic floating point max and min.
241 FMAXS, FMINS,
243 /// Floating point reciprocal-sqrt and reciprocal approximation.
244 /// Note that these typically require refinement
245 /// in order to obtain suitable precision.
246 FRSQRT, FRCP,
248 // AVX-512 reciprocal approximations with a little more precision.
249 RSQRT14, RSQRT14S, RCP14, RCP14S,
251 // Thread Local Storage.
252 TLSADDR,
254 // Thread Local Storage. A call to get the start address
255 // of the TLS block for the current module.
256 TLSBASEADDR,
258 // Thread Local Storage. When calling to an OS provided
259 // thunk at the address from an earlier relocation.
260 TLSCALL,
262 // Exception Handling helpers.
263 EH_RETURN,
265 // SjLj exception handling setjmp.
266 EH_SJLJ_SETJMP,
268 // SjLj exception handling longjmp.
269 EH_SJLJ_LONGJMP,
271 // SjLj exception handling dispatch.
272 EH_SJLJ_SETUP_DISPATCH,
274 /// Tail call return. See X86TargetLowering::LowerCall for
275 /// the list of operands.
276 TC_RETURN,
278 // Vector move to low scalar and zero higher vector elements.
279 VZEXT_MOVL,
281 // Vector integer truncate.
282 VTRUNC,
283 // Vector integer truncate with unsigned/signed saturation.
284 VTRUNCUS, VTRUNCS,
286 // Masked version of the above. Used when less than a 128-bit result is
287 // produced since the mask only applies to the lower elements and can't
288 // be represented by a select.
289 // SRC, PASSTHRU, MASK
290 VMTRUNC, VMTRUNCUS, VMTRUNCS,
292 // Vector FP extend.
293 VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
295 // Vector FP round.
296 VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
298 // Masked version of above. Used for v2f64->v4f32.
299 // SRC, PASSTHRU, MASK
300 VMFPROUND,
302 // 128-bit vector logical left / right shift
303 VSHLDQ, VSRLDQ,
305 // Vector shift elements
306 VSHL, VSRL, VSRA,
308 // Vector variable shift
309 VSHLV, VSRLV, VSRAV,
311 // Vector shift elements by immediate
312 VSHLI, VSRLI, VSRAI,
314 // Shifts of mask registers.
315 KSHIFTL, KSHIFTR,
317 // Bit rotate by immediate
318 VROTLI, VROTRI,
320 // Vector packed double/float comparison.
321 CMPP,
323 // Vector integer comparisons.
324 PCMPEQ, PCMPGT,
326 // v8i16 Horizontal minimum and position.
327 PHMINPOS,
329 MULTISHIFT,
331 /// Vector comparison generating mask bits for fp and
332 /// integer signed and unsigned data types.
333 CMPM,
334 // Vector comparison with SAE for FP values
335 CMPM_SAE,
337 // Arithmetic operations with FLAGS results.
338 ADD, SUB, ADC, SBB, SMUL, UMUL,
339 OR, XOR, AND,
341 // Bit field extract.
342 BEXTR,
344 // Zero High Bits Starting with Specified Bit Position.
345 BZHI,
347 // X86-specific multiply by immediate.
348 MUL_IMM,
350 // Vector sign bit extraction.
351 MOVMSK,
353 // Vector bitwise comparisons.
354 PTEST,
356 // Vector packed fp sign bitwise comparisons.
357 TESTP,
359 // OR/AND test for masks.
360 KORTEST,
361 KTEST,
363 // ADD for masks.
364 KADD,
366 // Several flavors of instructions with vector shuffle behaviors.
367 // Saturated signed/unnsigned packing.
368 PACKSS,
369 PACKUS,
370 // Intra-lane alignr.
371 PALIGNR,
372 // AVX512 inter-lane alignr.
373 VALIGN,
374 PSHUFD,
375 PSHUFHW,
376 PSHUFLW,
377 SHUFP,
378 // VBMI2 Concat & Shift.
379 VSHLD,
380 VSHRD,
381 VSHLDV,
382 VSHRDV,
383 //Shuffle Packed Values at 128-bit granularity.
384 SHUF128,
385 MOVDDUP,
386 MOVSHDUP,
387 MOVSLDUP,
388 MOVLHPS,
389 MOVHLPS,
390 MOVSD,
391 MOVSS,
392 UNPCKL,
393 UNPCKH,
394 VPERMILPV,
395 VPERMILPI,
396 VPERMI,
397 VPERM2X128,
399 // Variable Permute (VPERM).
400 // Res = VPERMV MaskV, V0
401 VPERMV,
403 // 3-op Variable Permute (VPERMT2).
404 // Res = VPERMV3 V0, MaskV, V1
405 VPERMV3,
407 // Bitwise ternary logic.
408 VPTERNLOG,
409 // Fix Up Special Packed Float32/64 values.
410 VFIXUPIMM, VFIXUPIMM_SAE,
411 VFIXUPIMMS, VFIXUPIMMS_SAE,
412 // Range Restriction Calculation For Packed Pairs of Float32/64 values.
413 VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
414 // Reduce - Perform Reduction Transformation on scalar\packed FP.
415 VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
416 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
417 // Also used by the legacy (V)ROUND intrinsics where we mask out the
418 // scaling part of the immediate.
419 VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
420 // Tests Types Of a FP Values for packed types.
421 VFPCLASS,
422 // Tests Types Of a FP Values for scalar types.
423 VFPCLASSS,
425 // Broadcast scalar to vector.
426 VBROADCAST,
427 // Broadcast mask to vector.
428 VBROADCASTM,
429 // Broadcast subvector to vector.
430 SUBV_BROADCAST,
432 /// SSE4A Extraction and Insertion.
433 EXTRQI, INSERTQI,
435 // XOP arithmetic/logical shifts.
436 VPSHA, VPSHL,
437 // XOP signed/unsigned integer comparisons.
438 VPCOM, VPCOMU,
439 // XOP packed permute bytes.
440 VPPERM,
441 // XOP two source permutation.
442 VPERMIL2,
444 // Vector multiply packed unsigned doubleword integers.
445 PMULUDQ,
446 // Vector multiply packed signed doubleword integers.
447 PMULDQ,
448 // Vector Multiply Packed UnsignedIntegers with Round and Scale.
449 MULHRS,
451 // Multiply and Add Packed Integers.
452 VPMADDUBSW, VPMADDWD,
454 // AVX512IFMA multiply and add.
455 // NOTE: These are different than the instruction and perform
456 // op0 x op1 + op2.
457 VPMADD52L, VPMADD52H,
459 // VNNI
460 VPDPBUSD,
461 VPDPBUSDS,
462 VPDPWSSD,
463 VPDPWSSDS,
465 // FMA nodes.
466 // We use the target independent ISD::FMA for the non-inverted case.
467 FNMADD,
468 FMSUB,
469 FNMSUB,
470 FMADDSUB,
471 FMSUBADD,
473 // FMA with rounding mode.
474 FMADD_RND,
475 FNMADD_RND,
476 FMSUB_RND,
477 FNMSUB_RND,
478 FMADDSUB_RND,
479 FMSUBADD_RND,
481 // Compress and expand.
482 COMPRESS,
483 EXPAND,
485 // Bits shuffle
486 VPSHUFBITQMB,
488 // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
489 SINT_TO_FP_RND, UINT_TO_FP_RND,
490 SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
491 SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
493 // Vector float/double to signed/unsigned integer.
494 CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
495 // Scalar float/double to signed/unsigned integer.
496 CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
498 // Vector float/double to signed/unsigned integer with truncation.
499 CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
500 // Scalar float/double to signed/unsigned integer with truncation.
501 CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
503 // Vector signed/unsigned integer to float/double.
504 CVTSI2P, CVTUI2P,
506 // Masked versions of above. Used for v2f64->v4f32.
507 // SRC, PASSTHRU, MASK
508 MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
509 MCVTSI2P, MCVTUI2P,
511 // Vector float to bfloat16.
512 // Convert TWO packed single data to one packed BF16 data
513 CVTNE2PS2BF16,
514 // Convert packed single data to packed BF16 data
515 CVTNEPS2BF16,
516 // Masked version of above.
517 // SRC, PASSTHRU, MASK
518 MCVTNEPS2BF16,
520 // Dot product of BF16 pairs to accumulated into
521 // packed single precision.
522 DPBF16PS,
524 // Save xmm argument registers to the stack, according to %al. An operator
525 // is needed so that this can be expanded with control flow.
526 VASTART_SAVE_XMM_REGS,
528 // Windows's _chkstk call to do stack probing.
529 WIN_ALLOCA,
531 // For allocating variable amounts of stack space when using
532 // segmented stacks. Check if the current stacklet has enough space, and
533 // falls back to heap allocation if not.
534 SEG_ALLOCA,
536 // Memory barriers.
537 MEMBARRIER,
538 MFENCE,
540 // Store FP status word into i16 register.
541 FNSTSW16r,
543 // Store contents of %ah into %eflags.
544 SAHF,
546 // Get a random integer and indicate whether it is valid in CF.
547 RDRAND,
549 // Get a NIST SP800-90B & C compliant random integer and
550 // indicate whether it is valid in CF.
551 RDSEED,
553 // Protection keys
554 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
555 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
556 // value for ECX.
557 RDPKRU, WRPKRU,
559 // SSE42 string comparisons.
560 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
561 // will emit one or two instructions based on which results are used. If
562 // flags and index/mask this allows us to use a single instruction since
563 // we won't have to pick and opcode for flags. Instead we can rely on the
564 // DAG to CSE everything and decide at isel.
565 PCMPISTR,
566 PCMPESTR,
568 // Test if in transactional execution.
569 XTEST,
571 // ERI instructions.
572 RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
573 RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
575 // Conversions between float and half-float.
576 CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
578 // Masked version of above.
579 // SRC, RND, PASSTHRU, MASK
580 MCVTPS2PH,
582 // Galois Field Arithmetic Instructions
583 GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
585 // LWP insert record.
586 LWPINS,
588 // User level wait
589 UMWAIT, TPAUSE,
591 // Enqueue Stores Instructions
592 ENQCMD, ENQCMDS,
594 // For avx512-vp2intersect
595 VP2INTERSECT,
597 // Compare and swap.
598 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
599 LCMPXCHG8_DAG,
600 LCMPXCHG16_DAG,
601 LCMPXCHG8_SAVE_EBX_DAG,
602 LCMPXCHG16_SAVE_RBX_DAG,
604 /// LOCK-prefixed arithmetic read-modify-write instructions.
605 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
606 LADD, LSUB, LOR, LXOR, LAND,
608 // Load, scalar_to_vector, and zero extend.
609 VZEXT_LOAD,
611 // extract_vector_elt, store.
612 VEXTRACT_STORE,
614 // Store FP control world into i16 memory.
615 FNSTCW16m,
617 /// This instruction implements FP_TO_SINT with the
618 /// integer destination in memory and a FP reg source. This corresponds
619 /// to the X86::FIST*m instructions and the rounding mode change stuff. It
620 /// has two inputs (token chain and address) and two outputs (int value
621 /// and token chain). Memory VT specifies the type to store to.
622 FP_TO_INT_IN_MEM,
624 /// This instruction implements SINT_TO_FP with the
625 /// integer source in memory and FP reg result. This corresponds to the
626 /// X86::FILD*m instructions. It has two inputs (token chain and address)
627 /// and two outputs (FP value and token chain). FILD_FLAG also produces a
628 /// flag). The integer source type is specified by the memory VT.
629 FILD,
630 FILD_FLAG,
632 /// This instruction implements a fp->int store from FP stack
633 /// slots. This corresponds to the fist instruction. It takes a
634 /// chain operand, value to store, address, and glue. The memory VT
635 /// specifies the type to store as.
636 FIST,
638 /// This instruction implements an extending load to FP stack slots.
639 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
640 /// operand, and ptr to load from. The memory VT specifies the type to
641 /// load from.
642 FLD,
644 /// This instruction implements a truncating store from FP stack
645 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
646 /// chain operand, value to store, address, and glue. The memory VT
647 /// specifies the type to store as.
648 FST,
650 /// This instruction grabs the address of the next argument
651 /// from a va_list. (reads and modifies the va_list in memory)
652 VAARG_64,
654 // Vector truncating store with unsigned/signed saturation
655 VTRUNCSTOREUS, VTRUNCSTORES,
656 // Vector truncating masked store with unsigned/signed saturation
657 VMTRUNCSTOREUS, VMTRUNCSTORES,
659 // X86 specific gather and scatter
660 MGATHER, MSCATTER,
662 // WARNING: Do not add anything in the end unless you want the node to
663 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
664 // opcodes will be thought as target memory ops!
666 } // end namespace X86ISD
668 /// Define some predicates that are used for node matching.
669 namespace X86 {
670 /// Returns true if Elt is a constant zero or floating point constant +0.0.
671 bool isZeroNode(SDValue Elt);
673 /// Returns true of the given offset can be
674 /// fit into displacement field of the instruction.
675 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
676 bool hasSymbolicDisplacement = true);
678 /// Determines whether the callee is required to pop its
679 /// own arguments. Callee pop is necessary to support tail calls.
680 bool isCalleePop(CallingConv::ID CallingConv,
681 bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
683 } // end namespace X86
685 //===--------------------------------------------------------------------===//
686 // X86 Implementation of the TargetLowering interface
687 class X86TargetLowering final : public TargetLowering {
688 public:
689 explicit X86TargetLowering(const X86TargetMachine &TM,
690 const X86Subtarget &STI);
692 unsigned getJumpTableEncoding() const override;
693 bool useSoftFloat() const override;
695 void markLibCallAttributes(MachineFunction *MF, unsigned CC,
696 ArgListTy &Args) const override;
698 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
699 return MVT::i8;
702 const MCExpr *
703 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
704 const MachineBasicBlock *MBB, unsigned uid,
705 MCContext &Ctx) const override;
707 /// Returns relocation base for the given PIC jumptable.
708 SDValue getPICJumpTableRelocBase(SDValue Table,
709 SelectionDAG &DAG) const override;
710 const MCExpr *
711 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
712 unsigned JTI, MCContext &Ctx) const override;
714 /// Return the desired alignment for ByVal aggregate
715 /// function arguments in the caller parameter area. For X86, aggregates
716 /// that contains are placed at 16-byte boundaries while the rest are at
717 /// 4-byte boundaries.
718 unsigned getByValTypeAlignment(Type *Ty,
719 const DataLayout &DL) const override;
721 /// Returns the target specific optimal type for load
722 /// and store operations as a result of memset, memcpy, and memmove
723 /// lowering. If DstAlign is zero that means it's safe to destination
724 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
725 /// means there isn't a need to check it against alignment requirement,
726 /// probably because the source does not need to be loaded. If 'IsMemset' is
727 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
728 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
729 /// source is constant so it does not need to be loaded.
730 /// It returns EVT::Other if the type should be determined using generic
731 /// target-independent logic.
732 EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
733 bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
734 const AttributeList &FuncAttributes) const override;
736 /// Returns true if it's safe to use load / store of the
737 /// specified type to expand memcpy / memset inline. This is mostly true
738 /// for all types except for some special cases. For example, on X86
739 /// targets without SSE2 f64 load / store are done with fldl / fstpl which
740 /// also does type conversion. Note the specified type doesn't have to be
741 /// legal as the hook is used before type legalization.
742 bool isSafeMemOpType(MVT VT) const override;
744 /// Returns true if the target allows unaligned memory accesses of the
745 /// specified type. Returns whether it is "fast" in the last argument.
746 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
747 MachineMemOperand::Flags Flags,
748 bool *Fast) const override;
750 /// Provide custom lowering hooks for some operations.
752 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
754 /// Places new result values for the node in Results (their number
755 /// and types must exactly match those of the original return values of
756 /// the node), or leaves Results empty, which indicates that the node is not
757 /// to be custom lowered after all.
758 void LowerOperationWrapper(SDNode *N,
759 SmallVectorImpl<SDValue> &Results,
760 SelectionDAG &DAG) const override;
762 /// Replace the results of node with an illegal result
763 /// type with new values built out of custom code.
765 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
766 SelectionDAG &DAG) const override;
768 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
770 // Return true if it is profitable to combine a BUILD_VECTOR with a
771 // stride-pattern to a shuffle and a truncate.
772 // Example of such a combine:
773 // v4i32 build_vector((extract_elt V, 1),
774 // (extract_elt V, 3),
775 // (extract_elt V, 5),
776 // (extract_elt V, 7))
777 // -->
778 // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
779 // v4i64)
780 bool isDesirableToCombineBuildVectorToShuffleTruncate(
781 ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
783 /// Return true if the target has native support for
784 /// the specified value type and it is 'desirable' to use the type for the
785 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
786 /// instruction encodings are longer and some i16 instructions are slow.
787 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
789 /// Return true if the target has native support for the
790 /// specified value type and it is 'desirable' to use the type. e.g. On x86
791 /// i16 is legal, but undesirable since i16 instruction encodings are longer
792 /// and some i16 instructions are slow.
793 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
795 MachineBasicBlock *
796 EmitInstrWithCustomInserter(MachineInstr &MI,
797 MachineBasicBlock *MBB) const override;
799 /// This method returns the name of a target specific DAG node.
800 const char *getTargetNodeName(unsigned Opcode) const override;
802 /// Do not merge vector stores after legalization because that may conflict
803 /// with x86-specific store splitting optimizations.
804 bool mergeStoresAfterLegalization(EVT MemVT) const override {
805 return !MemVT.isVector();
808 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
809 const SelectionDAG &DAG) const override;
811 bool isCheapToSpeculateCttz() const override;
813 bool isCheapToSpeculateCtlz() const override;
815 bool isCtlzFast() const override;
817 bool hasBitPreservingFPLogic(EVT VT) const override {
818 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
821 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
822 // If the pair to store is a mixture of float and int values, we will
823 // save two bitwise instructions and one float-to-int instruction and
824 // increase one store instruction. There is potentially a more
825 // significant benefit because it avoids the float->int domain switch
826 // for input value. So It is more likely a win.
827 if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
828 (LTy.isInteger() && HTy.isFloatingPoint()))
829 return true;
830 // If the pair only contains int values, we will save two bitwise
831 // instructions and increase one store instruction (costing one more
832 // store buffer). Since the benefit is more blurred so we leave
833 // such pair out until we get testcase to prove it is a win.
834 return false;
837 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
839 bool hasAndNotCompare(SDValue Y) const override;
841 bool hasAndNot(SDValue Y) const override;
843 bool hasBitTest(SDValue X, SDValue Y) const override;
845 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
846 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
847 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
848 SelectionDAG &DAG) const override;
850 bool shouldFoldConstantShiftPairToMask(const SDNode *N,
851 CombineLevel Level) const override;
853 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
855 bool
856 shouldTransformSignedTruncationCheck(EVT XVT,
857 unsigned KeptBits) const override {
858 // For vectors, we don't have a preference..
859 if (XVT.isVector())
860 return false;
862 auto VTIsOk = [](EVT VT) -> bool {
863 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
864 VT == MVT::i64;
867 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
868 // XVT will be larger than KeptBitsVT.
869 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
870 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
873 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
874 if (DAG.getMachineFunction().getFunction().hasMinSize())
875 return false;
876 return true;
879 bool shouldSplatInsEltVarIndex(EVT VT) const override;
881 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
882 return VT.isScalarInteger();
885 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
886 MVT hasFastEqualityCompare(unsigned NumBits) const override;
888 /// Return the value type to use for ISD::SETCC.
889 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
890 EVT VT) const override;
892 bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
893 TargetLoweringOpt &TLO) const override;
895 /// Determine which of the bits specified in Mask are known to be either
896 /// zero or one and return them in the KnownZero/KnownOne bitsets.
897 void computeKnownBitsForTargetNode(const SDValue Op,
898 KnownBits &Known,
899 const APInt &DemandedElts,
900 const SelectionDAG &DAG,
901 unsigned Depth = 0) const override;
903 /// Determine the number of bits in the operation that are sign bits.
904 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
905 const APInt &DemandedElts,
906 const SelectionDAG &DAG,
907 unsigned Depth) const override;
909 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
910 const APInt &DemandedElts,
911 APInt &KnownUndef,
912 APInt &KnownZero,
913 TargetLoweringOpt &TLO,
914 unsigned Depth) const override;
916 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
917 const APInt &DemandedBits,
918 const APInt &DemandedElts,
919 KnownBits &Known,
920 TargetLoweringOpt &TLO,
921 unsigned Depth) const override;
923 SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
924 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
925 SelectionDAG &DAG, unsigned Depth) const override;
927 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
929 SDValue unwrapAddress(SDValue N) const override;
931 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
933 bool ExpandInlineAsm(CallInst *CI) const override;
935 ConstraintType getConstraintType(StringRef Constraint) const override;
937 /// Examine constraint string and operand type and determine a weight value.
938 /// The operand object must already have been set up with the operand type.
939 ConstraintWeight
940 getSingleConstraintMatchWeight(AsmOperandInfo &info,
941 const char *constraint) const override;
943 const char *LowerXConstraint(EVT ConstraintVT) const override;
945 /// Lower the specified operand into the Ops vector. If it is invalid, don't
946 /// add anything to Ops. If hasMemory is true it means one of the asm
947 /// constraint of the inline asm instruction being processed is 'm'.
948 void LowerAsmOperandForConstraint(SDValue Op,
949 std::string &Constraint,
950 std::vector<SDValue> &Ops,
951 SelectionDAG &DAG) const override;
953 unsigned
954 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
955 if (ConstraintCode == "i")
956 return InlineAsm::Constraint_i;
957 else if (ConstraintCode == "o")
958 return InlineAsm::Constraint_o;
959 else if (ConstraintCode == "v")
960 return InlineAsm::Constraint_v;
961 else if (ConstraintCode == "X")
962 return InlineAsm::Constraint_X;
963 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
966 /// Handle Lowering flag assembly outputs.
967 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
968 const AsmOperandInfo &Constraint,
969 SelectionDAG &DAG) const override;
971 /// Given a physical register constraint
972 /// (e.g. {edx}), return the register number and the register class for the
973 /// register. This should only be used for C_Register constraints. On
974 /// error, this returns a register number of 0.
975 std::pair<unsigned, const TargetRegisterClass *>
976 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
977 StringRef Constraint, MVT VT) const override;
979 /// Return true if the addressing mode represented
980 /// by AM is legal for this target, for a load/store of the specified type.
981 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
982 Type *Ty, unsigned AS,
983 Instruction *I = nullptr) const override;
985 /// Return true if the specified immediate is legal
986 /// icmp immediate, that is the target has icmp instructions which can
987 /// compare a register against the immediate without having to materialize
988 /// the immediate into a register.
989 bool isLegalICmpImmediate(int64_t Imm) const override;
991 /// Return true if the specified immediate is legal
992 /// add immediate, that is the target has add instructions which can
993 /// add a register and the immediate without having to materialize
994 /// the immediate into a register.
995 bool isLegalAddImmediate(int64_t Imm) const override;
997 bool isLegalStoreImmediate(int64_t Imm) const override;
999 /// Return the cost of the scaling factor used in the addressing
1000 /// mode represented by AM for this target, for a load/store
1001 /// of the specified type.
1002 /// If the AM is supported, the return value must be >= 0.
1003 /// If the AM is not supported, it returns a negative value.
1004 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1005 unsigned AS) const override;
1007 bool isVectorShiftByScalarCheap(Type *Ty) const override;
1009 /// Add x86-specific opcodes to the default list.
1010 bool isBinOp(unsigned Opcode) const override;
1012 /// Returns true if the opcode is a commutative binary operation.
1013 bool isCommutativeBinOp(unsigned Opcode) const override;
1015 /// Return true if it's free to truncate a value of
1016 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1017 /// register EAX to i16 by referencing its sub-register AX.
1018 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1019 bool isTruncateFree(EVT VT1, EVT VT2) const override;
1021 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1023 /// Return true if any actual instruction that defines a
1024 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1025 /// register. This does not necessarily include registers defined in
1026 /// unknown ways, such as incoming arguments, or copies from unknown
1027 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1028 /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1029 /// all instructions that define 32-bit values implicit zero-extend the
1030 /// result out to 64 bits.
1031 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1032 bool isZExtFree(EVT VT1, EVT VT2) const override;
1033 bool isZExtFree(SDValue Val, EVT VT2) const override;
1035 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1036 /// extend node) is profitable.
1037 bool isVectorLoadExtDesirable(SDValue) const override;
1039 /// Return true if an FMA operation is faster than a pair of fmul and fadd
1040 /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1041 /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1042 bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1044 /// Return true if it's profitable to narrow
1045 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1046 /// from i32 to i8 but not from i32 to i16.
1047 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1049 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1050 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1051 /// true and stores the intrinsic information into the IntrinsicInfo that was
1052 /// passed to the function.
1053 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1054 MachineFunction &MF,
1055 unsigned Intrinsic) const override;
1057 /// Returns true if the target can instruction select the
1058 /// specified FP immediate natively. If false, the legalizer will
1059 /// materialize the FP immediate as a load from a constant pool.
1060 bool isFPImmLegal(const APFloat &Imm, EVT VT,
1061 bool ForCodeSize) const override;
1063 /// Targets can use this to indicate that they only support *some*
1064 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1065 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1066 /// be legal.
1067 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1069 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1070 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1071 /// constant pool entry.
1072 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1074 /// Returns true if lowering to a jump table is allowed.
1075 bool areJTsAllowed(const Function *Fn) const override;
1077 /// If true, then instruction selection should
1078 /// seek to shrink the FP constant of the specified type to a smaller type
1079 /// in order to save space and / or reduce runtime.
1080 bool ShouldShrinkFPConstant(EVT VT) const override {
1081 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1082 // expensive than a straight movsd. On the other hand, it's important to
1083 // shrink long double fp constant since fldt is very slow.
1084 return !X86ScalarSSEf64 || VT == MVT::f80;
1087 /// Return true if we believe it is correct and profitable to reduce the
1088 /// load node to a smaller type.
1089 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1090 EVT NewVT) const override;
1092 /// Return true if the specified scalar FP type is computed in an SSE
1093 /// register, not on the X87 floating point stack.
1094 bool isScalarFPTypeInSSEReg(EVT VT) const {
1095 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1096 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
1099 /// Returns true if it is beneficial to convert a load of a constant
1100 /// to just the constant itself.
1101 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1102 Type *Ty) const override;
1104 bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
1106 bool convertSelectOfConstantsToMath(EVT VT) const override;
1108 bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1109 SDValue C) const override;
1111 bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1112 bool IsSigned) const override;
1114 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1115 /// with this index.
1116 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1117 unsigned Index) const override;
1119 /// Scalar ops always have equal or better analysis/performance/power than
1120 /// the vector equivalent, so this always makes sense if the scalar op is
1121 /// supported.
1122 bool shouldScalarizeBinop(SDValue) const override;
1124 /// Extract of a scalar FP value from index 0 of a vector is free.
1125 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1126 EVT EltVT = VT.getScalarType();
1127 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1130 /// Overflow nodes should get combined/lowered to optimal instructions
1131 /// (they should allow eliminating explicit compares by getting flags from
1132 /// math ops).
1133 bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1135 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1136 unsigned AddrSpace) const override {
1137 // If we can replace more than 2 scalar stores, there will be a reduction
1138 // in instructions even after we add a vector constant load.
1139 return NumElem > 2;
1142 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1143 const SelectionDAG &DAG,
1144 const MachineMemOperand &MMO) const override;
1146 /// Intel processors have a unified instruction and data cache
1147 const char * getClearCacheBuiltinName() const override {
1148 return nullptr; // nothing to do, move along.
1151 unsigned getRegisterByName(const char* RegName, EVT VT,
1152 SelectionDAG &DAG) const override;
1154 /// If a physical register, this returns the register that receives the
1155 /// exception address on entry to an EH pad.
1156 unsigned
1157 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1159 /// If a physical register, this returns the register that receives the
1160 /// exception typeid on entry to a landing pad.
1161 unsigned
1162 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1164 virtual bool needsFixedCatchObjects() const override;
1166 /// This method returns a target specific FastISel object,
1167 /// or null if the target does not support "fast" ISel.
1168 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1169 const TargetLibraryInfo *libInfo) const override;
1171 /// If the target has a standard location for the stack protector cookie,
1172 /// returns the address of that location. Otherwise, returns nullptr.
1173 Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1175 bool useLoadStackGuardNode() const override;
1176 bool useStackGuardXorFP() const override;
1177 void insertSSPDeclarations(Module &M) const override;
1178 Value *getSDagStackGuard(const Module &M) const override;
1179 Function *getSSPStackGuardCheck(const Module &M) const override;
1180 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1181 const SDLoc &DL) const override;
1184 /// Return true if the target stores SafeStack pointer at a fixed offset in
1185 /// some non-standard address space, and populates the address space and
1186 /// offset as appropriate.
1187 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1189 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1190 SelectionDAG &DAG) const;
1192 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1194 /// Customize the preferred legalization strategy for certain types.
1195 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1197 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1198 EVT VT) const override;
1200 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1201 CallingConv::ID CC,
1202 EVT VT) const override;
1204 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1206 bool supportSwiftError() const override;
1208 StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1210 unsigned getStackProbeSize(MachineFunction &MF) const;
1212 bool hasVectorBlend() const override { return true; }
1214 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1216 /// Lower interleaved load(s) into target specific
1217 /// instructions/intrinsics.
1218 bool lowerInterleavedLoad(LoadInst *LI,
1219 ArrayRef<ShuffleVectorInst *> Shuffles,
1220 ArrayRef<unsigned> Indices,
1221 unsigned Factor) const override;
1223 /// Lower interleaved store(s) into target specific
1224 /// instructions/intrinsics.
1225 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1226 unsigned Factor) const override;
1228 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1229 SDValue Addr, SelectionDAG &DAG)
1230 const override;
1232 protected:
1233 std::pair<const TargetRegisterClass *, uint8_t>
1234 findRepresentativeClass(const TargetRegisterInfo *TRI,
1235 MVT VT) const override;
1237 private:
1238 /// Keep a reference to the X86Subtarget around so that we can
1239 /// make the right decision when generating code for different targets.
1240 const X86Subtarget &Subtarget;
1242 /// Select between SSE or x87 floating point ops.
1243 /// When SSE is available, use it for f32 operations.
1244 /// When SSE2 is available, use it for f64 operations.
1245 bool X86ScalarSSEf32;
1246 bool X86ScalarSSEf64;
1248 /// A list of legal FP immediates.
1249 std::vector<APFloat> LegalFPImmediates;
1251 /// Indicate that this x86 target can instruction
1252 /// select the specified FP immediate natively.
1253 void addLegalFPImmediate(const APFloat& Imm) {
1254 LegalFPImmediates.push_back(Imm);
1257 SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1258 CallingConv::ID CallConv, bool isVarArg,
1259 const SmallVectorImpl<ISD::InputArg> &Ins,
1260 const SDLoc &dl, SelectionDAG &DAG,
1261 SmallVectorImpl<SDValue> &InVals,
1262 uint32_t *RegMask) const;
1263 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1264 const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1265 const SDLoc &dl, SelectionDAG &DAG,
1266 const CCValAssign &VA, MachineFrameInfo &MFI,
1267 unsigned i) const;
1268 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1269 const SDLoc &dl, SelectionDAG &DAG,
1270 const CCValAssign &VA,
1271 ISD::ArgFlagsTy Flags) const;
1273 // Call lowering helpers.
1275 /// Check whether the call is eligible for tail call optimization. Targets
1276 /// that want to do tail call optimization should implement this function.
1277 bool IsEligibleForTailCallOptimization(SDValue Callee,
1278 CallingConv::ID CalleeCC,
1279 bool isVarArg,
1280 bool isCalleeStructRet,
1281 bool isCallerStructRet,
1282 Type *RetTy,
1283 const SmallVectorImpl<ISD::OutputArg> &Outs,
1284 const SmallVectorImpl<SDValue> &OutVals,
1285 const SmallVectorImpl<ISD::InputArg> &Ins,
1286 SelectionDAG& DAG) const;
1287 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1288 SDValue Chain, bool IsTailCall,
1289 bool Is64Bit, int FPDiff,
1290 const SDLoc &dl) const;
1292 unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1293 SelectionDAG &DAG) const;
1295 unsigned getAddressSpace(void) const;
1297 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1299 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1300 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1301 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1302 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1304 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1305 const unsigned char OpFlags = 0) const;
1306 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1307 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1308 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1309 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1310 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1312 /// Creates target global address or external symbol nodes for calls or
1313 /// other uses.
1314 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1315 bool ForCall) const;
1317 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1318 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1319 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1320 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1321 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1322 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1323 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1324 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1325 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1326 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1327 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1328 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1329 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1330 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1331 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1332 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1333 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1334 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1335 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1336 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1337 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1338 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1339 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1340 SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1341 SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1342 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1344 SDValue
1345 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1346 const SmallVectorImpl<ISD::InputArg> &Ins,
1347 const SDLoc &dl, SelectionDAG &DAG,
1348 SmallVectorImpl<SDValue> &InVals) const override;
1349 SDValue LowerCall(CallLoweringInfo &CLI,
1350 SmallVectorImpl<SDValue> &InVals) const override;
1352 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1353 const SmallVectorImpl<ISD::OutputArg> &Outs,
1354 const SmallVectorImpl<SDValue> &OutVals,
1355 const SDLoc &dl, SelectionDAG &DAG) const override;
1357 bool supportSplitCSR(MachineFunction *MF) const override {
1358 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1359 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1361 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1362 void insertCopiesSplitCSR(
1363 MachineBasicBlock *Entry,
1364 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1366 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1368 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1370 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1371 ISD::NodeType ExtendKind) const override;
1373 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1374 bool isVarArg,
1375 const SmallVectorImpl<ISD::OutputArg> &Outs,
1376 LLVMContext &Context) const override;
1378 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1380 TargetLoweringBase::AtomicExpansionKind
1381 shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1382 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1383 TargetLoweringBase::AtomicExpansionKind
1384 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1386 LoadInst *
1387 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1389 bool needsCmpXchgNb(Type *MemType) const;
1391 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1392 MachineBasicBlock *DispatchBB, int FI) const;
1394 // Utility function to emit the low-level va_arg code for X86-64.
1395 MachineBasicBlock *
1396 EmitVAARG64WithCustomInserter(MachineInstr &MI,
1397 MachineBasicBlock *MBB) const;
1399 /// Utility function to emit the xmm reg save portion of va_start.
1400 MachineBasicBlock *
1401 EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1402 MachineBasicBlock *BB) const;
1404 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1405 MachineInstr &MI2,
1406 MachineBasicBlock *BB) const;
1408 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1409 MachineBasicBlock *BB) const;
1411 MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1412 MachineBasicBlock *BB) const;
1414 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1415 MachineBasicBlock *BB) const;
1417 MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1418 MachineBasicBlock *BB) const;
1420 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1421 MachineBasicBlock *BB) const;
1423 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1424 MachineBasicBlock *BB) const;
1426 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1427 MachineBasicBlock *BB) const;
1429 MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1430 MachineBasicBlock *BB) const;
1432 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1433 MachineBasicBlock *MBB) const;
1435 void emitSetJmpShadowStackFix(MachineInstr &MI,
1436 MachineBasicBlock *MBB) const;
1438 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1439 MachineBasicBlock *MBB) const;
1441 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1442 MachineBasicBlock *MBB) const;
1444 MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1445 MachineBasicBlock *MBB) const;
1447 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1448 MachineBasicBlock *MBB) const;
1450 /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1451 /// equivalent, for use with the given x86 condition code.
1452 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1453 SelectionDAG &DAG) const;
1455 /// Convert a comparison if required by the subtarget.
1456 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1458 /// Emit flags for the given setcc condition and operands. Also returns the
1459 /// corresponding X86 condition code constant in X86CC.
1460 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1461 ISD::CondCode CC, const SDLoc &dl,
1462 SelectionDAG &DAG,
1463 SDValue &X86CC) const;
1465 /// Check if replacement of SQRT with RSQRT should be disabled.
1466 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1468 /// Use rsqrt* to speed up sqrt calculations.
1469 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1470 int &RefinementSteps, bool &UseOneConstNR,
1471 bool Reciprocal) const override;
1473 /// Use rcp* to speed up fdiv calculations.
1474 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1475 int &RefinementSteps) const override;
1477 /// Reassociate floating point divisions into multiply by reciprocal.
1478 unsigned combineRepeatedFPDivisors() const override;
1481 namespace X86 {
1482 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1483 const TargetLibraryInfo *libInfo);
1484 } // end namespace X86
1486 // Base class for all X86 non-masked store operations.
1487 class X86StoreSDNode : public MemSDNode {
1488 public:
1489 X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1490 SDVTList VTs, EVT MemVT,
1491 MachineMemOperand *MMO)
1492 :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1493 const SDValue &getValue() const { return getOperand(1); }
1494 const SDValue &getBasePtr() const { return getOperand(2); }
1496 static bool classof(const SDNode *N) {
1497 return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1498 N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1502 // Base class for all X86 masked store operations.
1503 // The class has the same order of operands as MaskedStoreSDNode for
1504 // convenience.
1505 class X86MaskedStoreSDNode : public MemSDNode {
1506 public:
1507 X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1508 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1509 MachineMemOperand *MMO)
1510 : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1512 const SDValue &getValue() const { return getOperand(1); }
1513 const SDValue &getBasePtr() const { return getOperand(2); }
1514 const SDValue &getMask() const { return getOperand(3); }
1516 static bool classof(const SDNode *N) {
1517 return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1518 N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1522 // X86 Truncating Store with Signed saturation.
1523 class TruncSStoreSDNode : public X86StoreSDNode {
1524 public:
1525 TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1526 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1527 : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1529 static bool classof(const SDNode *N) {
1530 return N->getOpcode() == X86ISD::VTRUNCSTORES;
1534 // X86 Truncating Store with Unsigned saturation.
1535 class TruncUSStoreSDNode : public X86StoreSDNode {
1536 public:
1537 TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1538 SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1539 : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1541 static bool classof(const SDNode *N) {
1542 return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1546 // X86 Truncating Masked Store with Signed saturation.
1547 class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1548 public:
1549 MaskedTruncSStoreSDNode(unsigned Order,
1550 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1551 MachineMemOperand *MMO)
1552 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1554 static bool classof(const SDNode *N) {
1555 return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1559 // X86 Truncating Masked Store with Unsigned saturation.
1560 class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1561 public:
1562 MaskedTruncUSStoreSDNode(unsigned Order,
1563 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1564 MachineMemOperand *MMO)
1565 : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1567 static bool classof(const SDNode *N) {
1568 return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1572 // X86 specific Gather/Scatter nodes.
1573 // The class has the same order of operands as MaskedGatherScatterSDNode for
1574 // convenience.
1575 class X86MaskedGatherScatterSDNode : public MemSDNode {
1576 public:
1577 X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1578 const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1579 MachineMemOperand *MMO)
1580 : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1582 const SDValue &getBasePtr() const { return getOperand(3); }
1583 const SDValue &getIndex() const { return getOperand(4); }
1584 const SDValue &getMask() const { return getOperand(2); }
1585 const SDValue &getScale() const { return getOperand(5); }
1587 static bool classof(const SDNode *N) {
1588 return N->getOpcode() == X86ISD::MGATHER ||
1589 N->getOpcode() == X86ISD::MSCATTER;
1593 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1594 public:
1595 X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1596 EVT MemVT, MachineMemOperand *MMO)
1597 : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1598 MMO) {}
1600 const SDValue &getPassThru() const { return getOperand(1); }
1602 static bool classof(const SDNode *N) {
1603 return N->getOpcode() == X86ISD::MGATHER;
1607 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1608 public:
1609 X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1610 EVT MemVT, MachineMemOperand *MMO)
1611 : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1612 MMO) {}
1614 const SDValue &getValue() const { return getOperand(1); }
1616 static bool classof(const SDNode *N) {
1617 return N->getOpcode() == X86ISD::MSCATTER;
1621 /// Generate unpacklo/unpackhi shuffle mask.
1622 template <typename T = int>
1623 void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1624 bool Unary) {
1625 assert(Mask.empty() && "Expected an empty shuffle mask vector");
1626 int NumElts = VT.getVectorNumElements();
1627 int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1628 for (int i = 0; i < NumElts; ++i) {
1629 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1630 int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1631 Pos += (Unary ? 0 : NumElts * (i % 2));
1632 Pos += (Lo ? 0 : NumEltsInLane / 2);
1633 Mask.push_back(Pos);
1637 /// Helper function to scale a shuffle or target shuffle mask, replacing each
1638 /// mask index with the scaled sequential indices for an equivalent narrowed
1639 /// mask. This is the reverse process to canWidenShuffleElements, but can
1640 /// always succeed.
1641 template <typename T>
1642 void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
1643 SmallVectorImpl<T> &ScaledMask) {
1644 assert(0 < Scale && "Unexpected scaling factor");
1645 size_t NumElts = Mask.size();
1646 ScaledMask.assign(NumElts * Scale, -1);
1648 for (int i = 0; i != (int)NumElts; ++i) {
1649 int M = Mask[i];
1651 // Repeat sentinel values in every mask element.
1652 if (M < 0) {
1653 for (int s = 0; s != Scale; ++s)
1654 ScaledMask[(Scale * i) + s] = M;
1655 continue;
1658 // Scale mask element and increment across each mask element.
1659 for (int s = 0; s != Scale; ++s)
1660 ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1663 } // end namespace llvm
1665 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H