[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / X86 / X86ISelLowering.h
blob1d4bcc775664560a720520e91936ddcc6b96d59e
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
20 namespace llvm {
21 class X86Subtarget;
22 class X86TargetMachine;
24 namespace X86ISD {
25 // X86 Specific DAG Nodes
26 enum NodeType : unsigned {
27 // Start the numbering where the builtin ops leave off.
28 FIRST_NUMBER = ISD::BUILTIN_OP_END,
30 /// Bit scan forward.
31 BSF,
32 /// Bit scan reverse.
33 BSR,
35 /// X86 funnel/double shift i16 instructions. These correspond to
36 /// X86::SHLDW and X86::SHRDW instructions which have different amt
37 /// modulo rules to generic funnel shifts.
38 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39 FSHL,
40 FSHR,
42 /// Bitwise logical AND of floating point values. This corresponds
43 /// to X86::ANDPS or X86::ANDPD.
44 FAND,
46 /// Bitwise logical OR of floating point values. This corresponds
47 /// to X86::ORPS or X86::ORPD.
48 FOR,
50 /// Bitwise logical XOR of floating point values. This corresponds
51 /// to X86::XORPS or X86::XORPD.
52 FXOR,
54 /// Bitwise logical ANDNOT of floating point values. This
55 /// corresponds to X86::ANDNPS or X86::ANDNPD.
56 FANDN,
58 /// These operations represent an abstract X86 call
59 /// instruction, which includes a bunch of information. In particular the
60 /// operands of these node are:
61 ///
62 /// #0 - The incoming token chain
63 /// #1 - The callee
64 /// #2 - The number of arg bytes the caller pushes on the stack.
65 /// #3 - The number of arg bytes the callee pops off the stack.
66 /// #4 - The value to pass in AL/AX/EAX (optional)
67 /// #5 - The value to pass in DL/DX/EDX (optional)
68 ///
69 /// The result values of these nodes are:
70 ///
71 /// #0 - The outgoing token chain
72 /// #1 - The first register result value (optional)
73 /// #2 - The second register result value (optional)
74 ///
75 CALL,
77 /// Same as call except it adds the NoTrack prefix.
78 NT_CALL,
80 // Pseudo for a OBJC call that gets emitted together with a special
81 // marker instruction.
82 CALL_RVMARKER,
84 /// X86 compare and logical compare instructions.
85 CMP,
86 FCMP,
87 COMI,
88 UCOMI,
90 /// X86 bit-test instructions.
91 BT,
93 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94 /// operand, usually produced by a CMP instruction.
95 SETCC,
97 /// X86 Select
98 SELECTS,
100 // Same as SETCC except it's materialized with a sbb and the value is all
101 // one's or all zero's.
102 SETCC_CARRY, // R = carry_bit ? ~0 : 0
104 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105 /// Operands are two FP values to compare; result is a mask of
106 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
107 FSETCC,
109 /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110 /// and a version with SAE.
111 FSETCCM,
112 FSETCCM_SAE,
114 /// X86 conditional moves. Operand 0 and operand 1 are the two values
115 /// to select from. Operand 2 is the condition code, and operand 3 is the
116 /// flag operand produced by a CMP or TEST instruction.
117 CMOV,
119 /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120 /// is the block to branch if condition is true, operand 2 is the
121 /// condition code, and operand 3 is the flag operand produced by a CMP
122 /// or TEST instruction.
123 BRCOND,
125 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126 /// operand 1 is the target address.
127 NT_BRIND,
129 /// Return with a flag operand. Operand 0 is the chain operand, operand
130 /// 1 is the number of bytes of stack to pop.
131 RET_FLAG,
133 /// Return from interrupt. Operand 0 is the number of bytes to pop.
134 IRET,
136 /// Repeat fill, corresponds to X86::REP_STOSx.
137 REP_STOS,
139 /// Repeat move, corresponds to X86::REP_MOVSx.
140 REP_MOVS,
142 /// On Darwin, this node represents the result of the popl
143 /// at function entry, used for PIC code.
144 GlobalBaseReg,
146 /// A wrapper node for TargetConstantPool, TargetJumpTable,
147 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148 /// MCSymbol and TargetBlockAddress.
149 Wrapper,
151 /// Special wrapper used under X86-64 PIC mode for RIP
152 /// relative displacements.
153 WrapperRIP,
155 /// Copies a 64-bit value from an MMX vector to the low word
156 /// of an XMM vector, with the high word zero filled.
157 MOVQ2DQ,
159 /// Copies a 64-bit value from the low word of an XMM vector
160 /// to an MMX vector.
161 MOVDQ2Q,
163 /// Copies a 32-bit value from the low word of a MMX
164 /// vector to a GPR.
165 MMX_MOVD2W,
167 /// Copies a GPR into the low 32-bit word of a MMX vector
168 /// and zero out the high word.
169 MMX_MOVW2D,
171 /// Extract an 8-bit value from a vector and zero extend it to
172 /// i32, corresponds to X86::PEXTRB.
173 PEXTRB,
175 /// Extract a 16-bit value from a vector and zero extend it to
176 /// i32, corresponds to X86::PEXTRW.
177 PEXTRW,
179 /// Insert any element of a 4 x float vector into any element
180 /// of a destination 4 x floatvector.
181 INSERTPS,
183 /// Insert the lower 8-bits of a 32-bit value to a vector,
184 /// corresponds to X86::PINSRB.
185 PINSRB,
187 /// Insert the lower 16-bits of a 32-bit value to a vector,
188 /// corresponds to X86::PINSRW.
189 PINSRW,
191 /// Shuffle 16 8-bit values within a vector.
192 PSHUFB,
194 /// Compute Sum of Absolute Differences.
195 PSADBW,
196 /// Compute Double Block Packed Sum-Absolute-Differences
197 DBPSADBW,
199 /// Bitwise Logical AND NOT of Packed FP values.
200 ANDNP,
202 /// Blend where the selector is an immediate.
203 BLENDI,
205 /// Dynamic (non-constant condition) vector blend where only the sign bits
206 /// of the condition elements are used. This is used to enforce that the
207 /// condition mask is not valid for generic VSELECT optimizations. This
208 /// is also used to implement the intrinsics.
209 /// Operands are in VSELECT order: MASK, TRUE, FALSE
210 BLENDV,
212 /// Combined add and sub on an FP vector.
213 ADDSUB,
215 // FP vector ops with rounding mode.
216 FADD_RND,
217 FADDS,
218 FADDS_RND,
219 FSUB_RND,
220 FSUBS,
221 FSUBS_RND,
222 FMUL_RND,
223 FMULS,
224 FMULS_RND,
225 FDIV_RND,
226 FDIVS,
227 FDIVS_RND,
228 FMAX_SAE,
229 FMAXS_SAE,
230 FMIN_SAE,
231 FMINS_SAE,
232 FSQRT_RND,
233 FSQRTS,
234 FSQRTS_RND,
236 // FP vector get exponent.
237 FGETEXP,
238 FGETEXP_SAE,
239 FGETEXPS,
240 FGETEXPS_SAE,
241 // Extract Normalized Mantissas.
242 VGETMANT,
243 VGETMANT_SAE,
244 VGETMANTS,
245 VGETMANTS_SAE,
246 // FP Scale.
247 SCALEF,
248 SCALEF_RND,
249 SCALEFS,
250 SCALEFS_RND,
252 // Unsigned Integer average.
253 AVG,
255 /// Integer horizontal add/sub.
256 HADD,
257 HSUB,
259 /// Floating point horizontal add/sub.
260 FHADD,
261 FHSUB,
263 // Detect Conflicts Within a Vector
264 CONFLICT,
266 /// Floating point max and min.
267 FMAX,
268 FMIN,
270 /// Commutative FMIN and FMAX.
271 FMAXC,
272 FMINC,
274 /// Scalar intrinsic floating point max and min.
275 FMAXS,
276 FMINS,
278 /// Floating point reciprocal-sqrt and reciprocal approximation.
279 /// Note that these typically require refinement
280 /// in order to obtain suitable precision.
281 FRSQRT,
282 FRCP,
284 // AVX-512 reciprocal approximations with a little more precision.
285 RSQRT14,
286 RSQRT14S,
287 RCP14,
288 RCP14S,
290 // Thread Local Storage.
291 TLSADDR,
293 // Thread Local Storage. A call to get the start address
294 // of the TLS block for the current module.
295 TLSBASEADDR,
297 // Thread Local Storage. When calling to an OS provided
298 // thunk at the address from an earlier relocation.
299 TLSCALL,
301 // Exception Handling helpers.
302 EH_RETURN,
304 // SjLj exception handling setjmp.
305 EH_SJLJ_SETJMP,
307 // SjLj exception handling longjmp.
308 EH_SJLJ_LONGJMP,
310 // SjLj exception handling dispatch.
311 EH_SJLJ_SETUP_DISPATCH,
313 /// Tail call return. See X86TargetLowering::LowerCall for
314 /// the list of operands.
315 TC_RETURN,
317 // Vector move to low scalar and zero higher vector elements.
318 VZEXT_MOVL,
320 // Vector integer truncate.
321 VTRUNC,
322 // Vector integer truncate with unsigned/signed saturation.
323 VTRUNCUS,
324 VTRUNCS,
326 // Masked version of the above. Used when less than a 128-bit result is
327 // produced since the mask only applies to the lower elements and can't
328 // be represented by a select.
329 // SRC, PASSTHRU, MASK
330 VMTRUNC,
331 VMTRUNCUS,
332 VMTRUNCS,
334 // Vector FP extend.
335 VFPEXT,
336 VFPEXT_SAE,
337 VFPEXTS,
338 VFPEXTS_SAE,
340 // Vector FP round.
341 VFPROUND,
342 VFPROUND_RND,
343 VFPROUNDS,
344 VFPROUNDS_RND,
346 // Masked version of above. Used for v2f64->v4f32.
347 // SRC, PASSTHRU, MASK
348 VMFPROUND,
350 // 128-bit vector logical left / right shift
351 VSHLDQ,
352 VSRLDQ,
354 // Vector shift elements
355 VSHL,
356 VSRL,
357 VSRA,
359 // Vector variable shift
360 VSHLV,
361 VSRLV,
362 VSRAV,
364 // Vector shift elements by immediate
365 VSHLI,
366 VSRLI,
367 VSRAI,
369 // Shifts of mask registers.
370 KSHIFTL,
371 KSHIFTR,
373 // Bit rotate by immediate
374 VROTLI,
375 VROTRI,
377 // Vector packed double/float comparison.
378 CMPP,
380 // Vector integer comparisons.
381 PCMPEQ,
382 PCMPGT,
384 // v8i16 Horizontal minimum and position.
385 PHMINPOS,
387 MULTISHIFT,
389 /// Vector comparison generating mask bits for fp and
390 /// integer signed and unsigned data types.
391 CMPM,
392 // Vector mask comparison generating mask bits for FP values.
393 CMPMM,
394 // Vector mask comparison with SAE for FP values.
395 CMPMM_SAE,
397 // Arithmetic operations with FLAGS results.
398 ADD,
399 SUB,
400 ADC,
401 SBB,
402 SMUL,
403 UMUL,
405 XOR,
406 AND,
408 // Bit field extract.
409 BEXTR,
410 BEXTRI,
412 // Zero High Bits Starting with Specified Bit Position.
413 BZHI,
415 // Parallel extract and deposit.
416 PDEP,
417 PEXT,
419 // X86-specific multiply by immediate.
420 MUL_IMM,
422 // Vector sign bit extraction.
423 MOVMSK,
425 // Vector bitwise comparisons.
426 PTEST,
428 // Vector packed fp sign bitwise comparisons.
429 TESTP,
431 // OR/AND test for masks.
432 KORTEST,
433 KTEST,
435 // ADD for masks.
436 KADD,
438 // Several flavors of instructions with vector shuffle behaviors.
439 // Saturated signed/unnsigned packing.
440 PACKSS,
441 PACKUS,
442 // Intra-lane alignr.
443 PALIGNR,
444 // AVX512 inter-lane alignr.
445 VALIGN,
446 PSHUFD,
447 PSHUFHW,
448 PSHUFLW,
449 SHUFP,
450 // VBMI2 Concat & Shift.
451 VSHLD,
452 VSHRD,
453 VSHLDV,
454 VSHRDV,
455 // Shuffle Packed Values at 128-bit granularity.
456 SHUF128,
457 MOVDDUP,
458 MOVSHDUP,
459 MOVSLDUP,
460 MOVLHPS,
461 MOVHLPS,
462 MOVSD,
463 MOVSS,
464 MOVSH,
465 UNPCKL,
466 UNPCKH,
467 VPERMILPV,
468 VPERMILPI,
469 VPERMI,
470 VPERM2X128,
472 // Variable Permute (VPERM).
473 // Res = VPERMV MaskV, V0
474 VPERMV,
476 // 3-op Variable Permute (VPERMT2).
477 // Res = VPERMV3 V0, MaskV, V1
478 VPERMV3,
480 // Bitwise ternary logic.
481 VPTERNLOG,
482 // Fix Up Special Packed Float32/64 values.
483 VFIXUPIMM,
484 VFIXUPIMM_SAE,
485 VFIXUPIMMS,
486 VFIXUPIMMS_SAE,
487 // Range Restriction Calculation For Packed Pairs of Float32/64 values.
488 VRANGE,
489 VRANGE_SAE,
490 VRANGES,
491 VRANGES_SAE,
492 // Reduce - Perform Reduction Transformation on scalar\packed FP.
493 VREDUCE,
494 VREDUCE_SAE,
495 VREDUCES,
496 VREDUCES_SAE,
497 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
498 // Also used by the legacy (V)ROUND intrinsics where we mask out the
499 // scaling part of the immediate.
500 VRNDSCALE,
501 VRNDSCALE_SAE,
502 VRNDSCALES,
503 VRNDSCALES_SAE,
504 // Tests Types Of a FP Values for packed types.
505 VFPCLASS,
506 // Tests Types Of a FP Values for scalar types.
507 VFPCLASSS,
509 // Broadcast (splat) scalar or element 0 of a vector. If the operand is
510 // a vector, this node may change the vector length as part of the splat.
511 VBROADCAST,
512 // Broadcast mask to vector.
513 VBROADCASTM,
515 /// SSE4A Extraction and Insertion.
516 EXTRQI,
517 INSERTQI,
519 // XOP arithmetic/logical shifts.
520 VPSHA,
521 VPSHL,
522 // XOP signed/unsigned integer comparisons.
523 VPCOM,
524 VPCOMU,
525 // XOP packed permute bytes.
526 VPPERM,
527 // XOP two source permutation.
528 VPERMIL2,
530 // Vector multiply packed unsigned doubleword integers.
531 PMULUDQ,
532 // Vector multiply packed signed doubleword integers.
533 PMULDQ,
534 // Vector Multiply Packed UnsignedIntegers with Round and Scale.
535 MULHRS,
537 // Multiply and Add Packed Integers.
538 VPMADDUBSW,
539 VPMADDWD,
541 // AVX512IFMA multiply and add.
542 // NOTE: These are different than the instruction and perform
543 // op0 x op1 + op2.
544 VPMADD52L,
545 VPMADD52H,
547 // VNNI
548 VPDPBUSD,
549 VPDPBUSDS,
550 VPDPWSSD,
551 VPDPWSSDS,
553 // FMA nodes.
554 // We use the target independent ISD::FMA for the non-inverted case.
555 FNMADD,
556 FMSUB,
557 FNMSUB,
558 FMADDSUB,
559 FMSUBADD,
561 // FMA with rounding mode.
562 FMADD_RND,
563 FNMADD_RND,
564 FMSUB_RND,
565 FNMSUB_RND,
566 FMADDSUB_RND,
567 FMSUBADD_RND,
569 // Compress and expand.
570 COMPRESS,
571 EXPAND,
573 // Bits shuffle
574 VPSHUFBITQMB,
576 // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
577 SINT_TO_FP_RND,
578 UINT_TO_FP_RND,
579 SCALAR_SINT_TO_FP,
580 SCALAR_UINT_TO_FP,
581 SCALAR_SINT_TO_FP_RND,
582 SCALAR_UINT_TO_FP_RND,
584 // Vector float/double to signed/unsigned integer.
585 CVTP2SI,
586 CVTP2UI,
587 CVTP2SI_RND,
588 CVTP2UI_RND,
589 // Scalar float/double to signed/unsigned integer.
590 CVTS2SI,
591 CVTS2UI,
592 CVTS2SI_RND,
593 CVTS2UI_RND,
595 // Vector float/double to signed/unsigned integer with truncation.
596 CVTTP2SI,
597 CVTTP2UI,
598 CVTTP2SI_SAE,
599 CVTTP2UI_SAE,
600 // Scalar float/double to signed/unsigned integer with truncation.
601 CVTTS2SI,
602 CVTTS2UI,
603 CVTTS2SI_SAE,
604 CVTTS2UI_SAE,
606 // Vector signed/unsigned integer to float/double.
607 CVTSI2P,
608 CVTUI2P,
610 // Masked versions of above. Used for v2f64->v4f32.
611 // SRC, PASSTHRU, MASK
612 MCVTP2SI,
613 MCVTP2UI,
614 MCVTTP2SI,
615 MCVTTP2UI,
616 MCVTSI2P,
617 MCVTUI2P,
619 // Vector float to bfloat16.
620 // Convert TWO packed single data to one packed BF16 data
621 CVTNE2PS2BF16,
622 // Convert packed single data to packed BF16 data
623 CVTNEPS2BF16,
624 // Masked version of above.
625 // SRC, PASSTHRU, MASK
626 MCVTNEPS2BF16,
628 // Dot product of BF16 pairs to accumulated into
629 // packed single precision.
630 DPBF16PS,
632 // Save xmm argument registers to the stack, according to %al. An operator
633 // is needed so that this can be expanded with control flow.
634 VASTART_SAVE_XMM_REGS,
636 // Windows's _chkstk call to do stack probing.
637 WIN_ALLOCA,
639 // For allocating variable amounts of stack space when using
640 // segmented stacks. Check if the current stacklet has enough space, and
641 // falls back to heap allocation if not.
642 SEG_ALLOCA,
644 // For allocating stack space when using stack clash protector.
645 // Allocation is performed by block, and each block is probed.
646 PROBED_ALLOCA,
648 // Memory barriers.
649 MEMBARRIER,
650 MFENCE,
652 // Get a random integer and indicate whether it is valid in CF.
653 RDRAND,
655 // Get a NIST SP800-90B & C compliant random integer and
656 // indicate whether it is valid in CF.
657 RDSEED,
659 // Protection keys
660 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
661 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
662 // value for ECX.
663 RDPKRU,
664 WRPKRU,
666 // SSE42 string comparisons.
667 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
668 // will emit one or two instructions based on which results are used. If
669 // flags and index/mask this allows us to use a single instruction since
670 // we won't have to pick and opcode for flags. Instead we can rely on the
671 // DAG to CSE everything and decide at isel.
672 PCMPISTR,
673 PCMPESTR,
675 // Test if in transactional execution.
676 XTEST,
678 // ERI instructions.
679 RSQRT28,
680 RSQRT28_SAE,
681 RSQRT28S,
682 RSQRT28S_SAE,
683 RCP28,
684 RCP28_SAE,
685 RCP28S,
686 RCP28S_SAE,
687 EXP2,
688 EXP2_SAE,
690 // Conversions between float and half-float.
691 CVTPS2PH,
692 CVTPH2PS,
693 CVTPH2PS_SAE,
695 // Masked version of above.
696 // SRC, RND, PASSTHRU, MASK
697 MCVTPS2PH,
699 // Galois Field Arithmetic Instructions
700 GF2P8AFFINEINVQB,
701 GF2P8AFFINEQB,
702 GF2P8MULB,
704 // LWP insert record.
705 LWPINS,
707 // User level wait
708 UMWAIT,
709 TPAUSE,
711 // Enqueue Stores Instructions
712 ENQCMD,
713 ENQCMDS,
715 // For avx512-vp2intersect
716 VP2INTERSECT,
718 // User level interrupts - testui
719 TESTUI,
721 /// X86 strict FP compare instructions.
722 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
723 STRICT_FCMPS,
725 // Vector packed double/float comparison.
726 STRICT_CMPP,
728 /// Vector comparison generating mask bits for fp and
729 /// integer signed and unsigned data types.
730 STRICT_CMPM,
732 // Vector float/double to signed/unsigned integer with truncation.
733 STRICT_CVTTP2SI,
734 STRICT_CVTTP2UI,
736 // Vector FP extend.
737 STRICT_VFPEXT,
739 // Vector FP round.
740 STRICT_VFPROUND,
742 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
743 // Also used by the legacy (V)ROUND intrinsics where we mask out the
744 // scaling part of the immediate.
745 STRICT_VRNDSCALE,
747 // Vector signed/unsigned integer to float/double.
748 STRICT_CVTSI2P,
749 STRICT_CVTUI2P,
751 // Strict FMA nodes.
752 STRICT_FNMADD,
753 STRICT_FMSUB,
754 STRICT_FNMSUB,
756 // Conversions between float and half-float.
757 STRICT_CVTPS2PH,
758 STRICT_CVTPH2PS,
760 // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and
761 // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
763 // Compare and swap.
764 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
765 LCMPXCHG8_DAG,
766 LCMPXCHG16_DAG,
767 LCMPXCHG16_SAVE_RBX_DAG,
769 /// LOCK-prefixed arithmetic read-modify-write instructions.
770 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
771 LADD,
772 LSUB,
773 LOR,
774 LXOR,
775 LAND,
777 // Load, scalar_to_vector, and zero extend.
778 VZEXT_LOAD,
780 // extract_vector_elt, store.
781 VEXTRACT_STORE,
783 // scalar broadcast from memory.
784 VBROADCAST_LOAD,
786 // subvector broadcast from memory.
787 SUBV_BROADCAST_LOAD,
789 // Store FP control word into i16 memory.
790 FNSTCW16m,
792 // Load FP control word from i16 memory.
793 FLDCW16m,
795 /// This instruction implements FP_TO_SINT with the
796 /// integer destination in memory and a FP reg source. This corresponds
797 /// to the X86::FIST*m instructions and the rounding mode change stuff. It
798 /// has two inputs (token chain and address) and two outputs (int value
799 /// and token chain). Memory VT specifies the type to store to.
800 FP_TO_INT_IN_MEM,
802 /// This instruction implements SINT_TO_FP with the
803 /// integer source in memory and FP reg result. This corresponds to the
804 /// X86::FILD*m instructions. It has two inputs (token chain and address)
805 /// and two outputs (FP value and token chain). The integer source type is
806 /// specified by the memory VT.
807 FILD,
809 /// This instruction implements a fp->int store from FP stack
810 /// slots. This corresponds to the fist instruction. It takes a
811 /// chain operand, value to store, address, and glue. The memory VT
812 /// specifies the type to store as.
813 FIST,
815 /// This instruction implements an extending load to FP stack slots.
816 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
817 /// operand, and ptr to load from. The memory VT specifies the type to
818 /// load from.
819 FLD,
821 /// This instruction implements a truncating store from FP stack
822 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
823 /// chain operand, value to store, address, and glue. The memory VT
824 /// specifies the type to store as.
825 FST,
827 /// These instructions grab the address of the next argument
828 /// from a va_list. (reads and modifies the va_list in memory)
829 VAARG_64,
830 VAARG_X32,
832 // Vector truncating store with unsigned/signed saturation
833 VTRUNCSTOREUS,
834 VTRUNCSTORES,
835 // Vector truncating masked store with unsigned/signed saturation
836 VMTRUNCSTOREUS,
837 VMTRUNCSTORES,
839 // X86 specific gather and scatter
840 MGATHER,
841 MSCATTER,
843 // Key locker nodes that produce flags.
844 AESENC128KL,
845 AESDEC128KL,
846 AESENC256KL,
847 AESDEC256KL,
848 AESENCWIDE128KL,
849 AESDECWIDE128KL,
850 AESENCWIDE256KL,
851 AESDECWIDE256KL,
853 // WARNING: Do not add anything in the end unless you want the node to
854 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
855 // opcodes will be thought as target memory ops!
857 } // end namespace X86ISD
859 namespace X86 {
860 /// Current rounding mode is represented in bits 11:10 of FPSR. These
861 /// values are same as corresponding constants for rounding mode used
862 /// in glibc.
863 enum RoundingMode {
864 rmToNearest = 0, // FE_TONEAREST
865 rmDownward = 1 << 10, // FE_DOWNWARD
866 rmUpward = 2 << 10, // FE_UPWARD
867 rmTowardZero = 3 << 10, // FE_TOWARDZERO
868 rmMask = 3 << 10 // Bit mask selecting rounding mode
872 /// Define some predicates that are used for node matching.
873 namespace X86 {
874 /// Returns true if Elt is a constant zero or floating point constant +0.0.
875 bool isZeroNode(SDValue Elt);
877 /// Returns true of the given offset can be
878 /// fit into displacement field of the instruction.
879 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
880 bool hasSymbolicDisplacement);
882 /// Determines whether the callee is required to pop its
883 /// own arguments. Callee pop is necessary to support tail calls.
884 bool isCalleePop(CallingConv::ID CallingConv,
885 bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
887 /// If Op is a constant whose elements are all the same constant or
888 /// undefined, return true and return the constant value in \p SplatVal.
889 /// If we have undef bits that don't cover an entire element, we treat these
890 /// as zero if AllowPartialUndefs is set, else we fail and return false.
891 bool isConstantSplat(SDValue Op, APInt &SplatVal,
892 bool AllowPartialUndefs = true);
893 } // end namespace X86
895 //===--------------------------------------------------------------------===//
896 // X86 Implementation of the TargetLowering interface
897 class X86TargetLowering final : public TargetLowering {
898 public:
899 explicit X86TargetLowering(const X86TargetMachine &TM,
900 const X86Subtarget &STI);
902 unsigned getJumpTableEncoding() const override;
903 bool useSoftFloat() const override;
905 void markLibCallAttributes(MachineFunction *MF, unsigned CC,
906 ArgListTy &Args) const override;
908 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
909 return MVT::i8;
912 const MCExpr *
913 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
914 const MachineBasicBlock *MBB, unsigned uid,
915 MCContext &Ctx) const override;
917 /// Returns relocation base for the given PIC jumptable.
918 SDValue getPICJumpTableRelocBase(SDValue Table,
919 SelectionDAG &DAG) const override;
920 const MCExpr *
921 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
922 unsigned JTI, MCContext &Ctx) const override;
924 /// Return the desired alignment for ByVal aggregate
925 /// function arguments in the caller parameter area. For X86, aggregates
926 /// that contains are placed at 16-byte boundaries while the rest are at
927 /// 4-byte boundaries.
928 unsigned getByValTypeAlignment(Type *Ty,
929 const DataLayout &DL) const override;
931 EVT getOptimalMemOpType(const MemOp &Op,
932 const AttributeList &FuncAttributes) const override;
934 /// Returns true if it's safe to use load / store of the
935 /// specified type to expand memcpy / memset inline. This is mostly true
936 /// for all types except for some special cases. For example, on X86
937 /// targets without SSE2 f64 load / store are done with fldl / fstpl which
938 /// also does type conversion. Note the specified type doesn't have to be
939 /// legal as the hook is used before type legalization.
940 bool isSafeMemOpType(MVT VT) const override;
942 /// Returns true if the target allows unaligned memory accesses of the
943 /// specified type. Returns whether it is "fast" in the last argument.
944 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
945 MachineMemOperand::Flags Flags,
946 bool *Fast) const override;
948 /// Provide custom lowering hooks for some operations.
950 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
952 /// Replace the results of node with an illegal result
953 /// type with new values built out of custom code.
955 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
956 SelectionDAG &DAG) const override;
958 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
960 /// Return true if the target has native support for
961 /// the specified value type and it is 'desirable' to use the type for the
962 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
963 /// instruction encodings are longer and some i16 instructions are slow.
964 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
966 /// Return true if the target has native support for the
967 /// specified value type and it is 'desirable' to use the type. e.g. On x86
968 /// i16 is legal, but undesirable since i16 instruction encodings are longer
969 /// and some i16 instructions are slow.
970 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
972 /// Return the newly negated expression if the cost is not expensive and
973 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
974 /// do the negation.
975 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
976 bool LegalOperations, bool ForCodeSize,
977 NegatibleCost &Cost,
978 unsigned Depth) const override;
980 MachineBasicBlock *
981 EmitInstrWithCustomInserter(MachineInstr &MI,
982 MachineBasicBlock *MBB) const override;
984 /// This method returns the name of a target specific DAG node.
985 const char *getTargetNodeName(unsigned Opcode) const override;
987 /// Do not merge vector stores after legalization because that may conflict
988 /// with x86-specific store splitting optimizations.
989 bool mergeStoresAfterLegalization(EVT MemVT) const override {
990 return !MemVT.isVector();
993 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
994 const MachineFunction &MF) const override;
996 bool isCheapToSpeculateCttz() const override;
998 bool isCheapToSpeculateCtlz() const override;
1000 bool isCtlzFast() const override;
1002 bool hasBitPreservingFPLogic(EVT VT) const override {
1003 return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() ||
1004 (VT == MVT::f16 && X86ScalarSSEf16);
1007 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1008 // If the pair to store is a mixture of float and int values, we will
1009 // save two bitwise instructions and one float-to-int instruction and
1010 // increase one store instruction. There is potentially a more
1011 // significant benefit because it avoids the float->int domain switch
1012 // for input value. So It is more likely a win.
1013 if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1014 (LTy.isInteger() && HTy.isFloatingPoint()))
1015 return true;
1016 // If the pair only contains int values, we will save two bitwise
1017 // instructions and increase one store instruction (costing one more
1018 // store buffer). Since the benefit is more blurred so we leave
1019 // such pair out until we get testcase to prove it is a win.
1020 return false;
1023 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1025 bool hasAndNotCompare(SDValue Y) const override;
1027 bool hasAndNot(SDValue Y) const override;
1029 bool hasBitTest(SDValue X, SDValue Y) const override;
1031 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1032 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1033 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1034 SelectionDAG &DAG) const override;
1036 bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1037 CombineLevel Level) const override;
1039 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1041 bool
1042 shouldTransformSignedTruncationCheck(EVT XVT,
1043 unsigned KeptBits) const override {
1044 // For vectors, we don't have a preference..
1045 if (XVT.isVector())
1046 return false;
1048 auto VTIsOk = [](EVT VT) -> bool {
1049 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1050 VT == MVT::i64;
1053 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1054 // XVT will be larger than KeptBitsVT.
1055 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1056 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1059 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
1061 bool shouldSplatInsEltVarIndex(EVT VT) const override;
1063 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1064 return VT.isScalarInteger();
1067 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1068 MVT hasFastEqualityCompare(unsigned NumBits) const override;
1070 /// Return the value type to use for ISD::SETCC.
1071 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1072 EVT VT) const override;
1074 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1075 const APInt &DemandedElts,
1076 TargetLoweringOpt &TLO) const override;
1078 /// Determine which of the bits specified in Mask are known to be either
1079 /// zero or one and return them in the KnownZero/KnownOne bitsets.
1080 void computeKnownBitsForTargetNode(const SDValue Op,
1081 KnownBits &Known,
1082 const APInt &DemandedElts,
1083 const SelectionDAG &DAG,
1084 unsigned Depth = 0) const override;
1086 /// Determine the number of bits in the operation that are sign bits.
1087 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1088 const APInt &DemandedElts,
1089 const SelectionDAG &DAG,
1090 unsigned Depth) const override;
1092 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1093 const APInt &DemandedElts,
1094 APInt &KnownUndef,
1095 APInt &KnownZero,
1096 TargetLoweringOpt &TLO,
1097 unsigned Depth) const override;
1099 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1100 const APInt &DemandedElts,
1101 unsigned MaskIndex,
1102 TargetLoweringOpt &TLO,
1103 unsigned Depth) const;
1105 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1106 const APInt &DemandedBits,
1107 const APInt &DemandedElts,
1108 KnownBits &Known,
1109 TargetLoweringOpt &TLO,
1110 unsigned Depth) const override;
1112 SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1113 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1114 SelectionDAG &DAG, unsigned Depth) const override;
1116 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1118 SDValue unwrapAddress(SDValue N) const override;
1120 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1122 bool ExpandInlineAsm(CallInst *CI) const override;
1124 ConstraintType getConstraintType(StringRef Constraint) const override;
1126 /// Examine constraint string and operand type and determine a weight value.
1127 /// The operand object must already have been set up with the operand type.
1128 ConstraintWeight
1129 getSingleConstraintMatchWeight(AsmOperandInfo &info,
1130 const char *constraint) const override;
1132 const char *LowerXConstraint(EVT ConstraintVT) const override;
1134 /// Lower the specified operand into the Ops vector. If it is invalid, don't
1135 /// add anything to Ops. If hasMemory is true it means one of the asm
1136 /// constraint of the inline asm instruction being processed is 'm'.
1137 void LowerAsmOperandForConstraint(SDValue Op,
1138 std::string &Constraint,
1139 std::vector<SDValue> &Ops,
1140 SelectionDAG &DAG) const override;
1142 unsigned
1143 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1144 if (ConstraintCode == "v")
1145 return InlineAsm::Constraint_v;
1146 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1149 /// Handle Lowering flag assembly outputs.
1150 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1151 const SDLoc &DL,
1152 const AsmOperandInfo &Constraint,
1153 SelectionDAG &DAG) const override;
1155 /// Given a physical register constraint
1156 /// (e.g. {edx}), return the register number and the register class for the
1157 /// register. This should only be used for C_Register constraints. On
1158 /// error, this returns a register number of 0.
1159 std::pair<unsigned, const TargetRegisterClass *>
1160 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1161 StringRef Constraint, MVT VT) const override;
1163 /// Return true if the addressing mode represented
1164 /// by AM is legal for this target, for a load/store of the specified type.
1165 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1166 Type *Ty, unsigned AS,
1167 Instruction *I = nullptr) const override;
1169 /// Return true if the specified immediate is legal
1170 /// icmp immediate, that is the target has icmp instructions which can
1171 /// compare a register against the immediate without having to materialize
1172 /// the immediate into a register.
1173 bool isLegalICmpImmediate(int64_t Imm) const override;
1175 /// Return true if the specified immediate is legal
1176 /// add immediate, that is the target has add instructions which can
1177 /// add a register and the immediate without having to materialize
1178 /// the immediate into a register.
1179 bool isLegalAddImmediate(int64_t Imm) const override;
1181 bool isLegalStoreImmediate(int64_t Imm) const override;
1183 /// Return the cost of the scaling factor used in the addressing
1184 /// mode represented by AM for this target, for a load/store
1185 /// of the specified type.
1186 /// If the AM is supported, the return value must be >= 0.
1187 /// If the AM is not supported, it returns a negative value.
1188 InstructionCost getScalingFactorCost(const DataLayout &DL,
1189 const AddrMode &AM, Type *Ty,
1190 unsigned AS) const override;
1192 /// This is used to enable splatted operand transforms for vector shifts
1193 /// and vector funnel shifts.
1194 bool isVectorShiftByScalarCheap(Type *Ty) const override;
1196 /// Add x86-specific opcodes to the default list.
1197 bool isBinOp(unsigned Opcode) const override;
1199 /// Returns true if the opcode is a commutative binary operation.
1200 bool isCommutativeBinOp(unsigned Opcode) const override;
1202 /// Return true if it's free to truncate a value of
1203 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1204 /// register EAX to i16 by referencing its sub-register AX.
1205 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1206 bool isTruncateFree(EVT VT1, EVT VT2) const override;
1208 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1210 /// Return true if any actual instruction that defines a
1211 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1212 /// register. This does not necessarily include registers defined in
1213 /// unknown ways, such as incoming arguments, or copies from unknown
1214 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1215 /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1216 /// all instructions that define 32-bit values implicit zero-extend the
1217 /// result out to 64 bits.
1218 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1219 bool isZExtFree(EVT VT1, EVT VT2) const override;
1220 bool isZExtFree(SDValue Val, EVT VT2) const override;
1222 bool shouldSinkOperands(Instruction *I,
1223 SmallVectorImpl<Use *> &Ops) const override;
1224 bool shouldConvertPhiType(Type *From, Type *To) const override;
1226 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1227 /// extend node) is profitable.
1228 bool isVectorLoadExtDesirable(SDValue) const override;
1230 /// Return true if an FMA operation is faster than a pair of fmul and fadd
1231 /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1232 /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1233 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1234 EVT VT) const override;
1236 /// Return true if it's profitable to narrow
1237 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1238 /// from i32 to i8 but not from i32 to i16.
1239 bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1241 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1242 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1243 /// true and stores the intrinsic information into the IntrinsicInfo that was
1244 /// passed to the function.
1245 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1246 MachineFunction &MF,
1247 unsigned Intrinsic) const override;
1249 /// Returns true if the target can instruction select the
1250 /// specified FP immediate natively. If false, the legalizer will
1251 /// materialize the FP immediate as a load from a constant pool.
1252 bool isFPImmLegal(const APFloat &Imm, EVT VT,
1253 bool ForCodeSize) const override;
1255 /// Targets can use this to indicate that they only support *some*
1256 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1257 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1258 /// be legal.
1259 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1261 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1262 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1263 /// constant pool entry.
1264 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1266 /// Returns true if lowering to a jump table is allowed.
1267 bool areJTsAllowed(const Function *Fn) const override;
1269 /// If true, then instruction selection should
1270 /// seek to shrink the FP constant of the specified type to a smaller type
1271 /// in order to save space and / or reduce runtime.
1272 bool ShouldShrinkFPConstant(EVT VT) const override {
1273 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1274 // expensive than a straight movsd. On the other hand, it's important to
1275 // shrink long double fp constant since fldt is very slow.
1276 return !X86ScalarSSEf64 || VT == MVT::f80;
1279 /// Return true if we believe it is correct and profitable to reduce the
1280 /// load node to a smaller type.
1281 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1282 EVT NewVT) const override;
1284 /// Return true if the specified scalar FP type is computed in an SSE
1285 /// register, not on the X87 floating point stack.
1286 bool isScalarFPTypeInSSEReg(EVT VT) const {
1287 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1288 (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
1289 (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16
1292 /// Returns true if it is beneficial to convert a load of a constant
1293 /// to just the constant itself.
1294 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1295 Type *Ty) const override;
1297 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1299 bool convertSelectOfConstantsToMath(EVT VT) const override;
1301 bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1302 SDValue C) const override;
1304 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1305 /// with this index.
1306 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1307 unsigned Index) const override;
1309 /// Scalar ops always have equal or better analysis/performance/power than
1310 /// the vector equivalent, so this always makes sense if the scalar op is
1311 /// supported.
1312 bool shouldScalarizeBinop(SDValue) const override;
1314 /// Extract of a scalar FP value from index 0 of a vector is free.
1315 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1316 EVT EltVT = VT.getScalarType();
1317 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1320 /// Overflow nodes should get combined/lowered to optimal instructions
1321 /// (they should allow eliminating explicit compares by getting flags from
1322 /// math ops).
1323 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1324 bool MathUsed) const override;
1326 bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1327 unsigned AddrSpace) const override {
1328 // If we can replace more than 2 scalar stores, there will be a reduction
1329 // in instructions even after we add a vector constant load.
1330 return NumElem > 2;
1333 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1334 const SelectionDAG &DAG,
1335 const MachineMemOperand &MMO) const override;
1337 /// Intel processors have a unified instruction and data cache
1338 const char * getClearCacheBuiltinName() const override {
1339 return nullptr; // nothing to do, move along.
1342 Register getRegisterByName(const char* RegName, LLT VT,
1343 const MachineFunction &MF) const override;
1345 /// If a physical register, this returns the register that receives the
1346 /// exception address on entry to an EH pad.
1347 Register
1348 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1350 /// If a physical register, this returns the register that receives the
1351 /// exception typeid on entry to a landing pad.
1352 Register
1353 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1355 virtual bool needsFixedCatchObjects() const override;
1357 /// This method returns a target specific FastISel object,
1358 /// or null if the target does not support "fast" ISel.
1359 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1360 const TargetLibraryInfo *libInfo) const override;
1362 /// If the target has a standard location for the stack protector cookie,
1363 /// returns the address of that location. Otherwise, returns nullptr.
1364 Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1366 bool useLoadStackGuardNode() const override;
1367 bool useStackGuardXorFP() const override;
1368 void insertSSPDeclarations(Module &M) const override;
1369 Value *getSDagStackGuard(const Module &M) const override;
1370 Function *getSSPStackGuardCheck(const Module &M) const override;
1371 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1372 const SDLoc &DL) const override;
1375 /// Return true if the target stores SafeStack pointer at a fixed offset in
1376 /// some non-standard address space, and populates the address space and
1377 /// offset as appropriate.
1378 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1380 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1381 SDValue Chain, SDValue Pointer,
1382 MachinePointerInfo PtrInfo,
1383 Align Alignment,
1384 SelectionDAG &DAG) const;
1386 /// Customize the preferred legalization strategy for certain types.
1387 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1389 bool softPromoteHalfType() const override { return true; }
1391 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1392 EVT VT) const override;
1394 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1395 CallingConv::ID CC,
1396 EVT VT) const override;
1398 unsigned getVectorTypeBreakdownForCallingConv(
1399 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1400 unsigned &NumIntermediates, MVT &RegisterVT) const override;
1402 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1404 bool supportSwiftError() const override;
1406 bool hasStackProbeSymbol(MachineFunction &MF) const override;
1407 bool hasInlineStackProbe(MachineFunction &MF) const override;
1408 StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1410 unsigned getStackProbeSize(MachineFunction &MF) const;
1412 bool hasVectorBlend() const override { return true; }
1414 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1416 /// Lower interleaved load(s) into target specific
1417 /// instructions/intrinsics.
1418 bool lowerInterleavedLoad(LoadInst *LI,
1419 ArrayRef<ShuffleVectorInst *> Shuffles,
1420 ArrayRef<unsigned> Indices,
1421 unsigned Factor) const override;
1423 /// Lower interleaved store(s) into target specific
1424 /// instructions/intrinsics.
1425 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1426 unsigned Factor) const override;
1428 SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1429 SDValue Addr, SelectionDAG &DAG)
1430 const override;
1432 Align getPrefLoopAlignment(MachineLoop *ML) const override;
1434 protected:
1435 std::pair<const TargetRegisterClass *, uint8_t>
1436 findRepresentativeClass(const TargetRegisterInfo *TRI,
1437 MVT VT) const override;
1439 private:
1440 /// Keep a reference to the X86Subtarget around so that we can
1441 /// make the right decision when generating code for different targets.
1442 const X86Subtarget &Subtarget;
1444 /// Select between SSE or x87 floating point ops.
1445 /// When SSE is available, use it for f32 operations.
1446 /// When SSE2 is available, use it for f64 operations.
1447 bool X86ScalarSSEf32;
1448 bool X86ScalarSSEf64;
1449 bool X86ScalarSSEf16;
1451 /// A list of legal FP immediates.
1452 std::vector<APFloat> LegalFPImmediates;
1454 /// Indicate that this x86 target can instruction
1455 /// select the specified FP immediate natively.
1456 void addLegalFPImmediate(const APFloat& Imm) {
1457 LegalFPImmediates.push_back(Imm);
1460 SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1461 CallingConv::ID CallConv, bool isVarArg,
1462 const SmallVectorImpl<ISD::InputArg> &Ins,
1463 const SDLoc &dl, SelectionDAG &DAG,
1464 SmallVectorImpl<SDValue> &InVals,
1465 uint32_t *RegMask) const;
1466 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1467 const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1468 const SDLoc &dl, SelectionDAG &DAG,
1469 const CCValAssign &VA, MachineFrameInfo &MFI,
1470 unsigned i) const;
1471 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1472 const SDLoc &dl, SelectionDAG &DAG,
1473 const CCValAssign &VA,
1474 ISD::ArgFlagsTy Flags, bool isByval) const;
1476 // Call lowering helpers.
1478 /// Check whether the call is eligible for tail call optimization. Targets
1479 /// that want to do tail call optimization should implement this function.
1480 bool IsEligibleForTailCallOptimization(SDValue Callee,
1481 CallingConv::ID CalleeCC,
1482 bool isVarArg,
1483 bool isCalleeStructRet,
1484 bool isCallerStructRet,
1485 Type *RetTy,
1486 const SmallVectorImpl<ISD::OutputArg> &Outs,
1487 const SmallVectorImpl<SDValue> &OutVals,
1488 const SmallVectorImpl<ISD::InputArg> &Ins,
1489 SelectionDAG& DAG) const;
1490 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1491 SDValue Chain, bool IsTailCall,
1492 bool Is64Bit, int FPDiff,
1493 const SDLoc &dl) const;
1495 unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1496 SelectionDAG &DAG) const;
1498 unsigned getAddressSpace(void) const;
1500 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1501 SDValue &Chain) const;
1502 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1504 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1505 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1506 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1507 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1509 unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1510 const unsigned char OpFlags = 0) const;
1511 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1512 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1513 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1514 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1515 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1517 /// Creates target global address or external symbol nodes for calls or
1518 /// other uses.
1519 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1520 bool ForCall) const;
1522 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1523 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1524 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1525 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1526 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1527 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1528 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1529 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1530 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1531 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1532 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1533 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1534 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1535 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1536 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1537 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1538 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1539 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1540 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1541 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1542 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1543 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1544 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1545 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1546 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1547 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1548 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1549 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1550 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1551 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1552 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1554 SDValue
1555 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1556 const SmallVectorImpl<ISD::InputArg> &Ins,
1557 const SDLoc &dl, SelectionDAG &DAG,
1558 SmallVectorImpl<SDValue> &InVals) const override;
1559 SDValue LowerCall(CallLoweringInfo &CLI,
1560 SmallVectorImpl<SDValue> &InVals) const override;
1562 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1563 const SmallVectorImpl<ISD::OutputArg> &Outs,
1564 const SmallVectorImpl<SDValue> &OutVals,
1565 const SDLoc &dl, SelectionDAG &DAG) const override;
1567 bool supportSplitCSR(MachineFunction *MF) const override {
1568 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1569 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1571 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1572 void insertCopiesSplitCSR(
1573 MachineBasicBlock *Entry,
1574 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1576 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1578 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1580 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1581 ISD::NodeType ExtendKind) const override;
1583 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1584 bool isVarArg,
1585 const SmallVectorImpl<ISD::OutputArg> &Outs,
1586 LLVMContext &Context) const override;
1588 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1590 TargetLoweringBase::AtomicExpansionKind
1591 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1592 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1593 TargetLoweringBase::AtomicExpansionKind
1594 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1596 LoadInst *
1597 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1599 bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1600 bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1602 bool needsCmpXchgNb(Type *MemType) const;
1604 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1605 MachineBasicBlock *DispatchBB, int FI) const;
1607 // Utility function to emit the low-level va_arg code for X86-64.
1608 MachineBasicBlock *
1609 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1611 /// Utility function to emit the xmm reg save portion of va_start.
1612 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1613 MachineInstr &MI2,
1614 MachineBasicBlock *BB) const;
1616 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1617 MachineBasicBlock *BB) const;
1619 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1620 MachineBasicBlock *BB) const;
1622 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1623 MachineBasicBlock *BB) const;
1625 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1626 MachineBasicBlock *BB) const;
1628 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1629 MachineBasicBlock *BB) const;
1631 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1632 MachineBasicBlock *BB) const;
1634 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1635 MachineBasicBlock *BB) const;
1637 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1638 MachineBasicBlock *MBB) const;
1640 void emitSetJmpShadowStackFix(MachineInstr &MI,
1641 MachineBasicBlock *MBB) const;
1643 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1644 MachineBasicBlock *MBB) const;
1646 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1647 MachineBasicBlock *MBB) const;
1649 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1650 MachineBasicBlock *MBB) const;
1652 /// Emit flags for the given setcc condition and operands. Also returns the
1653 /// corresponding X86 condition code constant in X86CC.
1654 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1655 const SDLoc &dl, SelectionDAG &DAG,
1656 SDValue &X86CC) const;
1658 /// Check if replacement of SQRT with RSQRT should be disabled.
1659 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1661 /// Use rsqrt* to speed up sqrt calculations.
1662 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1663 int &RefinementSteps, bool &UseOneConstNR,
1664 bool Reciprocal) const override;
1666 /// Use rcp* to speed up fdiv calculations.
1667 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1668 int &RefinementSteps) const override;
1670 /// Reassociate floating point divisions into multiply by reciprocal.
1671 unsigned combineRepeatedFPDivisors() const override;
1673 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1674 SmallVectorImpl<SDNode *> &Created) const override;
1677 namespace X86 {
1678 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1679 const TargetLibraryInfo *libInfo);
1680 } // end namespace X86
1682 // X86 specific Gather/Scatter nodes.
1683 // The class has the same order of operands as MaskedGatherScatterSDNode for
1684 // convenience.
1685 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1686 public:
1687 // This is a intended as a utility and should never be directly created.
1688 X86MaskedGatherScatterSDNode() = delete;
1689 ~X86MaskedGatherScatterSDNode() = delete;
1691 const SDValue &getBasePtr() const { return getOperand(3); }
1692 const SDValue &getIndex() const { return getOperand(4); }
1693 const SDValue &getMask() const { return getOperand(2); }
1694 const SDValue &getScale() const { return getOperand(5); }
1696 static bool classof(const SDNode *N) {
1697 return N->getOpcode() == X86ISD::MGATHER ||
1698 N->getOpcode() == X86ISD::MSCATTER;
1702 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1703 public:
1704 const SDValue &getPassThru() const { return getOperand(1); }
1706 static bool classof(const SDNode *N) {
1707 return N->getOpcode() == X86ISD::MGATHER;
1711 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1712 public:
1713 const SDValue &getValue() const { return getOperand(1); }
1715 static bool classof(const SDNode *N) {
1716 return N->getOpcode() == X86ISD::MSCATTER;
1720 /// Generate unpacklo/unpackhi shuffle mask.
1721 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1722 bool Unary);
1724 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1725 /// imposed by AVX and specific to the unary pattern. Example:
1726 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1727 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1728 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1730 } // end namespace llvm
1732 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H