AMDGPU: Mark test as XFAIL in expensive_checks builds
[llvm-project.git] / llvm / lib / Target / X86 / X86ISelLowering.h
blobeaedaa0b88d22c90b23e38bd1a2820d0e61b8b00
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
20 namespace llvm {
21 class X86Subtarget;
22 class X86TargetMachine;
24 namespace X86ISD {
25 // X86 Specific DAG Nodes
26 enum NodeType : unsigned {
27 // Start the numbering where the builtin ops leave off.
28 FIRST_NUMBER = ISD::BUILTIN_OP_END,
30 /// Bit scan forward.
31 BSF,
32 /// Bit scan reverse.
33 BSR,
35 /// X86 funnel/double shift i16 instructions. These correspond to
36 /// X86::SHLDW and X86::SHRDW instructions which have different amt
37 /// modulo rules to generic funnel shifts.
38 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39 FSHL,
40 FSHR,
42 /// Bitwise logical AND of floating point values. This corresponds
43 /// to X86::ANDPS or X86::ANDPD.
44 FAND,
46 /// Bitwise logical OR of floating point values. This corresponds
47 /// to X86::ORPS or X86::ORPD.
48 FOR,
50 /// Bitwise logical XOR of floating point values. This corresponds
51 /// to X86::XORPS or X86::XORPD.
52 FXOR,
54 /// Bitwise logical ANDNOT of floating point values. This
55 /// corresponds to X86::ANDNPS or X86::ANDNPD.
56 FANDN,
58 /// These operations represent an abstract X86 call
59 /// instruction, which includes a bunch of information. In particular the
60 /// operands of these node are:
61 ///
62 /// #0 - The incoming token chain
63 /// #1 - The callee
64 /// #2 - The number of arg bytes the caller pushes on the stack.
65 /// #3 - The number of arg bytes the callee pops off the stack.
66 /// #4 - The value to pass in AL/AX/EAX (optional)
67 /// #5 - The value to pass in DL/DX/EDX (optional)
68 ///
69 /// The result values of these nodes are:
70 ///
71 /// #0 - The outgoing token chain
72 /// #1 - The first register result value (optional)
73 /// #2 - The second register result value (optional)
74 ///
75 CALL,
77 /// Same as call except it adds the NoTrack prefix.
78 NT_CALL,
80 // Pseudo for a OBJC call that gets emitted together with a special
81 // marker instruction.
82 CALL_RVMARKER,
84 /// X86 compare and logical compare instructions.
85 CMP,
86 FCMP,
87 COMI,
88 UCOMI,
90 // X86 compare with Intrinsics similar to COMI.
91 COMX,
92 UCOMX,
94 /// X86 bit-test instructions.
95 BT,
97 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
98 /// operand, usually produced by a CMP instruction.
99 SETCC,
101 /// X86 Select
102 SELECTS,
104 // Same as SETCC except it's materialized with a sbb and the value is all
105 // one's or all zero's.
106 SETCC_CARRY, // R = carry_bit ? ~0 : 0
108 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
109 /// Operands are two FP values to compare; result is a mask of
110 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
111 FSETCC,
113 /// X86 FP SETCC, similar to above, but with output as an i1 mask and
114 /// and a version with SAE.
115 FSETCCM,
116 FSETCCM_SAE,
118 /// X86 conditional moves. Operand 0 and operand 1 are the two values
119 /// to select from. Operand 2 is the condition code, and operand 3 is the
120 /// flag operand produced by a CMP or TEST instruction.
121 CMOV,
123 /// X86 conditional branches. Operand 0 is the chain operand, operand 1
124 /// is the block to branch if condition is true, operand 2 is the
125 /// condition code, and operand 3 is the flag operand produced by a CMP
126 /// or TEST instruction.
127 BRCOND,
129 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
130 /// operand 1 is the target address.
131 NT_BRIND,
133 /// Return with a glue operand. Operand 0 is the chain operand, operand
134 /// 1 is the number of bytes of stack to pop.
135 RET_GLUE,
137 /// Return from interrupt. Operand 0 is the number of bytes to pop.
138 IRET,
140 /// Repeat fill, corresponds to X86::REP_STOSx.
141 REP_STOS,
143 /// Repeat move, corresponds to X86::REP_MOVSx.
144 REP_MOVS,
146 /// On Darwin, this node represents the result of the popl
147 /// at function entry, used for PIC code.
148 GlobalBaseReg,
150 /// A wrapper node for TargetConstantPool, TargetJumpTable,
151 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
152 /// MCSymbol and TargetBlockAddress.
153 Wrapper,
155 /// Special wrapper used under X86-64 PIC mode for RIP
156 /// relative displacements.
157 WrapperRIP,
159 /// Copies a 64-bit value from an MMX vector to the low word
160 /// of an XMM vector, with the high word zero filled.
161 MOVQ2DQ,
163 /// Copies a 64-bit value from the low word of an XMM vector
164 /// to an MMX vector.
165 MOVDQ2Q,
167 /// Copies a 32-bit value from the low word of a MMX
168 /// vector to a GPR.
169 MMX_MOVD2W,
171 /// Copies a GPR into the low 32-bit word of a MMX vector
172 /// and zero out the high word.
173 MMX_MOVW2D,
175 /// Extract an 8-bit value from a vector and zero extend it to
176 /// i32, corresponds to X86::PEXTRB.
177 PEXTRB,
179 /// Extract a 16-bit value from a vector and zero extend it to
180 /// i32, corresponds to X86::PEXTRW.
181 PEXTRW,
183 /// Insert any element of a 4 x float vector into any element
184 /// of a destination 4 x floatvector.
185 INSERTPS,
187 /// Insert the lower 8-bits of a 32-bit value to a vector,
188 /// corresponds to X86::PINSRB.
189 PINSRB,
191 /// Insert the lower 16-bits of a 32-bit value to a vector,
192 /// corresponds to X86::PINSRW.
193 PINSRW,
195 /// Shuffle 16 8-bit values within a vector.
196 PSHUFB,
198 /// Compute Sum of Absolute Differences.
199 PSADBW,
200 /// Compute Double Block Packed Sum-Absolute-Differences
201 DBPSADBW,
203 /// Bitwise Logical AND NOT of Packed FP values.
204 ANDNP,
206 /// Blend where the selector is an immediate.
207 BLENDI,
209 /// Dynamic (non-constant condition) vector blend where only the sign bits
210 /// of the condition elements are used. This is used to enforce that the
211 /// condition mask is not valid for generic VSELECT optimizations. This
212 /// is also used to implement the intrinsics.
213 /// Operands are in VSELECT order: MASK, TRUE, FALSE
214 BLENDV,
216 /// Combined add and sub on an FP vector.
217 ADDSUB,
219 // FP vector ops with rounding mode.
220 FADD_RND,
221 FADDS,
222 FADDS_RND,
223 FSUB_RND,
224 FSUBS,
225 FSUBS_RND,
226 FMUL_RND,
227 FMULS,
228 FMULS_RND,
229 FDIV_RND,
230 FDIVS,
231 FDIVS_RND,
232 FMAX_SAE,
233 FMAXS_SAE,
234 FMIN_SAE,
235 FMINS_SAE,
236 FSQRT_RND,
237 FSQRTS,
238 FSQRTS_RND,
240 // FP vector get exponent.
241 FGETEXP,
242 FGETEXP_SAE,
243 FGETEXPS,
244 FGETEXPS_SAE,
245 // Extract Normalized Mantissas.
246 VGETMANT,
247 VGETMANT_SAE,
248 VGETMANTS,
249 VGETMANTS_SAE,
250 // FP Scale.
251 SCALEF,
252 SCALEF_RND,
253 SCALEFS,
254 SCALEFS_RND,
256 /// Integer horizontal add/sub.
257 HADD,
258 HSUB,
260 /// Floating point horizontal add/sub.
261 FHADD,
262 FHSUB,
264 // Detect Conflicts Within a Vector
265 CONFLICT,
267 /// Floating point max and min.
268 FMAX,
269 FMIN,
271 /// Commutative FMIN and FMAX.
272 FMAXC,
273 FMINC,
275 /// Scalar intrinsic floating point max and min.
276 FMAXS,
277 FMINS,
279 /// Floating point reciprocal-sqrt and reciprocal approximation.
280 /// Note that these typically require refinement
281 /// in order to obtain suitable precision.
282 FRSQRT,
283 FRCP,
285 // AVX-512 reciprocal approximations with a little more precision.
286 RSQRT14,
287 RSQRT14S,
288 RCP14,
289 RCP14S,
291 // Thread Local Storage.
292 TLSADDR,
294 // Thread Local Storage. A call to get the start address
295 // of the TLS block for the current module.
296 TLSBASEADDR,
298 // Thread Local Storage. When calling to an OS provided
299 // thunk at the address from an earlier relocation.
300 TLSCALL,
302 // Thread Local Storage. A descriptor containing pointer to
303 // code and to argument to get the TLS offset for the symbol.
304 TLSDESC,
306 // Exception Handling helpers.
307 EH_RETURN,
309 // SjLj exception handling setjmp.
310 EH_SJLJ_SETJMP,
312 // SjLj exception handling longjmp.
313 EH_SJLJ_LONGJMP,
315 // SjLj exception handling dispatch.
316 EH_SJLJ_SETUP_DISPATCH,
318 /// Tail call return. See X86TargetLowering::LowerCall for
319 /// the list of operands.
320 TC_RETURN,
322 // Vector move to low scalar and zero higher vector elements.
323 VZEXT_MOVL,
325 // Vector integer truncate.
326 VTRUNC,
327 // Vector integer truncate with unsigned/signed saturation.
328 VTRUNCUS,
329 VTRUNCS,
331 // Masked version of the above. Used when less than a 128-bit result is
332 // produced since the mask only applies to the lower elements and can't
333 // be represented by a select.
334 // SRC, PASSTHRU, MASK
335 VMTRUNC,
336 VMTRUNCUS,
337 VMTRUNCS,
339 // Vector FP extend.
340 VFPEXT,
341 VFPEXT_SAE,
342 VFPEXTS,
343 VFPEXTS_SAE,
345 // Vector FP round.
346 VFPROUND,
347 // Convert TWO packed single data to one packed data
348 VFPROUND2,
349 VFPROUND2_RND,
350 VFPROUND_RND,
351 VFPROUNDS,
352 VFPROUNDS_RND,
354 // Masked version of above. Used for v2f64->v4f32.
355 // SRC, PASSTHRU, MASK
356 VMFPROUND,
358 // 128-bit vector logical left / right shift
359 VSHLDQ,
360 VSRLDQ,
362 // Vector shift elements
363 VSHL,
364 VSRL,
365 VSRA,
367 // Vector variable shift
368 VSHLV,
369 VSRLV,
370 VSRAV,
372 // Vector shift elements by immediate
373 VSHLI,
374 VSRLI,
375 VSRAI,
377 // Shifts of mask registers.
378 KSHIFTL,
379 KSHIFTR,
381 // Bit rotate by immediate
382 VROTLI,
383 VROTRI,
385 // Vector packed double/float comparison.
386 CMPP,
388 // Vector integer comparisons.
389 PCMPEQ,
390 PCMPGT,
392 // v8i16 Horizontal minimum and position.
393 PHMINPOS,
395 MULTISHIFT,
397 /// Vector comparison generating mask bits for fp and
398 /// integer signed and unsigned data types.
399 CMPM,
400 // Vector mask comparison generating mask bits for FP values.
401 CMPMM,
402 // Vector mask comparison with SAE for FP values.
403 CMPMM_SAE,
405 // Arithmetic operations with FLAGS results.
406 ADD,
407 SUB,
408 ADC,
409 SBB,
410 SMUL,
411 UMUL,
413 XOR,
414 AND,
416 // Bit field extract.
417 BEXTR,
418 BEXTRI,
420 // Zero High Bits Starting with Specified Bit Position.
421 BZHI,
423 // Parallel extract and deposit.
424 PDEP,
425 PEXT,
427 // X86-specific multiply by immediate.
428 MUL_IMM,
430 // Vector sign bit extraction.
431 MOVMSK,
433 // Vector bitwise comparisons.
434 PTEST,
436 // Vector packed fp sign bitwise comparisons.
437 TESTP,
439 // OR/AND test for masks.
440 KORTEST,
441 KTEST,
443 // ADD for masks.
444 KADD,
446 // Several flavors of instructions with vector shuffle behaviors.
447 // Saturated signed/unnsigned packing.
448 PACKSS,
449 PACKUS,
450 // Intra-lane alignr.
451 PALIGNR,
452 // AVX512 inter-lane alignr.
453 VALIGN,
454 PSHUFD,
455 PSHUFHW,
456 PSHUFLW,
457 SHUFP,
458 // VBMI2 Concat & Shift.
459 VSHLD,
460 VSHRD,
461 VSHLDV,
462 VSHRDV,
463 // Shuffle Packed Values at 128-bit granularity.
464 SHUF128,
465 MOVDDUP,
466 MOVSHDUP,
467 MOVSLDUP,
468 MOVLHPS,
469 MOVHLPS,
470 MOVSD,
471 MOVSS,
472 MOVSH,
473 UNPCKL,
474 UNPCKH,
475 VPERMILPV,
476 VPERMILPI,
477 VPERMI,
478 VPERM2X128,
480 // Variable Permute (VPERM).
481 // Res = VPERMV MaskV, V0
482 VPERMV,
484 // 3-op Variable Permute (VPERMT2).
485 // Res = VPERMV3 V0, MaskV, V1
486 VPERMV3,
488 // Bitwise ternary logic.
489 VPTERNLOG,
490 // Fix Up Special Packed Float32/64 values.
491 VFIXUPIMM,
492 VFIXUPIMM_SAE,
493 VFIXUPIMMS,
494 VFIXUPIMMS_SAE,
495 // Range Restriction Calculation For Packed Pairs of Float32/64 values.
496 VRANGE,
497 VRANGE_SAE,
498 VRANGES,
499 VRANGES_SAE,
500 // Reduce - Perform Reduction Transformation on scalar\packed FP.
501 VREDUCE,
502 VREDUCE_SAE,
503 VREDUCES,
504 VREDUCES_SAE,
505 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
506 // Also used by the legacy (V)ROUND intrinsics where we mask out the
507 // scaling part of the immediate.
508 VRNDSCALE,
509 VRNDSCALE_SAE,
510 VRNDSCALES,
511 VRNDSCALES_SAE,
512 // Tests Types Of a FP Values for packed types.
513 VFPCLASS,
514 // Tests Types Of a FP Values for scalar types.
515 VFPCLASSS,
517 // Broadcast (splat) scalar or element 0 of a vector. If the operand is
518 // a vector, this node may change the vector length as part of the splat.
519 VBROADCAST,
520 // Broadcast mask to vector.
521 VBROADCASTM,
523 /// SSE4A Extraction and Insertion.
524 EXTRQI,
525 INSERTQI,
527 // XOP arithmetic/logical shifts.
528 VPSHA,
529 VPSHL,
530 // XOP signed/unsigned integer comparisons.
531 VPCOM,
532 VPCOMU,
533 // XOP packed permute bytes.
534 VPPERM,
535 // XOP two source permutation.
536 VPERMIL2,
538 // Vector multiply packed unsigned doubleword integers.
539 PMULUDQ,
540 // Vector multiply packed signed doubleword integers.
541 PMULDQ,
542 // Vector Multiply Packed UnsignedIntegers with Round and Scale.
543 MULHRS,
545 // Multiply and Add Packed Integers.
546 VPMADDUBSW,
547 VPMADDWD,
549 // AVX512IFMA multiply and add.
550 // NOTE: These are different than the instruction and perform
551 // op0 x op1 + op2.
552 VPMADD52L,
553 VPMADD52H,
555 // VNNI
556 VPDPBUSD,
557 VPDPBUSDS,
558 VPDPWSSD,
559 VPDPWSSDS,
561 // FMA nodes.
562 // We use the target independent ISD::FMA for the non-inverted case.
563 FNMADD,
564 FMSUB,
565 FNMSUB,
566 FMADDSUB,
567 FMSUBADD,
569 // FMA with rounding mode.
570 FMADD_RND,
571 FNMADD_RND,
572 FMSUB_RND,
573 FNMSUB_RND,
574 FMADDSUB_RND,
575 FMSUBADD_RND,
577 // AVX512-FP16 complex addition and multiplication.
578 VFMADDC,
579 VFMADDC_RND,
580 VFCMADDC,
581 VFCMADDC_RND,
583 VFMULC,
584 VFMULC_RND,
585 VFCMULC,
586 VFCMULC_RND,
588 VFMADDCSH,
589 VFMADDCSH_RND,
590 VFCMADDCSH,
591 VFCMADDCSH_RND,
593 VFMULCSH,
594 VFMULCSH_RND,
595 VFCMULCSH,
596 VFCMULCSH_RND,
598 VPDPBSUD,
599 VPDPBSUDS,
600 VPDPBUUD,
601 VPDPBUUDS,
602 VPDPBSSD,
603 VPDPBSSDS,
605 VPDPWSUD,
606 VPDPWSUDS,
607 VPDPWUSD,
608 VPDPWUSDS,
609 VPDPWUUD,
610 VPDPWUUDS,
612 VMINMAX,
613 VMINMAX_SAE,
614 VMINMAXS,
615 VMINMAXS_SAE,
617 CVTP2IBS,
618 CVTP2IUBS,
619 CVTP2IBS_RND,
620 CVTP2IUBS_RND,
621 CVTTP2IBS,
622 CVTTP2IUBS,
623 CVTTP2IBS_SAE,
624 CVTTP2IUBS_SAE,
626 MPSADBW,
628 VCVTNE2PH2BF8,
629 VCVTNE2PH2BF8S,
630 VCVTNE2PH2HF8,
631 VCVTNE2PH2HF8S,
632 VCVTBIASPH2BF8,
633 VCVTBIASPH2BF8S,
634 VCVTBIASPH2HF8,
635 VCVTBIASPH2HF8S,
636 VCVTNEPH2BF8,
637 VCVTNEPH2BF8S,
638 VCVTNEPH2HF8,
639 VCVTNEPH2HF8S,
640 VMCVTBIASPH2BF8,
641 VMCVTBIASPH2BF8S,
642 VMCVTBIASPH2HF8,
643 VMCVTBIASPH2HF8S,
644 VMCVTNEPH2BF8,
645 VMCVTNEPH2BF8S,
646 VMCVTNEPH2HF8,
647 VMCVTNEPH2HF8S,
648 VCVTHF82PH,
650 // Compress and expand.
651 COMPRESS,
652 EXPAND,
654 // Bits shuffle
655 VPSHUFBITQMB,
657 // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
658 SINT_TO_FP_RND,
659 UINT_TO_FP_RND,
660 SCALAR_SINT_TO_FP,
661 SCALAR_UINT_TO_FP,
662 SCALAR_SINT_TO_FP_RND,
663 SCALAR_UINT_TO_FP_RND,
665 // Vector float/double to signed/unsigned integer.
666 CVTP2SI,
667 CVTP2UI,
668 CVTP2SI_RND,
669 CVTP2UI_RND,
670 // Scalar float/double to signed/unsigned integer.
671 CVTS2SI,
672 CVTS2UI,
673 CVTS2SI_RND,
674 CVTS2UI_RND,
676 // Vector float/double to signed/unsigned integer with truncation.
677 CVTTP2SI,
678 CVTTP2UI,
679 CVTTP2SI_SAE,
680 CVTTP2UI_SAE,
682 // Saturation enabled Vector float/double to signed/unsigned
683 // integer with truncation.
684 CVTTP2SIS,
685 CVTTP2UIS,
686 CVTTP2SIS_SAE,
687 CVTTP2UIS_SAE,
688 // Masked versions of above. Used for v2f64 to v4i32.
689 // SRC, PASSTHRU, MASK
690 MCVTTP2SIS,
691 MCVTTP2UIS,
693 // Scalar float/double to signed/unsigned integer with truncation.
694 CVTTS2SI,
695 CVTTS2UI,
696 CVTTS2SI_SAE,
697 CVTTS2UI_SAE,
699 // Vector signed/unsigned integer to float/double.
700 CVTSI2P,
701 CVTUI2P,
703 // Scalar float/double to signed/unsigned integer with saturation.
704 CVTTS2SIS,
705 CVTTS2UIS,
706 CVTTS2SIS_SAE,
707 CVTTS2UIS_SAE,
709 // Masked versions of above. Used for v2f64->v4f32.
710 // SRC, PASSTHRU, MASK
711 MCVTP2SI,
712 MCVTP2UI,
713 MCVTTP2SI,
714 MCVTTP2UI,
715 MCVTSI2P,
716 MCVTUI2P,
718 // Vector float to bfloat16.
719 // Convert packed single data to packed BF16 data
720 CVTNEPS2BF16,
721 // Masked version of above.
722 // SRC, PASSTHRU, MASK
723 MCVTNEPS2BF16,
725 // Dot product of BF16/FP16 pairs to accumulated into
726 // packed single precision.
727 DPBF16PS,
728 DPFP16PS,
730 // A stack checking function call. On Windows it's _chkstk call.
731 DYN_ALLOCA,
733 // For allocating variable amounts of stack space when using
734 // segmented stacks. Check if the current stacklet has enough space, and
735 // falls back to heap allocation if not.
736 SEG_ALLOCA,
738 // For allocating stack space when using stack clash protector.
739 // Allocation is performed by block, and each block is probed.
740 PROBED_ALLOCA,
742 // Memory barriers.
743 MFENCE,
745 // Get a random integer and indicate whether it is valid in CF.
746 RDRAND,
748 // Get a NIST SP800-90B & C compliant random integer and
749 // indicate whether it is valid in CF.
750 RDSEED,
752 // Protection keys
753 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
754 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
755 // value for ECX.
756 RDPKRU,
757 WRPKRU,
759 // SSE42 string comparisons.
760 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
761 // will emit one or two instructions based on which results are used. If
762 // flags and index/mask this allows us to use a single instruction since
763 // we won't have to pick and opcode for flags. Instead we can rely on the
764 // DAG to CSE everything and decide at isel.
765 PCMPISTR,
766 PCMPESTR,
768 // Test if in transactional execution.
769 XTEST,
771 // Conversions between float and half-float.
772 CVTPS2PH,
773 CVTPS2PH_SAE,
774 CVTPH2PS,
775 CVTPH2PS_SAE,
777 // Masked version of above.
778 // SRC, RND, PASSTHRU, MASK
779 MCVTPS2PH,
780 MCVTPS2PH_SAE,
782 // Galois Field Arithmetic Instructions
783 GF2P8AFFINEINVQB,
784 GF2P8AFFINEQB,
785 GF2P8MULB,
787 // LWP insert record.
788 LWPINS,
790 // User level wait
791 UMWAIT,
792 TPAUSE,
794 // Enqueue Stores Instructions
795 ENQCMD,
796 ENQCMDS,
798 // For avx512-vp2intersect
799 VP2INTERSECT,
801 // User level interrupts - testui
802 TESTUI,
804 // Perform an FP80 add after changing precision control in FPCW.
805 FP80_ADD,
807 // Conditional compare instructions
808 CCMP,
809 CTEST,
811 /// X86 strict FP compare instructions.
812 FIRST_STRICTFP_OPCODE,
813 STRICT_FCMP = FIRST_STRICTFP_OPCODE,
814 STRICT_FCMPS,
816 // Vector packed double/float comparison.
817 STRICT_CMPP,
819 /// Vector comparison generating mask bits for fp and
820 /// integer signed and unsigned data types.
821 STRICT_CMPM,
823 // Vector float/double to signed/unsigned integer with truncation.
824 STRICT_CVTTP2SI,
825 STRICT_CVTTP2UI,
827 // Vector FP extend.
828 STRICT_VFPEXT,
830 // Vector FP round.
831 STRICT_VFPROUND,
833 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
834 // Also used by the legacy (V)ROUND intrinsics where we mask out the
835 // scaling part of the immediate.
836 STRICT_VRNDSCALE,
838 // Vector signed/unsigned integer to float/double.
839 STRICT_CVTSI2P,
840 STRICT_CVTUI2P,
842 // Strict FMA nodes.
843 STRICT_FNMADD,
844 STRICT_FMSUB,
845 STRICT_FNMSUB,
847 // Conversions between float and half-float.
848 STRICT_CVTPS2PH,
849 STRICT_CVTPH2PS,
851 // Perform an FP80 add after changing precision control in FPCW.
852 STRICT_FP80_ADD,
854 /// Floating point max and min.
855 STRICT_FMAX,
856 STRICT_FMIN,
857 LAST_STRICTFP_OPCODE = STRICT_FMIN,
859 // Compare and swap.
860 FIRST_MEMORY_OPCODE,
861 LCMPXCHG_DAG = FIRST_MEMORY_OPCODE,
862 LCMPXCHG8_DAG,
863 LCMPXCHG16_DAG,
864 LCMPXCHG16_SAVE_RBX_DAG,
866 /// LOCK-prefixed arithmetic read-modify-write instructions.
867 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
868 LADD,
869 LSUB,
870 LOR,
871 LXOR,
872 LAND,
873 LBTS,
874 LBTC,
875 LBTR,
876 LBTS_RM,
877 LBTC_RM,
878 LBTR_RM,
880 /// RAO arithmetic instructions.
881 /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
882 AADD,
883 AOR,
884 AXOR,
885 AAND,
887 // Load, scalar_to_vector, and zero extend.
888 VZEXT_LOAD,
890 // extract_vector_elt, store.
891 VEXTRACT_STORE,
893 // scalar broadcast from memory.
894 VBROADCAST_LOAD,
896 // subvector broadcast from memory.
897 SUBV_BROADCAST_LOAD,
899 // Store FP control word into i16 memory.
900 FNSTCW16m,
902 // Load FP control word from i16 memory.
903 FLDCW16m,
905 // Store x87 FPU environment into memory.
906 FNSTENVm,
908 // Load x87 FPU environment from memory.
909 FLDENVm,
911 // Custom handling for FP_TO_xINT_SAT
912 FP_TO_SINT_SAT,
913 FP_TO_UINT_SAT,
915 /// This instruction implements FP_TO_SINT with the
916 /// integer destination in memory and a FP reg source. This corresponds
917 /// to the X86::FIST*m instructions and the rounding mode change stuff. It
918 /// has two inputs (token chain and address) and two outputs (int value
919 /// and token chain). Memory VT specifies the type to store to.
920 FP_TO_INT_IN_MEM,
922 /// This instruction implements SINT_TO_FP with the
923 /// integer source in memory and FP reg result. This corresponds to the
924 /// X86::FILD*m instructions. It has two inputs (token chain and address)
925 /// and two outputs (FP value and token chain). The integer source type is
926 /// specified by the memory VT.
927 FILD,
929 /// This instruction implements a fp->int store from FP stack
930 /// slots. This corresponds to the fist instruction. It takes a
931 /// chain operand, value to store, address, and glue. The memory VT
932 /// specifies the type to store as.
933 FIST,
935 /// This instruction implements an extending load to FP stack slots.
936 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
937 /// operand, and ptr to load from. The memory VT specifies the type to
938 /// load from.
939 FLD,
941 /// This instruction implements a truncating store from FP stack
942 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
943 /// chain operand, value to store, address, and glue. The memory VT
944 /// specifies the type to store as.
945 FST,
947 /// These instructions grab the address of the next argument
948 /// from a va_list. (reads and modifies the va_list in memory)
949 VAARG_64,
950 VAARG_X32,
952 // Vector truncating store with unsigned/signed saturation
953 VTRUNCSTOREUS,
954 VTRUNCSTORES,
955 // Vector truncating masked store with unsigned/signed saturation
956 VMTRUNCSTOREUS,
957 VMTRUNCSTORES,
959 // X86 specific gather and scatter
960 MGATHER,
961 MSCATTER,
963 // Key locker nodes that produce flags.
964 AESENC128KL,
965 AESDEC128KL,
966 AESENC256KL,
967 AESDEC256KL,
968 AESENCWIDE128KL,
969 AESDECWIDE128KL,
970 AESENCWIDE256KL,
971 AESDECWIDE256KL,
973 /// Compare and Add if Condition is Met. Compare value in operand 2 with
974 /// value in memory of operand 1. If condition of operand 4 is met, add
975 /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
976 /// always updated with the original value from operand 1.
977 CMPCCXADD,
979 // Save xmm argument registers to the stack, according to %al. An operator
980 // is needed so that this can be expanded with control flow.
981 VASTART_SAVE_XMM_REGS,
983 // Conditional load/store instructions
984 CLOAD,
985 CSTORE,
986 LAST_MEMORY_OPCODE = CSTORE,
988 } // end namespace X86ISD
990 namespace X86 {
991 /// Current rounding mode is represented in bits 11:10 of FPSR. These
992 /// values are same as corresponding constants for rounding mode used
993 /// in glibc.
994 enum RoundingMode {
995 rmToNearest = 0, // FE_TONEAREST
996 rmDownward = 1 << 10, // FE_DOWNWARD
997 rmUpward = 2 << 10, // FE_UPWARD
998 rmTowardZero = 3 << 10, // FE_TOWARDZERO
999 rmMask = 3 << 10 // Bit mask selecting rounding mode
1003 /// Define some predicates that are used for node matching.
1004 namespace X86 {
1005 /// Returns true if Elt is a constant zero or floating point constant +0.0.
1006 bool isZeroNode(SDValue Elt);
1008 /// Returns true of the given offset can be
1009 /// fit into displacement field of the instruction.
1010 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
1011 bool hasSymbolicDisplacement);
1013 /// Determines whether the callee is required to pop its
1014 /// own arguments. Callee pop is necessary to support tail calls.
1015 bool isCalleePop(CallingConv::ID CallingConv,
1016 bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
1018 /// If Op is a constant whose elements are all the same constant or
1019 /// undefined, return true and return the constant value in \p SplatVal.
1020 /// If we have undef bits that don't cover an entire element, we treat these
1021 /// as zero if AllowPartialUndefs is set, else we fail and return false.
1022 bool isConstantSplat(SDValue Op, APInt &SplatVal,
1023 bool AllowPartialUndefs = true);
1025 /// Check if Op is a load operation that could be folded into some other x86
1026 /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
1027 bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
1028 bool AssumeSingleUse = false);
1030 /// Check if Op is a load operation that could be folded into a vector splat
1031 /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
1032 bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
1033 const X86Subtarget &Subtarget,
1034 bool AssumeSingleUse = false);
1036 /// Check if Op is a value that could be used to fold a store into some
1037 /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
1038 bool mayFoldIntoStore(SDValue Op);
1040 /// Check if Op is an operation that could be folded into a zero extend x86
1041 /// instruction.
1042 bool mayFoldIntoZeroExtend(SDValue Op);
1044 /// True if the target supports the extended frame for async Swift
1045 /// functions.
1046 bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
1047 const MachineFunction &MF);
1048 } // end namespace X86
1050 //===--------------------------------------------------------------------===//
1051 // X86 Implementation of the TargetLowering interface
1052 class X86TargetLowering final : public TargetLowering {
1053 public:
1054 explicit X86TargetLowering(const X86TargetMachine &TM,
1055 const X86Subtarget &STI);
1057 unsigned getJumpTableEncoding() const override;
1058 bool useSoftFloat() const override;
1060 void markLibCallAttributes(MachineFunction *MF, unsigned CC,
1061 ArgListTy &Args) const override;
1063 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
1064 return MVT::i8;
1067 const MCExpr *
1068 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1069 const MachineBasicBlock *MBB, unsigned uid,
1070 MCContext &Ctx) const override;
1072 /// Returns relocation base for the given PIC jumptable.
1073 SDValue getPICJumpTableRelocBase(SDValue Table,
1074 SelectionDAG &DAG) const override;
1075 const MCExpr *
1076 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1077 unsigned JTI, MCContext &Ctx) const override;
1079 /// Return the desired alignment for ByVal aggregate
1080 /// function arguments in the caller parameter area. For X86, aggregates
1081 /// that contains are placed at 16-byte boundaries while the rest are at
1082 /// 4-byte boundaries.
1083 Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override;
1085 EVT getOptimalMemOpType(const MemOp &Op,
1086 const AttributeList &FuncAttributes) const override;
1088 /// Returns true if it's safe to use load / store of the
1089 /// specified type to expand memcpy / memset inline. This is mostly true
1090 /// for all types except for some special cases. For example, on X86
1091 /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1092 /// also does type conversion. Note the specified type doesn't have to be
1093 /// legal as the hook is used before type legalization.
1094 bool isSafeMemOpType(MVT VT) const override;
1096 bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1098 /// Returns true if the target allows unaligned memory accesses of the
1099 /// specified type. Returns whether it is "fast" in the last argument.
1100 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1101 MachineMemOperand::Flags Flags,
1102 unsigned *Fast) const override;
1104 /// This function returns true if the memory access is aligned or if the
1105 /// target allows this specific unaligned memory access. If the access is
1106 /// allowed, the optional final parameter returns a relative speed of the
1107 /// access (as defined by the target).
1108 bool allowsMemoryAccess(
1109 LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1110 Align Alignment,
1111 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1112 unsigned *Fast = nullptr) const override;
1114 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1115 const MachineMemOperand &MMO,
1116 unsigned *Fast) const {
1117 return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1118 MMO.getAlign(), MMO.getFlags(), Fast);
1121 /// Provide custom lowering hooks for some operations.
1123 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1125 /// Replace the results of node with an illegal result
1126 /// type with new values built out of custom code.
1128 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1129 SelectionDAG &DAG) const override;
1131 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1133 bool preferABDSToABSWithNSW(EVT VT) const override;
1135 bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1136 EVT ExtVT) const override;
1138 bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1139 EVT VT) const override;
1141 /// Return true if the target has native support for
1142 /// the specified value type and it is 'desirable' to use the type for the
1143 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1144 /// instruction encodings are longer and some i16 instructions are slow.
1145 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1147 /// Return true if the target has native support for the
1148 /// specified value type and it is 'desirable' to use the type. e.g. On x86
1149 /// i16 is legal, but undesirable since i16 instruction encodings are longer
1150 /// and some i16 instructions are slow.
1151 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1153 /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1154 /// integer, None otherwise.
1155 TargetLowering::AndOrSETCCFoldKind
1156 isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1157 const SDNode *SETCC0,
1158 const SDNode *SETCC1) const override;
1160 /// Return the newly negated expression if the cost is not expensive and
1161 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1162 /// do the negation.
1163 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1164 bool LegalOperations, bool ForCodeSize,
1165 NegatibleCost &Cost,
1166 unsigned Depth) const override;
1168 MachineBasicBlock *
1169 EmitInstrWithCustomInserter(MachineInstr &MI,
1170 MachineBasicBlock *MBB) const override;
1172 /// This method returns the name of a target specific DAG node.
1173 const char *getTargetNodeName(unsigned Opcode) const override;
1175 /// Do not merge vector stores after legalization because that may conflict
1176 /// with x86-specific store splitting optimizations.
1177 bool mergeStoresAfterLegalization(EVT MemVT) const override {
1178 return !MemVT.isVector();
1181 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1182 const MachineFunction &MF) const override;
1184 bool isCheapToSpeculateCttz(Type *Ty) const override;
1186 bool isCheapToSpeculateCtlz(Type *Ty) const override;
1188 bool isCtlzFast() const override;
1190 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1191 // If the pair to store is a mixture of float and int values, we will
1192 // save two bitwise instructions and one float-to-int instruction and
1193 // increase one store instruction. There is potentially a more
1194 // significant benefit because it avoids the float->int domain switch
1195 // for input value. So It is more likely a win.
1196 if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1197 (LTy.isInteger() && HTy.isFloatingPoint()))
1198 return true;
1199 // If the pair only contains int values, we will save two bitwise
1200 // instructions and increase one store instruction (costing one more
1201 // store buffer). Since the benefit is more blurred so we leave
1202 // such pair out until we get testcase to prove it is a win.
1203 return false;
1206 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1208 bool hasAndNotCompare(SDValue Y) const override;
1210 bool hasAndNot(SDValue Y) const override;
1212 bool hasBitTest(SDValue X, SDValue Y) const override;
1214 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1215 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1216 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1217 SelectionDAG &DAG) const override;
1219 unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1220 EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1221 const APInt &ShiftOrRotateAmt,
1222 const std::optional<APInt> &AndMask) const override;
1224 bool preferScalarizeSplat(SDNode *N) const override;
1226 CondMergingParams
1227 getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1228 const Value *Rhs) const override;
1230 bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1231 CombineLevel Level) const override;
1233 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1235 bool
1236 shouldTransformSignedTruncationCheck(EVT XVT,
1237 unsigned KeptBits) const override {
1238 // For vectors, we don't have a preference..
1239 if (XVT.isVector())
1240 return false;
1242 auto VTIsOk = [](EVT VT) -> bool {
1243 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1244 VT == MVT::i64;
1247 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1248 // XVT will be larger than KeptBitsVT.
1249 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1250 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1253 ShiftLegalizationStrategy
1254 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1255 unsigned ExpansionFactor) const override;
1257 bool shouldSplatInsEltVarIndex(EVT VT) const override;
1259 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1260 // Converting to sat variants holds little benefit on X86 as we will just
1261 // need to saturate the value back using fp arithmatic.
1262 return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1265 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1266 return VT.isScalarInteger();
1269 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1270 MVT hasFastEqualityCompare(unsigned NumBits) const override;
1272 /// Return the value type to use for ISD::SETCC.
1273 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1274 EVT VT) const override;
1276 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1277 const APInt &DemandedElts,
1278 TargetLoweringOpt &TLO) const override;
1280 /// Determine which of the bits specified in Mask are known to be either
1281 /// zero or one and return them in the KnownZero/KnownOne bitsets.
1282 void computeKnownBitsForTargetNode(const SDValue Op,
1283 KnownBits &Known,
1284 const APInt &DemandedElts,
1285 const SelectionDAG &DAG,
1286 unsigned Depth = 0) const override;
1288 /// Determine the number of bits in the operation that are sign bits.
1289 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1290 const APInt &DemandedElts,
1291 const SelectionDAG &DAG,
1292 unsigned Depth) const override;
1294 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1295 const APInt &DemandedElts,
1296 APInt &KnownUndef,
1297 APInt &KnownZero,
1298 TargetLoweringOpt &TLO,
1299 unsigned Depth) const override;
1301 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1302 const APInt &DemandedElts,
1303 unsigned MaskIndex,
1304 TargetLoweringOpt &TLO,
1305 unsigned Depth) const;
1307 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1308 const APInt &DemandedBits,
1309 const APInt &DemandedElts,
1310 KnownBits &Known,
1311 TargetLoweringOpt &TLO,
1312 unsigned Depth) const override;
1314 SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1315 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1316 SelectionDAG &DAG, unsigned Depth) const override;
1318 bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1319 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1320 bool PoisonOnly, unsigned Depth) const override;
1322 bool canCreateUndefOrPoisonForTargetNode(
1323 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1324 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1326 bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1327 APInt &UndefElts, const SelectionDAG &DAG,
1328 unsigned Depth) const override;
1330 bool isTargetCanonicalConstantNode(SDValue Op) const override {
1331 // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1332 // vector from memory.
1333 while (Op.getOpcode() == ISD::BITCAST ||
1334 Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1335 (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1336 Op.getOperand(0).isUndef()))
1337 Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1339 return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1340 TargetLowering::isTargetCanonicalConstantNode(Op);
1343 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1345 SDValue unwrapAddress(SDValue N) const override;
1347 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1349 bool ExpandInlineAsm(CallInst *CI) const override;
1351 ConstraintType getConstraintType(StringRef Constraint) const override;
1353 /// Examine constraint string and operand type and determine a weight value.
1354 /// The operand object must already have been set up with the operand type.
1355 ConstraintWeight
1356 getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1357 const char *Constraint) const override;
1359 const char *LowerXConstraint(EVT ConstraintVT) const override;
1361 /// Lower the specified operand into the Ops vector. If it is invalid, don't
1362 /// add anything to Ops. If hasMemory is true it means one of the asm
1363 /// constraint of the inline asm instruction being processed is 'm'.
1364 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1365 std::vector<SDValue> &Ops,
1366 SelectionDAG &DAG) const override;
1368 InlineAsm::ConstraintCode
1369 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1370 if (ConstraintCode == "v")
1371 return InlineAsm::ConstraintCode::v;
1372 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1375 /// Handle Lowering flag assembly outputs.
1376 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1377 const SDLoc &DL,
1378 const AsmOperandInfo &Constraint,
1379 SelectionDAG &DAG) const override;
1381 /// Given a physical register constraint
1382 /// (e.g. {edx}), return the register number and the register class for the
1383 /// register. This should only be used for C_Register constraints. On
1384 /// error, this returns a register number of 0.
1385 std::pair<unsigned, const TargetRegisterClass *>
1386 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1387 StringRef Constraint, MVT VT) const override;
1389 /// Return true if the addressing mode represented
1390 /// by AM is legal for this target, for a load/store of the specified type.
1391 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1392 Type *Ty, unsigned AS,
1393 Instruction *I = nullptr) const override;
1395 bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1397 /// Return true if the specified immediate is legal
1398 /// icmp immediate, that is the target has icmp instructions which can
1399 /// compare a register against the immediate without having to materialize
1400 /// the immediate into a register.
1401 bool isLegalICmpImmediate(int64_t Imm) const override;
1403 /// Return true if the specified immediate is legal
1404 /// add immediate, that is the target has add instructions which can
1405 /// add a register and the immediate without having to materialize
1406 /// the immediate into a register.
1407 bool isLegalAddImmediate(int64_t Imm) const override;
1409 bool isLegalStoreImmediate(int64_t Imm) const override;
1411 /// Add x86-specific opcodes to the default list.
1412 bool isBinOp(unsigned Opcode) const override;
1414 /// Returns true if the opcode is a commutative binary operation.
1415 bool isCommutativeBinOp(unsigned Opcode) const override;
1417 /// Return true if it's free to truncate a value of
1418 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1419 /// register EAX to i16 by referencing its sub-register AX.
1420 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1421 bool isTruncateFree(EVT VT1, EVT VT2) const override;
1423 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1425 /// Return true if any actual instruction that defines a
1426 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1427 /// register. This does not necessarily include registers defined in
1428 /// unknown ways, such as incoming arguments, or copies from unknown
1429 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1430 /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1431 /// all instructions that define 32-bit values implicit zero-extend the
1432 /// result out to 64 bits.
1433 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1434 bool isZExtFree(EVT VT1, EVT VT2) const override;
1435 bool isZExtFree(SDValue Val, EVT VT2) const override;
1437 bool shouldConvertPhiType(Type *From, Type *To) const override;
1439 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1440 /// extend node) is profitable.
1441 bool isVectorLoadExtDesirable(SDValue) const override;
1443 /// Return true if an FMA operation is faster than a pair of fmul and fadd
1444 /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1445 /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1446 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1447 EVT VT) const override;
1449 /// Return true if it's profitable to narrow operations of type SrcVT to
1450 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1451 /// from i32 to i16.
1452 bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override;
1454 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1455 EVT VT) const override;
1457 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1458 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1459 /// true and stores the intrinsic information into the IntrinsicInfo that was
1460 /// passed to the function.
1461 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1462 MachineFunction &MF,
1463 unsigned Intrinsic) const override;
1465 /// Returns true if the target can instruction select the
1466 /// specified FP immediate natively. If false, the legalizer will
1467 /// materialize the FP immediate as a load from a constant pool.
1468 bool isFPImmLegal(const APFloat &Imm, EVT VT,
1469 bool ForCodeSize) const override;
1471 /// Targets can use this to indicate that they only support *some*
1472 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1473 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1474 /// be legal.
1475 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1477 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1478 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1479 /// constant pool entry.
1480 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1482 /// Returns true if lowering to a jump table is allowed.
1483 bool areJTsAllowed(const Function *Fn) const override;
1485 MVT getPreferredSwitchConditionType(LLVMContext &Context,
1486 EVT ConditionVT) const override;
1488 /// If true, then instruction selection should
1489 /// seek to shrink the FP constant of the specified type to a smaller type
1490 /// in order to save space and / or reduce runtime.
1491 bool ShouldShrinkFPConstant(EVT VT) const override;
1493 /// Return true if we believe it is correct and profitable to reduce the
1494 /// load node to a smaller type.
1495 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1496 EVT NewVT) const override;
1498 /// Return true if the specified scalar FP type is computed in an SSE
1499 /// register, not on the X87 floating point stack.
1500 bool isScalarFPTypeInSSEReg(EVT VT) const;
1502 /// Returns true if it is beneficial to convert a load of a constant
1503 /// to just the constant itself.
1504 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1505 Type *Ty) const override;
1507 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1509 bool convertSelectOfConstantsToMath(EVT VT) const override;
1511 bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1512 SDValue C) const override;
1514 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1515 /// with this index.
1516 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1517 unsigned Index) const override;
1519 /// Scalar ops always have equal or better analysis/performance/power than
1520 /// the vector equivalent, so this always makes sense if the scalar op is
1521 /// supported.
1522 bool shouldScalarizeBinop(SDValue) const override;
1524 /// Extract of a scalar FP value from index 0 of a vector is free.
1525 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1526 EVT EltVT = VT.getScalarType();
1527 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1530 /// Overflow nodes should get combined/lowered to optimal instructions
1531 /// (they should allow eliminating explicit compares by getting flags from
1532 /// math ops).
1533 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1534 bool MathUsed) const override;
1536 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1537 unsigned AddrSpace) const override {
1538 // If we can replace more than 2 scalar stores, there will be a reduction
1539 // in instructions even after we add a vector constant load.
1540 return IsZero || NumElem > 2;
1543 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1544 const SelectionDAG &DAG,
1545 const MachineMemOperand &MMO) const override;
1547 Register getRegisterByName(const char* RegName, LLT VT,
1548 const MachineFunction &MF) const override;
1550 /// If a physical register, this returns the register that receives the
1551 /// exception address on entry to an EH pad.
1552 Register
1553 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1555 /// If a physical register, this returns the register that receives the
1556 /// exception typeid on entry to a landing pad.
1557 Register
1558 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1560 bool needsFixedCatchObjects() const override;
1562 /// This method returns a target specific FastISel object,
1563 /// or null if the target does not support "fast" ISel.
1564 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1565 const TargetLibraryInfo *libInfo) const override;
1567 /// If the target has a standard location for the stack protector cookie,
1568 /// returns the address of that location. Otherwise, returns nullptr.
1569 Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1571 bool useLoadStackGuardNode(const Module &M) const override;
1572 bool useStackGuardXorFP() const override;
1573 void insertSSPDeclarations(Module &M) const override;
1574 Value *getSDagStackGuard(const Module &M) const override;
1575 Function *getSSPStackGuardCheck(const Module &M) const override;
1576 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1577 const SDLoc &DL) const override;
1580 /// Return true if the target stores SafeStack pointer at a fixed offset in
1581 /// some non-standard address space, and populates the address space and
1582 /// offset as appropriate.
1583 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1585 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1586 SDValue Chain, SDValue Pointer,
1587 MachinePointerInfo PtrInfo,
1588 Align Alignment,
1589 SelectionDAG &DAG) const;
1591 /// Customize the preferred legalization strategy for certain types.
1592 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1594 bool softPromoteHalfType() const override { return true; }
1596 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1597 EVT VT) const override;
1599 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1600 CallingConv::ID CC,
1601 EVT VT) const override;
1603 unsigned getVectorTypeBreakdownForCallingConv(
1604 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1605 unsigned &NumIntermediates, MVT &RegisterVT) const override;
1607 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1609 bool supportSwiftError() const override;
1611 bool supportKCFIBundles() const override { return true; }
1613 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1614 MachineBasicBlock::instr_iterator &MBBI,
1615 const TargetInstrInfo *TII) const override;
1617 bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1618 bool hasInlineStackProbe(const MachineFunction &MF) const override;
1619 StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1621 unsigned getStackProbeSize(const MachineFunction &MF) const;
1623 bool hasVectorBlend() const override { return true; }
1625 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1627 bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1628 unsigned OpNo) const override;
1630 SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1631 MachineMemOperand *MMO, SDValue &NewLoad,
1632 SDValue Ptr, SDValue PassThru,
1633 SDValue Mask) const override;
1634 SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1635 MachineMemOperand *MMO, SDValue Ptr, SDValue Val,
1636 SDValue Mask) const override;
1638 /// Lower interleaved load(s) into target specific
1639 /// instructions/intrinsics.
1640 bool lowerInterleavedLoad(LoadInst *LI,
1641 ArrayRef<ShuffleVectorInst *> Shuffles,
1642 ArrayRef<unsigned> Indices,
1643 unsigned Factor) const override;
1645 /// Lower interleaved store(s) into target specific
1646 /// instructions/intrinsics.
1647 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1648 unsigned Factor) const override;
1650 SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1651 int JTI, SelectionDAG &DAG) const override;
1653 Align getPrefLoopAlignment(MachineLoop *ML) const override;
1655 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1656 if (VT == MVT::f80)
1657 return EVT::getIntegerVT(Context, 96);
1658 return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1661 protected:
1662 std::pair<const TargetRegisterClass *, uint8_t>
1663 findRepresentativeClass(const TargetRegisterInfo *TRI,
1664 MVT VT) const override;
1666 private:
1667 /// Keep a reference to the X86Subtarget around so that we can
1668 /// make the right decision when generating code for different targets.
1669 const X86Subtarget &Subtarget;
1671 /// A list of legal FP immediates.
1672 std::vector<APFloat> LegalFPImmediates;
1674 /// Indicate that this x86 target can instruction
1675 /// select the specified FP immediate natively.
1676 void addLegalFPImmediate(const APFloat& Imm) {
1677 LegalFPImmediates.push_back(Imm);
1680 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1681 CallingConv::ID CallConv, bool isVarArg,
1682 const SmallVectorImpl<ISD::InputArg> &Ins,
1683 const SDLoc &dl, SelectionDAG &DAG,
1684 SmallVectorImpl<SDValue> &InVals,
1685 uint32_t *RegMask) const;
1686 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1687 const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1688 const SDLoc &dl, SelectionDAG &DAG,
1689 const CCValAssign &VA, MachineFrameInfo &MFI,
1690 unsigned i) const;
1691 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1692 const SDLoc &dl, SelectionDAG &DAG,
1693 const CCValAssign &VA,
1694 ISD::ArgFlagsTy Flags, bool isByval) const;
1696 // Call lowering helpers.
1698 /// Check whether the call is eligible for tail call optimization. Targets
1699 /// that want to do tail call optimization should implement this function.
1700 bool IsEligibleForTailCallOptimization(
1701 TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
1702 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const;
1703 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1704 SDValue Chain, bool IsTailCall,
1705 bool Is64Bit, int FPDiff,
1706 const SDLoc &dl) const;
1708 unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1709 SelectionDAG &DAG) const;
1711 unsigned getAddressSpace() const;
1713 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1714 SDValue &Chain) const;
1715 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1717 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1718 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1719 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1720 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1722 unsigned getGlobalWrapperKind(const GlobalValue *GV,
1723 const unsigned char OpFlags) const;
1724 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1725 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1726 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1727 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1728 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1730 /// Creates target global address or external symbol nodes for calls or
1731 /// other uses.
1732 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1733 bool ForCall) const;
1735 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1736 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1737 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1738 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1739 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1740 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1741 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1742 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1743 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1744 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1745 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1746 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1747 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1748 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1749 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1750 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1751 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1752 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1753 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1754 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1755 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1756 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1757 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1758 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1759 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1760 SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1761 SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1762 SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1763 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1764 SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1765 SDValue &Chain) const;
1766 SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1767 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1768 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1769 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1770 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1771 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1772 SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1774 SDValue
1775 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1776 const SmallVectorImpl<ISD::InputArg> &Ins,
1777 const SDLoc &dl, SelectionDAG &DAG,
1778 SmallVectorImpl<SDValue> &InVals) const override;
1779 SDValue LowerCall(CallLoweringInfo &CLI,
1780 SmallVectorImpl<SDValue> &InVals) const override;
1782 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1783 const SmallVectorImpl<ISD::OutputArg> &Outs,
1784 const SmallVectorImpl<SDValue> &OutVals,
1785 const SDLoc &dl, SelectionDAG &DAG) const override;
1787 bool supportSplitCSR(MachineFunction *MF) const override {
1788 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1789 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1791 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1792 void insertCopiesSplitCSR(
1793 MachineBasicBlock *Entry,
1794 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1796 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1798 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1800 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1801 ISD::NodeType ExtendKind) const override;
1803 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1804 bool isVarArg,
1805 const SmallVectorImpl<ISD::OutputArg> &Outs,
1806 LLVMContext &Context) const override;
1808 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1809 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1811 TargetLoweringBase::AtomicExpansionKind
1812 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1813 TargetLoweringBase::AtomicExpansionKind
1814 shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1815 TargetLoweringBase::AtomicExpansionKind
1816 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1817 TargetLoweringBase::AtomicExpansionKind
1818 shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1819 void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1820 void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1822 LoadInst *
1823 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1825 bool needsCmpXchgNb(Type *MemType) const;
1827 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1828 MachineBasicBlock *DispatchBB, int FI) const;
1830 // Utility function to emit the low-level va_arg code for X86-64.
1831 MachineBasicBlock *
1832 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1834 /// Utility function to emit the xmm reg save portion of va_start.
1835 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1836 MachineInstr &MI2,
1837 MachineBasicBlock *BB) const;
1839 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1840 MachineBasicBlock *BB) const;
1842 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1843 MachineBasicBlock *BB) const;
1845 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1846 MachineBasicBlock *BB) const;
1848 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1849 MachineBasicBlock *BB) const;
1851 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1852 MachineBasicBlock *BB) const;
1854 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1855 MachineBasicBlock *BB) const;
1857 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1858 MachineBasicBlock *MBB) const;
1860 void emitSetJmpShadowStackFix(MachineInstr &MI,
1861 MachineBasicBlock *MBB) const;
1863 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1864 MachineBasicBlock *MBB) const;
1866 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1867 MachineBasicBlock *MBB) const;
1869 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1870 MachineBasicBlock *MBB) const;
1872 MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,
1873 MachineBasicBlock *MBB) const;
1875 /// Emit flags for the given setcc condition and operands. Also returns the
1876 /// corresponding X86 condition code constant in X86CC.
1877 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1878 const SDLoc &dl, SelectionDAG &DAG,
1879 SDValue &X86CC) const;
1881 bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1882 SDValue IntPow2) const override;
1884 /// Check if replacement of SQRT with RSQRT should be disabled.
1885 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1887 /// Use rsqrt* to speed up sqrt calculations.
1888 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1889 int &RefinementSteps, bool &UseOneConstNR,
1890 bool Reciprocal) const override;
1892 /// Use rcp* to speed up fdiv calculations.
1893 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1894 int &RefinementSteps) const override;
1896 /// Reassociate floating point divisions into multiply by reciprocal.
1897 unsigned combineRepeatedFPDivisors() const override;
1899 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1900 SmallVectorImpl<SDNode *> &Created) const override;
1902 SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1903 SDValue V2) const;
1906 namespace X86 {
1907 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1908 const TargetLibraryInfo *libInfo);
1909 } // end namespace X86
1911 // X86 specific Gather/Scatter nodes.
1912 // The class has the same order of operands as MaskedGatherScatterSDNode for
1913 // convenience.
1914 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1915 public:
1916 // This is a intended as a utility and should never be directly created.
1917 X86MaskedGatherScatterSDNode() = delete;
1918 ~X86MaskedGatherScatterSDNode() = delete;
1920 const SDValue &getBasePtr() const { return getOperand(3); }
1921 const SDValue &getIndex() const { return getOperand(4); }
1922 const SDValue &getMask() const { return getOperand(2); }
1923 const SDValue &getScale() const { return getOperand(5); }
1925 static bool classof(const SDNode *N) {
1926 return N->getOpcode() == X86ISD::MGATHER ||
1927 N->getOpcode() == X86ISD::MSCATTER;
1931 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1932 public:
1933 const SDValue &getPassThru() const { return getOperand(1); }
1935 static bool classof(const SDNode *N) {
1936 return N->getOpcode() == X86ISD::MGATHER;
1940 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1941 public:
1942 const SDValue &getValue() const { return getOperand(1); }
1944 static bool classof(const SDNode *N) {
1945 return N->getOpcode() == X86ISD::MSCATTER;
1949 /// Generate unpacklo/unpackhi shuffle mask.
1950 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1951 bool Unary);
1953 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1954 /// imposed by AVX and specific to the unary pattern. Example:
1955 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1956 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1957 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1959 } // end namespace llvm
1961 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H