lib/Target/X86/X86.td

   1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This is a target description file for the Intel i386 architecture, referred
  10 // to here as the "X86" architecture.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 // Get the target-independent interfaces which we are implementing...
  15 //
  16 include "llvm/Target/Target.td"
  17
  18 //===----------------------------------------------------------------------===//
  19 // X86 Subtarget state
  20 //
  21
  22 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
  23                                   "64-bit mode (x86_64)">;
  24 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
  25                                   "32-bit mode (80386)">;
  26 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
  27                                   "16-bit mode (i8086)">;
  28
  29 //===----------------------------------------------------------------------===//
  30 // X86 Subtarget features
  31 //===----------------------------------------------------------------------===//
  32
  33 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
  34                                       "Enable X87 float instructions">;
  35
  36 def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
  37                                       "Enable NOPL instruction">;
  38
  39 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
  40                                       "Enable conditional move instructions">;
  41
  42 def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
  43                                         "Support CMPXCHG8B instructions">;
  44
  45 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
  46                                        "Support POPCNT instruction">;
  47
  48 def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
  49                                       "Support fxsave/fxrestore instructions">;
  50
  51 def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
  52                                        "Support xsave instructions">;
  53
  54 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
  55                                        "Support xsaveopt instructions">;
  56
  57 def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
  58                                        "Support xsavec instructions">;
  59
  60 def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
  61                                        "Support xsaves instructions">;
  62
  63 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
  64                                       "Enable SSE instructions">;
  65 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
  66                                       "Enable SSE2 instructions",
  67                                       [FeatureSSE1]>;
  68 def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
  69                                       "Enable SSE3 instructions",
  70                                       [FeatureSSE2]>;
  71 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
  72                                       "Enable SSSE3 instructions",
  73                                       [FeatureSSE3]>;
  74 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
  75                                       "Enable SSE 4.1 instructions",
  76                                       [FeatureSSSE3]>;
  77 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
  78                                       "Enable SSE 4.2 instructions",
  79                                       [FeatureSSE41]>;
  80 // The MMX subtarget feature is separate from the rest of the SSE features
  81 // because it's important (for odd compatibility reasons) to be able to
  82 // turn it off explicitly while allowing SSE+ to be on.
  83 def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
  84                                       "Enable MMX instructions">;
  85 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
  86                                       "Enable 3DNow! instructions",
  87                                       [FeatureMMX]>;
  88 def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
  89                                       "Enable 3DNow! Athlon instructions",
  90                                       [Feature3DNow]>;
  91 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
  92 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
  93 // without disabling 64-bit mode. Nothing should imply this feature bit. It
  94 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
  95 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
  96                                       "Support 64-bit instructions">;
  97 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
  98                                       "64-bit with cmpxchg16b",
  99                                       [FeatureCMPXCHG8B]>;
 100 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
 101                                        "SHLD instruction is slow">;
 102 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
 103                                         "PMULLD instruction is slow">;
 104 def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
 105                                           "true",
 106                                           "PMADDWD is slower than PMULLD">;
 107 // FIXME: This should not apply to CPUs that do not have SSE.
 108 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
 109                                 "IsUAMem16Slow", "true",
 110                                 "Slow unaligned 16-byte memory access">;
 111 def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
 112                                 "IsUAMem32Slow", "true",
 113                                 "Slow unaligned 32-byte memory access">;
 114 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
 115                                       "Support SSE 4a instructions",
 116                                       [FeatureSSE3]>;
 117
 118 def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
 119                                       "Enable AVX instructions",
 120                                       [FeatureSSE42]>;
 121 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
 122                                       "Enable AVX2 instructions",
 123                                       [FeatureAVX]>;
 124 def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
 125                                       "Enable three-operand fused multiple-add",
 126                                       [FeatureAVX]>;
 127 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
 128                        "Support 16-bit floating point conversion instructions",
 129                        [FeatureAVX]>;
 130 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
 131                                       "Enable AVX-512 instructions",
 132                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
 133 def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
 134                       "Enable AVX-512 Exponential and Reciprocal Instructions",
 135                                       [FeatureAVX512]>;
 136 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
 137                       "Enable AVX-512 Conflict Detection Instructions",
 138                                       [FeatureAVX512]>;
 139 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
 140                        "true", "Enable AVX-512 Population Count Instructions",
 141                                       [FeatureAVX512]>;
 142 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
 143                       "Enable AVX-512 PreFetch Instructions",
 144                                       [FeatureAVX512]>;
 145 def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
 146                                    "true",
 147                                    "Prefetch with Intent to Write and T1 Hint">;
 148 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
 149                       "Enable AVX-512 Doubleword and Quadword Instructions",
 150                                       [FeatureAVX512]>;
 151 def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
 152                       "Enable AVX-512 Byte and Word Instructions",
 153                                       [FeatureAVX512]>;
 154 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
 155                       "Enable AVX-512 Vector Length eXtensions",
 156                                       [FeatureAVX512]>;
 157 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
 158                       "Enable AVX-512 Vector Byte Manipulation Instructions",
 159                                       [FeatureBWI]>;
 160 def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
 161                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
 162                                       [FeatureBWI]>;
 163 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
 164                       "Enable AVX-512 Integer Fused Multiple-Add",
 165                                       [FeatureAVX512]>;
 166 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
 167                       "Enable protection keys">;
 168 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
 169                           "Enable AVX-512 Vector Neural Network Instructions",
 170                                       [FeatureAVX512]>;
 171 def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
 172                            "Support bfloat16 floating point",
 173                                       [FeatureBWI]>;
 174 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
 175                        "Enable AVX-512 Bit Algorithms",
 176                         [FeatureBWI]>;
 177 def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
 178                                             "HasVP2INTERSECT", "true",
 179                                             "Enable AVX-512 vp2intersect",
 180                                             [FeatureAVX512]>;
 181 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
 182                          "Enable packed carry-less multiplication instructions",
 183                                [FeatureSSE2]>;
 184 def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
 185                          "Enable Galois Field Arithmetic Instructions",
 186                                [FeatureSSE2]>;
 187 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
 188                                          "Enable vpclmulqdq instructions",
 189                                          [FeatureAVX, FeaturePCLMUL]>;
 190 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
 191                                       "Enable four-operand fused multiple-add",
 192                                       [FeatureAVX, FeatureSSE4A]>;
 193 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
 194                                       "Enable XOP instructions",
 195                                       [FeatureFMA4]>;
 196 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
 197                                           "HasSSEUnalignedMem", "true",
 198                       "Allow unaligned memory operands with SSE instructions">;
 199 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
 200                                       "Enable AES instructions",
 201                                       [FeatureSSE2]>;
 202 def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
 203                        "Promote selected AES instructions to AVX512/AVX registers",
 204                         [FeatureAVX, FeatureAES]>;
 205 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
 206                                       "Enable TBM instructions">;
 207 def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
 208                                       "Enable LWP instructions">;
 209 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
 210                                       "Support MOVBE instruction">;
 211 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
 212                                       "Support RDRAND instruction">;
 213 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
 214                                        "Support FS/GS Base instructions">;
 215 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
 216                                       "Support LZCNT instruction">;
 217 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
 218                                       "Support BMI instructions">;
 219 def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
 220                                       "Support BMI2 instructions">;
 221 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
 222                                       "Support RTM instructions">;
 223 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
 224                                       "Support ADX instructions">;
 225 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
 226                                       "Enable SHA instructions",
 227                                       [FeatureSSE2]>;
 228 def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
 229                        "Support CET Shadow-Stack instructions">;
 230 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
 231                                       "Support PRFCHW instructions">;
 232 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
 233                                       "Support RDSEED instruction">;
 234 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
 235                                        "Support LAHF and SAHF instructions">;
 236 def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
 237                                       "Enable MONITORX/MWAITX timer functionality">;
 238 def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
 239                                       "Enable Cache Line Zero">;
 240 def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
 241                                       "Enable Cache Demote">;
 242 def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
 243                                       "Support ptwrite instruction">;
 244 // FIXME: This feature is deprecated in 10.0 and should not be used for
 245 // anything, but removing it would break IR files that may contain it in a
 246 // target-feature attribute.
 247 def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false",
 248                                       "Deprecated. Support MPX instructions">;
 249 def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
 250                                      "Use LEA for adjusting the stack pointer">;
 251 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
 252                                      "HasSlowDivide32", "true",
 253                                      "Use 8-bit divide for positive values less than 256">;
 254 def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
 255                                      "HasSlowDivide64", "true",
 256                                      "Use 32-bit divide for positive values less than 2^32">;
 257 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
 258                                      "PadShortFunctions", "true",
 259                                      "Pad short functions">;
 260 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
 261                                       "Invalidate Process-Context Identifier">;
 262 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
 263                                       "Enable Software Guard Extensions">;
 264 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
 265                                       "Flush A Cache Line Optimized">;
 266 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
 267                                       "Cache Line Write Back">;
 268 def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
 269                                       "Write Back No Invalidate">;
 270 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
 271                                     "Support RDPID instructions">;
 272 def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
 273                                       "Wait and pause enhancements">;
 274 def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
 275                                      "Has ENQCMD instructions">;
 276 // On some processors, instructions that implicitly take two memory operands are
 277 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
 278 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
 279 def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
 280                                      "SlowTwoMemOps", "true",
 281                                      "Two memory operand instructions are slow">;
 282 def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
 283                                    "LEA instruction needs inputs at AG stage">;
 284 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
 285                                    "LEA instruction with certain arguments is slow">;
 286 def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
 287                                    "LEA instruction with 3 ops or certain registers is slow">;
 288 def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
 289                                    "INC and DEC instructions are slower than ADD and SUB">;
 290 def FeatureSoftFloat
 291     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
 292                        "Use software floating point features">;
 293 def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
 294                                      "HasPOPCNTFalseDeps", "true",
 295                                      "POPCNT has a false dependency on dest register">;
 296 def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
 297                                      "HasLZCNTFalseDeps", "true",
 298                                      "LZCNT/TZCNT have a false dependency on dest register">;
 299 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
 300                                       "platform configuration instruction">;
 301 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
 302 // using a variable mask over multiple fixed shuffles.
 303 def FeatureFastVariableShuffle
 304     : SubtargetFeature<"fast-variable-shuffle",
 305                        "HasFastVariableShuffle",
 306                        "true", "Shuffles with variable masks are fast">;
 307 // On some X86 processors, there is no performance hazard to writing only the
 308 // lower parts of a YMM or ZMM register without clearing the upper part.
 309 def FeatureFastPartialYMMorZMMWrite
 310     : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
 311                        "HasFastPartialYMMorZMMWrite",
 312                        "true", "Partial writes to YMM/ZMM registers are fast">;
 313 // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
 314 // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
 315 // vector FSQRT has higher throughput than the corresponding NR code.
 316 // The idea is that throughput bound code is likely to be vectorized, so for
 317 // vectorized code we should care about the throughput of SQRT operations.
 318 // But if the code is scalar that probably means that the code has some kind of
 319 // dependency and we should care more about reducing the latency.
 320 def FeatureFastScalarFSQRT
 321     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
 322                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
 323 def FeatureFastVectorFSQRT
 324     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
 325                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
 326 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
 327 // be used to replace test/set sequences.
 328 def FeatureFastLZCNT
 329     : SubtargetFeature<
 330           "fast-lzcnt", "HasFastLZCNT", "true",
 331           "LZCNT instructions are as fast as most simple integer ops">;
 332 // If the target can efficiently decode NOPs upto 11-bytes in length.
 333 def FeatureFast11ByteNOP
 334     : SubtargetFeature<
 335           "fast-11bytenop", "HasFast11ByteNOP", "true",
 336           "Target can quickly decode up to 11 byte NOPs">;
 337 // If the target can efficiently decode NOPs upto 15-bytes in length.
 338 def FeatureFast15ByteNOP
 339     : SubtargetFeature<
 340           "fast-15bytenop", "HasFast15ByteNOP", "true",
 341           "Target can quickly decode up to 15 byte NOPs">;
 342 // Sandy Bridge and newer processors can use SHLD with the same source on both
 343 // inputs to implement rotate to avoid the partial flag update of the normal
 344 // rotate instructions.
 345 def FeatureFastSHLDRotate
 346     : SubtargetFeature<
 347           "fast-shld-rotate", "HasFastSHLDRotate", "true",
 348           "SHLD can be used as a faster rotate">;
 349
 350 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
 351 // "string operations"). See "REP String Enhancement" in the Intel Software
 352 // Development Manual. This feature essentially means that REP MOVSB will copy
 353 // using the largest available size instead of copying bytes one by one, making
 354 // it at least as fast as REPMOVS{W,D,Q}.
 355 def FeatureERMSB
 356     : SubtargetFeature<
 357           "ermsb", "HasERMSB", "true",
 358           "REP MOVS/STOS are fast">;
 359
 360 // Bulldozer and newer processors can merge CMP/TEST (but not other
 361 // instructions) with conditional branches.
 362 def FeatureBranchFusion
 363     : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
 364                  "CMP/TEST can be fused with conditional branches">;
 365
 366 // Sandy Bridge and newer processors have many instructions that can be
 367 // fused with conditional branches and pass through the CPU as a single
 368 // operation.
 369 def FeatureMacroFusion
 370     : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
 371                  "Various instructions can be fused with conditional branches">;
 372
 373 // Gather is available since Haswell (AVX2 set). So technically, we can
 374 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
 375 // Skylake Client processor has faster Gathers than HSW and performance is
 376 // similar to Skylake Server (AVX-512).
 377 def FeatureHasFastGather
 378     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
 379                        "Indicates if gather is reasonably fast">;
 380
 381 def FeaturePrefer128Bit
 382     : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
 383                        "Prefer 128-bit AVX instructions">;
 384
 385 def FeaturePrefer256Bit
 386     : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
 387                        "Prefer 256-bit AVX instructions">;
 388
 389 // Lower indirect calls using a special construct called a `retpoline` to
 390 // mitigate potential Spectre v2 attacks against them.
 391 def FeatureRetpolineIndirectCalls
 392     : SubtargetFeature<
 393           "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
 394           "Remove speculation of indirect calls from the generated code">;
 395
 396 // Lower indirect branches and switches either using conditional branch trees
 397 // or using a special construct called a `retpoline` to mitigate potential
 398 // Spectre v2 attacks against them.
 399 def FeatureRetpolineIndirectBranches
 400     : SubtargetFeature<
 401           "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
 402           "Remove speculation of indirect branches from the generated code">;
 403
 404 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
 405 // `retpoline-indirect-branches` above.
 406 def FeatureRetpoline
 407     : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
 408                        "Remove speculation of indirect branches from the "
 409                        "generated code, either by avoiding them entirely or "
 410                        "lowering them with a speculation blocking construct",
 411                        [FeatureRetpolineIndirectCalls,
 412                         FeatureRetpolineIndirectBranches]>;
 413
 414 // Rely on external thunks for the emitted retpoline calls. This allows users
 415 // to provide their own custom thunk definitions in highly specialized
 416 // environments such as a kernel that does boot-time hot patching.
 417 def FeatureRetpolineExternalThunk
 418     : SubtargetFeature<
 419           "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
 420           "When lowering an indirect call or branch using a `retpoline`, rely "
 421           "on the specified user provided thunk rather than emitting one "
 422           "ourselves. Only has effect when combined with some other retpoline "
 423           "feature", [FeatureRetpolineIndirectCalls]>;
 424
 425 // Direct Move instructions.
 426 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
 427                                        "Support movdiri instruction">;
 428 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
 429                                         "Support movdir64b instruction">;
 430
 431 def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
 432           "Indicates that the BEXTR instruction is implemented as a single uop "
 433           "with good throughput">;
 434
 435 // Combine vector math operations with shuffles into horizontal math
 436 // instructions if a CPU implements horizontal operations (introduced with
 437 // SSE3) with better latency/throughput than the alternative sequence.
 438 def FeatureFastHorizontalOps
 439     : SubtargetFeature<
 440         "fast-hops", "HasFastHorizontalOps", "true",
 441         "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
 442         "normal vector instructions with shuffles", [FeatureSSE3]>;
 443
 444 def FeatureFastScalarShiftMasks
 445     : SubtargetFeature<
 446         "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
 447         "Prefer a left/right scalar logical shift pair over a shift+and pair">;
 448
 449 def FeatureFastVectorShiftMasks
 450     : SubtargetFeature<
 451         "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
 452         "Prefer a left/right vector logical shift pair over a shift+and pair">;
 453
 454 // Merge branches using three-way conditional code.
 455 def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
 456                                         "ThreewayBranchProfitable", "true",
 457                                         "Merge branches to a three-way "
 458                                         "conditional branch">;
 459
 460 // Enable use of alias analysis during code generation.
 461 def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
 462                                     "Use alias analysis during codegen">;
 463
 464 // Bonnell
 465 def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
 466 // Silvermont
 467 def ProcIntelSLM  : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">;
 468 // Goldmont
 469 def ProcIntelGLM  : SubtargetFeature<"", "X86ProcFamily", "IntelGLM", "">;
 470 // Goldmont Plus
 471 def ProcIntelGLP  : SubtargetFeature<"", "X86ProcFamily", "IntelGLP", "">;
 472 // Tremont
 473 def ProcIntelTRM  : SubtargetFeature<"", "X86ProcFamily", "IntelTRM", "">;
 474
 475 //===----------------------------------------------------------------------===//
 476 // Register File Description
 477 //===----------------------------------------------------------------------===//
 478
 479 include "X86RegisterInfo.td"
 480 include "X86RegisterBanks.td"
 481
 482 //===----------------------------------------------------------------------===//
 483 // Instruction Descriptions
 484 //===----------------------------------------------------------------------===//
 485
 486 include "X86Schedule.td"
 487 include "X86InstrInfo.td"
 488 include "X86SchedPredicates.td"
 489
 490 def X86InstrInfo : InstrInfo;
 491
 492 //===----------------------------------------------------------------------===//
 493 // X86 Scheduler Models
 494 //===----------------------------------------------------------------------===//
 495
 496 include "X86ScheduleAtom.td"
 497 include "X86SchedSandyBridge.td"
 498 include "X86SchedHaswell.td"
 499 include "X86SchedBroadwell.td"
 500 include "X86ScheduleSLM.td"
 501 include "X86ScheduleZnver1.td"
 502 include "X86ScheduleBdVer2.td"
 503 include "X86ScheduleBtVer2.td"
 504 include "X86SchedSkylakeClient.td"
 505 include "X86SchedSkylakeServer.td"
 506
 507 //===----------------------------------------------------------------------===//
 508 // X86 Processor Feature Lists
 509 //===----------------------------------------------------------------------===//
 510
 511 def ProcessorFeatures {
 512   // Nehalem
 513   list<SubtargetFeature> NHMInheritableFeatures = [FeatureX87,
 514                                                    FeatureCMPXCHG8B,
 515                                                    FeatureCMOV,
 516                                                    FeatureMMX,
 517                                                    FeatureSSE42,
 518                                                    FeatureFXSR,
 519                                                    FeatureNOPL,
 520                                                    Feature64Bit,
 521                                                    FeatureCMPXCHG16B,
 522                                                    FeaturePOPCNT,
 523                                                    FeatureLAHFSAHF,
 524                                                    FeatureMacroFusion];
 525   list<SubtargetFeature> NHMSpecificFeatures = [];
 526   list<SubtargetFeature> NHMFeatures =
 527     !listconcat(NHMInheritableFeatures, NHMSpecificFeatures);
 528
 529   // Westmere
 530   list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
 531   list<SubtargetFeature> WSMSpecificFeatures = [];
 532   list<SubtargetFeature> WSMInheritableFeatures =
 533     !listconcat(NHMInheritableFeatures, WSMAdditionalFeatures);
 534   list<SubtargetFeature> WSMFeatures =
 535     !listconcat(WSMInheritableFeatures, WSMSpecificFeatures);
 536
 537   // Sandybridge
 538   list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
 539                                                   FeatureSlowDivide64,
 540                                                   FeatureXSAVE,
 541                                                   FeatureXSAVEOPT,
 542                                                   FeatureSlow3OpsLEA,
 543                                                   FeatureFastScalarFSQRT,
 544                                                   FeatureFastSHLDRotate,
 545                                                   FeatureMergeToThreeWayBranch];
 546   list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32,
 547                                                 FeaturePOPCNTFalseDeps];
 548   list<SubtargetFeature> SNBInheritableFeatures =
 549     !listconcat(WSMInheritableFeatures, SNBAdditionalFeatures);
 550   list<SubtargetFeature> SNBFeatures =
 551     !listconcat(SNBInheritableFeatures, SNBSpecificFeatures);
 552
 553   // Ivybridge
 554   list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
 555                                                   FeatureF16C,
 556                                                   FeatureFSGSBase];
 557   list<SubtargetFeature> IVBSpecificFeatures = [FeatureSlowUAMem32,
 558                                                 FeaturePOPCNTFalseDeps];
 559   list<SubtargetFeature> IVBInheritableFeatures =
 560     !listconcat(SNBInheritableFeatures, IVBAdditionalFeatures);
 561   list<SubtargetFeature> IVBFeatures =
 562     !listconcat(IVBInheritableFeatures, IVBSpecificFeatures);
 563
 564   // Haswell
 565   list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
 566                                                   FeatureBMI,
 567                                                   FeatureBMI2,
 568                                                   FeatureERMSB,
 569                                                   FeatureFMA,
 570                                                   FeatureINVPCID,
 571                                                   FeatureLZCNT,
 572                                                   FeatureMOVBE,
 573                                                   FeatureFastVariableShuffle];
 574   list<SubtargetFeature> HSWSpecificFeatures = [FeaturePOPCNTFalseDeps,
 575                                                 FeatureLZCNTFalseDeps];
 576   list<SubtargetFeature> HSWInheritableFeatures =
 577     !listconcat(IVBInheritableFeatures, HSWAdditionalFeatures);
 578   list<SubtargetFeature> HSWFeatures =
 579     !listconcat(HSWInheritableFeatures, HSWSpecificFeatures);
 580
 581   // Broadwell
 582   list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
 583                                                   FeatureRDSEED,
 584                                                   FeaturePRFCHW];
 585   list<SubtargetFeature> BDWSpecificFeatures = [FeaturePOPCNTFalseDeps,
 586                                                 FeatureLZCNTFalseDeps];
 587   list<SubtargetFeature> BDWInheritableFeatures =
 588     !listconcat(HSWInheritableFeatures, BDWAdditionalFeatures);
 589   list<SubtargetFeature> BDWFeatures =
 590     !listconcat(BDWInheritableFeatures, BDWSpecificFeatures);
 591
 592   // Skylake
 593   list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
 594                                                   FeatureXSAVEC,
 595                                                   FeatureXSAVES,
 596                                                   FeatureCLFLUSHOPT,
 597                                                   FeatureFastVectorFSQRT];
 598   list<SubtargetFeature> SKLSpecificFeatures = [FeatureHasFastGather,
 599                                                 FeaturePOPCNTFalseDeps,
 600                                                 FeatureSGX];
 601   list<SubtargetFeature> SKLInheritableFeatures =
 602     !listconcat(BDWInheritableFeatures, SKLAdditionalFeatures);
 603   list<SubtargetFeature> SKLFeatures =
 604     !listconcat(SKLInheritableFeatures, SKLSpecificFeatures);
 605
 606   // Skylake-AVX512
 607   list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAVX512,
 608                                                   FeaturePrefer256Bit,
 609                                                   FeatureCDI,
 610                                                   FeatureDQI,
 611                                                   FeatureBWI,
 612                                                   FeatureVLX,
 613                                                   FeaturePKU,
 614                                                   FeatureCLWB];
 615   list<SubtargetFeature> SKXSpecificFeatures = [FeatureHasFastGather,
 616                                                 FeaturePOPCNTFalseDeps];
 617   list<SubtargetFeature> SKXInheritableFeatures =
 618     !listconcat(SKLInheritableFeatures, SKXAdditionalFeatures);
 619   list<SubtargetFeature> SKXFeatures =
 620     !listconcat(SKXInheritableFeatures, SKXSpecificFeatures);
 621
 622   // Cascadelake
 623   list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
 624   list<SubtargetFeature> CLXSpecificFeatures = [FeatureHasFastGather,
 625                                                 FeaturePOPCNTFalseDeps];
 626   list<SubtargetFeature> CLXInheritableFeatures =
 627     !listconcat(SKXInheritableFeatures, CLXAdditionalFeatures);
 628   list<SubtargetFeature> CLXFeatures =
 629     !listconcat(CLXInheritableFeatures, CLXSpecificFeatures);
 630
 631   // Cooperlake
 632   list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
 633   list<SubtargetFeature> CPXSpecificFeatures = [FeatureHasFastGather,
 634                                                 FeaturePOPCNTFalseDeps];
 635   list<SubtargetFeature> CPXInheritableFeatures =
 636     !listconcat(CLXInheritableFeatures, CPXAdditionalFeatures);
 637   list<SubtargetFeature> CPXFeatures =
 638     !listconcat(CPXInheritableFeatures, CPXSpecificFeatures);
 639
 640   // Cannonlake
 641   list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
 642                                                   FeaturePrefer256Bit,
 643                                                   FeatureCDI,
 644                                                   FeatureDQI,
 645                                                   FeatureBWI,
 646                                                   FeatureVLX,
 647                                                   FeaturePKU,
 648                                                   FeatureVBMI,
 649                                                   FeatureIFMA,
 650                                                   FeatureSHA,
 651                                                   FeatureSGX];
 652   list<SubtargetFeature> CNLSpecificFeatures = [FeatureHasFastGather];
 653   list<SubtargetFeature> CNLInheritableFeatures =
 654     !listconcat(SKLInheritableFeatures, CNLAdditionalFeatures);
 655   list<SubtargetFeature> CNLFeatures =
 656     !listconcat(CNLInheritableFeatures, CNLSpecificFeatures);
 657
 658   // Icelake
 659   list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
 660                                                   FeatureVAES,
 661                                                   FeatureVBMI2,
 662                                                   FeatureVNNI,
 663                                                   FeatureVPCLMULQDQ,
 664                                                   FeatureVPOPCNTDQ,
 665                                                   FeatureGFNI,
 666                                                   FeatureCLWB,
 667                                                   FeatureRDPID];
 668   list<SubtargetFeature> ICLSpecificFeatures = [FeatureHasFastGather];
 669   list<SubtargetFeature> ICLInheritableFeatures =
 670     !listconcat(CNLInheritableFeatures, ICLAdditionalFeatures);
 671   list<SubtargetFeature> ICLFeatures =
 672     !listconcat(ICLInheritableFeatures, ICLSpecificFeatures);
 673
 674   // Icelake Server
 675   list<SubtargetFeature> ICXSpecificFeatures = [FeaturePCONFIG,
 676                                                 FeatureWBNOINVD,
 677                                                 FeatureHasFastGather];
 678   list<SubtargetFeature> ICXFeatures =
 679     !listconcat(ICLInheritableFeatures, ICXSpecificFeatures);
 680
 681   //Tigerlake
 682   list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
 683                                                   FeatureMOVDIRI,
 684                                                   FeatureMOVDIR64B,
 685                                                   FeatureSHSTK];
 686   list<SubtargetFeature> TGLSpecificFeatures = [FeatureHasFastGather];
 687   list<SubtargetFeature> TGLInheritableFeatures =
 688     !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures);
 689   list<SubtargetFeature> TGLFeatures =
 690     !listconcat(ICLFeatures, TGLInheritableFeatures );
 691
 692   // Atom
 693   list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87,
 694                                                     FeatureCMPXCHG8B,
 695                                                     FeatureCMOV,
 696                                                     FeatureMMX,
 697                                                     FeatureSSSE3,
 698                                                     FeatureFXSR,
 699                                                     FeatureNOPL,
 700                                                     Feature64Bit,
 701                                                     FeatureCMPXCHG16B,
 702                                                     FeatureMOVBE,
 703                                                     FeatureSlowTwoMemOps,
 704                                                     FeatureLAHFSAHF];
 705   list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom,
 706                                                  FeatureSlowUAMem16,
 707                                                  FeatureLEAForSP,
 708                                                  FeatureSlowDivide32,
 709                                                  FeatureSlowDivide64,
 710                                                  FeatureLEAUsesAG,
 711                                                  FeaturePadShortFunctions];
 712   list<SubtargetFeature> AtomFeatures =
 713     !listconcat(AtomInheritableFeatures, AtomSpecificFeatures);
 714
 715   // Silvermont
 716   list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
 717                                                   FeaturePOPCNT,
 718                                                   FeaturePCLMUL,
 719                                                   FeaturePRFCHW,
 720                                                   FeatureSlowLEA,
 721                                                   FeatureSlowIncDec,
 722                                                   FeatureRDRAND];
 723   list<SubtargetFeature> SLMSpecificFeatures = [ProcIntelSLM,
 724                                                 FeatureSlowDivide64,
 725                                                 FeatureSlowPMULLD,
 726                                                 FeaturePOPCNTFalseDeps];
 727   list<SubtargetFeature> SLMInheritableFeatures =
 728     !listconcat(AtomInheritableFeatures, SLMAdditionalFeatures);
 729   list<SubtargetFeature> SLMFeatures =
 730     !listconcat(SLMInheritableFeatures, SLMSpecificFeatures);
 731
 732   // Goldmont
 733   list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
 734                                                   FeatureSHA,
 735                                                   FeatureRDSEED,
 736                                                   FeatureXSAVE,
 737                                                   FeatureXSAVEOPT,
 738                                                   FeatureXSAVEC,
 739                                                   FeatureXSAVES,
 740                                                   FeatureCLFLUSHOPT,
 741                                                   FeatureFSGSBase];
 742   list<SubtargetFeature> GLMSpecificFeatures = [ProcIntelGLM,
 743                                                 FeaturePOPCNTFalseDeps];
 744   list<SubtargetFeature> GLMInheritableFeatures =
 745     !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures);
 746   list<SubtargetFeature> GLMFeatures =
 747     !listconcat(GLMInheritableFeatures, GLMSpecificFeatures);
 748
 749   // Goldmont Plus
 750   list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
 751                                                   FeatureRDPID,
 752                                                   FeatureSGX];
 753   list<SubtargetFeature> GLPSpecificFeatures = [ProcIntelGLP];
 754   list<SubtargetFeature> GLPInheritableFeatures =
 755     !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures);
 756   list<SubtargetFeature> GLPFeatures =
 757     !listconcat(GLPInheritableFeatures, GLPSpecificFeatures);
 758
 759   // Tremont
 760   list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLDEMOTE,
 761                                                   FeatureGFNI,
 762                                                   FeatureMOVDIRI,
 763                                                   FeatureMOVDIR64B,
 764                                                   FeatureWAITPKG];
 765   list<SubtargetFeature> TRMSpecificFeatures = [ProcIntelTRM];
 766   list<SubtargetFeature> TRMFeatures =
 767     !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures,
 768                 TRMSpecificFeatures);
 769
 770   // Knights Landing
 771   list<SubtargetFeature> KNLFeatures = [FeatureX87,
 772                                         FeatureCMPXCHG8B,
 773                                         FeatureCMOV,
 774                                         FeatureMMX,
 775                                         FeatureFXSR,
 776                                         FeatureNOPL,
 777                                         Feature64Bit,
 778                                         FeatureCMPXCHG16B,
 779                                         FeaturePOPCNT,
 780                                         FeatureSlowDivide64,
 781                                         FeaturePCLMUL,
 782                                         FeatureXSAVE,
 783                                         FeatureXSAVEOPT,
 784                                         FeatureLAHFSAHF,
 785                                         FeatureSlow3OpsLEA,
 786                                         FeatureSlowIncDec,
 787                                         FeatureAES,
 788                                         FeatureRDRAND,
 789                                         FeatureF16C,
 790                                         FeatureFSGSBase,
 791                                         FeatureAVX512,
 792                                         FeatureERI,
 793                                         FeatureCDI,
 794                                         FeaturePFI,
 795                                         FeaturePREFETCHWT1,
 796                                         FeatureADX,
 797                                         FeatureRDSEED,
 798                                         FeatureMOVBE,
 799                                         FeatureLZCNT,
 800                                         FeatureBMI,
 801                                         FeatureBMI2,
 802                                         FeatureFMA,
 803                                         FeaturePRFCHW,
 804                                         FeatureSlowTwoMemOps,
 805                                         FeatureFastPartialYMMorZMMWrite,
 806                                         FeatureHasFastGather,
 807                                         FeatureSlowPMADDWD];
 808   // TODO Add AVX5124FMAPS/AVX5124VNNIW features
 809   list<SubtargetFeature> KNMFeatures =
 810     !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
 811
 812   // Barcelona
 813   list<SubtargetFeature> BarcelonaInheritableFeatures = [FeatureX87,
 814                                                          FeatureCMPXCHG8B,
 815                                                          FeatureSSE4A,
 816                                                          Feature3DNowA,
 817                                                          FeatureFXSR,
 818                                                          FeatureNOPL,
 819                                                          FeatureCMPXCHG16B,
 820                                                          FeatureLZCNT,
 821                                                          FeaturePOPCNT,
 822                                                          FeatureSlowSHLD,
 823                                                          FeatureLAHFSAHF,
 824                                                          FeatureCMOV,
 825                                                          Feature64Bit,
 826                                                          FeatureFastScalarShiftMasks];
 827   list<SubtargetFeature> BarcelonaFeatures = BarcelonaInheritableFeatures;
 828
 829   // Bobcat
 830   list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87,
 831                                                       FeatureCMPXCHG8B,
 832                                                       FeatureCMOV,
 833                                                       FeatureMMX,
 834                                                       FeatureSSSE3,
 835                                                       FeatureSSE4A,
 836                                                       FeatureFXSR,
 837                                                       FeatureNOPL,
 838                                                       Feature64Bit,
 839                                                       FeatureCMPXCHG16B,
 840                                                       FeaturePRFCHW,
 841                                                       FeatureLZCNT,
 842                                                       FeaturePOPCNT,
 843                                                       FeatureSlowSHLD,
 844                                                       FeatureLAHFSAHF,
 845                                                       FeatureFast15ByteNOP,
 846                                                       FeatureFastScalarShiftMasks,
 847                                                       FeatureFastVectorShiftMasks];
 848   list<SubtargetFeature> BtVer1Features = BtVer1InheritableFeatures;
 849
 850   // Jaguar
 851   list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
 852                                                      FeatureAES,
 853                                                      FeaturePCLMUL,
 854                                                      FeatureBMI,
 855                                                      FeatureF16C,
 856                                                      FeatureMOVBE,
 857                                                      FeatureXSAVE,
 858                                                      FeatureXSAVEOPT];
 859   list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT,
 860                                                    FeatureFastBEXTR,
 861                                                    FeatureFastPartialYMMorZMMWrite,
 862                                                    FeatureFastHorizontalOps];
 863   list<SubtargetFeature> BtVer2InheritableFeatures =
 864     !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures);
 865   list<SubtargetFeature> BtVer2Features =
 866     !listconcat(BtVer2InheritableFeatures, BtVer2SpecificFeatures);
 867
 868   // Bulldozer
 869   list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87,
 870                                                       FeatureCMPXCHG8B,
 871                                                       FeatureCMOV,
 872                                                       FeatureXOP,
 873                                                       Feature64Bit,
 874                                                       FeatureCMPXCHG16B,
 875                                                       FeatureAES,
 876                                                       FeaturePRFCHW,
 877                                                       FeaturePCLMUL,
 878                                                       FeatureMMX,
 879                                                       FeatureFXSR,
 880                                                       FeatureNOPL,
 881                                                       FeatureLZCNT,
 882                                                       FeaturePOPCNT,
 883                                                       FeatureXSAVE,
 884                                                       FeatureLWP,
 885                                                       FeatureSlowSHLD,
 886                                                       FeatureLAHFSAHF,
 887                                                       FeatureFast11ByteNOP,
 888                                                       FeatureFastScalarShiftMasks,
 889                                                       FeatureBranchFusion];
 890   list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;
 891
 892   // PileDriver
 893   list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
 894                                                      FeatureBMI,
 895                                                      FeatureTBM,
 896                                                      FeatureFMA,
 897                                                      FeatureFastBEXTR];
 898   list<SubtargetFeature> BdVer2InheritableFeatures =
 899     !listconcat(BdVer1InheritableFeatures, BdVer2AdditionalFeatures);
 900   list<SubtargetFeature> BdVer2Features = BdVer2InheritableFeatures;
 901
 902   // Steamroller
 903   list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
 904                                                      FeatureFSGSBase];
 905   list<SubtargetFeature> BdVer3InheritableFeatures =
 906     !listconcat(BdVer2InheritableFeatures, BdVer3AdditionalFeatures);
 907   list<SubtargetFeature> BdVer3Features = BdVer3InheritableFeatures;
 908
 909   // Excavator
 910   list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
 911                                                      FeatureBMI2,
 912                                                      FeatureMWAITX];
 913   list<SubtargetFeature> BdVer4InheritableFeatures =
 914     !listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures);
 915   list<SubtargetFeature> BdVer4Features = BdVer4InheritableFeatures;
 916
 917
 918   // AMD Zen Processors common ISAs
 919   list<SubtargetFeature> ZNFeatures = [FeatureADX,
 920                                        FeatureAES,
 921                                        FeatureAVX2,
 922                                        FeatureBMI,
 923                                        FeatureBMI2,
 924                                        FeatureCLFLUSHOPT,
 925                                        FeatureCLZERO,
 926                                        FeatureCMOV,
 927                                        Feature64Bit,
 928                                        FeatureCMPXCHG16B,
 929                                        FeatureF16C,
 930                                        FeatureFMA,
 931                                        FeatureFSGSBase,
 932                                        FeatureFXSR,
 933                                        FeatureNOPL,
 934                                        FeatureFastLZCNT,
 935                                        FeatureLAHFSAHF,
 936                                        FeatureLZCNT,
 937                                        FeatureFastBEXTR,
 938                                        FeatureFast15ByteNOP,
 939                                        FeatureBranchFusion,
 940                                        FeatureFastScalarShiftMasks,
 941                                        FeatureMMX,
 942                                        FeatureMOVBE,
 943                                        FeatureMWAITX,
 944                                        FeaturePCLMUL,
 945                                        FeaturePOPCNT,
 946                                        FeaturePRFCHW,
 947                                        FeatureRDRAND,
 948                                        FeatureRDSEED,
 949                                        FeatureSHA,
 950                                        FeatureSSE4A,
 951                                        FeatureSlowSHLD,
 952                                        FeatureX87,
 953                                        FeatureXSAVE,
 954                                        FeatureXSAVEC,
 955                                        FeatureXSAVEOPT,
 956                                        FeatureXSAVES];
 957   list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
 958                                                   FeatureRDPID,
 959                                                   FeatureWBNOINVD];
 960   list<SubtargetFeature> ZN2Features =
 961     !listconcat(ZNFeatures, ZN2AdditionalFeatures);
 962 }
 963
 964 //===----------------------------------------------------------------------===//
 965 // X86 processors supported.
 966 //===----------------------------------------------------------------------===//
 967
 968 class Proc<string Name, list<SubtargetFeature> Features>
 969  : ProcessorModel<Name, GenericModel, Features>;
 970
 971 // NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled
 972 // if i386/i486 is specifically requested.
 973 def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16,
 974                                FeatureCMPXCHG8B]>;
 975 def : Proc<"i386",            [FeatureX87, FeatureSlowUAMem16]>;
 976 def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16]>;
 977 def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16,
 978                                FeatureCMPXCHG8B]>;
 979 def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16,
 980                                FeatureCMPXCHG8B]>;
 981 def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16,
 982                                FeatureCMPXCHG8B, FeatureMMX]>;
 983
 984 def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
 985                     FeatureCMOV]>;
 986 def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
 987                           FeatureCMOV, FeatureNOPL]>;
 988
 989 def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
 990                                FeatureMMX, FeatureCMOV, FeatureFXSR,
 991                                FeatureNOPL]>;
 992
 993 foreach P = ["pentium3", "pentium3m"] in {
 994   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX,
 995                  FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 996 }
 997
 998 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
 999 // The intent is to enable it for pentium4 which is the current default
1000 // processor in a vanilla 32-bit clang compilation when no specific
1001 // architecture is specified.  This generally gives a nice performance
1002 // increase on silvermont, with largely neutral behavior on other
1003 // contemporary large core processors.
1004 // pentium-m, pentium4m, prescott and nocona are included as a preventative
1005 // measure to avoid performance surprises, in case clang's default cpu
1006 // changes slightly.
1007
1008 def : ProcessorModel<"pentium-m", GenericPostRAModel,
1009                      [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1010                       FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
1011                       FeatureCMOV]>;
1012
1013 foreach P = ["pentium4", "pentium4m"] in {
1014   def : ProcessorModel<P, GenericPostRAModel,
1015                        [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1016                         FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
1017                         FeatureCMOV]>;
1018 }
1019
1020 // Intel Quark.
1021 def : Proc<"lakemont",        []>;
1022
1023 // Intel Core Duo.
1024 def : ProcessorModel<"yonah", SandyBridgeModel,
1025                      [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1026                       FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
1027                       FeatureCMOV]>;
1028
1029 // NetBurst.
1030 def : ProcessorModel<"prescott", GenericPostRAModel,
1031                      [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1032                       FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
1033                       FeatureCMOV]>;
1034 def : ProcessorModel<"nocona", GenericPostRAModel, [
1035   FeatureX87,
1036   FeatureSlowUAMem16,
1037   FeatureCMPXCHG8B,
1038   FeatureCMOV,
1039   FeatureMMX,
1040   FeatureSSE3,
1041   FeatureFXSR,
1042   FeatureNOPL,
1043   Feature64Bit,
1044   FeatureCMPXCHG16B
1045 ]>;
1046
1047 // Intel Core 2 Solo/Duo.
1048 def : ProcessorModel<"core2", SandyBridgeModel, [
1049   FeatureX87,
1050   FeatureSlowUAMem16,
1051   FeatureCMPXCHG8B,
1052   FeatureCMOV,
1053   FeatureMMX,
1054   FeatureSSSE3,
1055   FeatureFXSR,
1056   FeatureNOPL,
1057   Feature64Bit,
1058   FeatureCMPXCHG16B,
1059   FeatureLAHFSAHF,
1060   FeatureMacroFusion
1061 ]>;
1062 def : ProcessorModel<"penryn", SandyBridgeModel, [
1063   FeatureX87,
1064   FeatureSlowUAMem16,
1065   FeatureCMPXCHG8B,
1066   FeatureCMOV,
1067   FeatureMMX,
1068   FeatureSSE41,
1069   FeatureFXSR,
1070   FeatureNOPL,
1071   Feature64Bit,
1072   FeatureCMPXCHG16B,
1073   FeatureLAHFSAHF,
1074   FeatureMacroFusion
1075 ]>;
1076
1077 // Atom CPUs.
1078 foreach P = ["bonnell", "atom"] in {
1079   def : ProcessorModel<P, AtomModel, ProcessorFeatures.AtomFeatures>;
1080 }
1081
1082 foreach P = ["silvermont", "slm"] in {
1083   def : ProcessorModel<P, SLMModel, ProcessorFeatures.SLMFeatures>;
1084 }
1085
1086 def : ProcessorModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures>;
1087 def : ProcessorModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures>;
1088 def : ProcessorModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures>;
1089
1090 // "Arrandale" along with corei3 and corei5
1091 foreach P = ["nehalem", "corei7"] in {
1092   def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures>;
1093 }
1094
1095 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1096 def : ProcessorModel<"westmere", SandyBridgeModel,
1097                      ProcessorFeatures.WSMFeatures>;
1098
1099 foreach P = ["sandybridge", "corei7-avx"] in {
1100   def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures>;
1101 }
1102
1103 foreach P = ["ivybridge", "core-avx-i"] in {
1104   def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures>;
1105 }
1106
1107 foreach P = ["haswell", "core-avx2"] in {
1108   def : ProcessorModel<P, HaswellModel, ProcessorFeatures.HSWFeatures>;
1109 }
1110
1111 def : ProcessorModel<"broadwell", BroadwellModel,
1112                      ProcessorFeatures.BDWFeatures>;
1113
1114 def : ProcessorModel<"skylake", SkylakeClientModel,
1115                      ProcessorFeatures.SKLFeatures>;
1116
1117 // FIXME: define KNL scheduler model
1118 def : ProcessorModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures>;
1119 def : ProcessorModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures>;
1120
1121 foreach P = ["skylake-avx512", "skx"] in {
1122   def : ProcessorModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures>;
1123 }
1124
1125 def : ProcessorModel<"cascadelake", SkylakeServerModel,
1126                      ProcessorFeatures.CLXFeatures>;
1127 def : ProcessorModel<"cooperlake", SkylakeServerModel,
1128                      ProcessorFeatures.CPXFeatures>;
1129 def : ProcessorModel<"cannonlake", SkylakeServerModel,
1130                      ProcessorFeatures.CNLFeatures>;
1131 def : ProcessorModel<"icelake-client", SkylakeServerModel,
1132                      ProcessorFeatures.ICLFeatures>;
1133 def : ProcessorModel<"icelake-server", SkylakeServerModel,
1134                      ProcessorFeatures.ICXFeatures>;
1135 def : ProcessorModel<"tigerlake", SkylakeServerModel,
1136                      ProcessorFeatures.TGLFeatures>;
1137
1138 // AMD CPUs.
1139
1140 def : Proc<"k6",   [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1141                     FeatureMMX]>;
1142 def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1143                     Feature3DNow]>;
1144 def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1145                                Feature3DNow]>;
1146
1147 foreach P = ["athlon", "athlon-tbird"] in {
1148   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
1149                  Feature3DNowA, FeatureNOPL, FeatureSlowSHLD]>;
1150 }
1151
1152 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1153   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
1154                  FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL,
1155                  FeatureSlowSHLD]>;
1156 }
1157
1158 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1159   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1160                  FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
1161                  Feature64Bit, FeatureSlowSHLD, FeatureCMOV,
1162                  FeatureFastScalarShiftMasks]>;
1163 }
1164
1165 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1166   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
1167                  Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
1168                  FeatureSlowSHLD, FeatureCMOV, Feature64Bit,
1169                  FeatureFastScalarShiftMasks]>;
1170 }
1171
1172 foreach P = ["amdfam10", "barcelona"] in {
1173   def : Proc<P, ProcessorFeatures.BarcelonaFeatures>;
1174 }
1175
1176 // Bobcat
1177 def : Proc<"btver1", ProcessorFeatures.BtVer1Features>;
1178 // Jaguar
1179 def : ProcessorModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features>;
1180
1181 // Bulldozer
1182 def : ProcessorModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features>;
1183 // Piledriver
1184 def : ProcessorModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features>;
1185 // Steamroller
1186 def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>;
1187 // Excavator
1188 def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>;
1189
1190 def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>;
1191 def : ProcessorModel<"znver2", Znver1Model, ProcessorFeatures.ZN2Features>;
1192
1193 def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1194                                Feature3DNowA]>;
1195
1196 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
1197 def : Proc<"winchip2",        [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
1198 def : Proc<"c3",              [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
1199 def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1200                                FeatureMMX, FeatureSSE1, FeatureFXSR,
1201                                FeatureCMOV]>;
1202
1203 // We also provide a generic 64-bit specific x86 processor model which tries to
1204 // be good for modern chips without enabling instruction set encodings past the
1205 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1206 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1207 //
1208 // We currently use the Sandy Bridge model as the default scheduling model as
1209 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1210 // covers a huge swath of x86 processors. If there are specific scheduling
1211 // knobs which need to be tuned differently for AMD chips, we might consider
1212 // forming a common base for them.
1213 def : ProcessorModel<"x86-64", SandyBridgeModel, [
1214   FeatureX87,
1215   FeatureCMPXCHG8B,
1216   FeatureCMOV,
1217   FeatureMMX,
1218   FeatureSSE2,
1219   FeatureFXSR,
1220   FeatureNOPL,
1221   Feature64Bit,
1222   FeatureSlow3OpsLEA,
1223   FeatureSlowIncDec,
1224   FeatureMacroFusion
1225 ]>;
1226
1227 //===----------------------------------------------------------------------===//
1228 // Calling Conventions
1229 //===----------------------------------------------------------------------===//
1230
1231 include "X86CallingConv.td"
1232
1233
1234 //===----------------------------------------------------------------------===//
1235 // Assembly Parser
1236 //===----------------------------------------------------------------------===//
1237
1238 def ATTAsmParserVariant : AsmParserVariant {
1239   int Variant = 0;
1240
1241   // Variant name.
1242   string Name = "att";
1243
1244   // Discard comments in assembly strings.
1245   string CommentDelimiter = "#";
1246
1247   // Recognize hard coded registers.
1248   string RegisterPrefix = "%";
1249 }
1250
1251 def IntelAsmParserVariant : AsmParserVariant {
1252   int Variant = 1;
1253
1254   // Variant name.
1255   string Name = "intel";
1256
1257   // Discard comments in assembly strings.
1258   string CommentDelimiter = ";";
1259
1260   // Recognize hard coded registers.
1261   string RegisterPrefix = "";
1262 }
1263
1264 //===----------------------------------------------------------------------===//
1265 // Assembly Printers
1266 //===----------------------------------------------------------------------===//
1267
1268 // The X86 target supports two different syntaxes for emitting machine code.
1269 // This is controlled by the -x86-asm-syntax={att|intel}
1270 def ATTAsmWriter : AsmWriter {
1271   string AsmWriterClassName  = "ATTInstPrinter";
1272   int Variant = 0;
1273 }
1274 def IntelAsmWriter : AsmWriter {
1275   string AsmWriterClassName  = "IntelInstPrinter";
1276   int Variant = 1;
1277 }
1278
1279 def X86 : Target {
1280   // Information about the instructions...
1281   let InstructionSet = X86InstrInfo;
1282   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1283   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1284   let AllowRegisterRenaming = 1;
1285 }
1286
1287 //===----------------------------------------------------------------------===//
1288 // Pfm Counters
1289 //===----------------------------------------------------------------------===//
1290
1291 include "X86PfmCounters.td"