lib/Target/X86/X86.td

   1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This is a target description file for the Intel i386 architecture, referred
  10 // to here as the "X86" architecture.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 // Get the target-independent interfaces which we are implementing...
  15 //
  16 include "llvm/Target/Target.td"
  17
  18 //===----------------------------------------------------------------------===//
  19 // X86 Subtarget state
  20 //
  21
  22 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
  23                                   "64-bit mode (x86_64)">;
  24 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
  25                                   "32-bit mode (80386)">;
  26 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
  27                                   "16-bit mode (i8086)">;
  28
  29 //===----------------------------------------------------------------------===//
  30 // X86 Subtarget features
  31 //===----------------------------------------------------------------------===//
  32
  33 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
  34                                       "Enable X87 float instructions">;
  35
  36 def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
  37                                       "Enable NOPL instruction">;
  38
  39 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
  40                                       "Enable conditional move instructions">;
  41
  42 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
  43                                        "Support POPCNT instruction">;
  44
  45 def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
  46                                       "Support fxsave/fxrestore instructions">;
  47
  48 def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
  49                                        "Support xsave instructions">;
  50
  51 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
  52                                        "Support xsaveopt instructions">;
  53
  54 def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
  55                                        "Support xsavec instructions">;
  56
  57 def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
  58                                        "Support xsaves instructions">;
  59
  60 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
  61                                       "Enable SSE instructions">;
  62 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
  63                                       "Enable SSE2 instructions",
  64                                       [FeatureSSE1]>;
  65 def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
  66                                       "Enable SSE3 instructions",
  67                                       [FeatureSSE2]>;
  68 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
  69                                       "Enable SSSE3 instructions",
  70                                       [FeatureSSE3]>;
  71 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
  72                                       "Enable SSE 4.1 instructions",
  73                                       [FeatureSSSE3]>;
  74 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
  75                                       "Enable SSE 4.2 instructions",
  76                                       [FeatureSSE41]>;
  77 // The MMX subtarget feature is separate from the rest of the SSE features
  78 // because it's important (for odd compatibility reasons) to be able to
  79 // turn it off explicitly while allowing SSE+ to be on.
  80 def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
  81                                       "Enable MMX instructions">;
  82 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
  83                                       "Enable 3DNow! instructions",
  84                                       [FeatureMMX]>;
  85 def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
  86                                       "Enable 3DNow! Athlon instructions",
  87                                       [Feature3DNow]>;
  88 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
  89 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
  90 // without disabling 64-bit mode. Nothing should imply this feature bit. It
  91 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
  92 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
  93                                       "Support 64-bit instructions">;
  94 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
  95                                       "64-bit with cmpxchg16b">;
  96 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
  97                                        "SHLD instruction is slow">;
  98 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
  99                                         "PMULLD instruction is slow">;
 100 def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
 101                                           "true",
 102                                           "PMADDWD is slower than PMULLD">;
 103 // FIXME: This should not apply to CPUs that do not have SSE.
 104 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
 105                                 "IsUAMem16Slow", "true",
 106                                 "Slow unaligned 16-byte memory access">;
 107 def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
 108                                 "IsUAMem32Slow", "true",
 109                                 "Slow unaligned 32-byte memory access">;
 110 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
 111                                       "Support SSE 4a instructions",
 112                                       [FeatureSSE3]>;
 113
 114 def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
 115                                       "Enable AVX instructions",
 116                                       [FeatureSSE42]>;
 117 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
 118                                       "Enable AVX2 instructions",
 119                                       [FeatureAVX]>;
 120 def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
 121                                       "Enable three-operand fused multiple-add",
 122                                       [FeatureAVX]>;
 123 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
 124                        "Support 16-bit floating point conversion instructions",
 125                        [FeatureAVX]>;
 126 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
 127                                       "Enable AVX-512 instructions",
 128                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
 129 def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
 130                       "Enable AVX-512 Exponential and Reciprocal Instructions",
 131                                       [FeatureAVX512]>;
 132 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
 133                       "Enable AVX-512 Conflict Detection Instructions",
 134                                       [FeatureAVX512]>;
 135 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
 136                        "true", "Enable AVX-512 Population Count Instructions",
 137                                       [FeatureAVX512]>;
 138 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
 139                       "Enable AVX-512 PreFetch Instructions",
 140                                       [FeatureAVX512]>;
 141 def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
 142                                    "true",
 143                                    "Prefetch with Intent to Write and T1 Hint">;
 144 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
 145                       "Enable AVX-512 Doubleword and Quadword Instructions",
 146                                       [FeatureAVX512]>;
 147 def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
 148                       "Enable AVX-512 Byte and Word Instructions",
 149                                       [FeatureAVX512]>;
 150 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
 151                       "Enable AVX-512 Vector Length eXtensions",
 152                                       [FeatureAVX512]>;
 153 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
 154                       "Enable AVX-512 Vector Byte Manipulation Instructions",
 155                                       [FeatureBWI]>;
 156 def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
 157                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
 158                                       [FeatureBWI]>;
 159 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
 160                       "Enable AVX-512 Integer Fused Multiple-Add",
 161                                       [FeatureAVX512]>;
 162 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
 163                       "Enable protection keys">;
 164 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
 165                           "Enable AVX-512 Vector Neural Network Instructions",
 166                                       [FeatureAVX512]>;
 167 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
 168                        "Enable AVX-512 Bit Algorithms",
 169                         [FeatureBWI]>;
 170 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
 171                          "Enable packed carry-less multiplication instructions",
 172                                [FeatureSSE2]>;
 173 def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
 174                          "Enable Galois Field Arithmetic Instructions",
 175                                [FeatureSSE2]>;
 176 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
 177                                          "Enable vpclmulqdq instructions",
 178                                          [FeatureAVX, FeaturePCLMUL]>;
 179 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
 180                                       "Enable four-operand fused multiple-add",
 181                                       [FeatureAVX, FeatureSSE4A]>;
 182 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
 183                                       "Enable XOP instructions",
 184                                       [FeatureFMA4]>;
 185 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
 186                                           "HasSSEUnalignedMem", "true",
 187                       "Allow unaligned memory operands with SSE instructions">;
 188 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
 189                                       "Enable AES instructions",
 190                                       [FeatureSSE2]>;
 191 def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
 192                        "Promote selected AES instructions to AVX512/AVX registers",
 193                         [FeatureAVX, FeatureAES]>;
 194 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
 195                                       "Enable TBM instructions">;
 196 def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
 197                                       "Enable LWP instructions">;
 198 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
 199                                       "Support MOVBE instruction">;
 200 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
 201                                       "Support RDRAND instruction">;
 202 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
 203                                        "Support FS/GS Base instructions">;
 204 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
 205                                       "Support LZCNT instruction">;
 206 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
 207                                       "Support BMI instructions">;
 208 def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
 209                                       "Support BMI2 instructions">;
 210 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
 211                                       "Support RTM instructions">;
 212 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
 213                                       "Support ADX instructions">;
 214 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
 215                                       "Enable SHA instructions",
 216                                       [FeatureSSE2]>;
 217 def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
 218                        "Support CET Shadow-Stack instructions">;
 219 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
 220                                       "Support PRFCHW instructions">;
 221 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
 222                                       "Support RDSEED instruction">;
 223 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
 224                                        "Support LAHF and SAHF instructions">;
 225 def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
 226                                       "Enable MONITORX/MWAITX timer functionality">;
 227 def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
 228                                       "Enable Cache Line Zero">;
 229 def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
 230                                       "Enable Cache Demote">;
 231 def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
 232                                       "Support ptwrite instruction">;
 233 def FeatureMPX     : SubtargetFeature<"mpx", "HasMPX", "true",
 234                                       "Support MPX instructions">;
 235 def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
 236                                      "Use LEA for adjusting the stack pointer">;
 237 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
 238                                      "HasSlowDivide32", "true",
 239                                      "Use 8-bit divide for positive values less than 256">;
 240 def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
 241                                      "HasSlowDivide64", "true",
 242                                      "Use 32-bit divide for positive values less than 2^32">;
 243 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
 244                                      "PadShortFunctions", "true",
 245                                      "Pad short functions">;
 246 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
 247                                       "Invalidate Process-Context Identifier">;
 248 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
 249                                       "Enable Software Guard Extensions">;
 250 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
 251                                       "Flush A Cache Line Optimized">;
 252 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
 253                                       "Cache Line Write Back">;
 254 def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
 255                                       "Write Back No Invalidate">;
 256 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
 257                                     "Support RDPID instructions">;
 258 def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
 259                                       "Wait and pause enhancements">;
 260 // On some processors, instructions that implicitly take two memory operands are
 261 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
 262 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
 263 def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
 264                                      "SlowTwoMemOps", "true",
 265                                      "Two memory operand instructions are slow">;
 266 def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
 267                                    "LEA instruction needs inputs at AG stage">;
 268 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
 269                                    "LEA instruction with certain arguments is slow">;
 270 def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
 271                                    "LEA instruction with 3 ops or certain registers is slow">;
 272 def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
 273                                    "INC and DEC instructions are slower than ADD and SUB">;
 274 def FeatureSoftFloat
 275     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
 276                        "Use software floating point features.">;
 277 def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
 278                                      "HasPOPCNTFalseDeps", "true",
 279                                      "POPCNT has a false dependency on dest register">;
 280 def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
 281                                      "HasLZCNTFalseDeps", "true",
 282                                      "LZCNT/TZCNT have a false dependency on dest register">;
 283 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
 284                                       "platform configuration instruction">;
 285 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
 286 // using a variable mask over multiple fixed shuffles.
 287 def FeatureFastVariableShuffle
 288     : SubtargetFeature<"fast-variable-shuffle",
 289                        "HasFastVariableShuffle",
 290                        "true", "Shuffles with variable masks are fast">;
 291 // On some X86 processors, there is no performance hazard to writing only the
 292 // lower parts of a YMM or ZMM register without clearing the upper part.
 293 def FeatureFastPartialYMMorZMMWrite
 294     : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
 295                        "HasFastPartialYMMorZMMWrite",
 296                        "true", "Partial writes to YMM/ZMM registers are fast">;
 297 // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
 298 // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
 299 // vector FSQRT has higher throughput than the corresponding NR code.
 300 // The idea is that throughput bound code is likely to be vectorized, so for
 301 // vectorized code we should care about the throughput of SQRT operations.
 302 // But if the code is scalar that probably means that the code has some kind of
 303 // dependency and we should care more about reducing the latency.
 304 def FeatureFastScalarFSQRT
 305     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
 306                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
 307 def FeatureFastVectorFSQRT
 308     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
 309                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
 310 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
 311 // be used to replace test/set sequences.
 312 def FeatureFastLZCNT
 313     : SubtargetFeature<
 314           "fast-lzcnt", "HasFastLZCNT", "true",
 315           "LZCNT instructions are as fast as most simple integer ops">;
 316 // If the target can efficiently decode NOPs upto 11-bytes in length.
 317 def FeatureFast11ByteNOP
 318     : SubtargetFeature<
 319           "fast-11bytenop", "HasFast11ByteNOP", "true",
 320           "Target can quickly decode up to 11 byte NOPs">;
 321 // If the target can efficiently decode NOPs upto 15-bytes in length.
 322 def FeatureFast15ByteNOP
 323     : SubtargetFeature<
 324           "fast-15bytenop", "HasFast15ByteNOP", "true",
 325           "Target can quickly decode up to 15 byte NOPs">;
 326 // Sandy Bridge and newer processors can use SHLD with the same source on both
 327 // inputs to implement rotate to avoid the partial flag update of the normal
 328 // rotate instructions.
 329 def FeatureFastSHLDRotate
 330     : SubtargetFeature<
 331           "fast-shld-rotate", "HasFastSHLDRotate", "true",
 332           "SHLD can be used as a faster rotate">;
 333
 334 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
 335 // "string operations"). See "REP String Enhancement" in the Intel Software
 336 // Development Manual. This feature essentially means that REP MOVSB will copy
 337 // using the largest available size instead of copying bytes one by one, making
 338 // it at least as fast as REPMOVS{W,D,Q}.
 339 def FeatureERMSB
 340     : SubtargetFeature<
 341           "ermsb", "HasERMSB", "true",
 342           "REP MOVS/STOS are fast">;
 343
 344 // Sandy Bridge and newer processors have many instructions that can be
 345 // fused with conditional branches and pass through the CPU as a single
 346 // operation.
 347 def FeatureMacroFusion
 348     : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
 349                  "Various instructions can be fused with conditional branches">;
 350
 351 // Gather is available since Haswell (AVX2 set). So technically, we can
 352 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
 353 // Skylake Client processor has faster Gathers than HSW and performance is
 354 // similar to Skylake Server (AVX-512).
 355 def FeatureHasFastGather
 356     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
 357                        "Indicates if gather is reasonably fast.">;
 358
 359 def FeaturePrefer256Bit
 360     : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
 361                        "Prefer 256-bit AVX instructions">;
 362
 363 // Lower indirect calls using a special construct called a `retpoline` to
 364 // mitigate potential Spectre v2 attacks against them.
 365 def FeatureRetpolineIndirectCalls
 366     : SubtargetFeature<
 367           "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
 368           "Remove speculation of indirect calls from the generated code.">;
 369
 370 // Lower indirect branches and switches either using conditional branch trees
 371 // or using a special construct called a `retpoline` to mitigate potential
 372 // Spectre v2 attacks against them.
 373 def FeatureRetpolineIndirectBranches
 374     : SubtargetFeature<
 375           "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
 376           "Remove speculation of indirect branches from the generated code.">;
 377
 378 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
 379 // `retpoline-indirect-branches` above.
 380 def FeatureRetpoline
 381     : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
 382                        "Remove speculation of indirect branches from the "
 383                        "generated code, either by avoiding them entirely or "
 384                        "lowering them with a speculation blocking construct.",
 385                        [FeatureRetpolineIndirectCalls,
 386                         FeatureRetpolineIndirectBranches]>;
 387
 388 // Rely on external thunks for the emitted retpoline calls. This allows users
 389 // to provide their own custom thunk definitions in highly specialized
 390 // environments such as a kernel that does boot-time hot patching.
 391 def FeatureRetpolineExternalThunk
 392     : SubtargetFeature<
 393           "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
 394           "When lowering an indirect call or branch using a `retpoline`, rely "
 395           "on the specified user provided thunk rather than emitting one "
 396           "ourselves. Only has effect when combined with some other retpoline "
 397           "feature.", [FeatureRetpolineIndirectCalls]>;
 398
 399 // Direct Move instructions.
 400 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
 401                                        "Support movdiri instruction">;
 402 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
 403                                         "Support movdir64b instruction">;
 404
 405 def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
 406           "Indicates that the BEXTR instruction is implemented as a single uop "
 407           "with good throughput.">;
 408
 409 // Combine vector math operations with shuffles into horizontal math
 410 // instructions if a CPU implements horizontal operations (introduced with
 411 // SSE3) with better latency/throughput than the alternative sequence.
 412 def FeatureFastHorizontalOps
 413     : SubtargetFeature<
 414         "fast-hops", "HasFastHorizontalOps", "true",
 415         "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
 416         "normal vector instructions with shuffles", [FeatureSSE3]>;
 417
 418 // Merge branches using three-way conditional code.
 419 def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
 420                                         "ThreewayBranchProfitable", "true",
 421                                         "Merge branches to a three-way "
 422                                         "conditional branch">;
 423
 424 //===----------------------------------------------------------------------===//
 425 // Register File Description
 426 //===----------------------------------------------------------------------===//
 427
 428 include "X86RegisterInfo.td"
 429 include "X86RegisterBanks.td"
 430
 431 //===----------------------------------------------------------------------===//
 432 // Instruction Descriptions
 433 //===----------------------------------------------------------------------===//
 434
 435 include "X86Schedule.td"
 436 include "X86InstrInfo.td"
 437 include "X86SchedPredicates.td"
 438
 439 def X86InstrInfo : InstrInfo;
 440
 441 //===----------------------------------------------------------------------===//
 442 // X86 processors supported.
 443 //===----------------------------------------------------------------------===//
 444
 445 include "X86ScheduleAtom.td"
 446 include "X86SchedSandyBridge.td"
 447 include "X86SchedHaswell.td"
 448 include "X86SchedBroadwell.td"
 449 include "X86ScheduleSLM.td"
 450 include "X86ScheduleZnver1.td"
 451 include "X86ScheduleBdVer2.td"
 452 include "X86ScheduleBtVer2.td"
 453 include "X86SchedSkylakeClient.td"
 454 include "X86SchedSkylakeServer.td"
 455
 456 def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
 457                     "Intel Atom processors">;
 458 def ProcIntelSLM  : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
 459                     "Intel Silvermont processors">;
 460 def ProcIntelGLM  : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
 461                     "Intel Goldmont processors">;
 462 def ProcIntelGLP  : SubtargetFeature<"glp", "X86ProcFamily", "IntelGLP",
 463                     "Intel Goldmont Plus processors">;
 464 def ProcIntelTRM  : SubtargetFeature<"tremont", "X86ProcFamily", "IntelTRM",
 465                     "Intel Tremont processors">;
 466
 467 class Proc<string Name, list<SubtargetFeature> Features>
 468  : ProcessorModel<Name, GenericModel, Features>;
 469
 470 def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16]>;
 471 def : Proc<"i386",            [FeatureX87, FeatureSlowUAMem16]>;
 472 def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16]>;
 473 def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16]>;
 474 def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16]>;
 475 def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 476
 477 def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
 478 def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV,
 479                           FeatureNOPL]>;
 480
 481 def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 482                                FeatureCMOV, FeatureFXSR, FeatureNOPL]>;
 483
 484 foreach P = ["pentium3", "pentium3m"] in {
 485   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
 486                  FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 487 }
 488
 489 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
 490 // The intent is to enable it for pentium4 which is the current default
 491 // processor in a vanilla 32-bit clang compilation when no specific
 492 // architecture is specified.  This generally gives a nice performance
 493 // increase on silvermont, with largely neutral behavior on other
 494 // contemporary large core processors.
 495 // pentium-m, pentium4m, prescott and nocona are included as a preventative
 496 // measure to avoid performance surprises, in case clang's default cpu
 497 // changes slightly.
 498
 499 def : ProcessorModel<"pentium-m", GenericPostRAModel,
 500                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 501                       FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 502
 503 foreach P = ["pentium4", "pentium4m"] in {
 504   def : ProcessorModel<P, GenericPostRAModel,
 505                        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 506                         FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 507 }
 508
 509 // Intel Quark.
 510 def : Proc<"lakemont",        []>;
 511
 512 // Intel Core Duo.
 513 def : ProcessorModel<"yonah", SandyBridgeModel,
 514                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
 515                       FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 516
 517 // NetBurst.
 518 def : ProcessorModel<"prescott", GenericPostRAModel,
 519                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
 520                       FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 521 def : ProcessorModel<"nocona", GenericPostRAModel, [
 522   FeatureX87,
 523   FeatureSlowUAMem16,
 524   FeatureCMOV,
 525   FeatureMMX,
 526   FeatureSSE3,
 527   FeatureFXSR,
 528   FeatureNOPL,
 529   Feature64Bit,
 530   FeatureCMPXCHG16B
 531 ]>;
 532
 533 // Intel Core 2 Solo/Duo.
 534 def : ProcessorModel<"core2", SandyBridgeModel, [
 535   FeatureX87,
 536   FeatureSlowUAMem16,
 537   FeatureCMOV,
 538   FeatureMMX,
 539   FeatureSSSE3,
 540   FeatureFXSR,
 541   FeatureNOPL,
 542   Feature64Bit,
 543   FeatureCMPXCHG16B,
 544   FeatureLAHFSAHF,
 545   FeatureMacroFusion
 546 ]>;
 547 def : ProcessorModel<"penryn", SandyBridgeModel, [
 548   FeatureX87,
 549   FeatureSlowUAMem16,
 550   FeatureCMOV,
 551   FeatureMMX,
 552   FeatureSSE41,
 553   FeatureFXSR,
 554   FeatureNOPL,
 555   Feature64Bit,
 556   FeatureCMPXCHG16B,
 557   FeatureLAHFSAHF,
 558   FeatureMacroFusion
 559 ]>;
 560
 561 // Atom CPUs.
 562 class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
 563   ProcIntelAtom,
 564   FeatureX87,
 565   FeatureSlowUAMem16,
 566   FeatureCMOV,
 567   FeatureMMX,
 568   FeatureSSSE3,
 569   FeatureFXSR,
 570   FeatureNOPL,
 571   Feature64Bit,
 572   FeatureCMPXCHG16B,
 573   FeatureMOVBE,
 574   FeatureLEAForSP,
 575   FeatureSlowDivide32,
 576   FeatureSlowDivide64,
 577   FeatureSlowTwoMemOps,
 578   FeatureLEAUsesAG,
 579   FeaturePadShortFunctions,
 580   FeatureLAHFSAHF
 581 ]>;
 582 def : BonnellProc<"bonnell">;
 583 def : BonnellProc<"atom">; // Pin the generic name to the baseline.
 584
 585 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
 586   ProcIntelSLM,
 587   FeatureX87,
 588   FeatureCMOV,
 589   FeatureMMX,
 590   FeatureSSE42,
 591   FeatureFXSR,
 592   FeatureNOPL,
 593   Feature64Bit,
 594   FeatureCMPXCHG16B,
 595   FeatureMOVBE,
 596   FeaturePOPCNT,
 597   FeaturePCLMUL,
 598   FeatureSlowDivide64,
 599   FeatureSlowTwoMemOps,
 600   FeaturePRFCHW,
 601   FeatureSlowLEA,
 602   FeatureSlowIncDec,
 603   FeatureSlowPMULLD,
 604   FeatureRDRAND,
 605   FeatureLAHFSAHF,
 606   FeaturePOPCNTFalseDeps
 607 ]>;
 608 def : SilvermontProc<"silvermont">;
 609 def : SilvermontProc<"slm">; // Legacy alias.
 610
 611 class ProcessorFeatures<list<SubtargetFeature> Inherited,
 612                         list<SubtargetFeature> NewFeatures> {
 613   list<SubtargetFeature> Value = !listconcat(Inherited, NewFeatures);
 614 }
 615
 616 class ProcModel<string Name, SchedMachineModel Model,
 617                 list<SubtargetFeature> ProcFeatures,
 618                 list<SubtargetFeature> OtherFeatures> :
 619   ProcessorModel<Name, Model, !listconcat(ProcFeatures, OtherFeatures)>;
 620
 621 def GLMFeatures : ProcessorFeatures<[], [
 622   FeatureX87,
 623   FeatureCMOV,
 624   FeatureMMX,
 625   FeatureSSE42,
 626   FeatureFXSR,
 627   FeatureNOPL,
 628   Feature64Bit,
 629   FeatureCMPXCHG16B,
 630   FeatureMOVBE,
 631   FeaturePOPCNT,
 632   FeaturePCLMUL,
 633   FeatureAES,
 634   FeaturePRFCHW,
 635   FeatureSlowTwoMemOps,
 636   FeatureSlowLEA,
 637   FeatureSlowIncDec,
 638   FeatureLAHFSAHF,
 639   FeatureMPX,
 640   FeatureSHA,
 641   FeatureRDRAND,
 642   FeatureRDSEED,
 643   FeatureXSAVE,
 644   FeatureXSAVEOPT,
 645   FeatureXSAVEC,
 646   FeatureXSAVES,
 647   FeatureCLFLUSHOPT,
 648   FeatureFSGSBase
 649 ]>;
 650
 651 class GoldmontProc<string Name> : ProcModel<Name, SLMModel,
 652       GLMFeatures.Value, [
 653   ProcIntelGLM,
 654   FeaturePOPCNTFalseDeps
 655 ]>;
 656 def : GoldmontProc<"goldmont">;
 657
 658 def GLPFeatures : ProcessorFeatures<GLMFeatures.Value, [
 659   FeaturePTWRITE,
 660   FeatureRDPID,
 661   FeatureSGX
 662 ]>;
 663
 664 class GoldmontPlusProc<string Name> : ProcModel<Name, SLMModel,
 665       GLPFeatures.Value, [
 666   ProcIntelGLP
 667 ]>;
 668 def : GoldmontPlusProc<"goldmont-plus">;
 669
 670 class TremontProc<string Name> : ProcModel<Name, SLMModel,
 671       GLPFeatures.Value, [
 672   ProcIntelTRM,
 673   FeatureCLDEMOTE,
 674   FeatureGFNI,
 675   FeatureMOVDIRI,
 676   FeatureMOVDIR64B,
 677   FeatureWAITPKG
 678 ]>;
 679 def : TremontProc<"tremont">;
 680
 681 // "Arrandale" along with corei3 and corei5
 682 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
 683   FeatureX87,
 684   FeatureCMOV,
 685   FeatureMMX,
 686   FeatureSSE42,
 687   FeatureFXSR,
 688   FeatureNOPL,
 689   Feature64Bit,
 690   FeatureCMPXCHG16B,
 691   FeaturePOPCNT,
 692   FeatureLAHFSAHF,
 693   FeatureMacroFusion
 694 ]>;
 695 def : NehalemProc<"nehalem">;
 696 def : NehalemProc<"corei7">;
 697
 698 // Westmere is a similar machine to nehalem with some additional features.
 699 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
 700 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
 701   FeatureX87,
 702   FeatureCMOV,
 703   FeatureMMX,
 704   FeatureSSE42,
 705   FeatureFXSR,
 706   FeatureNOPL,
 707   Feature64Bit,
 708   FeatureCMPXCHG16B,
 709   FeaturePOPCNT,
 710   FeaturePCLMUL,
 711   FeatureLAHFSAHF,
 712   FeatureMacroFusion
 713 ]>;
 714 def : WestmereProc<"westmere">;
 715
 716 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 717 // rather than a superset.
 718 def SNBFeatures : ProcessorFeatures<[], [
 719   FeatureX87,
 720   FeatureCMOV,
 721   FeatureMMX,
 722   FeatureAVX,
 723   FeatureFXSR,
 724   FeatureNOPL,
 725   Feature64Bit,
 726   FeatureCMPXCHG16B,
 727   FeaturePOPCNT,
 728   FeatureSlowDivide64,
 729   FeaturePCLMUL,
 730   FeatureXSAVE,
 731   FeatureXSAVEOPT,
 732   FeatureLAHFSAHF,
 733   FeatureSlow3OpsLEA,
 734   FeatureFastScalarFSQRT,
 735   FeatureFastSHLDRotate,
 736   FeatureSlowIncDec,
 737   FeatureMergeToThreeWayBranch,
 738   FeatureMacroFusion
 739 ]>;
 740
 741 class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
 742                                                SNBFeatures.Value, [
 743   FeatureSlowUAMem32,
 744   FeaturePOPCNTFalseDeps
 745 ]>;
 746 def : SandyBridgeProc<"sandybridge">;
 747 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
 748
 749 def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
 750   FeatureRDRAND,
 751   FeatureF16C,
 752   FeatureFSGSBase
 753 ]>;
 754
 755 class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
 756                                              IVBFeatures.Value, [
 757   FeatureSlowUAMem32,
 758   FeaturePOPCNTFalseDeps
 759 ]>;
 760 def : IvyBridgeProc<"ivybridge">;
 761 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
 762
 763 def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
 764   FeatureAVX2,
 765   FeatureBMI,
 766   FeatureBMI2,
 767   FeatureERMSB,
 768   FeatureFMA,
 769   FeatureINVPCID,
 770   FeatureLZCNT,
 771   FeatureMOVBE,
 772   FeatureFastVariableShuffle
 773 ]>;
 774
 775 class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
 776                                            HSWFeatures.Value, [
 777   FeaturePOPCNTFalseDeps,
 778   FeatureLZCNTFalseDeps
 779 ]>;
 780 def : HaswellProc<"haswell">;
 781 def : HaswellProc<"core-avx2">; // Legacy alias.
 782
 783 def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
 784   FeatureADX,
 785   FeatureRDSEED,
 786   FeaturePRFCHW
 787 ]>;
 788 class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
 789                                              BDWFeatures.Value, [
 790   FeaturePOPCNTFalseDeps,
 791   FeatureLZCNTFalseDeps
 792 ]>;
 793 def : BroadwellProc<"broadwell">;
 794
 795 def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
 796   FeatureAES,
 797   FeatureMPX,
 798   FeatureXSAVEC,
 799   FeatureXSAVES,
 800   FeatureCLFLUSHOPT,
 801   FeatureFastVectorFSQRT
 802 ]>;
 803
 804 class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
 805                                                  SKLFeatures.Value, [
 806   FeatureHasFastGather,
 807   FeaturePOPCNTFalseDeps,
 808   FeatureSGX
 809 ]>;
 810 def : SkylakeClientProc<"skylake">;
 811
 812 def KNLFeatures : ProcessorFeatures<[], [
 813   FeatureX87,
 814   FeatureCMOV,
 815   FeatureMMX,
 816   FeatureFXSR,
 817   FeatureNOPL,
 818   Feature64Bit,
 819   FeatureCMPXCHG16B,
 820   FeaturePOPCNT,
 821   FeatureSlowDivide64,
 822   FeaturePCLMUL,
 823   FeatureXSAVE,
 824   FeatureXSAVEOPT,
 825   FeatureLAHFSAHF,
 826   FeatureSlow3OpsLEA,
 827   FeatureSlowIncDec,
 828   FeatureAES,
 829   FeatureRDRAND,
 830   FeatureF16C,
 831   FeatureFSGSBase,
 832   FeatureAVX512,
 833   FeatureERI,
 834   FeatureCDI,
 835   FeaturePFI,
 836   FeaturePREFETCHWT1,
 837   FeatureADX,
 838   FeatureRDSEED,
 839   FeatureMOVBE,
 840   FeatureLZCNT,
 841   FeatureBMI,
 842   FeatureBMI2,
 843   FeatureFMA,
 844   FeaturePRFCHW
 845 ]>;
 846
 847 // FIXME: define KNL model
 848 class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
 849                                                   KNLFeatures.Value, [
 850   FeatureSlowTwoMemOps,
 851   FeatureFastPartialYMMorZMMWrite,
 852   FeatureHasFastGather,
 853   FeatureSlowPMADDWD
 854 ]>;
 855 def : KnightsLandingProc<"knl">;
 856
 857 class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
 858                                                KNLFeatures.Value, [
 859   FeatureSlowTwoMemOps,
 860   FeatureFastPartialYMMorZMMWrite,
 861   FeatureHasFastGather,
 862   FeatureSlowPMADDWD,
 863   FeatureVPOPCNTDQ
 864 ]>;
 865 def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
 866
 867 def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
 868   FeatureAVX512,
 869   FeatureCDI,
 870   FeatureDQI,
 871   FeatureBWI,
 872   FeatureVLX,
 873   FeaturePKU,
 874   FeatureCLWB
 875 ]>;
 876
 877 class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
 878                                                  SKXFeatures.Value, [
 879   FeatureHasFastGather,
 880   FeaturePOPCNTFalseDeps
 881 ]>;
 882 def : SkylakeServerProc<"skylake-avx512">;
 883 def : SkylakeServerProc<"skx">; // Legacy alias.
 884
 885 def CLXFeatures : ProcessorFeatures<SKXFeatures.Value, [
 886   FeatureVNNI
 887 ]>;
 888
 889 class CascadelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
 890                                               CLXFeatures.Value, [
 891   FeatureHasFastGather,
 892   FeaturePOPCNTFalseDeps
 893 ]>;
 894 def : CascadelakeProc<"cascadelake">;
 895
 896 def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
 897   FeatureAVX512,
 898   FeatureCDI,
 899   FeatureDQI,
 900   FeatureBWI,
 901   FeatureVLX,
 902   FeaturePKU,
 903   FeatureVBMI,
 904   FeatureIFMA,
 905   FeatureSHA,
 906   FeatureSGX
 907 ]>;
 908
 909 class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
 910                                               CNLFeatures.Value, [
 911   FeatureHasFastGather
 912 ]>;
 913 def : CannonlakeProc<"cannonlake">;
 914
 915 def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
 916   FeatureBITALG,
 917   FeatureVAES,
 918   FeatureVBMI2,
 919   FeatureVNNI,
 920   FeatureVPCLMULQDQ,
 921   FeatureVPOPCNTDQ,
 922   FeatureGFNI,
 923   FeatureCLWB,
 924   FeatureRDPID
 925 ]>;
 926
 927 class IcelakeClientProc<string Name> : ProcModel<Name, SkylakeServerModel,
 928                                                  ICLFeatures.Value, [
 929   FeatureHasFastGather
 930 ]>;
 931 def : IcelakeClientProc<"icelake-client">;
 932
 933 class IcelakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
 934                                                  ICLFeatures.Value, [
 935   FeaturePCONFIG,
 936   FeatureWBNOINVD,
 937   FeatureHasFastGather
 938 ]>;
 939 def : IcelakeServerProc<"icelake-server">;
 940
 941 // AMD CPUs.
 942
 943 def : Proc<"k6",              [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 944 def : Proc<"k6-2",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 945 def : Proc<"k6-3",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 946
 947 foreach P = ["athlon", "athlon-tbird"] in {
 948   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, Feature3DNowA,
 949                  FeatureNOPL, FeatureSlowSHLD]>;
 950 }
 951
 952 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
 953   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
 954                  Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
 955 }
 956
 957 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
 958   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
 959                  FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
 960                  FeatureCMOV]>;
 961 }
 962
 963 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
 964   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
 965                  FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
 966                  FeatureCMOV, Feature64Bit]>;
 967 }
 968
 969 foreach P = ["amdfam10", "barcelona"] in {
 970   def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
 971                  FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
 972                  FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>;
 973 }
 974
 975 // Bobcat
 976 def : Proc<"btver1", [
 977   FeatureX87,
 978   FeatureCMOV,
 979   FeatureMMX,
 980   FeatureSSSE3,
 981   FeatureSSE4A,
 982   FeatureFXSR,
 983   FeatureNOPL,
 984   Feature64Bit,
 985   FeatureCMPXCHG16B,
 986   FeaturePRFCHW,
 987   FeatureLZCNT,
 988   FeaturePOPCNT,
 989   FeatureSlowSHLD,
 990   FeatureLAHFSAHF,
 991   FeatureFast15ByteNOP
 992 ]>;
 993
 994 // Jaguar
 995 def : ProcessorModel<"btver2", BtVer2Model, [
 996   FeatureX87,
 997   FeatureCMOV,
 998   FeatureMMX,
 999   FeatureAVX,
1000   FeatureFXSR,
1001   FeatureNOPL,
1002   FeatureSSE4A,
1003   Feature64Bit,
1004   FeatureCMPXCHG16B,
1005   FeaturePRFCHW,
1006   FeatureAES,
1007   FeaturePCLMUL,
1008   FeatureBMI,
1009   FeatureF16C,
1010   FeatureMOVBE,
1011   FeatureLZCNT,
1012   FeatureFastLZCNT,
1013   FeaturePOPCNT,
1014   FeatureXSAVE,
1015   FeatureXSAVEOPT,
1016   FeatureSlowSHLD,
1017   FeatureLAHFSAHF,
1018   FeatureFast15ByteNOP,
1019   FeatureFastBEXTR,
1020   FeatureFastPartialYMMorZMMWrite,
1021   FeatureFastHorizontalOps
1022 ]>;
1023
1024 // Bulldozer
1025 def : ProcessorModel<"bdver1", BdVer2Model, [
1026   FeatureX87,
1027   FeatureCMOV,
1028   FeatureXOP,
1029   FeatureFMA4,
1030   Feature64Bit,
1031   FeatureCMPXCHG16B,
1032   FeatureAES,
1033   FeaturePRFCHW,
1034   FeaturePCLMUL,
1035   FeatureMMX,
1036   FeatureAVX,
1037   FeatureFXSR,
1038   FeatureNOPL,
1039   FeatureSSE4A,
1040   FeatureLZCNT,
1041   FeaturePOPCNT,
1042   FeatureXSAVE,
1043   FeatureLWP,
1044   FeatureSlowSHLD,
1045   FeatureLAHFSAHF,
1046   FeatureFast11ByteNOP,
1047   FeatureMacroFusion
1048 ]>;
1049 // Piledriver
1050 def : ProcessorModel<"bdver2", BdVer2Model, [
1051   FeatureX87,
1052   FeatureCMOV,
1053   FeatureXOP,
1054   FeatureFMA4,
1055   Feature64Bit,
1056   FeatureCMPXCHG16B,
1057   FeatureAES,
1058   FeaturePRFCHW,
1059   FeaturePCLMUL,
1060   FeatureMMX,
1061   FeatureAVX,
1062   FeatureFXSR,
1063   FeatureNOPL,
1064   FeatureSSE4A,
1065   FeatureF16C,
1066   FeatureLZCNT,
1067   FeaturePOPCNT,
1068   FeatureXSAVE,
1069   FeatureBMI,
1070   FeatureTBM,
1071   FeatureLWP,
1072   FeatureFMA,
1073   FeatureSlowSHLD,
1074   FeatureLAHFSAHF,
1075   FeatureFast11ByteNOP,
1076   FeatureFastBEXTR,
1077   FeatureMacroFusion
1078 ]>;
1079
1080 // Steamroller
1081 def : Proc<"bdver3", [
1082   FeatureX87,
1083   FeatureCMOV,
1084   FeatureXOP,
1085   FeatureFMA4,
1086   Feature64Bit,
1087   FeatureCMPXCHG16B,
1088   FeatureAES,
1089   FeaturePRFCHW,
1090   FeaturePCLMUL,
1091   FeatureMMX,
1092   FeatureAVX,
1093   FeatureFXSR,
1094   FeatureNOPL,
1095   FeatureSSE4A,
1096   FeatureF16C,
1097   FeatureLZCNT,
1098   FeaturePOPCNT,
1099   FeatureXSAVE,
1100   FeatureBMI,
1101   FeatureTBM,
1102   FeatureLWP,
1103   FeatureFMA,
1104   FeatureXSAVEOPT,
1105   FeatureSlowSHLD,
1106   FeatureFSGSBase,
1107   FeatureLAHFSAHF,
1108   FeatureFast11ByteNOP,
1109   FeatureFastBEXTR,
1110   FeatureMacroFusion
1111 ]>;
1112
1113 // Excavator
1114 def : Proc<"bdver4", [
1115   FeatureX87,
1116   FeatureCMOV,
1117   FeatureMMX,
1118   FeatureAVX2,
1119   FeatureFXSR,
1120   FeatureNOPL,
1121   FeatureXOP,
1122   FeatureFMA4,
1123   Feature64Bit,
1124   FeatureCMPXCHG16B,
1125   FeatureAES,
1126   FeaturePRFCHW,
1127   FeaturePCLMUL,
1128   FeatureF16C,
1129   FeatureLZCNT,
1130   FeaturePOPCNT,
1131   FeatureXSAVE,
1132   FeatureBMI,
1133   FeatureBMI2,
1134   FeatureTBM,
1135   FeatureLWP,
1136   FeatureFMA,
1137   FeatureXSAVEOPT,
1138   FeatureSlowSHLD,
1139   FeatureFSGSBase,
1140   FeatureLAHFSAHF,
1141   FeatureFastBEXTR,
1142   FeatureFast11ByteNOP,
1143   FeatureMWAITX,
1144   FeatureMacroFusion
1145 ]>;
1146
1147 // Znver1
1148 def: ProcessorModel<"znver1", Znver1Model, [
1149   FeatureADX,
1150   FeatureAES,
1151   FeatureAVX2,
1152   FeatureBMI,
1153   FeatureBMI2,
1154   FeatureCLFLUSHOPT,
1155   FeatureCLZERO,
1156   FeatureCMOV,
1157   Feature64Bit,
1158   FeatureCMPXCHG16B,
1159   FeatureF16C,
1160   FeatureFMA,
1161   FeatureFSGSBase,
1162   FeatureFXSR,
1163   FeatureNOPL,
1164   FeatureFastLZCNT,
1165   FeatureLAHFSAHF,
1166   FeatureLZCNT,
1167   FeatureFastBEXTR,
1168   FeatureFast15ByteNOP,
1169   FeatureMacroFusion,
1170   FeatureMMX,
1171   FeatureMOVBE,
1172   FeatureMWAITX,
1173   FeaturePCLMUL,
1174   FeaturePOPCNT,
1175   FeaturePRFCHW,
1176   FeatureRDRAND,
1177   FeatureRDSEED,
1178   FeatureSHA,
1179   FeatureSSE4A,
1180   FeatureSlowSHLD,
1181   FeatureX87,
1182   FeatureXSAVE,
1183   FeatureXSAVEC,
1184   FeatureXSAVEOPT,
1185   FeatureXSAVES]>;
1186
1187 def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
1188
1189 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
1190 def : Proc<"winchip2",        [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
1191 def : Proc<"c3",              [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
1192 def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
1193                                FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
1194
1195 // We also provide a generic 64-bit specific x86 processor model which tries to
1196 // be good for modern chips without enabling instruction set encodings past the
1197 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1198 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1199 //
1200 // We currently use the Sandy Bridge model as the default scheduling model as
1201 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1202 // covers a huge swath of x86 processors. If there are specific scheduling
1203 // knobs which need to be tuned differently for AMD chips, we might consider
1204 // forming a common base for them.
1205 def : ProcessorModel<"x86-64", SandyBridgeModel, [
1206   FeatureX87,
1207   FeatureCMOV,
1208   FeatureMMX,
1209   FeatureSSE2,
1210   FeatureFXSR,
1211   FeatureNOPL,
1212   Feature64Bit,
1213   FeatureSlow3OpsLEA,
1214   FeatureSlowIncDec,
1215   FeatureMacroFusion
1216 ]>;
1217
1218 //===----------------------------------------------------------------------===//
1219 // Calling Conventions
1220 //===----------------------------------------------------------------------===//
1221
1222 include "X86CallingConv.td"
1223
1224
1225 //===----------------------------------------------------------------------===//
1226 // Assembly Parser
1227 //===----------------------------------------------------------------------===//
1228
1229 def ATTAsmParserVariant : AsmParserVariant {
1230   int Variant = 0;
1231
1232   // Variant name.
1233   string Name = "att";
1234
1235   // Discard comments in assembly strings.
1236   string CommentDelimiter = "#";
1237
1238   // Recognize hard coded registers.
1239   string RegisterPrefix = "%";
1240 }
1241
1242 def IntelAsmParserVariant : AsmParserVariant {
1243   int Variant = 1;
1244
1245   // Variant name.
1246   string Name = "intel";
1247
1248   // Discard comments in assembly strings.
1249   string CommentDelimiter = ";";
1250
1251   // Recognize hard coded registers.
1252   string RegisterPrefix = "";
1253 }
1254
1255 //===----------------------------------------------------------------------===//
1256 // Assembly Printers
1257 //===----------------------------------------------------------------------===//
1258
1259 // The X86 target supports two different syntaxes for emitting machine code.
1260 // This is controlled by the -x86-asm-syntax={att|intel}
1261 def ATTAsmWriter : AsmWriter {
1262   string AsmWriterClassName  = "ATTInstPrinter";
1263   int Variant = 0;
1264 }
1265 def IntelAsmWriter : AsmWriter {
1266   string AsmWriterClassName  = "IntelInstPrinter";
1267   int Variant = 1;
1268 }
1269
1270 def X86 : Target {
1271   // Information about the instructions...
1272   let InstructionSet = X86InstrInfo;
1273   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1274   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1275   let AllowRegisterRenaming = 1;
1276 }
1277
1278 //===----------------------------------------------------------------------===//
1279 // Pfm Counters
1280 //===----------------------------------------------------------------------===//
1281
1282 include "X86PfmCounters.td"