llvm/lib/Target/X86/X86.td

   1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This is a target description file for the Intel i386 architecture, referred
  10 // to here as the "X86" architecture.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 // Get the target-independent interfaces which we are implementing...
  15 //
  16 include "llvm/Target/Target.td"
  17
  18 //===----------------------------------------------------------------------===//
  19 // X86 Subtarget state
  20 //
  21
  22 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
  23                                   "64-bit mode (x86_64)">;
  24 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
  25                                   "32-bit mode (80386)">;
  26 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
  27                                   "16-bit mode (i8086)">;
  28
  29 //===----------------------------------------------------------------------===//
  30 // X86 Subtarget ISA features
  31 //===----------------------------------------------------------------------===//
  32
  33 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
  34                                       "Enable X87 float instructions">;
  35
  36 def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
  37                                       "Enable NOPL instruction">;
  38
  39 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
  40                                       "Enable conditional move instructions">;
  41
  42 def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
  43                                         "Support CMPXCHG8B instructions">;
  44
  45 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
  46                                        "Support POPCNT instruction">;
  47
  48 def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
  49                                       "Support fxsave/fxrestore instructions">;
  50
  51 def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
  52                                        "Support xsave instructions">;
  53
  54 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
  55                                        "Support xsaveopt instructions",
  56                                        [FeatureXSAVE]>;
  57
  58 def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
  59                                        "Support xsavec instructions",
  60                                        [FeatureXSAVE]>;
  61
  62 def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
  63                                        "Support xsaves instructions",
  64                                        [FeatureXSAVE]>;
  65
  66 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
  67                                       "Enable SSE instructions">;
  68 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
  69                                       "Enable SSE2 instructions",
  70                                       [FeatureSSE1]>;
  71 def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
  72                                       "Enable SSE3 instructions",
  73                                       [FeatureSSE2]>;
  74 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
  75                                       "Enable SSSE3 instructions",
  76                                       [FeatureSSE3]>;
  77 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
  78                                       "Enable SSE 4.1 instructions",
  79                                       [FeatureSSSE3]>;
  80 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
  81                                       "Enable SSE 4.2 instructions",
  82                                       [FeatureSSE41]>;
  83 // The MMX subtarget feature is separate from the rest of the SSE features
  84 // because it's important (for odd compatibility reasons) to be able to
  85 // turn it off explicitly while allowing SSE+ to be on.
  86 def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
  87                                       "Enable MMX instructions">;
  88 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
  89                                       "Enable 3DNow! instructions",
  90                                       [FeatureMMX]>;
  91 def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
  92                                       "Enable 3DNow! Athlon instructions",
  93                                       [Feature3DNow]>;
  94 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
  95 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
  96 // without disabling 64-bit mode. Nothing should imply this feature bit. It
  97 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
  98 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
  99                                       "Support 64-bit instructions">;
 100 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
 101                                       "64-bit with cmpxchg16b",
 102                                       [FeatureCMPXCHG8B]>;
 103 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
 104                                       "Support SSE 4a instructions",
 105                                       [FeatureSSE3]>;
 106
 107 def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
 108                                       "Enable AVX instructions",
 109                                       [FeatureSSE42]>;
 110 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
 111                                       "Enable AVX2 instructions",
 112                                       [FeatureAVX]>;
 113 def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
 114                                       "Enable three-operand fused multiple-add",
 115                                       [FeatureAVX]>;
 116 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
 117                        "Support 16-bit floating point conversion instructions",
 118                        [FeatureAVX]>;
 119 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
 120                                       "Enable AVX-512 instructions",
 121                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
 122 def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
 123                       "Enable AVX-512 Exponential and Reciprocal Instructions",
 124                                       [FeatureAVX512]>;
 125 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
 126                       "Enable AVX-512 Conflict Detection Instructions",
 127                                       [FeatureAVX512]>;
 128 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
 129                        "true", "Enable AVX-512 Population Count Instructions",
 130                                       [FeatureAVX512]>;
 131 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
 132                       "Enable AVX-512 PreFetch Instructions",
 133                                       [FeatureAVX512]>;
 134 def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
 135                                    "true",
 136                                    "Prefetch with Intent to Write and T1 Hint">;
 137 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
 138                       "Enable AVX-512 Doubleword and Quadword Instructions",
 139                                       [FeatureAVX512]>;
 140 def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
 141                       "Enable AVX-512 Byte and Word Instructions",
 142                                       [FeatureAVX512]>;
 143 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
 144                       "Enable AVX-512 Vector Length eXtensions",
 145                                       [FeatureAVX512]>;
 146 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
 147                       "Enable AVX-512 Vector Byte Manipulation Instructions",
 148                                       [FeatureBWI]>;
 149 def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
 150                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
 151                                       [FeatureBWI]>;
 152 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
 153                       "Enable AVX-512 Integer Fused Multiple-Add",
 154                                       [FeatureAVX512]>;
 155 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
 156                       "Enable protection keys">;
 157 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
 158                           "Enable AVX-512 Vector Neural Network Instructions",
 159                                       [FeatureAVX512]>;
 160 def FeatureAVXVNNI    : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
 161                            "Support AVX_VNNI encoding",
 162                                       [FeatureAVX2]>;
 163 def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
 164                            "Support bfloat16 floating point",
 165                                       [FeatureBWI]>;
 166 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
 167                        "Enable AVX-512 Bit Algorithms",
 168                         [FeatureBWI]>;
 169 def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
 170                                             "HasVP2INTERSECT", "true",
 171                                             "Enable AVX-512 vp2intersect",
 172                                             [FeatureAVX512]>;
 173 // FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
 174 // guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
 175 // FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
 176 // supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
 177 // currently.
 178 def FeatureFP16    : SubtargetFeature<"avx512fp16", "HasFP16", "true",
 179                            "Support 16-bit floating point",
 180                            [FeatureBWI, FeatureVLX, FeatureDQI]>;
 181 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
 182                          "Enable packed carry-less multiplication instructions",
 183                                [FeatureSSE2]>;
 184 def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
 185                          "Enable Galois Field Arithmetic Instructions",
 186                                [FeatureSSE2]>;
 187 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
 188                                          "Enable vpclmulqdq instructions",
 189                                          [FeatureAVX, FeaturePCLMUL]>;
 190 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
 191                                       "Enable four-operand fused multiple-add",
 192                                       [FeatureAVX, FeatureSSE4A]>;
 193 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
 194                                       "Enable XOP instructions",
 195                                       [FeatureFMA4]>;
 196 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
 197                                           "HasSSEUnalignedMem", "true",
 198                       "Allow unaligned memory operands with SSE instructions">;
 199 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
 200                                       "Enable AES instructions",
 201                                       [FeatureSSE2]>;
 202 def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
 203                        "Promote selected AES instructions to AVX512/AVX registers",
 204                         [FeatureAVX, FeatureAES]>;
 205 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
 206                                       "Enable TBM instructions">;
 207 def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
 208                                       "Enable LWP instructions">;
 209 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
 210                                       "Support MOVBE instruction">;
 211 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
 212                                       "Support RDRAND instruction">;
 213 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
 214                                        "Support FS/GS Base instructions">;
 215 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
 216                                       "Support LZCNT instruction">;
 217 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
 218                                       "Support BMI instructions">;
 219 def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
 220                                       "Support BMI2 instructions">;
 221 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
 222                                       "Support RTM instructions">;
 223 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
 224                                       "Support ADX instructions">;
 225 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
 226                                       "Enable SHA instructions",
 227                                       [FeatureSSE2]>;
 228 def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
 229                        "Support CET Shadow-Stack instructions">;
 230 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
 231                                       "Support PRFCHW instructions">;
 232 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
 233                                       "Support RDSEED instruction">;
 234 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
 235                            "Support LAHF and SAHF instructions in 64-bit mode">;
 236 def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
 237                                       "Enable MONITORX/MWAITX timer functionality">;
 238 def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
 239                                       "Enable Cache Line Zero">;
 240 def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
 241                                       "Enable Cache Demote">;
 242 def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
 243                                       "Support ptwrite instruction">;
 244 def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
 245                                       "Support AMX-TILE instructions">;
 246 def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
 247                                       "Support AMX-INT8 instructions",
 248                                       [FeatureAMXTILE]>;
 249 def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
 250                                       "Support AMX-BF16 instructions",
 251                                       [FeatureAMXTILE]>;
 252 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
 253                                       "Invalidate Process-Context Identifier">;
 254 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
 255                                       "Enable Software Guard Extensions">;
 256 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
 257                                       "Flush A Cache Line Optimized">;
 258 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
 259                                       "Cache Line Write Back">;
 260 def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
 261                                       "Write Back No Invalidate">;
 262 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
 263                                     "Support RDPID instructions">;
 264 def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
 265                                       "Wait and pause enhancements">;
 266 def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
 267                                      "Has ENQCMD instructions">;
 268 def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
 269                                   "Support Key Locker kl Instructions",
 270                                   [FeatureSSE2]>;
 271 def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
 272                                       "Support Key Locker wide Instructions",
 273                                       [FeatureKL]>;
 274 def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
 275                                       "Has hreset instruction">;
 276 def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
 277                                         "Has serialize instruction">;
 278 def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
 279                                        "Support TSXLDTRK instructions">;
 280 def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
 281                                     "Has UINTR Instructions">;
 282 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
 283                                       "platform configuration instruction">;
 284 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
 285                                        "Support movdiri instruction">;
 286 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
 287                                         "Support movdir64b instruction">;
 288
 289 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
 290 // "string operations"). See "REP String Enhancement" in the Intel Software
 291 // Development Manual. This feature essentially means that REP MOVSB will copy
 292 // using the largest available size instead of copying bytes one by one, making
 293 // it at least as fast as REPMOVS{W,D,Q}.
 294 def FeatureERMSB
 295     : SubtargetFeature<
 296           "ermsb", "HasERMSB", "true",
 297           "REP MOVS/STOS are fast">;
 298
 299 // Icelake and newer processors have Fast Short REP MOV.
 300 def FeatureFSRM
 301     : SubtargetFeature<
 302           "fsrm", "HasFSRM", "true",
 303           "REP MOVSB of short lengths is faster">;
 304
 305 def FeatureSoftFloat
 306     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
 307                        "Use software floating point features">;
 308
 309 //===----------------------------------------------------------------------===//
 310 // X86 Subtarget Security Mitigation features
 311 //===----------------------------------------------------------------------===//
 312
 313 // Lower indirect calls using a special construct called a `retpoline` to
 314 // mitigate potential Spectre v2 attacks against them.
 315 def FeatureRetpolineIndirectCalls
 316     : SubtargetFeature<
 317           "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
 318           "Remove speculation of indirect calls from the generated code">;
 319
 320 // Lower indirect branches and switches either using conditional branch trees
 321 // or using a special construct called a `retpoline` to mitigate potential
 322 // Spectre v2 attacks against them.
 323 def FeatureRetpolineIndirectBranches
 324     : SubtargetFeature<
 325           "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
 326           "Remove speculation of indirect branches from the generated code">;
 327
 328 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
 329 // `retpoline-indirect-branches` above.
 330 def FeatureRetpoline
 331     : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
 332                        "Remove speculation of indirect branches from the "
 333                        "generated code, either by avoiding them entirely or "
 334                        "lowering them with a speculation blocking construct",
 335                        [FeatureRetpolineIndirectCalls,
 336                         FeatureRetpolineIndirectBranches]>;
 337
 338 // Rely on external thunks for the emitted retpoline calls. This allows users
 339 // to provide their own custom thunk definitions in highly specialized
 340 // environments such as a kernel that does boot-time hot patching.
 341 def FeatureRetpolineExternalThunk
 342     : SubtargetFeature<
 343           "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
 344           "When lowering an indirect call or branch using a `retpoline`, rely "
 345           "on the specified user provided thunk rather than emitting one "
 346           "ourselves. Only has effect when combined with some other retpoline "
 347           "feature", [FeatureRetpolineIndirectCalls]>;
 348
 349 // Mitigate LVI attacks against indirect calls/branches and call returns
 350 def FeatureLVIControlFlowIntegrity
 351     : SubtargetFeature<
 352           "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
 353           "Prevent indirect calls/branches from using a memory operand, and "
 354           "precede all indirect calls/branches from a register with an "
 355           "LFENCE instruction to serialize control flow. Also decompose RET "
 356           "instructions into a POP+LFENCE+JMP sequence.">;
 357
 358 // Enable SESES to mitigate speculative execution attacks
 359 def FeatureSpeculativeExecutionSideEffectSuppression
 360     : SubtargetFeature<
 361           "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
 362           "Prevent speculative execution side channel timing attacks by "
 363           "inserting a speculation barrier before memory reads, memory writes, "
 364           "and conditional branches. Implies LVI Control Flow integrity.",
 365           [FeatureLVIControlFlowIntegrity]>;
 366
 367 // Mitigate LVI attacks against data loads
 368 def FeatureLVILoadHardening
 369     : SubtargetFeature<
 370           "lvi-load-hardening", "UseLVILoadHardening", "true",
 371           "Insert LFENCE instructions to prevent data speculatively injected "
 372           "into loads from being used maliciously.">;
 373
 374 //===----------------------------------------------------------------------===//
 375 // X86 Subtarget Tuning features
 376 //===----------------------------------------------------------------------===//
 377
 378 def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
 379                                        "SHLD instruction is slow">;
 380
 381 def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
 382                                         "PMULLD instruction is slow">;
 383
 384 def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
 385                                           "true",
 386                                           "PMADDWD is slower than PMULLD">;
 387
 388 // FIXME: This should not apply to CPUs that do not have SSE.
 389 def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
 390                                 "IsUAMem16Slow", "true",
 391                                 "Slow unaligned 16-byte memory access">;
 392
 393 def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
 394                                 "IsUAMem32Slow", "true",
 395                                 "Slow unaligned 32-byte memory access">;
 396
 397 def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
 398                                      "Use LEA for adjusting the stack pointer">;
 399
 400 def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
 401                                      "HasSlowDivide32", "true",
 402                                      "Use 8-bit divide for positive values less than 256">;
 403
 404 def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
 405                                      "HasSlowDivide64", "true",
 406                                      "Use 32-bit divide for positive values less than 2^32">;
 407
 408 def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
 409                                      "PadShortFunctions", "true",
 410                                      "Pad short functions">;
 411
 412 // On some processors, instructions that implicitly take two memory operands are
 413 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
 414 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
 415 def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
 416                                      "SlowTwoMemOps", "true",
 417                                      "Two memory operand instructions are slow">;
 418
 419 def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
 420                                    "LEA instruction needs inputs at AG stage">;
 421
 422 def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
 423                                    "LEA instruction with certain arguments is slow">;
 424
 425 def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
 426                                    "LEA instruction with 3 ops or certain registers is slow">;
 427
 428 def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
 429                                    "INC and DEC instructions are slower than ADD and SUB">;
 430
 431 def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
 432                                      "HasPOPCNTFalseDeps", "true",
 433                                      "POPCNT has a false dependency on dest register">;
 434
 435 def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
 436                                      "HasLZCNTFalseDeps", "true",
 437                                      "LZCNT/TZCNT have a false dependency on dest register">;
 438
 439 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
 440 // using a variable mask over multiple fixed shuffles.
 441 def TuningFastVariableCrossLaneShuffle
 442     : SubtargetFeature<"fast-variable-crosslane-shuffle",
 443                        "HasFastVariableCrossLaneShuffle",
 444                        "true", "Cross-lane shuffles with variable masks are fast">;
 445 def TuningFastVariablePerLaneShuffle
 446     : SubtargetFeature<"fast-variable-perlane-shuffle",
 447                        "HasFastVariablePerLaneShuffle",
 448                        "true", "Per-lane shuffles with variable masks are fast">;
 449
 450 // On some X86 processors, a vzeroupper instruction should be inserted after
 451 // using ymm/zmm registers before executing code that may use SSE instructions.
 452 def TuningInsertVZEROUPPER
 453     : SubtargetFeature<"vzeroupper",
 454                        "InsertVZEROUPPER",
 455                        "true", "Should insert vzeroupper instructions">;
 456
 457 // TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
 458 // than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
 459 // vector FSQRT has higher throughput than the corresponding NR code.
 460 // The idea is that throughput bound code is likely to be vectorized, so for
 461 // vectorized code we should care about the throughput of SQRT operations.
 462 // But if the code is scalar that probably means that the code has some kind of
 463 // dependency and we should care more about reducing the latency.
 464 def TuningFastScalarFSQRT
 465     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
 466                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
 467 def TuningFastVectorFSQRT
 468     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
 469                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
 470
 471 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
 472 // be used to replace test/set sequences.
 473 def TuningFastLZCNT
 474     : SubtargetFeature<
 475           "fast-lzcnt", "HasFastLZCNT", "true",
 476           "LZCNT instructions are as fast as most simple integer ops">;
 477
 478 // If the target can efficiently decode NOPs upto 7-bytes in length.
 479 def TuningFast7ByteNOP
 480     : SubtargetFeature<
 481           "fast-7bytenop", "HasFast7ByteNOP", "true",
 482           "Target can quickly decode up to 7 byte NOPs">;
 483
 484 // If the target can efficiently decode NOPs upto 11-bytes in length.
 485 def TuningFast11ByteNOP
 486     : SubtargetFeature<
 487           "fast-11bytenop", "HasFast11ByteNOP", "true",
 488           "Target can quickly decode up to 11 byte NOPs">;
 489
 490 // If the target can efficiently decode NOPs upto 15-bytes in length.
 491 def TuningFast15ByteNOP
 492     : SubtargetFeature<
 493           "fast-15bytenop", "HasFast15ByteNOP", "true",
 494           "Target can quickly decode up to 15 byte NOPs">;
 495
 496 // Sandy Bridge and newer processors can use SHLD with the same source on both
 497 // inputs to implement rotate to avoid the partial flag update of the normal
 498 // rotate instructions.
 499 def TuningFastSHLDRotate
 500     : SubtargetFeature<
 501           "fast-shld-rotate", "HasFastSHLDRotate", "true",
 502           "SHLD can be used as a faster rotate">;
 503
 504 // Bulldozer and newer processors can merge CMP/TEST (but not other
 505 // instructions) with conditional branches.
 506 def TuningBranchFusion
 507     : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
 508                  "CMP/TEST can be fused with conditional branches">;
 509
 510 // Sandy Bridge and newer processors have many instructions that can be
 511 // fused with conditional branches and pass through the CPU as a single
 512 // operation.
 513 def TuningMacroFusion
 514     : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
 515                  "Various instructions can be fused with conditional branches">;
 516
 517 // Gather is available since Haswell (AVX2 set). So technically, we can
 518 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
 519 // Skylake Client processor has faster Gathers than HSW and performance is
 520 // similar to Skylake Server (AVX-512).
 521 def TuningFastGather
 522     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
 523                        "Indicates if gather is reasonably fast">;
 524
 525 def TuningPrefer128Bit
 526     : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
 527                        "Prefer 128-bit AVX instructions">;
 528
 529 def TuningPrefer256Bit
 530     : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
 531                        "Prefer 256-bit AVX instructions">;
 532
 533 def TuningPreferMaskRegisters
 534     : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
 535                        "Prefer AVX512 mask registers over PTEST/MOVMSK">;
 536
 537 def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
 538           "Indicates that the BEXTR instruction is implemented as a single uop "
 539           "with good throughput">;
 540
 541 // Combine vector math operations with shuffles into horizontal math
 542 // instructions if a CPU implements horizontal operations (introduced with
 543 // SSE3) with better latency/throughput than the alternative sequence.
 544 def TuningFastHorizontalOps
 545     : SubtargetFeature<
 546         "fast-hops", "HasFastHorizontalOps", "true",
 547         "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
 548         "normal vector instructions with shuffles">;
 549
 550 def TuningFastScalarShiftMasks
 551     : SubtargetFeature<
 552         "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
 553         "Prefer a left/right scalar logical shift pair over a shift+and pair">;
 554
 555 def TuningFastVectorShiftMasks
 556     : SubtargetFeature<
 557         "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
 558         "Prefer a left/right vector logical shift pair over a shift+and pair">;
 559
 560 def TuningFastMOVBE
 561     : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
 562     "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
 563
 564 def TuningUseGLMDivSqrtCosts
 565     : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
 566         "Use Goldmont specific floating point div/sqrt costs">;
 567
 568 // Enable use of alias analysis during code generation.
 569 def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
 570                                     "Use alias analysis during codegen">;
 571
 572 //===----------------------------------------------------------------------===//
 573 // X86 CPU Families
 574 // TODO: Remove these - use general tuning features to determine codegen.
 575 //===----------------------------------------------------------------------===//
 576
 577 // Bonnell
 578 def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
 579 // Silvermont
 580 def ProcIntelSLM  : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">;
 581
 582 //===----------------------------------------------------------------------===//
 583 // Register File Description
 584 //===----------------------------------------------------------------------===//
 585
 586 include "X86RegisterInfo.td"
 587 include "X86RegisterBanks.td"
 588
 589 //===----------------------------------------------------------------------===//
 590 // Instruction Descriptions
 591 //===----------------------------------------------------------------------===//
 592
 593 include "X86Schedule.td"
 594 include "X86InstrInfo.td"
 595 include "X86SchedPredicates.td"
 596
 597 def X86InstrInfo : InstrInfo;
 598
 599 //===----------------------------------------------------------------------===//
 600 // X86 Scheduler Models
 601 //===----------------------------------------------------------------------===//
 602
 603 include "X86ScheduleAtom.td"
 604 include "X86SchedSandyBridge.td"
 605 include "X86SchedHaswell.td"
 606 include "X86SchedBroadwell.td"
 607 include "X86ScheduleSLM.td"
 608 include "X86ScheduleZnver1.td"
 609 include "X86ScheduleZnver2.td"
 610 include "X86ScheduleZnver3.td"
 611 include "X86ScheduleBdVer2.td"
 612 include "X86ScheduleBtVer2.td"
 613 include "X86SchedSkylakeClient.td"
 614 include "X86SchedSkylakeServer.td"
 615
 616 //===----------------------------------------------------------------------===//
 617 // X86 Processor Feature Lists
 618 //===----------------------------------------------------------------------===//
 619
 620 def ProcessorFeatures {
 621   // x86-64 and x86-64-v[234]
 622   list<SubtargetFeature> X86_64V1Features = [
 623     FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
 624     FeatureFXSR, FeatureNOPL, Feature64Bit
 625   ];
 626   list<SubtargetFeature> X86_64V2Features = !listconcat(
 627       X86_64V1Features,
 628       [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]);
 629   list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
 630     FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
 631     FeatureMOVBE, FeatureXSAVE
 632   ]);
 633   list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
 634     FeatureBWI,
 635     FeatureCDI,
 636     FeatureDQI,
 637     FeatureVLX,
 638   ]);
 639
 640   // Nehalem
 641   list<SubtargetFeature> NHMFeatures = X86_64V2Features;
 642   list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
 643                                       TuningInsertVZEROUPPER];
 644
 645   // Westmere
 646   list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
 647   list<SubtargetFeature> WSMTuning = NHMTuning;
 648   list<SubtargetFeature> WSMFeatures =
 649     !listconcat(NHMFeatures, WSMAdditionalFeatures);
 650
 651   // Sandybridge
 652   list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
 653                                                   FeatureXSAVE,
 654                                                   FeatureXSAVEOPT];
 655   list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
 656                                       TuningSlow3OpsLEA,
 657                                       TuningSlowDivide64,
 658                                       TuningSlowUAMem32,
 659                                       TuningFastScalarFSQRT,
 660                                       TuningFastSHLDRotate,
 661                                       TuningFast15ByteNOP,
 662                                       TuningPOPCNTFalseDeps,
 663                                       TuningInsertVZEROUPPER];
 664   list<SubtargetFeature> SNBFeatures =
 665     !listconcat(WSMFeatures, SNBAdditionalFeatures);
 666
 667   // Ivybridge
 668   list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
 669                                                   FeatureF16C,
 670                                                   FeatureFSGSBase];
 671   list<SubtargetFeature> IVBTuning = SNBTuning;
 672   list<SubtargetFeature> IVBFeatures =
 673     !listconcat(SNBFeatures, IVBAdditionalFeatures);
 674
 675   // Haswell
 676   list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
 677                                                   FeatureBMI,
 678                                                   FeatureBMI2,
 679                                                   FeatureERMSB,
 680                                                   FeatureFMA,
 681                                                   FeatureINVPCID,
 682                                                   FeatureLZCNT,
 683                                                   FeatureMOVBE];
 684   list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
 685                                       TuningSlow3OpsLEA,
 686                                       TuningSlowDivide64,
 687                                       TuningFastScalarFSQRT,
 688                                       TuningFastSHLDRotate,
 689                                       TuningFast15ByteNOP,
 690                                       TuningFastVariableCrossLaneShuffle,
 691                                       TuningFastVariablePerLaneShuffle,
 692                                       TuningPOPCNTFalseDeps,
 693                                       TuningLZCNTFalseDeps,
 694                                       TuningInsertVZEROUPPER];
 695   list<SubtargetFeature> HSWFeatures =
 696     !listconcat(IVBFeatures, HSWAdditionalFeatures);
 697
 698   // Broadwell
 699   list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
 700                                                   FeatureRDSEED,
 701                                                   FeaturePRFCHW];
 702   list<SubtargetFeature> BDWTuning = HSWTuning;
 703   list<SubtargetFeature> BDWFeatures =
 704     !listconcat(HSWFeatures, BDWAdditionalFeatures);
 705
 706   // Skylake
 707   list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
 708                                                   FeatureXSAVEC,
 709                                                   FeatureXSAVES,
 710                                                   FeatureCLFLUSHOPT];
 711   list<SubtargetFeature> SKLTuning = [TuningFastGather,
 712                                       TuningMacroFusion,
 713                                       TuningSlow3OpsLEA,
 714                                       TuningSlowDivide64,
 715                                       TuningFastScalarFSQRT,
 716                                       TuningFastVectorFSQRT,
 717                                       TuningFastSHLDRotate,
 718                                       TuningFast15ByteNOP,
 719                                       TuningFastVariableCrossLaneShuffle,
 720                                       TuningFastVariablePerLaneShuffle,
 721                                       TuningPOPCNTFalseDeps,
 722                                       TuningInsertVZEROUPPER];
 723   list<SubtargetFeature> SKLFeatures =
 724     !listconcat(BDWFeatures, SKLAdditionalFeatures);
 725
 726   // Skylake-AVX512
 727   list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
 728                                                   FeatureXSAVEC,
 729                                                   FeatureXSAVES,
 730                                                   FeatureCLFLUSHOPT,
 731                                                   FeatureAVX512,
 732                                                   FeatureCDI,
 733                                                   FeatureDQI,
 734                                                   FeatureBWI,
 735                                                   FeatureVLX,
 736                                                   FeaturePKU,
 737                                                   FeatureCLWB];
 738   list<SubtargetFeature> SKXTuning = [TuningFastGather,
 739                                       TuningMacroFusion,
 740                                       TuningSlow3OpsLEA,
 741                                       TuningSlowDivide64,
 742                                       TuningFastScalarFSQRT,
 743                                       TuningFastVectorFSQRT,
 744                                       TuningFastSHLDRotate,
 745                                       TuningFast15ByteNOP,
 746                                       TuningFastVariableCrossLaneShuffle,
 747                                       TuningFastVariablePerLaneShuffle,
 748                                       TuningPrefer256Bit,
 749                                       TuningPOPCNTFalseDeps,
 750                                       TuningInsertVZEROUPPER];
 751   list<SubtargetFeature> SKXFeatures =
 752     !listconcat(BDWFeatures, SKXAdditionalFeatures);
 753
 754   // Cascadelake
 755   list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
 756   list<SubtargetFeature> CLXTuning = SKXTuning;
 757   list<SubtargetFeature> CLXFeatures =
 758     !listconcat(SKXFeatures, CLXAdditionalFeatures);
 759
 760   // Cooperlake
 761   list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
 762   list<SubtargetFeature> CPXTuning = SKXTuning;
 763   list<SubtargetFeature> CPXFeatures =
 764     !listconcat(CLXFeatures, CPXAdditionalFeatures);
 765
 766   // Cannonlake
 767   list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
 768                                                   FeatureCDI,
 769                                                   FeatureDQI,
 770                                                   FeatureBWI,
 771                                                   FeatureVLX,
 772                                                   FeaturePKU,
 773                                                   FeatureVBMI,
 774                                                   FeatureIFMA,
 775                                                   FeatureSHA];
 776   list<SubtargetFeature> CNLTuning = [TuningFastGather,
 777                                       TuningMacroFusion,
 778                                       TuningSlow3OpsLEA,
 779                                       TuningSlowDivide64,
 780                                       TuningFastScalarFSQRT,
 781                                       TuningFastVectorFSQRT,
 782                                       TuningFastSHLDRotate,
 783                                       TuningFast15ByteNOP,
 784                                       TuningFastVariableCrossLaneShuffle,
 785                                       TuningFastVariablePerLaneShuffle,
 786                                       TuningPrefer256Bit,
 787                                       TuningInsertVZEROUPPER];
 788   list<SubtargetFeature> CNLFeatures =
 789     !listconcat(SKLFeatures, CNLAdditionalFeatures);
 790
 791   // Icelake
 792   list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
 793                                                   FeatureVAES,
 794                                                   FeatureVBMI2,
 795                                                   FeatureVNNI,
 796                                                   FeatureVPCLMULQDQ,
 797                                                   FeatureVPOPCNTDQ,
 798                                                   FeatureGFNI,
 799                                                   FeatureRDPID,
 800                                                   FeatureFSRM];
 801   list<SubtargetFeature> ICLTuning = CNLTuning;
 802   list<SubtargetFeature> ICLFeatures =
 803     !listconcat(CNLFeatures, ICLAdditionalFeatures);
 804
 805   // Icelake Server
 806   list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
 807                                                   FeatureCLWB,
 808                                                   FeatureWBNOINVD];
 809   list<SubtargetFeature> ICXTuning = CNLTuning;
 810   list<SubtargetFeature> ICXFeatures =
 811     !listconcat(ICLFeatures, ICXAdditionalFeatures);
 812
 813   // Tigerlake
 814   list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
 815                                                   FeatureCLWB,
 816                                                   FeatureMOVDIRI,
 817                                                   FeatureMOVDIR64B,
 818                                                   FeatureSHSTK];
 819   list<SubtargetFeature> TGLTuning = CNLTuning;
 820   list<SubtargetFeature> TGLFeatures =
 821     !listconcat(ICLFeatures, TGLAdditionalFeatures );
 822
 823   // Sapphirerapids
 824   list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
 825                                                   FeatureAMXINT8,
 826                                                   FeatureAMXBF16,
 827                                                   FeatureBF16,
 828                                                   FeatureSERIALIZE,
 829                                                   FeatureCLDEMOTE,
 830                                                   FeatureWAITPKG,
 831                                                   FeaturePTWRITE,
 832                                                   FeatureFP16,
 833                                                   FeatureAVXVNNI,
 834                                                   FeatureTSXLDTRK,
 835                                                   FeatureENQCMD,
 836                                                   FeatureSHSTK,
 837                                                   FeatureVP2INTERSECT,
 838                                                   FeatureMOVDIRI,
 839                                                   FeatureMOVDIR64B,
 840                                                   FeatureUINTR];
 841   list<SubtargetFeature> SPRTuning = ICXTuning;
 842   list<SubtargetFeature> SPRFeatures =
 843     !listconcat(ICXFeatures, SPRAdditionalFeatures);
 844
 845   // Atom
 846   list<SubtargetFeature> AtomFeatures = [FeatureX87,
 847                                          FeatureCMPXCHG8B,
 848                                          FeatureCMOV,
 849                                          FeatureMMX,
 850                                          FeatureSSSE3,
 851                                          FeatureFXSR,
 852                                          FeatureNOPL,
 853                                          Feature64Bit,
 854                                          FeatureCMPXCHG16B,
 855                                          FeatureMOVBE,
 856                                          FeatureLAHFSAHF];
 857   list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
 858                                        TuningSlowUAMem16,
 859                                        TuningLEAForSP,
 860                                        TuningSlowDivide32,
 861                                        TuningSlowDivide64,
 862                                        TuningSlowTwoMemOps,
 863                                        TuningLEAUsesAG,
 864                                        TuningPadShortFunctions,
 865                                        TuningInsertVZEROUPPER];
 866
 867   // Silvermont
 868   list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
 869                                                   FeaturePOPCNT,
 870                                                   FeaturePCLMUL,
 871                                                   FeaturePRFCHW,
 872                                                   FeatureRDRAND];
 873   list<SubtargetFeature> SLMTuning = [ProcIntelSLM,
 874                                       TuningSlowTwoMemOps,
 875                                       TuningSlowLEA,
 876                                       TuningSlowIncDec,
 877                                       TuningSlowDivide64,
 878                                       TuningSlowPMULLD,
 879                                       TuningFast7ByteNOP,
 880                                       TuningFastMOVBE,
 881                                       TuningPOPCNTFalseDeps,
 882                                       TuningInsertVZEROUPPER];
 883   list<SubtargetFeature> SLMFeatures =
 884     !listconcat(AtomFeatures, SLMAdditionalFeatures);
 885
 886   // Goldmont
 887   list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
 888                                                   FeatureSHA,
 889                                                   FeatureRDSEED,
 890                                                   FeatureXSAVE,
 891                                                   FeatureXSAVEOPT,
 892                                                   FeatureXSAVEC,
 893                                                   FeatureXSAVES,
 894                                                   FeatureCLFLUSHOPT,
 895                                                   FeatureFSGSBase];
 896   list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
 897                                       TuningSlowTwoMemOps,
 898                                       TuningSlowLEA,
 899                                       TuningSlowIncDec,
 900                                       TuningFastMOVBE,
 901                                       TuningPOPCNTFalseDeps,
 902                                       TuningInsertVZEROUPPER];
 903   list<SubtargetFeature> GLMFeatures =
 904     !listconcat(SLMFeatures, GLMAdditionalFeatures);
 905
 906   // Goldmont Plus
 907   list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
 908                                                   FeatureRDPID];
 909   list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
 910                                       TuningSlowTwoMemOps,
 911                                       TuningSlowLEA,
 912                                       TuningSlowIncDec,
 913                                       TuningFastMOVBE,
 914                                       TuningInsertVZEROUPPER];
 915   list<SubtargetFeature> GLPFeatures =
 916     !listconcat(GLMFeatures, GLPAdditionalFeatures);
 917
 918   // Tremont
 919   list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
 920                                                   FeatureGFNI];
 921   list<SubtargetFeature> TRMTuning = GLPTuning;
 922   list<SubtargetFeature> TRMFeatures =
 923     !listconcat(GLPFeatures, TRMAdditionalFeatures);
 924
 925   // Alderlake
 926   list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
 927                                                   FeaturePCONFIG,
 928                                                   FeatureSHSTK,
 929                                                   FeatureWIDEKL,
 930                                                   FeatureINVPCID,
 931                                                   FeatureADX,
 932                                                   FeatureFMA,
 933                                                   FeatureVAES,
 934                                                   FeatureVPCLMULQDQ,
 935                                                   FeatureF16C,
 936                                                   FeatureBMI,
 937                                                   FeatureBMI2,
 938                                                   FeatureLZCNT,
 939                                                   FeatureAVXVNNI,
 940                                                   FeaturePKU,
 941                                                   FeatureHRESET,
 942                                                   FeatureCLDEMOTE,
 943                                                   FeatureMOVDIRI,
 944                                                   FeatureMOVDIR64B,
 945                                                   FeatureWAITPKG];
 946   list<SubtargetFeature> ADLTuning = SKLTuning;
 947   list<SubtargetFeature> ADLFeatures =
 948     !listconcat(TRMFeatures, ADLAdditionalFeatures);
 949
 950   // Knights Landing
 951   list<SubtargetFeature> KNLFeatures = [FeatureX87,
 952                                         FeatureCMPXCHG8B,
 953                                         FeatureCMOV,
 954                                         FeatureMMX,
 955                                         FeatureFXSR,
 956                                         FeatureNOPL,
 957                                         Feature64Bit,
 958                                         FeatureCMPXCHG16B,
 959                                         FeaturePOPCNT,
 960                                         FeaturePCLMUL,
 961                                         FeatureXSAVE,
 962                                         FeatureXSAVEOPT,
 963                                         FeatureLAHFSAHF,
 964                                         FeatureAES,
 965                                         FeatureRDRAND,
 966                                         FeatureF16C,
 967                                         FeatureFSGSBase,
 968                                         FeatureAVX512,
 969                                         FeatureERI,
 970                                         FeatureCDI,
 971                                         FeaturePFI,
 972                                         FeaturePREFETCHWT1,
 973                                         FeatureADX,
 974                                         FeatureRDSEED,
 975                                         FeatureMOVBE,
 976                                         FeatureLZCNT,
 977                                         FeatureBMI,
 978                                         FeatureBMI2,
 979                                         FeatureFMA,
 980                                         FeaturePRFCHW];
 981   list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
 982                                       TuningSlow3OpsLEA,
 983                                       TuningSlowIncDec,
 984                                       TuningSlowTwoMemOps,
 985                                       TuningPreferMaskRegisters,
 986                                       TuningFastGather,
 987                                       TuningFastMOVBE,
 988                                       TuningSlowPMADDWD];
 989   // TODO Add AVX5124FMAPS/AVX5124VNNIW features
 990   list<SubtargetFeature> KNMFeatures =
 991     !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
 992
 993   // Barcelona
 994   list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
 995                                               FeatureCMPXCHG8B,
 996                                               FeatureSSE4A,
 997                                               Feature3DNowA,
 998                                               FeatureFXSR,
 999                                               FeatureNOPL,
1000                                               FeatureCMPXCHG16B,
1001                                               FeaturePRFCHW,
1002                                               FeatureLZCNT,
1003                                               FeaturePOPCNT,
1004                                               FeatureLAHFSAHF,
1005                                               FeatureCMOV,
1006                                               Feature64Bit];
1007   list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1008                                             TuningSlowSHLD,
1009                                             TuningInsertVZEROUPPER];
1010
1011   // Bobcat
1012   list<SubtargetFeature> BtVer1Features = [FeatureX87,
1013                                            FeatureCMPXCHG8B,
1014                                            FeatureCMOV,
1015                                            FeatureMMX,
1016                                            FeatureSSSE3,
1017                                            FeatureSSE4A,
1018                                            FeatureFXSR,
1019                                            FeatureNOPL,
1020                                            Feature64Bit,
1021                                            FeatureCMPXCHG16B,
1022                                            FeaturePRFCHW,
1023                                            FeatureLZCNT,
1024                                            FeaturePOPCNT,
1025                                            FeatureLAHFSAHF];
1026   list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1027                                          TuningFastScalarShiftMasks,
1028                                          TuningFastVectorShiftMasks,
1029                                          TuningSlowSHLD,
1030                                          TuningInsertVZEROUPPER];
1031
1032   // Jaguar
1033   list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1034                                                      FeatureAES,
1035                                                      FeaturePCLMUL,
1036                                                      FeatureBMI,
1037                                                      FeatureF16C,
1038                                                      FeatureMOVBE,
1039                                                      FeatureXSAVE,
1040                                                      FeatureXSAVEOPT];
1041   list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1042                                          TuningFastBEXTR,
1043                                          TuningFastHorizontalOps,
1044                                          TuningFast15ByteNOP,
1045                                          TuningFastScalarShiftMasks,
1046                                          TuningFastVectorShiftMasks,
1047                                          TuningFastMOVBE,
1048                                          TuningSlowSHLD];
1049   list<SubtargetFeature> BtVer2Features =
1050     !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1051
1052   // Bulldozer
1053   list<SubtargetFeature> BdVer1Features = [FeatureX87,
1054                                            FeatureCMPXCHG8B,
1055                                            FeatureCMOV,
1056                                            FeatureXOP,
1057                                            Feature64Bit,
1058                                            FeatureCMPXCHG16B,
1059                                            FeatureAES,
1060                                            FeaturePRFCHW,
1061                                            FeaturePCLMUL,
1062                                            FeatureMMX,
1063                                            FeatureFXSR,
1064                                            FeatureNOPL,
1065                                            FeatureLZCNT,
1066                                            FeaturePOPCNT,
1067                                            FeatureXSAVE,
1068                                            FeatureLWP,
1069                                            FeatureLAHFSAHF];
1070   list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1071                                          TuningFast11ByteNOP,
1072                                          TuningFastScalarShiftMasks,
1073                                          TuningBranchFusion,
1074                                          TuningInsertVZEROUPPER];
1075
1076   // PileDriver
1077   list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1078                                                      FeatureBMI,
1079                                                      FeatureTBM,
1080                                                      FeatureFMA];
1081   list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1082                                                    TuningFastMOVBE];
1083   list<SubtargetFeature> BdVer2Tuning =
1084     !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1085   list<SubtargetFeature> BdVer2Features =
1086     !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1087
1088   // Steamroller
1089   list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1090                                                      FeatureFSGSBase];
1091   list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1092   list<SubtargetFeature> BdVer3Features =
1093     !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1094
1095   // Excavator
1096   list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1097                                                      FeatureBMI2,
1098                                                      FeatureMOVBE,
1099                                                      FeatureRDRAND,
1100                                                      FeatureMWAITX];
1101   list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1102   list<SubtargetFeature> BdVer4Features =
1103     !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1104
1105
1106   // AMD Zen Processors common ISAs
1107   list<SubtargetFeature> ZNFeatures = [FeatureADX,
1108                                        FeatureAES,
1109                                        FeatureAVX2,
1110                                        FeatureBMI,
1111                                        FeatureBMI2,
1112                                        FeatureCLFLUSHOPT,
1113                                        FeatureCLZERO,
1114                                        FeatureCMOV,
1115                                        Feature64Bit,
1116                                        FeatureCMPXCHG16B,
1117                                        FeatureF16C,
1118                                        FeatureFMA,
1119                                        FeatureFSGSBase,
1120                                        FeatureFXSR,
1121                                        FeatureNOPL,
1122                                        FeatureLAHFSAHF,
1123                                        FeatureLZCNT,
1124                                        FeatureMMX,
1125                                        FeatureMOVBE,
1126                                        FeatureMWAITX,
1127                                        FeaturePCLMUL,
1128                                        FeaturePOPCNT,
1129                                        FeaturePRFCHW,
1130                                        FeatureRDRAND,
1131                                        FeatureRDSEED,
1132                                        FeatureSHA,
1133                                        FeatureSSE4A,
1134                                        FeatureX87,
1135                                        FeatureXSAVE,
1136                                        FeatureXSAVEC,
1137                                        FeatureXSAVEOPT,
1138                                        FeatureXSAVES];
1139   list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1140                                      TuningFastBEXTR,
1141                                      TuningFast15ByteNOP,
1142                                      TuningBranchFusion,
1143                                      TuningFastScalarShiftMasks,
1144                                      TuningFastMOVBE,
1145                                      TuningSlowSHLD,
1146                                      TuningInsertVZEROUPPER];
1147   list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1148                                                   FeatureRDPID,
1149                                                   FeatureWBNOINVD];
1150   list<SubtargetFeature> ZN2Tuning = ZNTuning;
1151   list<SubtargetFeature> ZN2Features =
1152     !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1153   list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1154                                                   FeatureINVPCID,
1155                                                   FeaturePKU,
1156                                                   FeatureVAES,
1157                                                   FeatureVPCLMULQDQ];
1158   list<SubtargetFeature> ZN3AdditionalTuning =
1159     [TuningMacroFusion,
1160      TuningFastVariablePerLaneShuffle];
1161   list<SubtargetFeature> ZN3Tuning =
1162     !listconcat(ZNTuning, ZN3AdditionalTuning);
1163   list<SubtargetFeature> ZN3Features =
1164     !listconcat(ZN2Features, ZN3AdditionalFeatures);
1165 }
1166
1167 //===----------------------------------------------------------------------===//
1168 // X86 processors supported.
1169 //===----------------------------------------------------------------------===//
1170
1171 class Proc<string Name, list<SubtargetFeature> Features,
1172            list<SubtargetFeature> TuneFeatures>
1173  : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1174
1175 class ProcModel<string Name, SchedMachineModel Model,
1176                 list<SubtargetFeature> Features,
1177                 list<SubtargetFeature> TuneFeatures>
1178  : ProcessorModel<Name, Model, Features, TuneFeatures>;
1179
1180 // NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1181 // if i386/i486 is specifically requested.
1182 // NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1183 // constructor checks that any CPU used in 64-bit mode has Feature64Bit enabled.
1184 // It has no effect on code generation.
1185 def : ProcModel<"generic", SandyBridgeModel,
1186                 [FeatureX87, FeatureCMPXCHG8B, Feature64Bit],
1187                 [TuningSlow3OpsLEA,
1188                  TuningSlowDivide64,
1189                  TuningSlowIncDec,
1190                  TuningMacroFusion,
1191                  TuningInsertVZEROUPPER]>;
1192
1193 def : Proc<"i386",            [FeatureX87],
1194                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1195 def : Proc<"i486",            [FeatureX87],
1196                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1197 def : Proc<"i586",            [FeatureX87, FeatureCMPXCHG8B],
1198                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1199 def : Proc<"pentium",         [FeatureX87, FeatureCMPXCHG8B],
1200                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1201 def : Proc<"pentium-mmx",     [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
1202                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1203
1204 def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV],
1205                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1206 def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
1207                           FeatureNOPL],
1208                          [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1209
1210 def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV,
1211                         FeatureFXSR, FeatureNOPL],
1212                        [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1213
1214 foreach P = ["pentium3", "pentium3m"] in {
1215   def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
1216                  FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1217                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1218 }
1219
1220 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1221 // The intent is to enable it for pentium4 which is the current default
1222 // processor in a vanilla 32-bit clang compilation when no specific
1223 // architecture is specified.  This generally gives a nice performance
1224 // increase on silvermont, with largely neutral behavior on other
1225 // contemporary large core processors.
1226 // pentium-m, pentium4m, prescott and nocona are included as a preventative
1227 // measure to avoid performance surprises, in case clang's default cpu
1228 // changes slightly.
1229
1230 def : ProcModel<"pentium-m", GenericPostRAModel,
1231                 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
1232                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1233                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1234
1235 foreach P = ["pentium4", "pentium4m"] in {
1236   def : ProcModel<P, GenericPostRAModel,
1237                   [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
1238                    FeatureFXSR, FeatureNOPL, FeatureCMOV],
1239                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1240 }
1241
1242 // Intel Quark.
1243 def : Proc<"lakemont", [FeatureCMPXCHG8B],
1244                        [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1245
1246 // Intel Core Duo.
1247 def : ProcModel<"yonah", SandyBridgeModel,
1248                 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
1249                  FeatureFXSR, FeatureNOPL, FeatureCMOV],
1250                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1251
1252 // NetBurst.
1253 def : ProcModel<"prescott", GenericPostRAModel,
1254                 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
1255                  FeatureFXSR, FeatureNOPL, FeatureCMOV],
1256                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1257 def : ProcModel<"nocona", GenericPostRAModel, [
1258   FeatureX87,
1259   FeatureCMPXCHG8B,
1260   FeatureCMOV,
1261   FeatureMMX,
1262   FeatureSSE3,
1263   FeatureFXSR,
1264   FeatureNOPL,
1265   Feature64Bit,
1266   FeatureCMPXCHG16B,
1267 ],
1268 [
1269   TuningSlowUAMem16,
1270   TuningInsertVZEROUPPER
1271 ]>;
1272
1273 // Intel Core 2 Solo/Duo.
1274 def : ProcModel<"core2", SandyBridgeModel, [
1275   FeatureX87,
1276   FeatureCMPXCHG8B,
1277   FeatureCMOV,
1278   FeatureMMX,
1279   FeatureSSSE3,
1280   FeatureFXSR,
1281   FeatureNOPL,
1282   Feature64Bit,
1283   FeatureCMPXCHG16B,
1284   FeatureLAHFSAHF
1285 ],
1286 [
1287   TuningMacroFusion,
1288   TuningSlowUAMem16,
1289   TuningInsertVZEROUPPER
1290 ]>;
1291 def : ProcModel<"penryn", SandyBridgeModel, [
1292   FeatureX87,
1293   FeatureCMPXCHG8B,
1294   FeatureCMOV,
1295   FeatureMMX,
1296   FeatureSSE41,
1297   FeatureFXSR,
1298   FeatureNOPL,
1299   Feature64Bit,
1300   FeatureCMPXCHG16B,
1301   FeatureLAHFSAHF
1302 ],
1303 [
1304   TuningMacroFusion,
1305   TuningSlowUAMem16,
1306   TuningInsertVZEROUPPER
1307 ]>;
1308
1309 // Atom CPUs.
1310 foreach P = ["bonnell", "atom"] in {
1311   def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1312                   ProcessorFeatures.AtomTuning>;
1313 }
1314
1315 foreach P = ["silvermont", "slm"] in {
1316   def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1317                   ProcessorFeatures.SLMTuning>;
1318 }
1319
1320 def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1321                 ProcessorFeatures.GLMTuning>;
1322 def : ProcModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures,
1323                 ProcessorFeatures.GLPTuning>;
1324 def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1325                 ProcessorFeatures.TRMTuning>;
1326
1327 // "Arrandale" along with corei3 and corei5
1328 foreach P = ["nehalem", "corei7"] in {
1329   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1330                   ProcessorFeatures.NHMTuning>;
1331 }
1332
1333 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1334 def : ProcModel<"westmere", SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1335                 ProcessorFeatures.WSMTuning>;
1336
1337 foreach P = ["sandybridge", "corei7-avx"] in {
1338   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1339                   ProcessorFeatures.SNBTuning>;
1340 }
1341
1342 foreach P = ["ivybridge", "core-avx-i"] in {
1343   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1344                   ProcessorFeatures.IVBTuning>;
1345 }
1346
1347 foreach P = ["haswell", "core-avx2"] in {
1348   def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1349                   ProcessorFeatures.HSWTuning>;
1350 }
1351
1352 def : ProcModel<"broadwell", BroadwellModel, ProcessorFeatures.BDWFeatures,
1353                 ProcessorFeatures.BDWTuning>;
1354
1355 def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1356                 ProcessorFeatures.SKLTuning>;
1357
1358 // FIXME: define KNL scheduler model
1359 def : ProcModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures,
1360                 ProcessorFeatures.KNLTuning>;
1361 def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1362                 ProcessorFeatures.KNLTuning>;
1363
1364 foreach P = ["skylake-avx512", "skx"] in {
1365   def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1366                   ProcessorFeatures.SKXTuning>;
1367 }
1368
1369 def : ProcModel<"cascadelake", SkylakeServerModel,
1370                 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1371 def : ProcModel<"cooperlake", SkylakeServerModel,
1372                 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1373 def : ProcModel<"cannonlake", SkylakeServerModel,
1374                 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1375 def : ProcModel<"icelake-client", SkylakeServerModel,
1376                 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1377 def : ProcModel<"rocketlake", SkylakeServerModel,
1378                 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1379 def : ProcModel<"icelake-server", SkylakeServerModel,
1380                 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1381 def : ProcModel<"tigerlake", SkylakeServerModel,
1382                 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1383 def : ProcModel<"sapphirerapids", SkylakeServerModel,
1384                 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1385 def : ProcModel<"alderlake", SkylakeClientModel,
1386                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1387
1388 // AMD CPUs.
1389
1390 def : Proc<"k6",   [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
1391                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1392 def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
1393                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1394 def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
1395                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1396
1397 foreach P = ["athlon", "athlon-tbird"] in {
1398   def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA,
1399                  FeatureNOPL],
1400                 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1401 }
1402
1403 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1404   def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
1405                  FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
1406                 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1407 }
1408
1409 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1410   def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
1411                  FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
1412                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1413                  TuningInsertVZEROUPPER]>;
1414 }
1415
1416 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1417   def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA,
1418                  FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
1419                  Feature64Bit],
1420                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1421                  TuningInsertVZEROUPPER]>;
1422 }
1423
1424 foreach P = ["amdfam10", "barcelona"] in {
1425   def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1426              ProcessorFeatures.BarcelonaTuning>;
1427 }
1428
1429 // Bobcat
1430 def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1431            ProcessorFeatures.BtVer1Tuning>;
1432 // Jaguar
1433 def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1434                 ProcessorFeatures.BtVer2Tuning>;
1435
1436 // Bulldozer
1437 def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1438                 ProcessorFeatures.BdVer1Tuning>;
1439 // Piledriver
1440 def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1441                 ProcessorFeatures.BdVer2Tuning>;
1442 // Steamroller
1443 def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
1444            ProcessorFeatures.BdVer3Tuning>;
1445 // Excavator
1446 def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
1447            ProcessorFeatures.BdVer4Tuning>;
1448
1449 def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1450                 ProcessorFeatures.ZNTuning>;
1451 def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1452                 ProcessorFeatures.ZN2Tuning>;
1453 def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1454                 ProcessorFeatures.ZN3Tuning>;
1455
1456 def : Proc<"geode",           [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA],
1457                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1458
1459 def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1460                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1461 def : Proc<"winchip2",        [FeatureX87, Feature3DNow],
1462                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1463 def : Proc<"c3",              [FeatureX87, Feature3DNow],
1464                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1465 def : Proc<"c3-2",            [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
1466                                FeatureSSE1, FeatureFXSR, FeatureCMOV],
1467                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1468
1469 // We also provide a generic 64-bit specific x86 processor model which tries to
1470 // be good for modern chips without enabling instruction set encodings past the
1471 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1472 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1473 //
1474 // We currently use the Sandy Bridge model as the default scheduling model as
1475 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1476 // covers a huge swath of x86 processors. If there are specific scheduling
1477 // knobs which need to be tuned differently for AMD chips, we might consider
1478 // forming a common base for them.
1479 def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1480 [
1481   TuningSlow3OpsLEA,
1482   TuningSlowDivide64,
1483   TuningSlowIncDec,
1484   TuningMacroFusion,
1485   TuningInsertVZEROUPPER
1486 ]>;
1487
1488 // x86-64 micro-architecture levels.
1489 def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1490                 ProcessorFeatures.SNBTuning>;
1491 // Close to Haswell.
1492 def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1493                 ProcessorFeatures.HSWTuning>;
1494 // Close to the AVX-512 level implemented by Xeon Scalable Processors.
1495 def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1496                 ProcessorFeatures.SKXTuning>;
1497
1498 //===----------------------------------------------------------------------===//
1499 // Calling Conventions
1500 //===----------------------------------------------------------------------===//
1501
1502 include "X86CallingConv.td"
1503
1504
1505 //===----------------------------------------------------------------------===//
1506 // Assembly Parser
1507 //===----------------------------------------------------------------------===//
1508
1509 def ATTAsmParserVariant : AsmParserVariant {
1510   int Variant = 0;
1511
1512   // Variant name.
1513   string Name = "att";
1514
1515   // Discard comments in assembly strings.
1516   string CommentDelimiter = "#";
1517
1518   // Recognize hard coded registers.
1519   string RegisterPrefix = "%";
1520 }
1521
1522 def IntelAsmParserVariant : AsmParserVariant {
1523   int Variant = 1;
1524
1525   // Variant name.
1526   string Name = "intel";
1527
1528   // Discard comments in assembly strings.
1529   string CommentDelimiter = ";";
1530
1531   // Recognize hard coded registers.
1532   string RegisterPrefix = "";
1533 }
1534
1535 //===----------------------------------------------------------------------===//
1536 // Assembly Printers
1537 //===----------------------------------------------------------------------===//
1538
1539 // The X86 target supports two different syntaxes for emitting machine code.
1540 // This is controlled by the -x86-asm-syntax={att|intel}
1541 def ATTAsmWriter : AsmWriter {
1542   string AsmWriterClassName  = "ATTInstPrinter";
1543   int Variant = 0;
1544 }
1545 def IntelAsmWriter : AsmWriter {
1546   string AsmWriterClassName  = "IntelInstPrinter";
1547   int Variant = 1;
1548 }
1549
1550 def X86 : Target {
1551   // Information about the instructions...
1552   let InstructionSet = X86InstrInfo;
1553   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1554   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1555   let AllowRegisterRenaming = 1;
1556 }
1557
1558 //===----------------------------------------------------------------------===//
1559 // Pfm Counters
1560 //===----------------------------------------------------------------------===//
1561
1562 include "X86PfmCounters.td"