llvm/lib/Target/X86/X86.td

   1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This is a target description file for the Intel i386 architecture, referred
  10 // to here as the "X86" architecture.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 // Get the target-independent interfaces which we are implementing...
  15 //
  16 include "llvm/Target/Target.td"
  17
  18 //===----------------------------------------------------------------------===//
  19 // X86 Subtarget state
  20 //
  21 // disregarding specific ABI / programming model
  22 def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
  23                                "64-bit mode (x86_64)">;
  24 def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
  25                                "32-bit mode (80386)">;
  26 def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
  27                                "16-bit mode (i8086)">;
  28
  29 //===----------------------------------------------------------------------===//
  30 // X86 Subtarget ISA features
  31 //===----------------------------------------------------------------------===//
  32
  33 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
  34                                       "Enable X87 float instructions">;
  35
  36 def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
  37                                       "Enable NOPL instruction (generally pentium pro+)">;
  38
  39 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMOV", "true",
  40                                       "Enable conditional move instructions">;
  41
  42 def FeatureCX8     : SubtargetFeature<"cx8", "HasCX8", "true",
  43                                       "Support CMPXCHG8B instructions">;
  44
  45 def FeatureCRC32   : SubtargetFeature<"crc32", "HasCRC32", "true",
  46                                       "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
  47
  48 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
  49                                        "Support POPCNT instruction">;
  50
  51 def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
  52                                       "Support fxsave/fxrestore instructions">;
  53
  54 def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
  55                                        "Support xsave instructions">;
  56
  57 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
  58                                        "Support xsaveopt instructions",
  59                                        [FeatureXSAVE]>;
  60
  61 def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
  62                                        "Support xsavec instructions",
  63                                        [FeatureXSAVE]>;
  64
  65 def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
  66                                        "Support xsaves instructions",
  67                                        [FeatureXSAVE]>;
  68
  69 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
  70                                       "Enable SSE instructions">;
  71 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
  72                                       "Enable SSE2 instructions",
  73                                       [FeatureSSE1]>;
  74 def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
  75                                       "Enable SSE3 instructions",
  76                                       [FeatureSSE2]>;
  77 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
  78                                       "Enable SSSE3 instructions",
  79                                       [FeatureSSE3]>;
  80 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
  81                                       "Enable SSE 4.1 instructions",
  82                                       [FeatureSSSE3]>;
  83 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
  84                                       "Enable SSE 4.2 instructions",
  85                                       [FeatureSSE41]>;
  86 // The MMX subtarget feature is separate from the rest of the SSE features
  87 // because it's important (for odd compatibility reasons) to be able to
  88 // turn it off explicitly while allowing SSE+ to be on.
  89 def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
  90                                       "Enable MMX instructions">;
  91 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
  92                                       "Enable 3DNow! instructions",
  93                                       [FeatureMMX]>;
  94 def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
  95                                       "Enable 3DNow! Athlon instructions",
  96                                       [Feature3DNow]>;
  97 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
  98 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
  99 // without disabling 64-bit mode. Nothing should imply this feature bit. It
 100 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
 101 def FeatureX86_64   : SubtargetFeature<"64bit", "HasX86_64", "true",
 102                                       "Support 64-bit instructions">;
 103 def FeatureCX16     : SubtargetFeature<"cx16", "HasCX16", "true",
 104                                        "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
 105                                        [FeatureCX8]>;
 106 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
 107                                       "Support SSE 4a instructions",
 108                                       [FeatureSSE3]>;
 109
 110 def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
 111                                       "Enable AVX instructions",
 112                                       [FeatureSSE42]>;
 113 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
 114                                       "Enable AVX2 instructions",
 115                                       [FeatureAVX]>;
 116 def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
 117                                       "Enable three-operand fused multiple-add",
 118                                       [FeatureAVX]>;
 119 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
 120                        "Support 16-bit floating point conversion instructions",
 121                        [FeatureAVX]>;
 122 def FeatureEVEX512  : SubtargetFeature<"evex512", "HasEVEX512", "true",
 123                         "Support ZMM and 64-bit mask instructions">;
 124 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
 125                                       "Enable AVX-512 instructions",
 126                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
 127 def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
 128                       "Enable AVX-512 Exponential and Reciprocal Instructions",
 129                                       [FeatureAVX512]>;
 130 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
 131                       "Enable AVX-512 Conflict Detection Instructions",
 132                                       [FeatureAVX512]>;
 133 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
 134                        "true", "Enable AVX-512 Population Count Instructions",
 135                                       [FeatureAVX512]>;
 136 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
 137                       "Enable AVX-512 PreFetch Instructions",
 138                                       [FeatureAVX512]>;
 139 def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
 140                                    "true",
 141                                    "Prefetch instruction with T0 or T1 Hint">;
 142 def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
 143                                    "true",
 144                                    "Prefetch with Intent to Write and T1 Hint">;
 145 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
 146                       "Enable AVX-512 Doubleword and Quadword Instructions",
 147                                       [FeatureAVX512]>;
 148 def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
 149                       "Enable AVX-512 Byte and Word Instructions",
 150                                       [FeatureAVX512]>;
 151 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
 152                       "Enable AVX-512 Vector Length eXtensions",
 153                                       [FeatureAVX512]>;
 154 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
 155                       "Enable AVX-512 Vector Byte Manipulation Instructions",
 156                                       [FeatureBWI]>;
 157 def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
 158                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
 159                                       [FeatureBWI]>;
 160 def FeatureAVXIFMA    : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
 161                            "Enable AVX-IFMA",
 162                            [FeatureAVX2]>;
 163 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
 164                       "Enable AVX-512 Integer Fused Multiple-Add",
 165                                       [FeatureAVX512]>;
 166 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
 167                       "Enable protection keys">;
 168 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
 169                           "Enable AVX-512 Vector Neural Network Instructions",
 170                                       [FeatureAVX512]>;
 171 def FeatureAVXVNNI    : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
 172                            "Support AVX_VNNI encoding",
 173                                       [FeatureAVX2]>;
 174 def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
 175                            "Support bfloat16 floating point",
 176                                       [FeatureBWI]>;
 177 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
 178                        "Enable AVX-512 Bit Algorithms",
 179                         [FeatureBWI]>;
 180 def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
 181                                             "HasVP2INTERSECT", "true",
 182                                             "Enable AVX-512 vp2intersect",
 183                                             [FeatureAVX512]>;
 184 // FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
 185 // guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
 186 // FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
 187 // supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
 188 // currently.
 189 def FeatureFP16    : SubtargetFeature<"avx512fp16", "HasFP16", "true",
 190                            "Support 16-bit floating point",
 191                            [FeatureBWI, FeatureVLX, FeatureDQI]>;
 192 def FeatureAVXVNNIINT8  : SubtargetFeature<"avxvnniint8",
 193                              "HasAVXVNNIINT8", "true",
 194                              "Enable AVX-VNNI-INT8",
 195                              [FeatureAVX2]>;
 196 def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
 197                              "HasAVXVNNIINT16", "true",
 198                              "Enable AVX-VNNI-INT16",
 199                              [FeatureAVX2]>;
 200 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
 201                          "Enable packed carry-less multiplication instructions",
 202                                [FeatureSSE2]>;
 203 def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
 204                          "Enable Galois Field Arithmetic Instructions",
 205                                [FeatureSSE2]>;
 206 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
 207                                          "Enable vpclmulqdq instructions",
 208                                          [FeatureAVX, FeaturePCLMUL]>;
 209 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
 210                                       "Enable four-operand fused multiple-add",
 211                                       [FeatureAVX, FeatureSSE4A]>;
 212 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
 213                                       "Enable XOP instructions",
 214                                       [FeatureFMA4]>;
 215 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
 216                                           "HasSSEUnalignedMem", "true",
 217                       "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
 218 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
 219                                       "Enable AES instructions",
 220                                       [FeatureSSE2]>;
 221 def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
 222                        "Promote selected AES instructions to AVX512/AVX registers",
 223                         [FeatureAVX2, FeatureAES]>;
 224 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
 225                                       "Enable TBM instructions">;
 226 def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
 227                                       "Enable LWP instructions">;
 228 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
 229                                       "Support MOVBE instruction">;
 230 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
 231                                       "Support RDRAND instruction">;
 232 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
 233                                        "Support FS/GS Base instructions">;
 234 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
 235                                       "Support LZCNT instruction">;
 236 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
 237                                       "Support BMI instructions">;
 238 def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
 239                                       "Support BMI2 instructions">;
 240 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
 241                                       "Support RTM instructions">;
 242 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
 243                                       "Support ADX instructions">;
 244 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
 245                                       "Enable SHA instructions",
 246                                       [FeatureSSE2]>;
 247 def FeatureSHA512  : SubtargetFeature<"sha512", "HasSHA512", "true",
 248                                       "Support SHA512 instructions",
 249                                       [FeatureAVX2]>;
 250 // Processor supports CET SHSTK - Control-Flow Enforcement Technology
 251 // using Shadow Stack
 252 def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
 253                        "Support CET Shadow-Stack instructions">;
 254 def FeatureSM3     : SubtargetFeature<"sm3", "HasSM3", "true",
 255                                       "Support SM3 instructions",
 256                                       [FeatureAVX]>;
 257 def FeatureSM4     : SubtargetFeature<"sm4", "HasSM4", "true",
 258                                       "Support SM4 instructions",
 259                                       [FeatureAVX2]>;
 260 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
 261                                       "Support PRFCHW instructions">;
 262 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
 263                                       "Support RDSEED instruction">;
 264 def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
 265                            "Support LAHF and SAHF instructions in 64-bit mode">;
 266 def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
 267                                       "Enable MONITORX/MWAITX timer functionality">;
 268 def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
 269                                       "Enable Cache Line Zero">;
 270 def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
 271                                       "Enable Cache Line Demote">;
 272 def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
 273                                       "Support ptwrite instruction">;
 274 def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
 275                                       "Support AMX-TILE instructions">;
 276 def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
 277                                       "Support AMX-INT8 instructions",
 278                                       [FeatureAMXTILE]>;
 279 def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
 280                                       "Support AMX-BF16 instructions",
 281                                       [FeatureAMXTILE]>;
 282 def FeatureAMXFP16     : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
 283                                       "Support AMX amx-fp16 instructions",
 284                                       [FeatureAMXTILE]>;
 285 def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
 286                                          "Support AMX-COMPLEX instructions",
 287                                          [FeatureAMXTILE]>;
 288 def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
 289                                         "Support CMPCCXADD instructions">;
 290 def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
 291                                      "Support RAO-INT instructions",
 292                                      []>;
 293 def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
 294                                            "Support AVX-NE-CONVERT instructions",
 295                                            [FeatureAVX2]>;
 296 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
 297                                       "Invalidate Process-Context Identifier">;
 298 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
 299                                       "Enable Software Guard Extensions">;
 300 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
 301                                       "Flush A Cache Line Optimized">;
 302 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
 303                                       "Cache Line Write Back">;
 304 def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
 305                                       "Write Back No Invalidate">;
 306 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
 307                                     "Support RDPID instructions">;
 308 def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
 309                                     "Support RDPRU instructions">;
 310 def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
 311                                       "Wait and pause enhancements">;
 312 def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
 313                                      "Has ENQCMD instructions">;
 314 def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
 315                                   "Support Key Locker kl Instructions",
 316                                   [FeatureSSE2]>;
 317 def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
 318                                       "Support Key Locker wide Instructions",
 319                                       [FeatureKL]>;
 320 def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
 321                                       "Has hreset instruction">;
 322 def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
 323                                         "Has serialize instruction">;
 324 def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
 325                                        "Support TSXLDTRK instructions">;
 326 def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
 327                                     "Has UINTR Instructions">;
 328 def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true",
 329                                       "Support USERMSR instructions">;
 330 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
 331                                       "platform configuration instruction">;
 332 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
 333                                        "Support movdiri instruction (direct store integer)">;
 334 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
 335                                         "Support movdir64b instruction (direct store 64 bytes)">;
 336 def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
 337                                       "Support AVX10.1 up to 256-bit instruction",
 338                                       [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
 339                                        FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
 340                                        FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
 341 def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
 342                                           "Support AVX10.1 up to 512-bit instruction",
 343                                           [FeatureAVX10_1, FeatureEVEX512]>;
 344 def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
 345                                    "Support extended general purpose register">;
 346 def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
 347                                         "Support PUSH2/POP2 instructions">;
 348 def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true",
 349                                   "Support Push-Pop Acceleration">;
 350 def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true",
 351                                   "Support non-destructive destination">;
 352 def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
 353                                    "Support conditional cmp & test instructions">;
 354 def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
 355                                  "Support conditional faulting">;
 356
 357 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
 358 // "string operations"). See "REP String Enhancement" in the Intel Software
 359 // Development Manual. This feature essentially means that REP MOVSB will copy
 360 // using the largest available size instead of copying bytes one by one, making
 361 // it at least as fast as REPMOVS{W,D,Q}.
 362 def FeatureERMSB
 363     : SubtargetFeature<
 364           "ermsb", "HasERMSB", "true",
 365           "REP MOVS/STOS are fast">;
 366
 367 // Icelake and newer processors have Fast Short REP MOV.
 368 def FeatureFSRM
 369     : SubtargetFeature<
 370           "fsrm", "HasFSRM", "true",
 371           "REP MOVSB of short lengths is faster">;
 372
 373 def FeatureSoftFloat
 374     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
 375                        "Use software floating point features">;
 376
 377 //===----------------------------------------------------------------------===//
 378 // X86 Subtarget Security Mitigation features
 379 //===----------------------------------------------------------------------===//
 380
 381 // Lower indirect calls using a special construct called a `retpoline` to
 382 // mitigate potential Spectre v2 attacks against them.
 383 def FeatureRetpolineIndirectCalls
 384     : SubtargetFeature<
 385           "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
 386           "Remove speculation of indirect calls from the generated code">;
 387
 388 // Lower indirect branches and switches either using conditional branch trees
 389 // or using a special construct called a `retpoline` to mitigate potential
 390 // Spectre v2 attacks against them.
 391 def FeatureRetpolineIndirectBranches
 392     : SubtargetFeature<
 393           "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
 394           "Remove speculation of indirect branches from the generated code">;
 395
 396 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
 397 // `retpoline-indirect-branches` above.
 398 def FeatureRetpoline
 399     : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
 400                        "Remove speculation of indirect branches from the "
 401                        "generated code, either by avoiding them entirely or "
 402                        "lowering them with a speculation blocking construct",
 403                        [FeatureRetpolineIndirectCalls,
 404                         FeatureRetpolineIndirectBranches]>;
 405
 406 // Rely on external thunks for the emitted retpoline calls. This allows users
 407 // to provide their own custom thunk definitions in highly specialized
 408 // environments such as a kernel that does boot-time hot patching.
 409 def FeatureRetpolineExternalThunk
 410     : SubtargetFeature<
 411           "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
 412           "When lowering an indirect call or branch using a `retpoline`, rely "
 413           "on the specified user provided thunk rather than emitting one "
 414           "ourselves. Only has effect when combined with some other retpoline "
 415           "feature", [FeatureRetpolineIndirectCalls]>;
 416
 417 // Mitigate LVI attacks against indirect calls/branches and call returns
 418 def FeatureLVIControlFlowIntegrity
 419     : SubtargetFeature<
 420           "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
 421           "Prevent indirect calls/branches from using a memory operand, and "
 422           "precede all indirect calls/branches from a register with an "
 423           "LFENCE instruction to serialize control flow. Also decompose RET "
 424           "instructions into a POP+LFENCE+JMP sequence.">;
 425
 426 // Enable SESES to mitigate speculative execution attacks
 427 def FeatureSpeculativeExecutionSideEffectSuppression
 428     : SubtargetFeature<
 429           "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
 430           "Prevent speculative execution side channel timing attacks by "
 431           "inserting a speculation barrier before memory reads, memory writes, "
 432           "and conditional branches. Implies LVI Control Flow integrity.",
 433           [FeatureLVIControlFlowIntegrity]>;
 434
 435 // Mitigate LVI attacks against data loads
 436 def FeatureLVILoadHardening
 437     : SubtargetFeature<
 438           "lvi-load-hardening", "UseLVILoadHardening", "true",
 439           "Insert LFENCE instructions to prevent data speculatively injected "
 440           "into loads from being used maliciously.">;
 441
 442 def FeatureTaggedGlobals
 443     : SubtargetFeature<
 444           "tagged-globals", "AllowTaggedGlobals", "true",
 445           "Use an instruction sequence for taking the address of a global "
 446           "that allows a memory tag in the upper address bits.">;
 447
 448 // Control codegen mitigation against Straight Line Speculation vulnerability.
 449 def FeatureHardenSlsRet
 450     : SubtargetFeature<
 451           "harden-sls-ret", "HardenSlsRet", "true",
 452           "Harden against straight line speculation across RET instructions.">;
 453
 454 def FeatureHardenSlsIJmp
 455     : SubtargetFeature<
 456           "harden-sls-ijmp", "HardenSlsIJmp", "true",
 457           "Harden against straight line speculation across indirect JMP instructions.">;
 458
 459 //===----------------------------------------------------------------------===//
 460 // X86 Subtarget Tuning features
 461 //===----------------------------------------------------------------------===//
 462 def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
 463                                        "PreferMovmskOverVTest", "true",
 464                                        "Prefer movmsk over vtest instruction">;
 465
 466 def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
 467                                        "SHLD instruction is slow">;
 468
 469 def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
 470                                         "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
 471
 472 def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
 473                                           "true",
 474                                           "PMADDWD is slower than PMULLD">;
 475
 476 // FIXME: This should not apply to CPUs that do not have SSE.
 477 def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
 478                                 "IsUnalignedMem16Slow", "true",
 479                                 "Slow unaligned 16-byte memory access">;
 480
 481 def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
 482                                 "IsUnalignedMem32Slow", "true",
 483                                 "Slow unaligned 32-byte memory access">;
 484
 485 def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
 486                                      "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;
 487
 488 // True if 8-bit divisions are significantly faster than
 489 // 32-bit divisions and should be used when possible.
 490 def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
 491                                      "HasSlowDivide32", "true",
 492                                      "Use 8-bit divide for positive values less than 256">;
 493
 494 // True if 32-bit divides are significantly faster than
 495 // 64-bit divisions and should be used when possible.
 496 def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
 497                                      "HasSlowDivide64", "true",
 498                                      "Use 32-bit divide for positive values less than 2^32">;
 499
 500 def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
 501                                      "PadShortFunctions", "true",
 502                                      "Pad short functions (to prevent a stall when returning too early)">;
 503
 504 // On some processors, instructions that implicitly take two memory operands are
 505 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
 506 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
 507 def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
 508                                      "SlowTwoMemOps", "true",
 509                                      "Two memory operand instructions are slow">;
 510
 511 // True if the LEA instruction inputs have to be ready at address generation
 512 // (AG) time.
 513 def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
 514                                    "LEA instruction needs inputs at AG stage">;
 515
 516 def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
 517                                    "LEA instruction with certain arguments is slow">;
 518
 519 // True if the LEA instruction has all three source operands: base, index,
 520 // and offset or if the LEA instruction uses base and index registers where
 521 // the base is EBP, RBP,or R13
 522 def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
 523                                    "LEA instruction with 3 ops or certain registers is slow">;
 524
 525 // True if INC and DEC instructions are slow when writing to flags
 526 def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
 527                                    "INC and DEC instructions are slower than ADD and SUB">;
 528
 529 def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
 530                                      "HasPOPCNTFalseDeps", "true",
 531                                      "POPCNT has a false dependency on dest register">;
 532
 533 def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
 534                                      "HasLZCNTFalseDeps", "true",
 535                                      "LZCNT/TZCNT have a false dependency on dest register">;
 536
 537 def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
 538                                "HasMULCFalseDeps", "true",
 539                                "VF[C]MULCPH/SH has a false dependency on dest register">;
 540
 541 def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
 542                                "HasPERMFalseDeps", "true",
 543                                "VPERMD/Q/PS/PD has a false dependency on dest register">;
 544
 545 def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
 546                                "HasRANGEFalseDeps", "true",
 547                                "VRANGEPD/PS/SD/SS has a false dependency on dest register">;
 548
 549 def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
 550                                "HasGETMANTFalseDeps", "true",
 551                                "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
 552                                " false dependency on dest register">;
 553
 554 def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
 555                                "HasMULLQFalseDeps", "true",
 556                                "VPMULLQ has a false dependency on dest register">;
 557
 558 def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
 559                                      "HasSBBDepBreaking", "true",
 560                                      "SBB with same register has no source dependency">;
 561
 562 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
 563 // using a variable mask over multiple fixed shuffles.
 564 def TuningFastVariableCrossLaneShuffle
 565     : SubtargetFeature<"fast-variable-crosslane-shuffle",
 566                        "HasFastVariableCrossLaneShuffle",
 567                        "true", "Cross-lane shuffles with variable masks are fast">;
 568 def TuningFastVariablePerLaneShuffle
 569     : SubtargetFeature<"fast-variable-perlane-shuffle",
 570                        "HasFastVariablePerLaneShuffle",
 571                        "true", "Per-lane shuffles with variable masks are fast">;
 572
 573 // Goldmont / Tremont (atom in general) has no bypass delay
 574 def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
 575                                    "NoDomainDelay","true",
 576                                    "Has no bypass delay when using the 'wrong' domain">;
 577
 578 // Many processors (Nehalem+ on Intel) have no bypass delay when
 579 // using the wrong mov type.
 580 def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
 581                                    "NoDomainDelayMov","true",
 582                                    "Has no bypass delay when using the 'wrong' mov type">;
 583
 584 // Newer processors (Skylake+ on Intel) have no bypass delay when
 585 // using the wrong blend type.
 586 def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
 587                                    "NoDomainDelayBlend","true",
 588                                    "Has no bypass delay when using the 'wrong' blend type">;
 589
 590 // Newer processors (Haswell+ on Intel) have no bypass delay when
 591 // using the wrong shuffle type.
 592 def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
 593                                    "NoDomainDelayShuffle","true",
 594                                    "Has no bypass delay when using the 'wrong' shuffle type">;
 595
 596 // Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
 597 // imm shifts/rotate if they can use more ports than regular shuffles.
 598 def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
 599                                    "PreferLowerShuffleAsShift", "true",
 600                                    "Shifts are faster (or as fast) as shuffle">;
 601
 602 def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
 603                                    "FastImmVectorShift", "true",
 604                                    "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
 605
 606 // On some X86 processors, a vzeroupper instruction should be inserted after
 607 // using ymm/zmm registers before executing code that may use SSE instructions.
 608 def TuningInsertVZEROUPPER
 609     : SubtargetFeature<"vzeroupper",
 610                        "InsertVZEROUPPER",
 611                        "true", "Should insert vzeroupper instructions">;
 612
 613 // TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
 614 // than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
 615 // vector FSQRT has higher throughput than the corresponding NR code.
 616 // The idea is that throughput bound code is likely to be vectorized, so for
 617 // vectorized code we should care about the throughput of SQRT operations.
 618 // But if the code is scalar that probably means that the code has some kind of
 619 // dependency and we should care more about reducing the latency.
 620
 621 // True if hardware SQRTSS instruction is at least as fast (latency) as
 622 // RSQRTSS followed by a Newton-Raphson iteration.
 623 def TuningFastScalarFSQRT
 624     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
 625                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
 626 // True if hardware SQRTPS/VSQRTPS instructions are at least as fast
 627 // (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
 628 def TuningFastVectorFSQRT
 629     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
 630                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
 631
 632 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
 633 // be used to replace test/set sequences.
 634 def TuningFastLZCNT
 635     : SubtargetFeature<
 636           "fast-lzcnt", "HasFastLZCNT", "true",
 637           "LZCNT instructions are as fast as most simple integer ops">;
 638
 639 // If the target can efficiently decode NOPs upto 7-bytes in length.
 640 def TuningFast7ByteNOP
 641     : SubtargetFeature<
 642           "fast-7bytenop", "HasFast7ByteNOP", "true",
 643           "Target can quickly decode up to 7 byte NOPs">;
 644
 645 // If the target can efficiently decode NOPs upto 11-bytes in length.
 646 def TuningFast11ByteNOP
 647     : SubtargetFeature<
 648           "fast-11bytenop", "HasFast11ByteNOP", "true",
 649           "Target can quickly decode up to 11 byte NOPs">;
 650
 651 // If the target can efficiently decode NOPs upto 15-bytes in length.
 652 def TuningFast15ByteNOP
 653     : SubtargetFeature<
 654           "fast-15bytenop", "HasFast15ByteNOP", "true",
 655           "Target can quickly decode up to 15 byte NOPs">;
 656
 657 // Sandy Bridge and newer processors can use SHLD with the same source on both
 658 // inputs to implement rotate to avoid the partial flag update of the normal
 659 // rotate instructions.
 660 def TuningFastSHLDRotate
 661     : SubtargetFeature<
 662           "fast-shld-rotate", "HasFastSHLDRotate", "true",
 663           "SHLD can be used as a faster rotate">;
 664
 665 // Bulldozer and newer processors can merge CMP/TEST (but not other
 666 // instructions) with conditional branches.
 667 def TuningBranchFusion
 668     : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
 669                  "CMP/TEST can be fused with conditional branches">;
 670
 671 // Sandy Bridge and newer processors have many instructions that can be
 672 // fused with conditional branches and pass through the CPU as a single
 673 // operation.
 674 def TuningMacroFusion
 675     : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
 676                  "Various instructions can be fused with conditional branches">;
 677
 678 // Gather is available since Haswell (AVX2 set). So technically, we can
 679 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
 680 // Skylake Client processor has faster Gathers than HSW and performance is
 681 // similar to Skylake Server (AVX-512).
 682 def TuningFastGather
 683     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
 684                        "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
 685
 686 def TuningPreferNoGather
 687     : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
 688                        "Prefer no gather instructions">;
 689 def TuningPreferNoScatter
 690     : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
 691                        "Prefer no scatter instructions">;
 692
 693 def TuningPrefer128Bit
 694     : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
 695                        "Prefer 128-bit AVX instructions">;
 696
 697 def TuningPrefer256Bit
 698     : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
 699                        "Prefer 256-bit AVX instructions">;
 700
 701 def TuningAllowLight256Bit
 702     : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
 703                        "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
 704
 705 def TuningPreferMaskRegisters
 706     : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
 707                        "Prefer AVX512 mask registers over PTEST/MOVMSK">;
 708
 709 def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
 710           "Indicates that the BEXTR instruction is implemented as a single uop "
 711           "with good throughput">;
 712
 713 // Combine vector math operations with shuffles into horizontal math
 714 // instructions if a CPU implements horizontal operations (introduced with
 715 // SSE3) with better latency/throughput than the alternative sequence.
 716 def TuningFastHorizontalOps
 717     : SubtargetFeature<
 718         "fast-hops", "HasFastHorizontalOps", "true",
 719         "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
 720         "normal vector instructions with shuffles">;
 721
 722 def TuningFastScalarShiftMasks
 723     : SubtargetFeature<
 724         "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
 725         "Prefer a left/right scalar logical shift pair over a shift+and pair">;
 726
 727 def TuningFastVectorShiftMasks
 728     : SubtargetFeature<
 729         "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
 730         "Prefer a left/right vector logical shift pair over a shift+and pair">;
 731
 732 def TuningFastMOVBE
 733     : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
 734     "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
 735
 736 def TuningUseSLMArithCosts
 737     : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
 738         "Use Silvermont specific arithmetic costs">;
 739
 740 def TuningUseGLMDivSqrtCosts
 741     : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
 742         "Use Goldmont specific floating point div/sqrt costs">;
 743
 744 //===----------------------------------------------------------------------===//
 745 // X86 CPU Families
 746 // TODO: Remove these - use general tuning features to determine codegen.
 747 //===----------------------------------------------------------------------===//
 748
 749 // Bonnell
 750 def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;
 751
 752 //===----------------------------------------------------------------------===//
 753 // Register File Description
 754 //===----------------------------------------------------------------------===//
 755
 756 include "X86RegisterInfo.td"
 757 include "X86RegisterBanks.td"
 758
 759 //===----------------------------------------------------------------------===//
 760 // Instruction Descriptions
 761 //===----------------------------------------------------------------------===//
 762
 763 include "X86Schedule.td"
 764 include "X86InstrInfo.td"
 765 include "X86SchedPredicates.td"
 766
 767 def X86InstrInfo : InstrInfo;
 768
 769 //===----------------------------------------------------------------------===//
 770 // X86 Scheduler Models
 771 //===----------------------------------------------------------------------===//
 772
 773 include "X86ScheduleAtom.td"
 774 include "X86SchedSandyBridge.td"
 775 include "X86SchedHaswell.td"
 776 include "X86SchedBroadwell.td"
 777 include "X86ScheduleSLM.td"
 778 include "X86ScheduleZnver1.td"
 779 include "X86ScheduleZnver2.td"
 780 include "X86ScheduleZnver3.td"
 781 include "X86ScheduleZnver4.td"
 782 include "X86ScheduleBdVer2.td"
 783 include "X86ScheduleBtVer2.td"
 784 include "X86SchedSkylakeClient.td"
 785 include "X86SchedSkylakeServer.td"
 786 include "X86SchedIceLake.td"
 787 include "X86SchedAlderlakeP.td"
 788 include "X86SchedSapphireRapids.td"
 789
 790 //===----------------------------------------------------------------------===//
 791 // X86 Processor Feature Lists
 792 //===----------------------------------------------------------------------===//
 793
 794 def ProcessorFeatures {
 795   // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
 796   list<SubtargetFeature> X86_64V1Features = [
 797     FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
 798     FeatureFXSR, FeatureNOPL, FeatureX86_64,
 799   ];
 800   list<SubtargetFeature> X86_64V1Tuning = [
 801     TuningMacroFusion,
 802     TuningSlow3OpsLEA,
 803     TuningSlowDivide64,
 804     TuningSlowIncDec,
 805     TuningInsertVZEROUPPER
 806   ];
 807
 808   list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
 809     FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
 810     FeatureSSE42
 811   ]);
 812   list<SubtargetFeature> X86_64V2Tuning = [
 813     TuningMacroFusion,
 814     TuningSlow3OpsLEA,
 815     TuningSlowDivide64,
 816     TuningSlowUAMem32,
 817     TuningFastScalarFSQRT,
 818     TuningFastSHLDRotate,
 819     TuningFast15ByteNOP,
 820     TuningPOPCNTFalseDeps,
 821     TuningInsertVZEROUPPER
 822   ];
 823
 824   list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
 825     FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
 826     FeatureMOVBE, FeatureXSAVE
 827   ]);
 828   list<SubtargetFeature> X86_64V3Tuning = [
 829     TuningMacroFusion,
 830     TuningSlow3OpsLEA,
 831     TuningSlowDivide64,
 832     TuningFastScalarFSQRT,
 833     TuningFastSHLDRotate,
 834     TuningFast15ByteNOP,
 835     TuningFastVariableCrossLaneShuffle,
 836     TuningFastVariablePerLaneShuffle,
 837     TuningPOPCNTFalseDeps,
 838     TuningLZCNTFalseDeps,
 839     TuningInsertVZEROUPPER,
 840     TuningAllowLight256Bit
 841   ];
 842
 843   list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
 844     FeatureEVEX512,
 845     FeatureBWI,
 846     FeatureCDI,
 847     FeatureDQI,
 848     FeatureVLX,
 849   ]);
 850   list<SubtargetFeature> X86_64V4Tuning = [
 851     TuningMacroFusion,
 852     TuningSlow3OpsLEA,
 853     TuningSlowDivide64,
 854     TuningFastScalarFSQRT,
 855     TuningFastVectorFSQRT,
 856     TuningFastSHLDRotate,
 857     TuningFast15ByteNOP,
 858     TuningFastVariableCrossLaneShuffle,
 859     TuningFastVariablePerLaneShuffle,
 860     TuningPrefer256Bit,
 861     TuningFastGather,
 862     TuningPOPCNTFalseDeps,
 863     TuningInsertVZEROUPPER,
 864     TuningAllowLight256Bit
 865   ];
 866
 867   // Nehalem
 868   list<SubtargetFeature> NHMFeatures = X86_64V2Features;
 869   list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
 870                                       TuningInsertVZEROUPPER,
 871                                       TuningNoDomainDelayMov];
 872
 873   // Westmere
 874   list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
 875   list<SubtargetFeature> WSMTuning = NHMTuning;
 876   list<SubtargetFeature> WSMFeatures =
 877     !listconcat(NHMFeatures, WSMAdditionalFeatures);
 878
 879   // Sandybridge
 880   list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
 881                                                   FeatureXSAVE,
 882                                                   FeatureXSAVEOPT];
 883   list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
 884                                       TuningSlow3OpsLEA,
 885                                       TuningSlowDivide64,
 886                                       TuningSlowUAMem32,
 887                                       TuningFastScalarFSQRT,
 888                                       TuningFastSHLDRotate,
 889                                       TuningFast15ByteNOP,
 890                                       TuningPOPCNTFalseDeps,
 891                                       TuningInsertVZEROUPPER,
 892                                       TuningNoDomainDelayMov];
 893   list<SubtargetFeature> SNBFeatures =
 894     !listconcat(WSMFeatures, SNBAdditionalFeatures);
 895
 896   // Ivybridge
 897   list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
 898                                                   FeatureF16C,
 899                                                   FeatureFSGSBase];
 900   list<SubtargetFeature> IVBTuning = SNBTuning;
 901   list<SubtargetFeature> IVBFeatures =
 902     !listconcat(SNBFeatures, IVBAdditionalFeatures);
 903
 904   // Haswell
 905   list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
 906                                                   FeatureBMI,
 907                                                   FeatureBMI2,
 908                                                   FeatureERMSB,
 909                                                   FeatureFMA,
 910                                                   FeatureINVPCID,
 911                                                   FeatureLZCNT,
 912                                                   FeatureMOVBE];
 913   list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
 914                                       TuningSlow3OpsLEA,
 915                                       TuningSlowDivide64,
 916                                       TuningFastScalarFSQRT,
 917                                       TuningFastSHLDRotate,
 918                                       TuningFast15ByteNOP,
 919                                       TuningFastVariableCrossLaneShuffle,
 920                                       TuningFastVariablePerLaneShuffle,
 921                                       TuningPOPCNTFalseDeps,
 922                                       TuningLZCNTFalseDeps,
 923                                       TuningInsertVZEROUPPER,
 924                                       TuningAllowLight256Bit,
 925                                       TuningNoDomainDelayMov,
 926                                       TuningNoDomainDelayShuffle];
 927   list<SubtargetFeature> HSWFeatures =
 928     !listconcat(IVBFeatures, HSWAdditionalFeatures);
 929
 930   // Broadwell
 931   list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
 932                                                   FeatureRDSEED,
 933                                                   FeaturePRFCHW];
 934   list<SubtargetFeature> BDWTuning = HSWTuning;
 935   list<SubtargetFeature> BDWFeatures =
 936     !listconcat(HSWFeatures, BDWAdditionalFeatures);
 937
 938   // Skylake
 939   list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
 940                                                   FeatureXSAVEC,
 941                                                   FeatureXSAVES,
 942                                                   FeatureCLFLUSHOPT];
 943   list<SubtargetFeature> SKLTuning = [TuningFastGather,
 944                                       TuningMacroFusion,
 945                                       TuningSlow3OpsLEA,
 946                                       TuningSlowDivide64,
 947                                       TuningFastScalarFSQRT,
 948                                       TuningFastVectorFSQRT,
 949                                       TuningFastSHLDRotate,
 950                                       TuningFast15ByteNOP,
 951                                       TuningFastVariableCrossLaneShuffle,
 952                                       TuningFastVariablePerLaneShuffle,
 953                                       TuningPOPCNTFalseDeps,
 954                                       TuningInsertVZEROUPPER,
 955                                       TuningAllowLight256Bit,
 956                                       TuningNoDomainDelayMov,
 957                                       TuningNoDomainDelayShuffle,
 958                                       TuningNoDomainDelayBlend];
 959   list<SubtargetFeature> SKLFeatures =
 960     !listconcat(BDWFeatures, SKLAdditionalFeatures);
 961
 962   // Skylake-AVX512
 963   list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
 964                                                   FeatureXSAVEC,
 965                                                   FeatureXSAVES,
 966                                                   FeatureCLFLUSHOPT,
 967                                                   FeatureAVX512,
 968                                                   FeatureEVEX512,
 969                                                   FeatureCDI,
 970                                                   FeatureDQI,
 971                                                   FeatureBWI,
 972                                                   FeatureVLX,
 973                                                   FeaturePKU,
 974                                                   FeatureCLWB];
 975   list<SubtargetFeature> SKXTuning = [TuningFastGather,
 976                                       TuningMacroFusion,
 977                                       TuningSlow3OpsLEA,
 978                                       TuningSlowDivide64,
 979                                       TuningFastScalarFSQRT,
 980                                       TuningFastVectorFSQRT,
 981                                       TuningFastSHLDRotate,
 982                                       TuningFast15ByteNOP,
 983                                       TuningFastVariableCrossLaneShuffle,
 984                                       TuningFastVariablePerLaneShuffle,
 985                                       TuningPrefer256Bit,
 986                                       TuningPOPCNTFalseDeps,
 987                                       TuningInsertVZEROUPPER,
 988                                       TuningAllowLight256Bit,
 989                                       TuningPreferShiftShuffle,
 990                                       TuningNoDomainDelayMov,
 991                                       TuningNoDomainDelayShuffle,
 992                                       TuningNoDomainDelayBlend,
 993                                       TuningFastImmVectorShift];
 994   list<SubtargetFeature> SKXFeatures =
 995     !listconcat(BDWFeatures, SKXAdditionalFeatures);
 996
 997   // Cascadelake
 998   list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
 999   list<SubtargetFeature> CLXTuning = SKXTuning;
1000   list<SubtargetFeature> CLXFeatures =
1001     !listconcat(SKXFeatures, CLXAdditionalFeatures);
1002
1003   // Cooperlake
1004   list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
1005   list<SubtargetFeature> CPXTuning = SKXTuning;
1006   list<SubtargetFeature> CPXFeatures =
1007     !listconcat(CLXFeatures, CPXAdditionalFeatures);
1008
1009   // Cannonlake
1010   list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
1011                                                   FeatureEVEX512,
1012                                                   FeatureCDI,
1013                                                   FeatureDQI,
1014                                                   FeatureBWI,
1015                                                   FeatureVLX,
1016                                                   FeaturePKU,
1017                                                   FeatureVBMI,
1018                                                   FeatureIFMA,
1019                                                   FeatureSHA];
1020   list<SubtargetFeature> CNLTuning = [TuningFastGather,
1021                                       TuningMacroFusion,
1022                                       TuningSlow3OpsLEA,
1023                                       TuningSlowDivide64,
1024                                       TuningFastScalarFSQRT,
1025                                       TuningFastVectorFSQRT,
1026                                       TuningFastSHLDRotate,
1027                                       TuningFast15ByteNOP,
1028                                       TuningFastVariableCrossLaneShuffle,
1029                                       TuningFastVariablePerLaneShuffle,
1030                                       TuningPrefer256Bit,
1031                                       TuningInsertVZEROUPPER,
1032                                       TuningAllowLight256Bit,
1033                                       TuningNoDomainDelayMov,
1034                                       TuningNoDomainDelayShuffle,
1035                                       TuningNoDomainDelayBlend,
1036                                       TuningFastImmVectorShift];
1037   list<SubtargetFeature> CNLFeatures =
1038     !listconcat(SKLFeatures, CNLAdditionalFeatures);
1039
1040   // Icelake
1041   list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
1042                                                   FeatureVAES,
1043                                                   FeatureVBMI2,
1044                                                   FeatureVNNI,
1045                                                   FeatureVPCLMULQDQ,
1046                                                   FeatureVPOPCNTDQ,
1047                                                   FeatureGFNI,
1048                                                   FeatureRDPID,
1049                                                   FeatureFSRM];
1050   list<SubtargetFeature> ICLTuning = [TuningFastGather,
1051                                       TuningMacroFusion,
1052                                       TuningSlowDivide64,
1053                                       TuningFastScalarFSQRT,
1054                                       TuningFastVectorFSQRT,
1055                                       TuningFastSHLDRotate,
1056                                       TuningFast15ByteNOP,
1057                                       TuningFastVariableCrossLaneShuffle,
1058                                       TuningFastVariablePerLaneShuffle,
1059                                       TuningPrefer256Bit,
1060                                       TuningInsertVZEROUPPER,
1061                                       TuningAllowLight256Bit,
1062                                       TuningNoDomainDelayMov,
1063                                       TuningNoDomainDelayShuffle,
1064                                       TuningNoDomainDelayBlend,
1065                                       TuningFastImmVectorShift];
1066   list<SubtargetFeature> ICLFeatures =
1067     !listconcat(CNLFeatures, ICLAdditionalFeatures);
1068
1069   // Icelake Server
1070   list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
1071                                                   FeatureCLWB,
1072                                                   FeatureWBNOINVD];
1073   list<SubtargetFeature> ICXTuning = ICLTuning;
1074   list<SubtargetFeature> ICXFeatures =
1075     !listconcat(ICLFeatures, ICXAdditionalFeatures);
1076
1077   // Tigerlake
1078   list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
1079                                                   FeatureCLWB,
1080                                                   FeatureMOVDIRI,
1081                                                   FeatureMOVDIR64B,
1082                                                   FeatureSHSTK];
1083   list<SubtargetFeature> TGLTuning = ICLTuning;
1084   list<SubtargetFeature> TGLFeatures =
1085     !listconcat(ICLFeatures, TGLAdditionalFeatures );
1086
1087   // Sapphirerapids
1088   list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
1089                                                   FeatureAMXINT8,
1090                                                   FeatureAMXBF16,
1091                                                   FeatureBF16,
1092                                                   FeatureSERIALIZE,
1093                                                   FeatureCLDEMOTE,
1094                                                   FeatureWAITPKG,
1095                                                   FeaturePTWRITE,
1096                                                   FeatureFP16,
1097                                                   FeatureAVXVNNI,
1098                                                   FeatureTSXLDTRK,
1099                                                   FeatureENQCMD,
1100                                                   FeatureSHSTK,
1101                                                   FeatureMOVDIRI,
1102                                                   FeatureMOVDIR64B,
1103                                                   FeatureUINTR];
1104   list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
1105                                                 TuningPERMFalseDeps,
1106                                                 TuningRANGEFalseDeps,
1107                                                 TuningGETMANTFalseDeps,
1108                                                 TuningMULLQFalseDeps];
1109   list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
1110   list<SubtargetFeature> SPRFeatures =
1111     !listconcat(ICXFeatures, SPRAdditionalFeatures);
1112
1113   // Graniterapids
1114   list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
1115                                                   FeaturePREFETCHI];
1116   list<SubtargetFeature> GNRFeatures =
1117     !listconcat(SPRFeatures, GNRAdditionalFeatures);
1118
1119   // Graniterapids D
1120   list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
1121   list<SubtargetFeature> GNRDFeatures =
1122     !listconcat(GNRFeatures, GNRDAdditionalFeatures);
1123
1124   // Atom
1125   list<SubtargetFeature> AtomFeatures = [FeatureX87,
1126                                          FeatureCX8,
1127                                          FeatureCMOV,
1128                                          FeatureMMX,
1129                                          FeatureSSSE3,
1130                                          FeatureFXSR,
1131                                          FeatureNOPL,
1132                                          FeatureX86_64,
1133                                          FeatureCX16,
1134                                          FeatureMOVBE,
1135                                          FeatureLAHFSAHF64];
1136   list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
1137                                        TuningSlowUAMem16,
1138                                        TuningLEAForSP,
1139                                        TuningSlowDivide32,
1140                                        TuningSlowDivide64,
1141                                        TuningSlowTwoMemOps,
1142                                        TuningLEAUsesAG,
1143                                        TuningPadShortFunctions,
1144                                        TuningInsertVZEROUPPER,
1145                                        TuningNoDomainDelay];
1146
1147   // Silvermont
1148   list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
1149                                                   FeatureCRC32,
1150                                                   FeaturePOPCNT,
1151                                                   FeaturePCLMUL,
1152                                                   FeaturePRFCHW,
1153                                                   FeatureRDRAND];
1154   list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
1155                                       TuningSlowTwoMemOps,
1156                                       TuningSlowLEA,
1157                                       TuningSlowIncDec,
1158                                       TuningSlowDivide64,
1159                                       TuningSlowPMULLD,
1160                                       TuningFast7ByteNOP,
1161                                       TuningFastMOVBE,
1162                                       TuningPOPCNTFalseDeps,
1163                                       TuningInsertVZEROUPPER,
1164                                       TuningNoDomainDelay];
1165   list<SubtargetFeature> SLMFeatures =
1166     !listconcat(AtomFeatures, SLMAdditionalFeatures);
1167
1168   // Goldmont
1169   list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
1170                                                   FeatureSHA,
1171                                                   FeatureRDSEED,
1172                                                   FeatureXSAVE,
1173                                                   FeatureXSAVEOPT,
1174                                                   FeatureXSAVEC,
1175                                                   FeatureXSAVES,
1176                                                   FeatureCLFLUSHOPT,
1177                                                   FeatureFSGSBase];
1178   list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
1179                                       TuningSlowTwoMemOps,
1180                                       TuningSlowLEA,
1181                                       TuningSlowIncDec,
1182                                       TuningFastMOVBE,
1183                                       TuningPOPCNTFalseDeps,
1184                                       TuningInsertVZEROUPPER,
1185                                       TuningNoDomainDelay];
1186   list<SubtargetFeature> GLMFeatures =
1187     !listconcat(SLMFeatures, GLMAdditionalFeatures);
1188
1189   // Goldmont Plus
1190   list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
1191                                                   FeatureRDPID];
1192   list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
1193                                       TuningSlowTwoMemOps,
1194                                       TuningSlowLEA,
1195                                       TuningSlowIncDec,
1196                                       TuningFastMOVBE,
1197                                       TuningInsertVZEROUPPER,
1198                                       TuningNoDomainDelay];
1199   list<SubtargetFeature> GLPFeatures =
1200     !listconcat(GLMFeatures, GLPAdditionalFeatures);
1201
1202   // Tremont
1203   list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1204                                                   FeatureGFNI];
1205   list<SubtargetFeature> TRMTuning = GLPTuning;
1206   list<SubtargetFeature> TRMFeatures =
1207     !listconcat(GLPFeatures, TRMAdditionalFeatures);
1208
1209   // Alderlake
1210   list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
1211                                                   FeaturePCONFIG,
1212                                                   FeatureSHSTK,
1213                                                   FeatureWIDEKL,
1214                                                   FeatureINVPCID,
1215                                                   FeatureADX,
1216                                                   FeatureFMA,
1217                                                   FeatureVAES,
1218                                                   FeatureVPCLMULQDQ,
1219                                                   FeatureF16C,
1220                                                   FeatureBMI,
1221                                                   FeatureBMI2,
1222                                                   FeatureLZCNT,
1223                                                   FeatureAVXVNNI,
1224                                                   FeaturePKU,
1225                                                   FeatureHRESET,
1226                                                   FeatureCLDEMOTE,
1227                                                   FeatureMOVDIRI,
1228                                                   FeatureMOVDIR64B,
1229                                                   FeatureWAITPKG];
1230   list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
1231                                                 TuningPreferMovmskOverVTest,
1232                                                 TuningFastImmVectorShift];
1233   list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
1234   list<SubtargetFeature> ADLFeatures =
1235     !listconcat(TRMFeatures, ADLAdditionalFeatures);
1236
1237   // Gracemont
1238   list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
1239                                       TuningSlow3OpsLEA,
1240                                       TuningSlowDivide32,
1241                                       TuningSlowDivide64,
1242                                       TuningFastScalarFSQRT,
1243                                       TuningFastVectorFSQRT,
1244                                       TuningFast15ByteNOP,
1245                                       TuningFastVariablePerLaneShuffle,
1246                                       TuningPOPCNTFalseDeps,
1247                                       TuningInsertVZEROUPPER];
1248
1249   // Sierraforest
1250   list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
1251                                                   FeatureAVXIFMA,
1252                                                   FeatureAVXNECONVERT,
1253                                                   FeatureENQCMD,
1254                                                   FeatureUINTR,
1255                                                   FeatureAVXVNNIINT8];
1256   list<SubtargetFeature> SRFFeatures =
1257     !listconcat(ADLFeatures, SRFAdditionalFeatures);
1258
1259   // Arrowlake S
1260   list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
1261                                                    FeatureSHA512,
1262                                                    FeatureSM3,
1263                                                    FeatureSM4];
1264   list<SubtargetFeature> ARLSFeatures =
1265     !listconcat(SRFFeatures, ARLSAdditionalFeatures);
1266
1267   // Pantherlake
1268   list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
1269   list<SubtargetFeature> PTLFeatures =
1270     !listconcat(ARLSFeatures, PTLAdditionalFeatures);
1271
1272
1273   // Clearwaterforest
1274   list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
1275                                                   FeatureUSERMSR];
1276   list<SubtargetFeature> CWFFeatures =
1277     !listconcat(ARLSFeatures, CWFAdditionalFeatures);
1278
1279   // Knights Landing
1280   list<SubtargetFeature> KNLFeatures = [FeatureX87,
1281                                         FeatureCX8,
1282                                         FeatureCMOV,
1283                                         FeatureMMX,
1284                                         FeatureFXSR,
1285                                         FeatureNOPL,
1286                                         FeatureX86_64,
1287                                         FeatureCX16,
1288                                         FeatureCRC32,
1289                                         FeaturePOPCNT,
1290                                         FeaturePCLMUL,
1291                                         FeatureXSAVE,
1292                                         FeatureXSAVEOPT,
1293                                         FeatureLAHFSAHF64,
1294                                         FeatureAES,
1295                                         FeatureRDRAND,
1296                                         FeatureF16C,
1297                                         FeatureFSGSBase,
1298                                         FeatureAVX512,
1299                                         FeatureEVEX512,
1300                                         FeatureERI,
1301                                         FeatureCDI,
1302                                         FeaturePFI,
1303                                         FeaturePREFETCHWT1,
1304                                         FeatureADX,
1305                                         FeatureRDSEED,
1306                                         FeatureMOVBE,
1307                                         FeatureLZCNT,
1308                                         FeatureBMI,
1309                                         FeatureBMI2,
1310                                         FeatureFMA,
1311                                         FeaturePRFCHW];
1312   list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
1313                                       TuningSlow3OpsLEA,
1314                                       TuningSlowIncDec,
1315                                       TuningSlowTwoMemOps,
1316                                       TuningPreferMaskRegisters,
1317                                       TuningFastGather,
1318                                       TuningFastMOVBE,
1319                                       TuningSlowPMADDWD];
1320   // TODO Add AVX5124FMAPS/AVX5124VNNIW features
1321   list<SubtargetFeature> KNMFeatures =
1322     !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
1323
1324   // Barcelona
1325   list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1326                                               FeatureCX8,
1327                                               FeatureSSE4A,
1328                                               Feature3DNowA,
1329                                               FeatureFXSR,
1330                                               FeatureNOPL,
1331                                               FeatureCX16,
1332                                               FeaturePRFCHW,
1333                                               FeatureLZCNT,
1334                                               FeaturePOPCNT,
1335                                               FeatureLAHFSAHF64,
1336                                               FeatureCMOV,
1337                                               FeatureX86_64];
1338   list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1339                                             TuningSlowSHLD,
1340                                             TuningSBBDepBreaking,
1341                                             TuningInsertVZEROUPPER];
1342
1343   // Bobcat
1344   list<SubtargetFeature> BtVer1Features = [FeatureX87,
1345                                            FeatureCX8,
1346                                            FeatureCMOV,
1347                                            FeatureMMX,
1348                                            FeatureSSSE3,
1349                                            FeatureSSE4A,
1350                                            FeatureFXSR,
1351                                            FeatureNOPL,
1352                                            FeatureX86_64,
1353                                            FeatureCX16,
1354                                            FeaturePRFCHW,
1355                                            FeatureLZCNT,
1356                                            FeaturePOPCNT,
1357                                            FeatureLAHFSAHF64];
1358   list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1359                                          TuningFastScalarShiftMasks,
1360                                          TuningFastVectorShiftMasks,
1361                                          TuningSlowSHLD,
1362                                          TuningSBBDepBreaking,
1363                                          TuningInsertVZEROUPPER];
1364
1365   // Jaguar
1366   list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1367                                                      FeatureAES,
1368                                                      FeatureCRC32,
1369                                                      FeaturePCLMUL,
1370                                                      FeatureBMI,
1371                                                      FeatureF16C,
1372                                                      FeatureMOVBE,
1373                                                      FeatureXSAVE,
1374                                                      FeatureXSAVEOPT];
1375   list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1376                                          TuningFastBEXTR,
1377                                          TuningFastHorizontalOps,
1378                                          TuningFast15ByteNOP,
1379                                          TuningFastScalarShiftMasks,
1380                                          TuningFastVectorShiftMasks,
1381                                          TuningFastMOVBE,
1382                                          TuningSBBDepBreaking,
1383                                          TuningSlowSHLD];
1384   list<SubtargetFeature> BtVer2Features =
1385     !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1386
1387   // Bulldozer
1388   list<SubtargetFeature> BdVer1Features = [FeatureX87,
1389                                            FeatureCX8,
1390                                            FeatureCMOV,
1391                                            FeatureXOP,
1392                                            FeatureX86_64,
1393                                            FeatureCX16,
1394                                            FeatureAES,
1395                                            FeatureCRC32,
1396                                            FeaturePRFCHW,
1397                                            FeaturePCLMUL,
1398                                            FeatureMMX,
1399                                            FeatureFXSR,
1400                                            FeatureNOPL,
1401                                            FeatureLZCNT,
1402                                            FeaturePOPCNT,
1403                                            FeatureXSAVE,
1404                                            FeatureLWP,
1405                                            FeatureLAHFSAHF64];
1406   list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1407                                          TuningFast11ByteNOP,
1408                                          TuningFastScalarShiftMasks,
1409                                          TuningBranchFusion,
1410                                          TuningSBBDepBreaking,
1411                                          TuningInsertVZEROUPPER];
1412
1413   // PileDriver
1414   list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1415                                                      FeatureBMI,
1416                                                      FeatureTBM,
1417                                                      FeatureFMA];
1418   list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1419                                                    TuningFastMOVBE];
1420   list<SubtargetFeature> BdVer2Tuning =
1421     !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1422   list<SubtargetFeature> BdVer2Features =
1423     !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1424
1425   // Steamroller
1426   list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1427                                                      FeatureFSGSBase];
1428   list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1429   list<SubtargetFeature> BdVer3Features =
1430     !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1431
1432   // Excavator
1433   list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1434                                                      FeatureBMI2,
1435                                                      FeatureMOVBE,
1436                                                      FeatureRDRAND,
1437                                                      FeatureMWAITX];
1438   list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1439   list<SubtargetFeature> BdVer4Features =
1440     !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1441
1442
1443   // AMD Zen Processors common ISAs
1444   list<SubtargetFeature> ZNFeatures = [FeatureADX,
1445                                        FeatureAES,
1446                                        FeatureAVX2,
1447                                        FeatureBMI,
1448                                        FeatureBMI2,
1449                                        FeatureCLFLUSHOPT,
1450                                        FeatureCLZERO,
1451                                        FeatureCMOV,
1452                                        FeatureX86_64,
1453                                        FeatureCX16,
1454                                        FeatureCRC32,
1455                                        FeatureF16C,
1456                                        FeatureFMA,
1457                                        FeatureFSGSBase,
1458                                        FeatureFXSR,
1459                                        FeatureNOPL,
1460                                        FeatureLAHFSAHF64,
1461                                        FeatureLZCNT,
1462                                        FeatureMMX,
1463                                        FeatureMOVBE,
1464                                        FeatureMWAITX,
1465                                        FeaturePCLMUL,
1466                                        FeaturePOPCNT,
1467                                        FeaturePRFCHW,
1468                                        FeatureRDRAND,
1469                                        FeatureRDSEED,
1470                                        FeatureSHA,
1471                                        FeatureSSE4A,
1472                                        FeatureX87,
1473                                        FeatureXSAVE,
1474                                        FeatureXSAVEC,
1475                                        FeatureXSAVEOPT,
1476                                        FeatureXSAVES];
1477   list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1478                                      TuningFastBEXTR,
1479                                      TuningFast15ByteNOP,
1480                                      TuningBranchFusion,
1481                                      TuningFastScalarFSQRT,
1482                                      TuningFastVectorFSQRT,
1483                                      TuningFastScalarShiftMasks,
1484                                      TuningFastVariablePerLaneShuffle,
1485                                      TuningFastMOVBE,
1486                                      TuningSlowSHLD,
1487                                      TuningSBBDepBreaking,
1488                                      TuningInsertVZEROUPPER,
1489                                      TuningAllowLight256Bit];
1490   list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1491                                                   FeatureRDPID,
1492                                                   FeatureRDPRU,
1493                                                   FeatureWBNOINVD];
1494   list<SubtargetFeature> ZN2Tuning = ZNTuning;
1495   list<SubtargetFeature> ZN2Features =
1496     !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1497   list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1498                                                   FeatureINVPCID,
1499                                                   FeaturePKU,
1500                                                   FeatureVAES,
1501                                                   FeatureVPCLMULQDQ];
1502   list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
1503   list<SubtargetFeature> ZN3Tuning =
1504     !listconcat(ZN2Tuning, ZN3AdditionalTuning);
1505   list<SubtargetFeature> ZN3Features =
1506     !listconcat(ZN2Features, ZN3AdditionalFeatures);
1507   list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
1508   list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
1509                                                   FeatureEVEX512,
1510                                                   FeatureCDI,
1511                                                   FeatureDQI,
1512                                                   FeatureBWI,
1513                                                   FeatureVLX,
1514                                                   FeatureVBMI,
1515                                                   FeatureVBMI2,
1516                                                   FeatureIFMA,
1517                                                   FeatureVNNI,
1518                                                   FeatureBITALG,
1519                                                   FeatureGFNI,
1520                                                   FeatureBF16,
1521                                                   FeatureSHSTK,
1522                                                   FeatureVPOPCNTDQ];
1523   list<SubtargetFeature> ZN4Features =
1524     !listconcat(ZN3Features, ZN4AdditionalFeatures);
1525 }
1526
1527 //===----------------------------------------------------------------------===//
1528 // X86 processors supported.
1529 //===----------------------------------------------------------------------===//
1530
1531 class Proc<string Name, list<SubtargetFeature> Features,
1532            list<SubtargetFeature> TuneFeatures>
1533  : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1534
1535 class ProcModel<string Name, SchedMachineModel Model,
1536                 list<SubtargetFeature> Features,
1537                 list<SubtargetFeature> TuneFeatures>
1538  : ProcessorModel<Name, Model, Features, TuneFeatures>;
1539
1540 // NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1541 // if i386/i486 is specifically requested.
1542 // NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1543 // constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1544 // enabled. It has no effect on code generation.
1545 // NOTE: As a default tuning, "generic" aims to produce code optimized for the
1546 // most common X86 processors. The tunings might be changed over time. It is
1547 // recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1548 def : ProcModel<"generic", SandyBridgeModel,
1549                 [FeatureX87, FeatureCX8, FeatureX86_64],
1550                 [TuningSlow3OpsLEA,
1551                  TuningSlowDivide64,
1552                  TuningMacroFusion,
1553                  TuningFastScalarFSQRT,
1554                  TuningFast15ByteNOP,
1555                  TuningInsertVZEROUPPER]>;
1556
1557 def : Proc<"i386",            [FeatureX87],
1558                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1559 def : Proc<"i486",            [FeatureX87],
1560                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1561 def : Proc<"i586",            [FeatureX87, FeatureCX8],
1562                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1563 def : Proc<"pentium",         [FeatureX87, FeatureCX8],
1564                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1565 foreach P = ["pentium-mmx", "pentium_mmx"] in {
1566   def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
1567                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1568 }
1569 def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
1570                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1571 foreach P = ["pentiumpro", "pentium_pro"] in {
1572   def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
1573                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1574 }
1575 foreach P = ["pentium2", "pentium_ii"] in {
1576   def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
1577                           FeatureFXSR, FeatureNOPL],
1578                         [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1579 }
1580 foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
1581   def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
1582                  FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1583                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1584 }
1585
1586 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1587 // The intent is to enable it for pentium4 which is the current default
1588 // processor in a vanilla 32-bit clang compilation when no specific
1589 // architecture is specified.  This generally gives a nice performance
1590 // increase on silvermont, with largely neutral behavior on other
1591 // contemporary large core processors.
1592 // pentium-m, pentium4m, prescott and nocona are included as a preventative
1593 // measure to avoid performance surprises, in case clang's default cpu
1594 // changes slightly.
1595
1596 foreach P = ["pentium_m", "pentium-m"] in {
1597 def : ProcModel<P, GenericPostRAModel,
1598                 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1599                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1600                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1601 }
1602
1603 foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
1604   def : ProcModel<P, GenericPostRAModel,
1605                   [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1606                    FeatureFXSR, FeatureNOPL, FeatureCMOV],
1607                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1608 }
1609
1610 // Intel Quark.
1611 def : Proc<"lakemont", [FeatureCX8],
1612                        [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1613
1614 // Intel Core Duo.
1615 def : ProcModel<"yonah", SandyBridgeModel,
1616                 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1617                  FeatureFXSR, FeatureNOPL, FeatureCMOV],
1618                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1619
1620 // NetBurst.
1621 foreach P = ["prescott", "pentium_4_sse3"] in {
1622   def : ProcModel<P, GenericPostRAModel,
1623                   [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1624                   FeatureFXSR, FeatureNOPL, FeatureCMOV],
1625                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1626 }
1627 def : ProcModel<"nocona", GenericPostRAModel, [
1628   FeatureX87,
1629   FeatureCX8,
1630   FeatureCMOV,
1631   FeatureMMX,
1632   FeatureSSE3,
1633   FeatureFXSR,
1634   FeatureNOPL,
1635   FeatureX86_64,
1636   FeatureCX16,
1637 ],
1638 [
1639   TuningSlowUAMem16,
1640   TuningInsertVZEROUPPER
1641 ]>;
1642
1643 // Intel Core 2 Solo/Duo.
1644 foreach P = ["core2", "core_2_duo_ssse3"] in {
1645 def : ProcModel<P, SandyBridgeModel, [
1646   FeatureX87,
1647   FeatureCX8,
1648   FeatureCMOV,
1649   FeatureMMX,
1650   FeatureSSSE3,
1651   FeatureFXSR,
1652   FeatureNOPL,
1653   FeatureX86_64,
1654   FeatureCX16,
1655   FeatureLAHFSAHF64
1656 ],
1657 [
1658   TuningMacroFusion,
1659   TuningSlowUAMem16,
1660   TuningInsertVZEROUPPER
1661 ]>;
1662 }
1663 foreach P = ["penryn", "core_2_duo_sse4_1"] in {
1664 def : ProcModel<P, SandyBridgeModel, [
1665   FeatureX87,
1666   FeatureCX8,
1667   FeatureCMOV,
1668   FeatureMMX,
1669   FeatureSSE41,
1670   FeatureFXSR,
1671   FeatureNOPL,
1672   FeatureX86_64,
1673   FeatureCX16,
1674   FeatureLAHFSAHF64
1675 ],
1676 [
1677   TuningMacroFusion,
1678   TuningSlowUAMem16,
1679   TuningInsertVZEROUPPER
1680 ]>;
1681 }
1682
1683 // Atom CPUs.
1684 foreach P = ["bonnell", "atom"] in {
1685   def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1686                   ProcessorFeatures.AtomTuning>;
1687 }
1688
1689 foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
1690   def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1691                   ProcessorFeatures.SLMTuning>;
1692 }
1693
1694 def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
1695                 ProcessorFeatures.SLMTuning>;
1696 def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1697                 ProcessorFeatures.GLMTuning>;
1698 foreach P = ["goldmont_plus", "goldmont-plus"] in {
1699   def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
1700                   ProcessorFeatures.GLPTuning>;
1701 }
1702 def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1703                 ProcessorFeatures.TRMTuning>;
1704 foreach P = ["sierraforest", "grandridge"] in {
1705   def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures,
1706                 ProcessorFeatures.TRMTuning>;
1707 }
1708
1709 // "Arrandale" along with corei3 and corei5
1710 foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
1711   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1712                   ProcessorFeatures.NHMTuning>;
1713 }
1714
1715 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1716 foreach P = ["westmere", "core_aes_pclmulqdq"] in {
1717   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1718                   ProcessorFeatures.WSMTuning>;
1719 }
1720
1721 foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1722   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1723                   ProcessorFeatures.SNBTuning>;
1724 }
1725
1726 foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1727   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1728                   ProcessorFeatures.IVBTuning>;
1729 }
1730
1731 foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1732   def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1733                   ProcessorFeatures.HSWTuning>;
1734 }
1735
1736 foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
1737   def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
1738                   ProcessorFeatures.BDWTuning>;
1739 }
1740
1741 def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1742                 ProcessorFeatures.SKLTuning>;
1743
1744 // FIXME: define KNL scheduler model
1745 foreach P = ["knl", "mic_avx512"] in {
1746   def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
1747                   ProcessorFeatures.KNLTuning>;
1748 }
1749 def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1750                 ProcessorFeatures.KNLTuning>;
1751
1752 foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1753   def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1754                   ProcessorFeatures.SKXTuning>;
1755 }
1756
1757 def : ProcModel<"cascadelake", SkylakeServerModel,
1758                 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1759 def : ProcModel<"cooperlake", SkylakeServerModel,
1760                 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1761 def : ProcModel<"cannonlake", SkylakeServerModel,
1762                 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1763 foreach P = ["icelake-client", "icelake_client"] in {
1764 def : ProcModel<P, IceLakeModel,
1765                 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1766 }
1767 def : ProcModel<"rocketlake", IceLakeModel,
1768                 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1769 foreach P = ["icelake-server", "icelake_server"] in {
1770 def : ProcModel<P, IceLakeModel,
1771                 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1772 }
1773 def : ProcModel<"tigerlake", IceLakeModel,
1774                 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1775 def : ProcModel<"sapphirerapids", SapphireRapidsModel,
1776                 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1777 def : ProcModel<"alderlake", AlderlakePModel,
1778                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1779 // FIXME: Use Gracemont Schedule Model when it is ready.
1780 def : ProcModel<"gracemont", AlderlakePModel,
1781                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
1782 def : ProcModel<"raptorlake", AlderlakePModel,
1783                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1784 def : ProcModel<"meteorlake", AlderlakePModel,
1785                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1786 def : ProcModel<"arrowlake", AlderlakePModel,
1787                 ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
1788 foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1789 def : ProcModel<P, AlderlakePModel,
1790                 ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
1791 }
1792 def : ProcModel<"pantherlake", AlderlakePModel,
1793                 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
1794 def : ProcModel<"clearwaterforest", AlderlakePModel,
1795                 ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
1796 def : ProcModel<"graniterapids", SapphireRapidsModel,
1797                 ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>;
1798 def : ProcModel<"emeraldrapids", SapphireRapidsModel,
1799                 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1800 foreach P = ["graniterapids-d", "graniterapids_d"] in {
1801 def : ProcModel<P, SapphireRapidsModel,
1802                 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>;
1803 }
1804
1805 // AMD CPUs.
1806
1807 def : Proc<"k6",   [FeatureX87, FeatureCX8, FeatureMMX],
1808                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1809 def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow],
1810                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1811 def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow],
1812                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1813
1814 foreach P = ["athlon", "athlon-tbird"] in {
1815   def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, Feature3DNowA,
1816                  FeatureNOPL],
1817                 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1818 }
1819
1820 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1821   def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
1822                  FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
1823                 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1824 }
1825
1826 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1827   def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, Feature3DNowA,
1828                  FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
1829                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1830                  TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1831 }
1832
1833 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1834   def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, Feature3DNowA,
1835                  FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
1836                  FeatureX86_64],
1837                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1838                  TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1839 }
1840
1841 foreach P = ["amdfam10", "barcelona"] in {
1842   def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1843              ProcessorFeatures.BarcelonaTuning>;
1844 }
1845
1846 // Bobcat
1847 def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1848            ProcessorFeatures.BtVer1Tuning>;
1849 // Jaguar
1850 def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1851                 ProcessorFeatures.BtVer2Tuning>;
1852
1853 // Bulldozer
1854 def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1855                 ProcessorFeatures.BdVer1Tuning>;
1856 // Piledriver
1857 def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1858                 ProcessorFeatures.BdVer2Tuning>;
1859 // Steamroller
1860 def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
1861            ProcessorFeatures.BdVer3Tuning>;
1862 // Excavator
1863 def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
1864            ProcessorFeatures.BdVer4Tuning>;
1865
1866 def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1867                 ProcessorFeatures.ZNTuning>;
1868 def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1869                 ProcessorFeatures.ZN2Tuning>;
1870 def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1871                 ProcessorFeatures.ZN3Tuning>;
1872 def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
1873            ProcessorFeatures.ZN4Tuning>;
1874
1875 def : Proc<"geode",           [FeatureX87, FeatureCX8, Feature3DNowA],
1876                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1877
1878 def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1879                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1880 def : Proc<"winchip2",        [FeatureX87, Feature3DNow],
1881                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1882 def : Proc<"c3",              [FeatureX87, Feature3DNow],
1883                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1884 def : Proc<"c3-2",            [FeatureX87, FeatureCX8, FeatureMMX,
1885                                FeatureSSE1, FeatureFXSR, FeatureCMOV],
1886                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1887
1888 // We also provide a generic 64-bit specific x86 processor model which tries to
1889 // be good for modern chips without enabling instruction set encodings past the
1890 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1891 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1892 //
1893 // We currently use the Sandy Bridge model as the default scheduling model as
1894 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1895 // covers a huge swath of x86 processors. If there are specific scheduling
1896 // knobs which need to be tuned differently for AMD chips, we might consider
1897 // forming a common base for them.
1898 def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1899                 ProcessorFeatures.X86_64V1Tuning>;
1900 // Close to Sandybridge.
1901 def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1902                 ProcessorFeatures.X86_64V2Tuning>;
1903 // Close to Haswell.
1904 def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1905                 ProcessorFeatures.X86_64V3Tuning>;
1906 // Close to the AVX-512 level implemented by Xeon Scalable Processors.
1907 def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1908                 ProcessorFeatures.X86_64V4Tuning>;
1909
1910 //===----------------------------------------------------------------------===//
1911 // Calling Conventions
1912 //===----------------------------------------------------------------------===//
1913
1914 include "X86CallingConv.td"
1915
1916
1917 //===----------------------------------------------------------------------===//
1918 // Assembly Parser
1919 //===----------------------------------------------------------------------===//
1920
1921 def ATTAsmParserVariant : AsmParserVariant {
1922   int Variant = 0;
1923
1924   // Variant name.
1925   string Name = "att";
1926
1927   // Discard comments in assembly strings.
1928   string CommentDelimiter = "#";
1929
1930   // Recognize hard coded registers.
1931   string RegisterPrefix = "%";
1932 }
1933
1934 def IntelAsmParserVariant : AsmParserVariant {
1935   int Variant = 1;
1936
1937   // Variant name.
1938   string Name = "intel";
1939
1940   // Discard comments in assembly strings.
1941   string CommentDelimiter = ";";
1942
1943   // Recognize hard coded registers.
1944   string RegisterPrefix = "";
1945 }
1946
1947 //===----------------------------------------------------------------------===//
1948 // Assembly Printers
1949 //===----------------------------------------------------------------------===//
1950
1951 // The X86 target supports two different syntaxes for emitting machine code.
1952 // This is controlled by the -x86-asm-syntax={att|intel}
1953 def ATTAsmWriter : AsmWriter {
1954   string AsmWriterClassName  = "ATTInstPrinter";
1955   int Variant = 0;
1956 }
1957 def IntelAsmWriter : AsmWriter {
1958   string AsmWriterClassName  = "IntelInstPrinter";
1959   int Variant = 1;
1960 }
1961
1962 def X86 : Target {
1963   // Information about the instructions...
1964   let InstructionSet = X86InstrInfo;
1965   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1966   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1967   let AllowRegisterRenaming = 1;
1968 }
1969
1970 //===----------------------------------------------------------------------===//
1971 // Pfm Counters
1972 //===----------------------------------------------------------------------===//
1973
1974 include "X86PfmCounters.td"