llvm/lib/Target/X86/X86.td

   1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This is a target description file for the Intel i386 architecture, referred
  10 // to here as the "X86" architecture.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 // Get the target-independent interfaces which we are implementing...
  15 //
  16 include "llvm/Target/Target.td"
  17
  18 //===----------------------------------------------------------------------===//
  19 // X86 Subtarget state
  20 //
  21 // disregarding specific ABI / programming model
  22 def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
  23                                "64-bit mode (x86_64)">;
  24 def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
  25                                "32-bit mode (80386)">;
  26 def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
  27                                "16-bit mode (i8086)">;
  28
  29 //===----------------------------------------------------------------------===//
  30 // X86 Subtarget ISA features
  31 //===----------------------------------------------------------------------===//
  32
  33 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
  34                                       "Enable X87 float instructions">;
  35
  36 def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
  37                                       "Enable NOPL instruction (generally pentium pro+)">;
  38
  39 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMOV", "true",
  40                                       "Enable conditional move instructions">;
  41
  42 def FeatureCX8     : SubtargetFeature<"cx8", "HasCX8", "true",
  43                                       "Support CMPXCHG8B instructions">;
  44
  45 def FeatureCRC32   : SubtargetFeature<"crc32", "HasCRC32", "true",
  46                                       "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
  47
  48 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
  49                                        "Support POPCNT instruction">;
  50
  51 def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
  52                                       "Support fxsave/fxrestore instructions">;
  53
  54 def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
  55                                        "Support xsave instructions">;
  56
  57 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
  58                                        "Support xsaveopt instructions",
  59                                        [FeatureXSAVE]>;
  60
  61 def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
  62                                        "Support xsavec instructions",
  63                                        [FeatureXSAVE]>;
  64
  65 def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
  66                                        "Support xsaves instructions",
  67                                        [FeatureXSAVE]>;
  68
  69 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
  70                                       "Enable SSE instructions">;
  71 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
  72                                       "Enable SSE2 instructions",
  73                                       [FeatureSSE1]>;
  74 def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
  75                                       "Enable SSE3 instructions",
  76                                       [FeatureSSE2]>;
  77 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
  78                                       "Enable SSSE3 instructions",
  79                                       [FeatureSSE3]>;
  80 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
  81                                       "Enable SSE 4.1 instructions",
  82                                       [FeatureSSSE3]>;
  83 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
  84                                       "Enable SSE 4.2 instructions",
  85                                       [FeatureSSE41]>;
  86 // The MMX subtarget feature is separate from the rest of the SSE features
  87 // because it's important (for odd compatibility reasons) to be able to
  88 // turn it off explicitly while allowing SSE+ to be on.
  89 def FeatureMMX     : SubtargetFeature<"mmx","HasMMX", "true",
  90                                       "Enable MMX instructions">;
  91 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
  92 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
  93 // without disabling 64-bit mode. Nothing should imply this feature bit. It
  94 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
  95 def FeatureX86_64   : SubtargetFeature<"64bit", "HasX86_64", "true",
  96                                       "Support 64-bit instructions">;
  97 def FeatureCX16     : SubtargetFeature<"cx16", "HasCX16", "true",
  98                                        "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
  99                                        [FeatureCX8]>;
 100 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
 101                                       "Support SSE 4a instructions",
 102                                       [FeatureSSE3]>;
 103
 104 def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
 105                                       "Enable AVX instructions",
 106                                       [FeatureSSE42]>;
 107 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
 108                                       "Enable AVX2 instructions",
 109                                       [FeatureAVX]>;
 110 def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
 111                                       "Enable three-operand fused multiple-add",
 112                                       [FeatureAVX]>;
 113 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
 114                        "Support 16-bit floating point conversion instructions",
 115                        [FeatureAVX]>;
 116 def FeatureEVEX512  : SubtargetFeature<"evex512", "HasEVEX512", "true",
 117                         "Support ZMM and 64-bit mask instructions">;
 118 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
 119                                       "Enable AVX-512 instructions",
 120                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
 121 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
 122                       "Enable AVX-512 Conflict Detection Instructions",
 123                                       [FeatureAVX512]>;
 124 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
 125                        "true", "Enable AVX-512 Population Count Instructions",
 126                                       [FeatureAVX512]>;
 127 def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
 128                                    "true",
 129                                    "Prefetch instruction with T0 or T1 Hint">;
 130 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
 131                       "Enable AVX-512 Doubleword and Quadword Instructions",
 132                                       [FeatureAVX512]>;
 133 def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
 134                       "Enable AVX-512 Byte and Word Instructions",
 135                                       [FeatureAVX512]>;
 136 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
 137                       "Enable AVX-512 Vector Length eXtensions",
 138                                       [FeatureAVX512]>;
 139 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
 140                       "Enable AVX-512 Vector Byte Manipulation Instructions",
 141                                       [FeatureBWI]>;
 142 def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
 143                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
 144                                       [FeatureBWI]>;
 145 def FeatureAVXIFMA    : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
 146                            "Enable AVX-IFMA",
 147                            [FeatureAVX2]>;
 148 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
 149                       "Enable AVX-512 Integer Fused Multiple-Add",
 150                                       [FeatureAVX512]>;
 151 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
 152                       "Enable protection keys">;
 153 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
 154                           "Enable AVX-512 Vector Neural Network Instructions",
 155                                       [FeatureAVX512]>;
 156 def FeatureAVXVNNI    : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
 157                            "Support AVX_VNNI encoding",
 158                                       [FeatureAVX2]>;
 159 def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
 160                            "Support bfloat16 floating point",
 161                                       [FeatureBWI]>;
 162 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
 163                        "Enable AVX-512 Bit Algorithms",
 164                         [FeatureBWI]>;
 165 def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
 166                                             "HasVP2INTERSECT", "true",
 167                                             "Enable AVX-512 vp2intersect",
 168                                             [FeatureAVX512]>;
 169 // FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
 170 // guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
 171 // FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
 172 // supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
 173 // currently.
 174 def FeatureFP16    : SubtargetFeature<"avx512fp16", "HasFP16", "true",
 175                            "Support 16-bit floating point",
 176                            [FeatureBWI, FeatureVLX, FeatureDQI]>;
 177 def FeatureAVXVNNIINT8  : SubtargetFeature<"avxvnniint8",
 178                              "HasAVXVNNIINT8", "true",
 179                              "Enable AVX-VNNI-INT8",
 180                              [FeatureAVX2]>;
 181 def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
 182                              "HasAVXVNNIINT16", "true",
 183                              "Enable AVX-VNNI-INT16",
 184                              [FeatureAVX2]>;
 185 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
 186                          "Enable packed carry-less multiplication instructions",
 187                                [FeatureSSE2]>;
 188 def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
 189                          "Enable Galois Field Arithmetic Instructions",
 190                                [FeatureSSE2]>;
 191 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
 192                                          "Enable vpclmulqdq instructions",
 193                                          [FeatureAVX, FeaturePCLMUL]>;
 194 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
 195                                       "Enable four-operand fused multiple-add",
 196                                       [FeatureAVX, FeatureSSE4A]>;
 197 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
 198                                       "Enable XOP instructions",
 199                                       [FeatureFMA4]>;
 200 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
 201                                           "HasSSEUnalignedMem", "true",
 202                       "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
 203 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
 204                                       "Enable AES instructions",
 205                                       [FeatureSSE2]>;
 206 def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
 207                        "Promote selected AES instructions to AVX512/AVX registers",
 208                         [FeatureAVX2, FeatureAES]>;
 209 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
 210                                       "Enable TBM instructions">;
 211 def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
 212                                       "Enable LWP instructions">;
 213 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
 214                                       "Support MOVBE instruction">;
 215 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
 216                                       "Support RDRAND instruction">;
 217 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
 218                                        "Support FS/GS Base instructions">;
 219 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
 220                                       "Support LZCNT instruction">;
 221 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
 222                                       "Support BMI instructions">;
 223 def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
 224                                       "Support BMI2 instructions">;
 225 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
 226                                       "Support RTM instructions">;
 227 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
 228                                       "Support ADX instructions">;
 229 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
 230                                       "Enable SHA instructions",
 231                                       [FeatureSSE2]>;
 232 def FeatureSHA512  : SubtargetFeature<"sha512", "HasSHA512", "true",
 233                                       "Support SHA512 instructions",
 234                                       [FeatureAVX2]>;
 235 // Processor supports CET SHSTK - Control-Flow Enforcement Technology
 236 // using Shadow Stack
 237 def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
 238                        "Support CET Shadow-Stack instructions">;
 239 def FeatureSM3     : SubtargetFeature<"sm3", "HasSM3", "true",
 240                                       "Support SM3 instructions",
 241                                       [FeatureAVX]>;
 242 def FeatureSM4     : SubtargetFeature<"sm4", "HasSM4", "true",
 243                                       "Support SM4 instructions",
 244                                       [FeatureAVX2]>;
 245 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
 246                                       "Support PRFCHW instructions">;
 247 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
 248                                       "Support RDSEED instruction">;
 249 def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
 250                            "Support LAHF and SAHF instructions in 64-bit mode">;
 251 def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
 252                                       "Enable MONITORX/MWAITX timer functionality">;
 253 def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
 254                                       "Enable Cache Line Zero">;
 255 def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
 256                                       "Enable Cache Line Demote">;
 257 def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
 258                                       "Support ptwrite instruction">;
 259 def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
 260                                       "Support AMX-TILE instructions">;
 261 def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
 262                                       "Support AMX-INT8 instructions",
 263                                       [FeatureAMXTILE]>;
 264 def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
 265                                       "Support AMX-BF16 instructions",
 266                                       [FeatureAMXTILE]>;
 267 def FeatureAMXFP16     : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
 268                                       "Support AMX amx-fp16 instructions",
 269                                       [FeatureAMXTILE]>;
 270 def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
 271                                          "Support AMX-COMPLEX instructions",
 272                                          [FeatureAMXTILE]>;
 273 def FeatureAMXFP8 : SubtargetFeature<"amx-fp8", "HasAMXFP8", "true",
 274                                      "Support AMX-FP8 instructions",
 275                                      [FeatureAMXTILE]>;
 276 def FeatureAMXMOVRS : SubtargetFeature<"amx-movrs", "HasAMXMOVRS", "true",
 277                                        "Support AMX-MOVRS instructions",
 278                                        [FeatureAMXTILE]>;
 279 def FeatureAMXTRANSPOSE : SubtargetFeature<"amx-transpose", "HasAMXTRANSPOSE", "true",
 280                                            "Support AMX amx-transpose instructions",
 281                                            [FeatureAMXTILE]>;
 282 def FeatureAMXAVX512 : SubtargetFeature<"amx-avx512",
 283                                         "HasAMXAVX512", "true",
 284                                         "Support AMX-AVX512 instructions",
 285                                         [FeatureAMXTILE]>;
 286 def FeatureAMXTF32 : SubtargetFeature<"amx-tf32", "HasAMXTF32", "true",
 287                                       "Support AMX-TF32 instructions",
 288                                       [FeatureAMXTILE]>;
 289 def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
 290                                         "Support CMPCCXADD instructions">;
 291 def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
 292                                      "Support RAO-INT instructions",
 293                                      []>;
 294 def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
 295                                            "Support AVX-NE-CONVERT instructions",
 296                                            [FeatureAVX2]>;
 297 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
 298                                       "Invalidate Process-Context Identifier">;
 299 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
 300                                       "Enable Software Guard Extensions">;
 301 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
 302                                       "Flush A Cache Line Optimized">;
 303 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
 304                                       "Cache Line Write Back">;
 305 def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
 306                                       "Write Back No Invalidate">;
 307 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
 308                                     "Support RDPID instructions">;
 309 def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
 310                                     "Support RDPRU instructions">;
 311 def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
 312                                       "Wait and pause enhancements">;
 313 def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
 314                                      "Has ENQCMD instructions">;
 315 def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
 316                                   "Support Key Locker kl Instructions",
 317                                   [FeatureSSE2]>;
 318 def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
 319                                       "Support Key Locker wide Instructions",
 320                                       [FeatureKL]>;
 321 def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
 322                                       "Has hreset instruction">;
 323 def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
 324                                         "Has serialize instruction">;
 325 def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
 326                                        "Support TSXLDTRK instructions">;
 327 def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
 328                                     "Has UINTR Instructions">;
 329 def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true",
 330                                       "Support USERMSR instructions">;
 331 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
 332                                       "platform configuration instruction">;
 333 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
 334                                        "Support movdiri instruction (direct store integer)">;
 335 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
 336                                         "Support movdir64b instruction (direct store 64 bytes)">;
 337 def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
 338                                       "Support AVX10.1 up to 256-bit instruction",
 339                                       [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
 340                                        FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
 341                                        FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
 342 def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
 343                                           "Support AVX10.1 up to 512-bit instruction",
 344                                           [FeatureAVX10_1, FeatureEVEX512]>;
 345 def FeatureAVX10_2 : SubtargetFeature<"avx10.2-256", "HasAVX10_2", "true",
 346                                       "Support AVX10.2 up to 256-bit instruction",
 347                                       [FeatureAVX10_1]>;
 348 def FeatureAVX10_2_512 : SubtargetFeature<"avx10.2-512", "HasAVX10_2_512", "true",
 349                                           "Support AVX10.2 up to 512-bit instruction",
 350                                           [FeatureAVX10_2, FeatureAVX10_1_512]>;
 351 def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
 352                                    "Support extended general purpose register">;
 353 def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
 354                                         "Support PUSH2/POP2 instructions">;
 355 def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true",
 356                                   "Support Push-Pop Acceleration">;
 357 def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true",
 358                                   "Support non-destructive destination">;
 359 def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
 360                                    "Support conditional cmp & test instructions">;
 361 def FeatureNF : SubtargetFeature<"nf", "HasNF", "true",
 362                                  "Support status flags update suppression">;
 363 def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
 364                                  "Support conditional faulting">;
 365 def FeatureZU : SubtargetFeature<"zu", "HasZU", "true",
 366                                  "Support zero-upper SETcc/IMUL">;
 367 def FeatureUseGPR32InInlineAsm
 368     : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
 369                        "Enable use of GPR32 in inline assembly for APX">;
 370 def FeatureMOVRS   : SubtargetFeature<"movrs", "HasMOVRS", "true",
 371                            "Enable MOVRS", []>;
 372
 373 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
 374 // "string operations"). See "REP String Enhancement" in the Intel Software
 375 // Development Manual. This feature essentially means that REP MOVSB will copy
 376 // using the largest available size instead of copying bytes one by one, making
 377 // it at least as fast as REPMOVS{W,D,Q}.
 378 def FeatureERMSB
 379     : SubtargetFeature<
 380           "ermsb", "HasERMSB", "true",
 381           "REP MOVS/STOS are fast">;
 382
 383 // Icelake and newer processors have Fast Short REP MOV.
 384 def FeatureFSRM
 385     : SubtargetFeature<
 386           "fsrm", "HasFSRM", "true",
 387           "REP MOVSB of short lengths is faster">;
 388
 389 def FeatureSoftFloat
 390     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
 391                        "Use software floating point features">;
 392
 393 //===----------------------------------------------------------------------===//
 394 // X86 Subtarget Security Mitigation features
 395 //===----------------------------------------------------------------------===//
 396
 397 // Lower indirect calls using a special construct called a `retpoline` to
 398 // mitigate potential Spectre v2 attacks against them.
 399 def FeatureRetpolineIndirectCalls
 400     : SubtargetFeature<
 401           "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
 402           "Remove speculation of indirect calls from the generated code">;
 403
 404 // Lower indirect branches and switches either using conditional branch trees
 405 // or using a special construct called a `retpoline` to mitigate potential
 406 // Spectre v2 attacks against them.
 407 def FeatureRetpolineIndirectBranches
 408     : SubtargetFeature<
 409           "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
 410           "Remove speculation of indirect branches from the generated code">;
 411
 412 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
 413 // `retpoline-indirect-branches` above.
 414 def FeatureRetpoline
 415     : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
 416                        "Remove speculation of indirect branches from the "
 417                        "generated code, either by avoiding them entirely or "
 418                        "lowering them with a speculation blocking construct",
 419                        [FeatureRetpolineIndirectCalls,
 420                         FeatureRetpolineIndirectBranches]>;
 421
 422 // Rely on external thunks for the emitted retpoline calls. This allows users
 423 // to provide their own custom thunk definitions in highly specialized
 424 // environments such as a kernel that does boot-time hot patching.
 425 def FeatureRetpolineExternalThunk
 426     : SubtargetFeature<
 427           "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
 428           "When lowering an indirect call or branch using a `retpoline`, rely "
 429           "on the specified user provided thunk rather than emitting one "
 430           "ourselves. Only has effect when combined with some other retpoline "
 431           "feature", [FeatureRetpolineIndirectCalls]>;
 432
 433 // Mitigate LVI attacks against indirect calls/branches and call returns
 434 def FeatureLVIControlFlowIntegrity
 435     : SubtargetFeature<
 436           "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
 437           "Prevent indirect calls/branches from using a memory operand, and "
 438           "precede all indirect calls/branches from a register with an "
 439           "LFENCE instruction to serialize control flow. Also decompose RET "
 440           "instructions into a POP+LFENCE+JMP sequence.">;
 441
 442 // Enable SESES to mitigate speculative execution attacks
 443 def FeatureSpeculativeExecutionSideEffectSuppression
 444     : SubtargetFeature<
 445           "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
 446           "Prevent speculative execution side channel timing attacks by "
 447           "inserting a speculation barrier before memory reads, memory writes, "
 448           "and conditional branches. Implies LVI Control Flow integrity.",
 449           [FeatureLVIControlFlowIntegrity]>;
 450
 451 // Mitigate LVI attacks against data loads
 452 def FeatureLVILoadHardening
 453     : SubtargetFeature<
 454           "lvi-load-hardening", "UseLVILoadHardening", "true",
 455           "Insert LFENCE instructions to prevent data speculatively injected "
 456           "into loads from being used maliciously.">;
 457
 458 def FeatureTaggedGlobals
 459     : SubtargetFeature<
 460           "tagged-globals", "AllowTaggedGlobals", "true",
 461           "Use an instruction sequence for taking the address of a global "
 462           "that allows a memory tag in the upper address bits.">;
 463
 464 // Control codegen mitigation against Straight Line Speculation vulnerability.
 465 def FeatureHardenSlsRet
 466     : SubtargetFeature<
 467           "harden-sls-ret", "HardenSlsRet", "true",
 468           "Harden against straight line speculation across RET instructions.">;
 469
 470 def FeatureHardenSlsIJmp
 471     : SubtargetFeature<
 472           "harden-sls-ijmp", "HardenSlsIJmp", "true",
 473           "Harden against straight line speculation across indirect JMP instructions.">;
 474
 475 //===----------------------------------------------------------------------===//
 476 // X86 Subtarget Tuning features
 477 //===----------------------------------------------------------------------===//
 478 def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
 479                                        "PreferMovmskOverVTest", "true",
 480                                        "Prefer movmsk over vtest instruction">;
 481
 482 def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
 483                                        "SHLD instruction is slow">;
 484
 485 def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
 486                                         "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
 487
 488 def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
 489                                           "true",
 490                                           "PMADDWD is slower than PMULLD">;
 491
 492 // FIXME: This should not apply to CPUs that do not have SSE.
 493 def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
 494                                 "IsUnalignedMem16Slow", "true",
 495                                 "Slow unaligned 16-byte memory access">;
 496
 497 def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
 498                                 "IsUnalignedMem32Slow", "true",
 499                                 "Slow unaligned 32-byte memory access">;
 500
 501 def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
 502                                      "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;
 503
 504 // True if 8-bit divisions are significantly faster than
 505 // 32-bit divisions and should be used when possible.
 506 def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
 507                                      "HasSlowDivide32", "true",
 508                                      "Use 8-bit divide for positive values less than 256">;
 509
 510 // True if 32-bit divides are significantly faster than
 511 // 64-bit divisions and should be used when possible.
 512 def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
 513                                      "HasSlowDivide64", "true",
 514                                      "Use 32-bit divide for positive values less than 2^32">;
 515
 516 def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
 517                                      "PadShortFunctions", "true",
 518                                      "Pad short functions (to prevent a stall when returning too early)">;
 519
 520 // On some processors, instructions that implicitly take two memory operands are
 521 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
 522 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
 523 def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
 524                                      "SlowTwoMemOps", "true",
 525                                      "Two memory operand instructions are slow">;
 526
 527 // True if the LEA instruction inputs have to be ready at address generation
 528 // (AG) time.
 529 def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
 530                                    "LEA instruction needs inputs at AG stage">;
 531
 532 def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
 533                                    "LEA instruction with certain arguments is slow">;
 534
 535 // True if the LEA instruction has all three source operands: base, index,
 536 // and offset or if the LEA instruction uses base and index registers where
 537 // the base is EBP, RBP,or R13
 538 def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
 539                                    "LEA instruction with 3 ops or certain registers is slow">;
 540
 541 // True if INC and DEC instructions are slow when writing to flags
 542 def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
 543                                    "INC and DEC instructions are slower than ADD and SUB">;
 544
 545 def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
 546                                      "HasPOPCNTFalseDeps", "true",
 547                                      "POPCNT has a false dependency on dest register">;
 548
 549 def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
 550                                      "HasLZCNTFalseDeps", "true",
 551                                      "LZCNT/TZCNT have a false dependency on dest register">;
 552
 553 def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
 554                                "HasMULCFalseDeps", "true",
 555                                "VF[C]MULCPH/SH has a false dependency on dest register">;
 556
 557 def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
 558                                "HasPERMFalseDeps", "true",
 559                                "VPERMD/Q/PS/PD has a false dependency on dest register">;
 560
 561 def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
 562                                "HasRANGEFalseDeps", "true",
 563                                "VRANGEPD/PS/SD/SS has a false dependency on dest register">;
 564
 565 def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
 566                                "HasGETMANTFalseDeps", "true",
 567                                "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
 568                                " false dependency on dest register">;
 569
 570 def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
 571                                "HasMULLQFalseDeps", "true",
 572                                "VPMULLQ has a false dependency on dest register">;
 573
 574 def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
 575                                      "HasSBBDepBreaking", "true",
 576                                      "SBB with same register has no source dependency">;
 577
 578 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
 579 // using a variable mask over multiple fixed shuffles.
 580 def TuningFastVariableCrossLaneShuffle
 581     : SubtargetFeature<"fast-variable-crosslane-shuffle",
 582                        "HasFastVariableCrossLaneShuffle",
 583                        "true", "Cross-lane shuffles with variable masks are fast">;
 584 def TuningFastVariablePerLaneShuffle
 585     : SubtargetFeature<"fast-variable-perlane-shuffle",
 586                        "HasFastVariablePerLaneShuffle",
 587                        "true", "Per-lane shuffles with variable masks are fast">;
 588
 589 // Goldmont / Tremont (atom in general) has no bypass delay
 590 def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
 591                                    "NoDomainDelay","true",
 592                                    "Has no bypass delay when using the 'wrong' domain">;
 593
 594 // Many processors (Nehalem+ on Intel) have no bypass delay when
 595 // using the wrong mov type.
 596 def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
 597                                    "NoDomainDelayMov","true",
 598                                    "Has no bypass delay when using the 'wrong' mov type">;
 599
 600 // Newer processors (Skylake+ on Intel) have no bypass delay when
 601 // using the wrong blend type.
 602 def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
 603                                    "NoDomainDelayBlend","true",
 604                                    "Has no bypass delay when using the 'wrong' blend type">;
 605
 606 // Newer processors (Haswell+ on Intel) have no bypass delay when
 607 // using the wrong shuffle type.
 608 def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
 609                                    "NoDomainDelayShuffle","true",
 610                                    "Has no bypass delay when using the 'wrong' shuffle type">;
 611
 612 // Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
 613 // imm shifts/rotate if they can use more ports than regular shuffles.
 614 def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
 615                                    "PreferLowerShuffleAsShift", "true",
 616                                    "Shifts are faster (or as fast) as shuffle">;
 617
 618 def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
 619                                    "FastImmVectorShift", "true",
 620                                    "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
 621
 622 // On some X86 processors, a vzeroupper instruction should be inserted after
 623 // using ymm/zmm registers before executing code that may use SSE instructions.
 624 def TuningInsertVZEROUPPER
 625     : SubtargetFeature<"vzeroupper",
 626                        "InsertVZEROUPPER",
 627                        "true", "Should insert vzeroupper instructions">;
 628
 629 // TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
 630 // than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
 631 // vector FSQRT has higher throughput than the corresponding NR code.
 632 // The idea is that throughput bound code is likely to be vectorized, so for
 633 // vectorized code we should care about the throughput of SQRT operations.
 634 // But if the code is scalar that probably means that the code has some kind of
 635 // dependency and we should care more about reducing the latency.
 636
 637 // True if hardware SQRTSS instruction is at least as fast (latency) as
 638 // RSQRTSS followed by a Newton-Raphson iteration.
 639 def TuningFastScalarFSQRT
 640     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
 641                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
 642 // True if hardware SQRTPS/VSQRTPS instructions are at least as fast
 643 // (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
 644 def TuningFastVectorFSQRT
 645     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
 646                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
 647
 648 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
 649 // be used to replace test/set sequences.
 650 def TuningFastLZCNT
 651     : SubtargetFeature<
 652           "fast-lzcnt", "HasFastLZCNT", "true",
 653           "LZCNT instructions are as fast as most simple integer ops">;
 654
 655 // If the target can efficiently decode NOPs upto 7-bytes in length.
 656 def TuningFast7ByteNOP
 657     : SubtargetFeature<
 658           "fast-7bytenop", "HasFast7ByteNOP", "true",
 659           "Target can quickly decode up to 7 byte NOPs">;
 660
 661 // If the target can efficiently decode NOPs upto 11-bytes in length.
 662 def TuningFast11ByteNOP
 663     : SubtargetFeature<
 664           "fast-11bytenop", "HasFast11ByteNOP", "true",
 665           "Target can quickly decode up to 11 byte NOPs">;
 666
 667 // If the target can efficiently decode NOPs upto 15-bytes in length.
 668 def TuningFast15ByteNOP
 669     : SubtargetFeature<
 670           "fast-15bytenop", "HasFast15ByteNOP", "true",
 671           "Target can quickly decode up to 15 byte NOPs">;
 672
 673 // Sandy Bridge and newer processors can use SHLD with the same source on both
 674 // inputs to implement rotate to avoid the partial flag update of the normal
 675 // rotate instructions.
 676 def TuningFastSHLDRotate
 677     : SubtargetFeature<
 678           "fast-shld-rotate", "HasFastSHLDRotate", "true",
 679           "SHLD can be used as a faster rotate">;
 680
 681 // Bulldozer and newer processors can merge CMP/TEST (but not other
 682 // instructions) with conditional branches.
 683 def TuningBranchFusion
 684     : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
 685                  "CMP/TEST can be fused with conditional branches">;
 686
 687 // Sandy Bridge and newer processors have many instructions that can be
 688 // fused with conditional branches and pass through the CPU as a single
 689 // operation.
 690 def TuningMacroFusion
 691     : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
 692                  "Various instructions can be fused with conditional branches">;
 693
 694 // Gather is available since Haswell (AVX2 set). So technically, we can
 695 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
 696 // Skylake Client processor has faster Gathers than HSW and performance is
 697 // similar to Skylake Server (AVX-512).
 698 def TuningFastGather
 699     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
 700                        "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
 701
 702 // Generate vpdpwssd instead of vpmaddwd+vpaddd sequence.
 703 def TuningFastDPWSSD
 704     : SubtargetFeature<
 705           "fast-dpwssd", "HasFastDPWSSD", "true",
 706           "Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence">;
 707
 708 def TuningPreferNoGather
 709     : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
 710                        "Prefer no gather instructions">;
 711 def TuningPreferNoScatter
 712     : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
 713                        "Prefer no scatter instructions">;
 714
 715 def TuningPrefer128Bit
 716     : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
 717                        "Prefer 128-bit AVX instructions">;
 718
 719 def TuningPrefer256Bit
 720     : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
 721                        "Prefer 256-bit AVX instructions">;
 722
 723 def TuningAllowLight256Bit
 724     : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
 725                        "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
 726
 727 def TuningPreferMaskRegisters
 728     : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
 729                        "Prefer AVX512 mask registers over PTEST/MOVMSK">;
 730
 731 def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
 732           "Indicates that the BEXTR instruction is implemented as a single uop "
 733           "with good throughput">;
 734
 735 // Combine vector math operations with shuffles into horizontal math
 736 // instructions if a CPU implements horizontal operations (introduced with
 737 // SSE3) with better latency/throughput than the alternative sequence.
 738 def TuningFastHorizontalOps
 739     : SubtargetFeature<
 740         "fast-hops", "HasFastHorizontalOps", "true",
 741         "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
 742         "normal vector instructions with shuffles">;
 743
 744 def TuningFastScalarShiftMasks
 745     : SubtargetFeature<
 746         "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
 747         "Prefer a left/right scalar logical shift pair over a shift+and pair">;
 748
 749 def TuningFastVectorShiftMasks
 750     : SubtargetFeature<
 751         "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
 752         "Prefer a left/right vector logical shift pair over a shift+and pair">;
 753
 754 def TuningFastMOVBE
 755     : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
 756     "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
 757
 758 def TuningFastImm16
 759     : SubtargetFeature<"fast-imm16", "HasFastImm16", "true",
 760     "Prefer a i16 instruction with i16 immediate over extension to i32">;
 761
 762 def TuningUseSLMArithCosts
 763     : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
 764         "Use Silvermont specific arithmetic costs">;
 765
 766 def TuningUseGLMDivSqrtCosts
 767     : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
 768         "Use Goldmont specific floating point div/sqrt costs">;
 769
 770 // Starting with Redwood Cove architecture, the branch has branch taken hint
 771 // (i.e., instruction prefix 3EH).
 772 def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
 773                                         "Target has branch hint feature">;
 774
 775 //===----------------------------------------------------------------------===//
 776 // X86 CPU Families
 777 // TODO: Remove these - use general tuning features to determine codegen.
 778 //===----------------------------------------------------------------------===//
 779
 780 // Bonnell
 781 def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;
 782
 783 //===----------------------------------------------------------------------===//
 784 // Register File Description
 785 //===----------------------------------------------------------------------===//
 786
 787 include "X86RegisterInfo.td"
 788 include "X86RegisterBanks.td"
 789
 790 //===----------------------------------------------------------------------===//
 791 // Instruction Descriptions
 792 //===----------------------------------------------------------------------===//
 793
 794 include "X86Schedule.td"
 795 include "X86InstrInfo.td"
 796 include "X86SchedPredicates.td"
 797
 798 def X86InstrInfo : InstrInfo;
 799
 800 //===----------------------------------------------------------------------===//
 801 // X86 Scheduler Models
 802 //===----------------------------------------------------------------------===//
 803
 804 include "X86ScheduleAtom.td"
 805 include "X86SchedSandyBridge.td"
 806 include "X86SchedHaswell.td"
 807 include "X86SchedBroadwell.td"
 808 include "X86ScheduleSLM.td"
 809 include "X86ScheduleZnver1.td"
 810 include "X86ScheduleZnver2.td"
 811 include "X86ScheduleZnver3.td"
 812 include "X86ScheduleZnver4.td"
 813 include "X86ScheduleBdVer2.td"
 814 include "X86ScheduleBtVer2.td"
 815 include "X86SchedSkylakeClient.td"
 816 include "X86SchedSkylakeServer.td"
 817 include "X86SchedIceLake.td"
 818 include "X86SchedAlderlakeP.td"
 819 include "X86SchedSapphireRapids.td"
 820
 821 //===----------------------------------------------------------------------===//
 822 // X86 Processor Feature Lists
 823 //===----------------------------------------------------------------------===//
 824
 825 def ProcessorFeatures {
 826   // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
 827   list<SubtargetFeature> X86_64V1Features = [
 828     FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
 829     FeatureFXSR, FeatureNOPL, FeatureX86_64,
 830   ];
 831   list<SubtargetFeature> X86_64V1Tuning = [
 832     TuningMacroFusion,
 833     TuningSlow3OpsLEA,
 834     TuningSlowDivide64,
 835     TuningSlowIncDec,
 836     TuningInsertVZEROUPPER
 837   ];
 838
 839   list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
 840     FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
 841     FeatureSSE42
 842   ]);
 843   list<SubtargetFeature> X86_64V2Tuning = [
 844     TuningMacroFusion,
 845     TuningSlow3OpsLEA,
 846     TuningSlowDivide64,
 847     TuningSlowUAMem32,
 848     TuningFastScalarFSQRT,
 849     TuningFastSHLDRotate,
 850     TuningFast15ByteNOP,
 851     TuningPOPCNTFalseDeps,
 852     TuningInsertVZEROUPPER
 853   ];
 854
 855   list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
 856     FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
 857     FeatureMOVBE, FeatureXSAVE
 858   ]);
 859   list<SubtargetFeature> X86_64V3Tuning = [
 860     TuningMacroFusion,
 861     TuningSlow3OpsLEA,
 862     TuningSlowDivide64,
 863     TuningFastScalarFSQRT,
 864     TuningFastSHLDRotate,
 865     TuningFast15ByteNOP,
 866     TuningFastVariableCrossLaneShuffle,
 867     TuningFastVariablePerLaneShuffle,
 868     TuningPOPCNTFalseDeps,
 869     TuningLZCNTFalseDeps,
 870     TuningInsertVZEROUPPER,
 871     TuningAllowLight256Bit
 872   ];
 873
 874   list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
 875     FeatureEVEX512,
 876     FeatureBWI,
 877     FeatureCDI,
 878     FeatureDQI,
 879     FeatureVLX,
 880   ]);
 881   list<SubtargetFeature> X86_64V4Tuning = [
 882     TuningMacroFusion,
 883     TuningSlow3OpsLEA,
 884     TuningSlowDivide64,
 885     TuningFastScalarFSQRT,
 886     TuningFastVectorFSQRT,
 887     TuningFastSHLDRotate,
 888     TuningFast15ByteNOP,
 889     TuningFastVariableCrossLaneShuffle,
 890     TuningFastVariablePerLaneShuffle,
 891     TuningPrefer256Bit,
 892     TuningFastGather,
 893     TuningPOPCNTFalseDeps,
 894     TuningInsertVZEROUPPER,
 895     TuningAllowLight256Bit
 896   ];
 897
 898   // Nehalem
 899   list<SubtargetFeature> NHMFeatures = X86_64V2Features;
 900   list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
 901                                       TuningSlowDivide64,
 902                                       TuningInsertVZEROUPPER,
 903                                       TuningNoDomainDelayMov];
 904
 905   // Westmere
 906   list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
 907   list<SubtargetFeature> WSMTuning = NHMTuning;
 908   list<SubtargetFeature> WSMFeatures =
 909     !listconcat(NHMFeatures, WSMAdditionalFeatures);
 910
 911   // Sandybridge
 912   list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
 913                                                   FeatureXSAVE,
 914                                                   FeatureXSAVEOPT];
 915   list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
 916                                       TuningSlow3OpsLEA,
 917                                       TuningSlowDivide64,
 918                                       TuningSlowUAMem32,
 919                                       TuningFastScalarFSQRT,
 920                                       TuningFastSHLDRotate,
 921                                       TuningFast15ByteNOP,
 922                                       TuningPOPCNTFalseDeps,
 923                                       TuningInsertVZEROUPPER,
 924                                       TuningNoDomainDelayMov];
 925   list<SubtargetFeature> SNBFeatures =
 926     !listconcat(WSMFeatures, SNBAdditionalFeatures);
 927
 928   // Ivybridge
 929   list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
 930                                                   FeatureF16C,
 931                                                   FeatureFSGSBase];
 932   list<SubtargetFeature> IVBTuning = SNBTuning;
 933   list<SubtargetFeature> IVBFeatures =
 934     !listconcat(SNBFeatures, IVBAdditionalFeatures);
 935
 936   // Haswell
 937   list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
 938                                                   FeatureBMI,
 939                                                   FeatureBMI2,
 940                                                   FeatureERMSB,
 941                                                   FeatureFMA,
 942                                                   FeatureINVPCID,
 943                                                   FeatureLZCNT,
 944                                                   FeatureMOVBE];
 945   list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
 946                                       TuningSlow3OpsLEA,
 947                                       TuningSlowDivide64,
 948                                       TuningFastScalarFSQRT,
 949                                       TuningFastSHLDRotate,
 950                                       TuningFast15ByteNOP,
 951                                       TuningFastVariableCrossLaneShuffle,
 952                                       TuningFastVariablePerLaneShuffle,
 953                                       TuningPOPCNTFalseDeps,
 954                                       TuningLZCNTFalseDeps,
 955                                       TuningInsertVZEROUPPER,
 956                                       TuningAllowLight256Bit,
 957                                       TuningNoDomainDelayMov,
 958                                       TuningNoDomainDelayShuffle];
 959   list<SubtargetFeature> HSWFeatures =
 960     !listconcat(IVBFeatures, HSWAdditionalFeatures);
 961
 962   // Broadwell
 963   list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
 964                                                   FeatureRDSEED,
 965                                                   FeaturePRFCHW];
 966   list<SubtargetFeature> BDWTuning = HSWTuning;
 967   list<SubtargetFeature> BDWFeatures =
 968     !listconcat(HSWFeatures, BDWAdditionalFeatures);
 969
 970   // Skylake
 971   list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
 972                                                   FeatureXSAVEC,
 973                                                   FeatureXSAVES,
 974                                                   FeatureCLFLUSHOPT];
 975   list<SubtargetFeature> SKLTuning = [TuningFastGather,
 976                                       TuningMacroFusion,
 977                                       TuningSlow3OpsLEA,
 978                                       TuningSlowDivide64,
 979                                       TuningFastScalarFSQRT,
 980                                       TuningFastVectorFSQRT,
 981                                       TuningFastSHLDRotate,
 982                                       TuningFast15ByteNOP,
 983                                       TuningFastVariableCrossLaneShuffle,
 984                                       TuningFastVariablePerLaneShuffle,
 985                                       TuningPOPCNTFalseDeps,
 986                                       TuningInsertVZEROUPPER,
 987                                       TuningAllowLight256Bit,
 988                                       TuningNoDomainDelayMov,
 989                                       TuningNoDomainDelayShuffle,
 990                                       TuningNoDomainDelayBlend];
 991   list<SubtargetFeature> SKLFeatures =
 992     !listconcat(BDWFeatures, SKLAdditionalFeatures);
 993
 994   // Skylake-AVX512
 995   list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
 996                                                   FeatureXSAVEC,
 997                                                   FeatureXSAVES,
 998                                                   FeatureCLFLUSHOPT,
 999                                                   FeatureAVX512,
1000                                                   FeatureEVEX512,
1001                                                   FeatureCDI,
1002                                                   FeatureDQI,
1003                                                   FeatureBWI,
1004                                                   FeatureVLX,
1005                                                   FeaturePKU,
1006                                                   FeatureCLWB];
1007   list<SubtargetFeature> SKXTuning = [TuningFastGather,
1008                                       TuningMacroFusion,
1009                                       TuningSlow3OpsLEA,
1010                                       TuningSlowDivide64,
1011                                       TuningFastScalarFSQRT,
1012                                       TuningFastVectorFSQRT,
1013                                       TuningFastSHLDRotate,
1014                                       TuningFast15ByteNOP,
1015                                       TuningFastVariableCrossLaneShuffle,
1016                                       TuningFastVariablePerLaneShuffle,
1017                                       TuningPrefer256Bit,
1018                                       TuningPOPCNTFalseDeps,
1019                                       TuningInsertVZEROUPPER,
1020                                       TuningAllowLight256Bit,
1021                                       TuningPreferShiftShuffle,
1022                                       TuningNoDomainDelayMov,
1023                                       TuningNoDomainDelayShuffle,
1024                                       TuningNoDomainDelayBlend,
1025                                       TuningFastImmVectorShift];
1026   list<SubtargetFeature> SKXFeatures =
1027     !listconcat(BDWFeatures, SKXAdditionalFeatures);
1028
1029   // Cascadelake
1030   list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
1031   list<SubtargetFeature> CLXTuning = SKXTuning;
1032   list<SubtargetFeature> CLXFeatures =
1033     !listconcat(SKXFeatures, CLXAdditionalFeatures);
1034
1035   // Cooperlake
1036   list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
1037   list<SubtargetFeature> CPXTuning = SKXTuning;
1038   list<SubtargetFeature> CPXFeatures =
1039     !listconcat(CLXFeatures, CPXAdditionalFeatures);
1040
1041   // Cannonlake
1042   list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
1043                                                   FeatureEVEX512,
1044                                                   FeatureCDI,
1045                                                   FeatureDQI,
1046                                                   FeatureBWI,
1047                                                   FeatureVLX,
1048                                                   FeaturePKU,
1049                                                   FeatureVBMI,
1050                                                   FeatureIFMA,
1051                                                   FeatureSHA];
1052   list<SubtargetFeature> CNLTuning = [TuningFastGather,
1053                                       TuningMacroFusion,
1054                                       TuningSlow3OpsLEA,
1055                                       TuningSlowDivide64,
1056                                       TuningFastScalarFSQRT,
1057                                       TuningFastVectorFSQRT,
1058                                       TuningFastSHLDRotate,
1059                                       TuningFast15ByteNOP,
1060                                       TuningFastVariableCrossLaneShuffle,
1061                                       TuningFastVariablePerLaneShuffle,
1062                                       TuningPrefer256Bit,
1063                                       TuningInsertVZEROUPPER,
1064                                       TuningAllowLight256Bit,
1065                                       TuningNoDomainDelayMov,
1066                                       TuningNoDomainDelayShuffle,
1067                                       TuningNoDomainDelayBlend,
1068                                       TuningFastImmVectorShift];
1069   list<SubtargetFeature> CNLFeatures =
1070     !listconcat(SKLFeatures, CNLAdditionalFeatures);
1071
1072   // Icelake
1073   list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
1074                                                   FeatureVAES,
1075                                                   FeatureVBMI2,
1076                                                   FeatureVNNI,
1077                                                   FeatureVPCLMULQDQ,
1078                                                   FeatureVPOPCNTDQ,
1079                                                   FeatureGFNI,
1080                                                   FeatureRDPID,
1081                                                   FeatureFSRM];
1082   list<SubtargetFeature> ICLTuning = [TuningFastGather,
1083                                       TuningMacroFusion,
1084                                       TuningSlowDivide64,
1085                                       TuningFastScalarFSQRT,
1086                                       TuningFastVectorFSQRT,
1087                                       TuningFastSHLDRotate,
1088                                       TuningFast15ByteNOP,
1089                                       TuningFastVariableCrossLaneShuffle,
1090                                       TuningFastVariablePerLaneShuffle,
1091                                       TuningPrefer256Bit,
1092                                       TuningInsertVZEROUPPER,
1093                                       TuningAllowLight256Bit,
1094                                       TuningNoDomainDelayMov,
1095                                       TuningNoDomainDelayShuffle,
1096                                       TuningNoDomainDelayBlend,
1097                                       TuningFastImmVectorShift];
1098   list<SubtargetFeature> ICLFeatures =
1099     !listconcat(CNLFeatures, ICLAdditionalFeatures);
1100
1101   // Icelake Server
1102   list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
1103                                                   FeatureCLWB,
1104                                                   FeatureWBNOINVD];
1105   list<SubtargetFeature> ICXTuning = ICLTuning;
1106   list<SubtargetFeature> ICXFeatures =
1107     !listconcat(ICLFeatures, ICXAdditionalFeatures);
1108
1109   // Tigerlake
1110   list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
1111                                                   FeatureCLWB,
1112                                                   FeatureMOVDIRI,
1113                                                   FeatureMOVDIR64B,
1114                                                   FeatureSHSTK];
1115   list<SubtargetFeature> TGLTuning = ICLTuning;
1116   list<SubtargetFeature> TGLFeatures =
1117     !listconcat(ICLFeatures, TGLAdditionalFeatures );
1118
1119   // Sapphirerapids
1120   list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
1121                                                   FeatureAMXINT8,
1122                                                   FeatureAMXBF16,
1123                                                   FeatureBF16,
1124                                                   FeatureSERIALIZE,
1125                                                   FeatureCLDEMOTE,
1126                                                   FeatureWAITPKG,
1127                                                   FeaturePTWRITE,
1128                                                   FeatureFP16,
1129                                                   FeatureAVXVNNI,
1130                                                   FeatureTSXLDTRK,
1131                                                   FeatureENQCMD,
1132                                                   FeatureSHSTK,
1133                                                   FeatureMOVDIRI,
1134                                                   FeatureMOVDIR64B,
1135                                                   FeatureUINTR];
1136   list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
1137                                                 TuningPERMFalseDeps,
1138                                                 TuningRANGEFalseDeps,
1139                                                 TuningGETMANTFalseDeps,
1140                                                 TuningMULLQFalseDeps];
1141   list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
1142   list<SubtargetFeature> SPRFeatures =
1143     !listconcat(ICXFeatures, SPRAdditionalFeatures);
1144
1145   // Graniterapids
1146   list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
1147                                                   FeaturePREFETCHI];
1148   list<SubtargetFeature> GNRFeatures =
1149     !listconcat(SPRFeatures, GNRAdditionalFeatures);
1150   list<SubtargetFeature> GNRAdditionalTuning = [TuningBranchHint];
1151   list<SubtargetFeature> GNRTuning = !listconcat(SPRTuning, GNRAdditionalTuning);
1152
1153   // Graniterapids D
1154   list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
1155   list<SubtargetFeature> GNRDFeatures =
1156     !listconcat(GNRFeatures, GNRDAdditionalFeatures);
1157
1158   // Diamond Rapids
1159   list<SubtargetFeature> DMRAdditionalFeatures = [FeatureAVX10_2_512,
1160                                                   FeatureSM4,
1161                                                   FeatureCMPCCXADD,
1162                                                   FeatureAVXIFMA,
1163                                                   FeatureAVXNECONVERT,
1164                                                   FeatureAVXVNNIINT8,
1165                                                   FeatureAVXVNNIINT16,
1166                                                   FeatureUSERMSR,
1167                                                   FeatureSHA512,
1168                                                   FeatureSM3,
1169                                                   FeatureEGPR,
1170                                                   FeatureZU,
1171                                                   FeatureCCMP,
1172                                                   FeaturePush2Pop2,
1173                                                   FeaturePPX,
1174                                                   FeatureNDD,
1175                                                   FeatureNF,
1176                                                   FeatureCF,
1177                                                   FeatureMOVRS,
1178                                                   FeatureAMXMOVRS,
1179                                                   FeatureAMXAVX512,
1180                                                   FeatureAMXFP8,
1181                                                   FeatureAMXTF32,
1182                                                   FeatureAMXTRANSPOSE];
1183   list<SubtargetFeature> DMRFeatures =
1184     !listconcat(GNRDFeatures, DMRAdditionalFeatures);
1185
1186   // Atom
1187   list<SubtargetFeature> AtomFeatures = [FeatureX87,
1188                                          FeatureCX8,
1189                                          FeatureCMOV,
1190                                          FeatureMMX,
1191                                          FeatureSSSE3,
1192                                          FeatureFXSR,
1193                                          FeatureNOPL,
1194                                          FeatureX86_64,
1195                                          FeatureCX16,
1196                                          FeatureMOVBE,
1197                                          FeatureLAHFSAHF64];
1198   list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
1199                                        TuningSlowUAMem16,
1200                                        TuningLEAForSP,
1201                                        TuningSlowDivide32,
1202                                        TuningSlowDivide64,
1203                                        TuningSlowTwoMemOps,
1204                                        TuningFastImm16,
1205                                        TuningLEAUsesAG,
1206                                        TuningPadShortFunctions,
1207                                        TuningInsertVZEROUPPER,
1208                                        TuningNoDomainDelay];
1209
1210   // Silvermont
1211   list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
1212                                                   FeatureCRC32,
1213                                                   FeaturePOPCNT,
1214                                                   FeaturePCLMUL,
1215                                                   FeaturePRFCHW,
1216                                                   FeatureRDRAND];
1217   list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
1218                                       TuningSlowTwoMemOps,
1219                                       TuningSlowLEA,
1220                                       TuningSlowIncDec,
1221                                       TuningSlowDivide64,
1222                                       TuningSlowPMULLD,
1223                                       TuningFast7ByteNOP,
1224                                       TuningFastMOVBE,
1225                                       TuningFastImm16,
1226                                       TuningPOPCNTFalseDeps,
1227                                       TuningInsertVZEROUPPER,
1228                                       TuningNoDomainDelay];
1229   list<SubtargetFeature> SLMFeatures =
1230     !listconcat(AtomFeatures, SLMAdditionalFeatures);
1231
1232   // Goldmont
1233   list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
1234                                                   FeatureSHA,
1235                                                   FeatureRDSEED,
1236                                                   FeatureXSAVE,
1237                                                   FeatureXSAVEOPT,
1238                                                   FeatureXSAVEC,
1239                                                   FeatureXSAVES,
1240                                                   FeatureCLFLUSHOPT,
1241                                                   FeatureFSGSBase];
1242   list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
1243                                       TuningSlowTwoMemOps,
1244                                       TuningSlowLEA,
1245                                       TuningSlowIncDec,
1246                                       TuningFastMOVBE,
1247                                       TuningFastImm16,
1248                                       TuningPOPCNTFalseDeps,
1249                                       TuningInsertVZEROUPPER,
1250                                       TuningNoDomainDelay];
1251   list<SubtargetFeature> GLMFeatures =
1252     !listconcat(SLMFeatures, GLMAdditionalFeatures);
1253
1254   // Goldmont Plus
1255   list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
1256                                                   FeatureRDPID];
1257   list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
1258                                       TuningSlowTwoMemOps,
1259                                       TuningSlowLEA,
1260                                       TuningSlowIncDec,
1261                                       TuningFastMOVBE,
1262                                       TuningFastImm16,
1263                                       TuningInsertVZEROUPPER,
1264                                       TuningNoDomainDelay];
1265   list<SubtargetFeature> GLPFeatures =
1266     !listconcat(GLMFeatures, GLPAdditionalFeatures);
1267
1268   // Tremont
1269   list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1270                                                   FeatureGFNI];
1271   list<SubtargetFeature> TRMTuning = GLPTuning;
1272   list<SubtargetFeature> TRMFeatures =
1273     !listconcat(GLPFeatures, TRMAdditionalFeatures);
1274
1275   // Alderlake
1276   list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
1277                                                   FeaturePCONFIG,
1278                                                   FeatureSHSTK,
1279                                                   FeatureWIDEKL,
1280                                                   FeatureINVPCID,
1281                                                   FeatureADX,
1282                                                   FeatureFMA,
1283                                                   FeatureVAES,
1284                                                   FeatureVPCLMULQDQ,
1285                                                   FeatureF16C,
1286                                                   FeatureBMI,
1287                                                   FeatureBMI2,
1288                                                   FeatureLZCNT,
1289                                                   FeatureAVXVNNI,
1290                                                   FeaturePKU,
1291                                                   FeatureHRESET,
1292                                                   FeatureCLDEMOTE,
1293                                                   FeatureMOVDIRI,
1294                                                   FeatureMOVDIR64B,
1295                                                   FeatureWAITPKG];
1296   list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
1297                                                 TuningPreferMovmskOverVTest,
1298                                                 TuningFastImmVectorShift];
1299   list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
1300   list<SubtargetFeature> ADLFeatures =
1301     !listconcat(TRMFeatures, ADLAdditionalFeatures);
1302
1303   // Gracemont
1304   list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
1305                                       TuningSlow3OpsLEA,
1306                                       TuningFastScalarFSQRT,
1307                                       TuningFastVectorFSQRT,
1308                                       TuningFast15ByteNOP,
1309                                       TuningFastVariablePerLaneShuffle,
1310                                       TuningPOPCNTFalseDeps,
1311                                       TuningInsertVZEROUPPER];
1312
1313   // Sierraforest
1314   list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
1315                                                   FeatureAVXIFMA,
1316                                                   FeatureAVXNECONVERT,
1317                                                   FeatureENQCMD,
1318                                                   FeatureUINTR,
1319                                                   FeatureAVXVNNIINT8];
1320   list<SubtargetFeature> SRFFeatures =
1321     !listconcat(ADLFeatures, SRFAdditionalFeatures);
1322
1323   // Arrowlake S
1324   list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
1325                                                    FeatureSHA512,
1326                                                    FeatureSM3,
1327                                                    FeatureSM4];
1328   list<SubtargetFeature> ARLSFeatures =
1329     !listconcat(SRFFeatures, ARLSAdditionalFeatures);
1330
1331   // Pantherlake
1332   list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
1333   list<SubtargetFeature> PTLFeatures =
1334     !listconcat(ARLSFeatures, PTLAdditionalFeatures);
1335
1336
1337   // Clearwaterforest
1338   list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
1339                                                   FeatureUSERMSR];
1340   list<SubtargetFeature> CWFFeatures =
1341     !listconcat(ARLSFeatures, CWFAdditionalFeatures);
1342
1343   // Knights Landing
1344   list<SubtargetFeature> KNLFeatures = [FeatureX87,
1345                                         FeatureCX8,
1346                                         FeatureCMOV,
1347                                         FeatureMMX,
1348                                         FeatureFXSR,
1349                                         FeatureNOPL,
1350                                         FeatureX86_64,
1351                                         FeatureCX16,
1352                                         FeatureCRC32,
1353                                         FeaturePOPCNT,
1354                                         FeaturePCLMUL,
1355                                         FeatureXSAVE,
1356                                         FeatureXSAVEOPT,
1357                                         FeatureLAHFSAHF64,
1358                                         FeatureAES,
1359                                         FeatureRDRAND,
1360                                         FeatureF16C,
1361                                         FeatureFSGSBase,
1362                                         FeatureAVX512,
1363                                         FeatureEVEX512,
1364                                         FeatureCDI,
1365                                         FeatureADX,
1366                                         FeatureRDSEED,
1367                                         FeatureMOVBE,
1368                                         FeatureLZCNT,
1369                                         FeatureBMI,
1370                                         FeatureBMI2,
1371                                         FeatureFMA,
1372                                         FeaturePRFCHW];
1373   list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
1374                                       TuningSlow3OpsLEA,
1375                                       TuningSlowIncDec,
1376                                       TuningSlowTwoMemOps,
1377                                       TuningPreferMaskRegisters,
1378                                       TuningFastGather,
1379                                       TuningFastMOVBE,
1380                                       TuningFastImm16,
1381                                       TuningSlowPMADDWD];
1382   // TODO Add AVX5124FMAPS/AVX5124VNNIW features
1383   list<SubtargetFeature> KNMFeatures =
1384     !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
1385
1386   // Barcelona
1387   list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1388                                               FeatureCX8,
1389                                               FeatureSSE4A,
1390                                               FeatureFXSR,
1391                                               FeatureNOPL,
1392                                               FeatureCX16,
1393                                               FeaturePRFCHW,
1394                                               FeatureLZCNT,
1395                                               FeaturePOPCNT,
1396                                               FeatureLAHFSAHF64,
1397                                               FeatureCMOV,
1398                                               FeatureX86_64];
1399   list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1400                                             TuningSlowDivide64,
1401                                             TuningSlowSHLD,
1402                                             TuningSBBDepBreaking,
1403                                             TuningInsertVZEROUPPER];
1404
1405   // Bobcat
1406   list<SubtargetFeature> BtVer1Features = [FeatureX87,
1407                                            FeatureCX8,
1408                                            FeatureCMOV,
1409                                            FeatureMMX,
1410                                            FeatureSSSE3,
1411                                            FeatureSSE4A,
1412                                            FeatureFXSR,
1413                                            FeatureNOPL,
1414                                            FeatureX86_64,
1415                                            FeatureCX16,
1416                                            FeaturePRFCHW,
1417                                            FeatureLZCNT,
1418                                            FeaturePOPCNT,
1419                                            FeatureLAHFSAHF64];
1420   list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1421                                          TuningFastScalarShiftMasks,
1422                                          TuningFastVectorShiftMasks,
1423                                          TuningSlowDivide64,
1424                                          TuningSlowSHLD,
1425                                          TuningFastImm16,
1426                                          TuningSBBDepBreaking,
1427                                          TuningInsertVZEROUPPER];
1428
1429   // Jaguar
1430   list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1431                                                      FeatureAES,
1432                                                      FeatureCRC32,
1433                                                      FeaturePCLMUL,
1434                                                      FeatureBMI,
1435                                                      FeatureF16C,
1436                                                      FeatureMOVBE,
1437                                                      FeatureXSAVE,
1438                                                      FeatureXSAVEOPT];
1439   list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1440                                          TuningFastBEXTR,
1441                                          TuningFastHorizontalOps,
1442                                          TuningFast15ByteNOP,
1443                                          TuningFastScalarShiftMasks,
1444                                          TuningFastVectorShiftMasks,
1445                                          TuningFastMOVBE,
1446                                          TuningFastImm16,
1447                                          TuningSBBDepBreaking,
1448                                          TuningSlowDivide64,
1449                                          TuningSlowSHLD];
1450   list<SubtargetFeature> BtVer2Features =
1451     !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1452
1453   // Bulldozer
1454   list<SubtargetFeature> BdVer1Features = [FeatureX87,
1455                                            FeatureCX8,
1456                                            FeatureCMOV,
1457                                            FeatureXOP,
1458                                            FeatureX86_64,
1459                                            FeatureCX16,
1460                                            FeatureAES,
1461                                            FeatureCRC32,
1462                                            FeaturePRFCHW,
1463                                            FeaturePCLMUL,
1464                                            FeatureMMX,
1465                                            FeatureFXSR,
1466                                            FeatureNOPL,
1467                                            FeatureLZCNT,
1468                                            FeaturePOPCNT,
1469                                            FeatureXSAVE,
1470                                            FeatureLWP,
1471                                            FeatureLAHFSAHF64];
1472   list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1473                                          TuningSlowDivide64,
1474                                          TuningFast11ByteNOP,
1475                                          TuningFastScalarShiftMasks,
1476                                          TuningBranchFusion,
1477                                          TuningSBBDepBreaking,
1478                                          TuningInsertVZEROUPPER];
1479
1480   // PileDriver
1481   list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1482                                                      FeatureBMI,
1483                                                      FeatureTBM,
1484                                                      FeatureFMA];
1485   list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1486                                                    TuningFastMOVBE];
1487   list<SubtargetFeature> BdVer2Tuning =
1488     !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1489   list<SubtargetFeature> BdVer2Features =
1490     !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1491
1492   // Steamroller
1493   list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1494                                                      FeatureFSGSBase];
1495   list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1496   list<SubtargetFeature> BdVer3Features =
1497     !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1498
1499   // Excavator
1500   list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1501                                                      FeatureBMI2,
1502                                                      FeatureMOVBE,
1503                                                      FeatureRDRAND,
1504                                                      FeatureMWAITX];
1505   list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1506   list<SubtargetFeature> BdVer4Features =
1507     !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1508
1509
1510   // AMD Zen Processors common ISAs
1511   list<SubtargetFeature> ZNFeatures = [FeatureADX,
1512                                        FeatureAES,
1513                                        FeatureAVX2,
1514                                        FeatureBMI,
1515                                        FeatureBMI2,
1516                                        FeatureCLFLUSHOPT,
1517                                        FeatureCLZERO,
1518                                        FeatureCMOV,
1519                                        FeatureX86_64,
1520                                        FeatureCX16,
1521                                        FeatureCRC32,
1522                                        FeatureF16C,
1523                                        FeatureFMA,
1524                                        FeatureFSGSBase,
1525                                        FeatureFXSR,
1526                                        FeatureNOPL,
1527                                        FeatureLAHFSAHF64,
1528                                        FeatureLZCNT,
1529                                        FeatureMMX,
1530                                        FeatureMOVBE,
1531                                        FeatureMWAITX,
1532                                        FeaturePCLMUL,
1533                                        FeaturePOPCNT,
1534                                        FeaturePRFCHW,
1535                                        FeatureRDRAND,
1536                                        FeatureRDSEED,
1537                                        FeatureSHA,
1538                                        FeatureSSE4A,
1539                                        FeatureX87,
1540                                        FeatureXSAVE,
1541                                        FeatureXSAVEC,
1542                                        FeatureXSAVEOPT,
1543                                        FeatureXSAVES];
1544   list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1545                                      TuningFastBEXTR,
1546                                      TuningFast15ByteNOP,
1547                                      TuningBranchFusion,
1548                                      TuningFastScalarFSQRT,
1549                                      TuningFastVectorFSQRT,
1550                                      TuningFastScalarShiftMasks,
1551                                      TuningFastVariablePerLaneShuffle,
1552                                      TuningFastMOVBE,
1553                                      TuningFastImm16,
1554                                      TuningSlowDivide64,
1555                                      TuningSlowSHLD,
1556                                      TuningSBBDepBreaking,
1557                                      TuningInsertVZEROUPPER,
1558                                      TuningAllowLight256Bit];
1559   list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1560                                                   FeatureRDPID,
1561                                                   FeatureRDPRU,
1562                                                   FeatureWBNOINVD];
1563   list<SubtargetFeature> ZN2Tuning = ZNTuning;
1564   list<SubtargetFeature> ZN2Features =
1565     !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1566   list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1567                                                   FeatureINVPCID,
1568                                                   FeaturePKU,
1569                                                   FeatureVAES,
1570                                                   FeatureVPCLMULQDQ];
1571   list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
1572   list<SubtargetFeature> ZN3Tuning =
1573     !listconcat(ZN2Tuning, ZN3AdditionalTuning);
1574   list<SubtargetFeature> ZN3Features =
1575     !listconcat(ZN2Features, ZN3AdditionalFeatures);
1576
1577   list<SubtargetFeature> ZN4AdditionalTuning = [TuningFastDPWSSD];
1578   list<SubtargetFeature> ZN4Tuning =
1579     !listconcat(ZN3Tuning, ZN4AdditionalTuning);
1580   list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
1581                                                   FeatureEVEX512,
1582                                                   FeatureCDI,
1583                                                   FeatureDQI,
1584                                                   FeatureBWI,
1585                                                   FeatureVLX,
1586                                                   FeatureVBMI,
1587                                                   FeatureVBMI2,
1588                                                   FeatureIFMA,
1589                                                   FeatureVNNI,
1590                                                   FeatureBITALG,
1591                                                   FeatureGFNI,
1592                                                   FeatureBF16,
1593                                                   FeatureSHSTK,
1594                                                   FeatureVPOPCNTDQ];
1595   list<SubtargetFeature> ZN4Features =
1596     !listconcat(ZN3Features, ZN4AdditionalFeatures);
1597
1598   list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
1599   list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
1600                                                   FeatureMOVDIRI,
1601                                                   FeatureMOVDIR64B,
1602                                                   FeatureVP2INTERSECT,
1603                                                   FeaturePREFETCHI,
1604                                                   FeatureAVXVNNI
1605                                                   ];
1606   list<SubtargetFeature> ZN5Features =
1607     !listconcat(ZN4Features, ZN5AdditionalFeatures);
1608 }
1609
1610 //===----------------------------------------------------------------------===//
1611 // X86 processors supported.
1612 //===----------------------------------------------------------------------===//
1613
1614 class Proc<string Name, list<SubtargetFeature> Features,
1615            list<SubtargetFeature> TuneFeatures>
1616  : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1617
1618 class ProcModel<string Name, SchedMachineModel Model,
1619                 list<SubtargetFeature> Features,
1620                 list<SubtargetFeature> TuneFeatures>
1621  : ProcessorModel<Name, Model, Features, TuneFeatures>;
1622
1623 // NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1624 // if i386/i486 is specifically requested.
1625 // NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1626 // constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1627 // enabled. It has no effect on code generation.
1628 // NOTE: As a default tuning, "generic" aims to produce code optimized for the
1629 // most common X86 processors. The tunings might be changed over time. It is
1630 // recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1631 def : ProcModel<"generic", SandyBridgeModel,
1632                 [FeatureX87, FeatureCX8, FeatureX86_64],
1633                 [TuningSlow3OpsLEA,
1634                  TuningSlowDivide64,
1635                  TuningMacroFusion,
1636                  TuningFastScalarFSQRT,
1637                  TuningFast15ByteNOP,
1638                  TuningInsertVZEROUPPER]>;
1639
1640 def : Proc<"i386",            [FeatureX87],
1641                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1642 def : Proc<"i486",            [FeatureX87],
1643                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1644 def : Proc<"i586",            [FeatureX87, FeatureCX8],
1645                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1646 def : Proc<"pentium",         [FeatureX87, FeatureCX8],
1647                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1648 foreach P = ["pentium-mmx", "pentium_mmx"] in {
1649   def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
1650                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1651 }
1652 def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
1653                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1654 foreach P = ["pentiumpro", "pentium_pro"] in {
1655   def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
1656                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1657 }
1658 foreach P = ["pentium2", "pentium_ii"] in {
1659   def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
1660                           FeatureFXSR, FeatureNOPL],
1661                         [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1662 }
1663 foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
1664   def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
1665                  FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1666                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1667 }
1668
1669 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1670 // The intent is to enable it for pentium4 which is the current default
1671 // processor in a vanilla 32-bit clang compilation when no specific
1672 // architecture is specified.  This generally gives a nice performance
1673 // increase on silvermont, with largely neutral behavior on other
1674 // contemporary large core processors.
1675 // pentium-m, pentium4m, prescott and nocona are included as a preventative
1676 // measure to avoid performance surprises, in case clang's default cpu
1677 // changes slightly.
1678
1679 foreach P = ["pentium_m", "pentium-m"] in {
1680 def : ProcModel<P, GenericPostRAModel,
1681                 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1682                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1683                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1684 }
1685
1686 foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
1687   def : ProcModel<P, GenericPostRAModel,
1688                   [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1689                    FeatureFXSR, FeatureNOPL, FeatureCMOV],
1690                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1691 }
1692
1693 // Intel Quark.
1694 def : Proc<"lakemont", [FeatureCX8],
1695                        [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1696
1697 // Intel Core Duo.
1698 def : ProcModel<"yonah", SandyBridgeModel,
1699                 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1700                  FeatureFXSR, FeatureNOPL, FeatureCMOV],
1701                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1702
1703 // NetBurst.
1704 foreach P = ["prescott", "pentium_4_sse3"] in {
1705   def : ProcModel<P, GenericPostRAModel,
1706                   [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1707                   FeatureFXSR, FeatureNOPL, FeatureCMOV],
1708                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1709 }
1710 def : ProcModel<"nocona", GenericPostRAModel, [
1711   FeatureX87,
1712   FeatureCX8,
1713   FeatureCMOV,
1714   FeatureMMX,
1715   FeatureSSE3,
1716   FeatureFXSR,
1717   FeatureNOPL,
1718   FeatureX86_64,
1719   FeatureCX16,
1720 ],
1721 [
1722   TuningSlowUAMem16,
1723   TuningInsertVZEROUPPER
1724 ]>;
1725
1726 // Intel Core 2 Solo/Duo.
1727 foreach P = ["core2", "core_2_duo_ssse3"] in {
1728 def : ProcModel<P, SandyBridgeModel, [
1729   FeatureX87,
1730   FeatureCX8,
1731   FeatureCMOV,
1732   FeatureMMX,
1733   FeatureSSSE3,
1734   FeatureFXSR,
1735   FeatureNOPL,
1736   FeatureX86_64,
1737   FeatureCX16,
1738   FeatureLAHFSAHF64
1739 ],
1740 [
1741   TuningMacroFusion,
1742   TuningSlowUAMem16,
1743   TuningInsertVZEROUPPER
1744 ]>;
1745 }
1746 foreach P = ["penryn", "core_2_duo_sse4_1"] in {
1747 def : ProcModel<P, SandyBridgeModel, [
1748   FeatureX87,
1749   FeatureCX8,
1750   FeatureCMOV,
1751   FeatureMMX,
1752   FeatureSSE41,
1753   FeatureFXSR,
1754   FeatureNOPL,
1755   FeatureX86_64,
1756   FeatureCX16,
1757   FeatureLAHFSAHF64
1758 ],
1759 [
1760   TuningMacroFusion,
1761   TuningSlowUAMem16,
1762   TuningInsertVZEROUPPER
1763 ]>;
1764 }
1765
1766 // Atom CPUs.
1767 foreach P = ["bonnell", "atom"] in {
1768   def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1769                   ProcessorFeatures.AtomTuning>;
1770 }
1771
1772 foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
1773   def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1774                   ProcessorFeatures.SLMTuning>;
1775 }
1776
1777 def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
1778                 ProcessorFeatures.SLMTuning>;
1779 def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1780                 ProcessorFeatures.GLMTuning>;
1781 foreach P = ["goldmont_plus", "goldmont-plus"] in {
1782   def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
1783                   ProcessorFeatures.GLPTuning>;
1784 }
1785 def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1786                 ProcessorFeatures.TRMTuning>;
1787
1788 // "Arrandale" along with corei3 and corei5
1789 foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
1790   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1791                   ProcessorFeatures.NHMTuning>;
1792 }
1793
1794 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1795 foreach P = ["westmere", "core_aes_pclmulqdq"] in {
1796   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1797                   ProcessorFeatures.WSMTuning>;
1798 }
1799
1800 foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1801   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1802                   ProcessorFeatures.SNBTuning>;
1803 }
1804
1805 foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1806   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1807                   ProcessorFeatures.IVBTuning>;
1808 }
1809
1810 foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1811   def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1812                   ProcessorFeatures.HSWTuning>;
1813 }
1814
1815 foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
1816   def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
1817                   ProcessorFeatures.BDWTuning>;
1818 }
1819
1820 def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1821                 ProcessorFeatures.SKLTuning>;
1822
1823 // FIXME: define KNL scheduler model
1824 foreach P = ["knl", "mic_avx512"] in {
1825   def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
1826                   ProcessorFeatures.KNLTuning>;
1827 }
1828 def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1829                 ProcessorFeatures.KNLTuning>;
1830
1831 foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1832   def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1833                   ProcessorFeatures.SKXTuning>;
1834 }
1835
1836 def : ProcModel<"cascadelake", SkylakeServerModel,
1837                 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1838 def : ProcModel<"cooperlake", SkylakeServerModel,
1839                 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1840 def : ProcModel<"cannonlake", SkylakeServerModel,
1841                 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1842 foreach P = ["icelake-client", "icelake_client"] in {
1843 def : ProcModel<P, IceLakeModel,
1844                 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1845 }
1846 def : ProcModel<"rocketlake", IceLakeModel,
1847                 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1848 foreach P = ["icelake-server", "icelake_server"] in {
1849 def : ProcModel<P, IceLakeModel,
1850                 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1851 }
1852 def : ProcModel<"tigerlake", IceLakeModel,
1853                 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1854 def : ProcModel<"sapphirerapids", SapphireRapidsModel,
1855                 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1856 def : ProcModel<"alderlake", AlderlakePModel,
1857                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1858 // FIXME: Use Gracemont Schedule Model when it is ready.
1859 def : ProcModel<"gracemont", AlderlakePModel,
1860                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
1861 foreach P = ["sierraforest", "grandridge"] in {
1862   def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures,
1863                 ProcessorFeatures.GRTTuning>;
1864 }
1865 def : ProcModel<"raptorlake", AlderlakePModel,
1866                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1867 def : ProcModel<"meteorlake", AlderlakePModel,
1868                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1869 def : ProcModel<"arrowlake", AlderlakePModel,
1870                 ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
1871 foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1872 def : ProcModel<P, AlderlakePModel,
1873                 ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
1874 }
1875 def : ProcModel<"pantherlake", AlderlakePModel,
1876                 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
1877 def : ProcModel<"clearwaterforest", AlderlakePModel,
1878                 ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
1879 def : ProcModel<"emeraldrapids", SapphireRapidsModel,
1880                 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1881 def : ProcModel<"graniterapids", SapphireRapidsModel,
1882                 ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>;
1883 foreach P = ["graniterapids-d", "graniterapids_d"] in {
1884 def : ProcModel<P, SapphireRapidsModel,
1885                 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>;
1886 }
1887 def : ProcModel<"diamondrapids", SapphireRapidsModel,
1888                 ProcessorFeatures.DMRFeatures, ProcessorFeatures.GNRTuning>;
1889
1890 // AMD CPUs.
1891
1892 def : Proc<"k6",   [FeatureX87, FeatureCX8, FeatureMMX],
1893                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1894 def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1895                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1896 def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1897                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1898
1899 foreach P = ["athlon", "athlon-tbird"] in {
1900   def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeaturePRFCHW,
1901                  FeatureNOPL],
1902                 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1903 }
1904
1905 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1906   def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
1907                  FeatureSSE1, FeatureMMX, FeaturePRFCHW, FeatureFXSR, FeatureNOPL],
1908                 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1909 }
1910
1911 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1912   def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, FeatureMMX, FeaturePRFCHW,
1913                  FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
1914                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1915                  TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1916 }
1917
1918 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1919   def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, FeatureMMX, FeaturePRFCHW,
1920                  FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
1921                  FeatureX86_64],
1922                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1923                  TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1924 }
1925
1926 foreach P = ["amdfam10", "barcelona"] in {
1927   def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1928              ProcessorFeatures.BarcelonaTuning>;
1929 }
1930
1931 // Bobcat
1932 def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1933            ProcessorFeatures.BtVer1Tuning>;
1934 // Jaguar
1935 def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1936                 ProcessorFeatures.BtVer2Tuning>;
1937
1938 // Bulldozer
1939 def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1940                 ProcessorFeatures.BdVer1Tuning>;
1941 // Piledriver
1942 def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1943                 ProcessorFeatures.BdVer2Tuning>;
1944 // Steamroller
1945 // NOTE: BdVer2Model is only an approx model for Steamroller.
1946 def : ProcModel<"bdver3", BdVer2Model, ProcessorFeatures.BdVer3Features,
1947                 ProcessorFeatures.BdVer3Tuning>;
1948 // Excavator
1949 // NOTE: Znver1Model is only an approx model for Excavator (with AVX2).
1950 def : ProcModel<"bdver4", Znver1Model, ProcessorFeatures.BdVer4Features,
1951                 ProcessorFeatures.BdVer4Tuning>;
1952
1953 def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1954                 ProcessorFeatures.ZNTuning>;
1955 def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1956                 ProcessorFeatures.ZN2Tuning>;
1957 def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1958                 ProcessorFeatures.ZN3Tuning>;
1959 def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
1960                 ProcessorFeatures.ZN4Tuning>;
1961 def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
1962                 ProcessorFeatures.ZN5Tuning>;
1963
1964 def : Proc<"geode",           [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1965                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1966
1967 def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1968                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1969 def : Proc<"winchip2",        [FeatureX87, FeatureMMX, FeaturePRFCHW],
1970                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1971 def : Proc<"c3",              [FeatureX87, FeatureMMX, FeaturePRFCHW],
1972                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1973 def : Proc<"c3-2",            [FeatureX87, FeatureCX8, FeatureMMX,
1974                                FeatureSSE1, FeatureFXSR, FeatureCMOV],
1975                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1976
1977 // We also provide a generic 64-bit specific x86 processor model which tries to
1978 // be good for modern chips without enabling instruction set encodings past the
1979 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1980 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1981 //
1982 // We currently use the Sandy Bridge model as the default scheduling model as
1983 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1984 // covers a huge swath of x86 processors. If there are specific scheduling
1985 // knobs which need to be tuned differently for AMD chips, we might consider
1986 // forming a common base for them.
1987 def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1988                 ProcessorFeatures.X86_64V1Tuning>;
1989 // Close to Sandybridge.
1990 def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1991                 ProcessorFeatures.X86_64V2Tuning>;
1992 // Close to Haswell.
1993 def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1994                 ProcessorFeatures.X86_64V3Tuning>;
1995 // Close to the AVX-512 level implemented by Xeon Scalable Processors.
1996 def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1997                 ProcessorFeatures.X86_64V4Tuning>;
1998
1999 //===----------------------------------------------------------------------===//
2000 // Calling Conventions
2001 //===----------------------------------------------------------------------===//
2002
2003 include "X86CallingConv.td"
2004
2005
2006 //===----------------------------------------------------------------------===//
2007 // Assembly Parser
2008 //===----------------------------------------------------------------------===//
2009
2010 def ATTAsmParserVariant : AsmParserVariant {
2011   int Variant = 0;
2012
2013   // Variant name.
2014   string Name = "att";
2015
2016   // Discard comments in assembly strings.
2017   string CommentDelimiter = "#";
2018
2019   // Recognize hard coded registers.
2020   string RegisterPrefix = "%";
2021 }
2022
2023 def IntelAsmParserVariant : AsmParserVariant {
2024   int Variant = 1;
2025
2026   // Variant name.
2027   string Name = "intel";
2028
2029   // Discard comments in assembly strings.
2030   string CommentDelimiter = ";";
2031
2032   // Recognize hard coded registers.
2033   string RegisterPrefix = "";
2034 }
2035
2036 //===----------------------------------------------------------------------===//
2037 // Assembly Printers
2038 //===----------------------------------------------------------------------===//
2039
2040 // The X86 target supports two different syntaxes for emitting machine code.
2041 // This is controlled by the -x86-asm-syntax={att|intel}
2042 def ATTAsmWriter : AsmWriter {
2043   string AsmWriterClassName  = "ATTInstPrinter";
2044   int Variant = 0;
2045 }
2046 def IntelAsmWriter : AsmWriter {
2047   string AsmWriterClassName  = "IntelInstPrinter";
2048   int Variant = 1;
2049 }
2050
2051 def X86 : Target {
2052   // Information about the instructions...
2053   let InstructionSet = X86InstrInfo;
2054   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
2055   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
2056   let AllowRegisterRenaming = 1;
2057 }
2058
2059 //===----------------------------------------------------------------------===//
2060 // Pfm Counters
2061 //===----------------------------------------------------------------------===//
2062
2063 include "X86PfmCounters.td"