AMDGPU: Mark test as XFAIL in expensive_checks builds
[llvm-project.git] / llvm / lib / Target / X86 / X86.td
blob38761e1fd7eecc5e89944871230c78c693cd3593
1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a target description file for the Intel i386 architecture, referred
10 // to here as the "X86" architecture.
12 //===----------------------------------------------------------------------===//
14 // Get the target-independent interfaces which we are implementing...
16 include "llvm/Target/Target.td"
18 //===----------------------------------------------------------------------===//
19 // X86 Subtarget state
21 // disregarding specific ABI / programming model
22 def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
23                                "64-bit mode (x86_64)">;
24 def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
25                                "32-bit mode (80386)">;
26 def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
27                                "16-bit mode (i8086)">;
29 //===----------------------------------------------------------------------===//
30 // X86 Subtarget ISA features
31 //===----------------------------------------------------------------------===//
33 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
34                                       "Enable X87 float instructions">;
36 def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
37                                       "Enable NOPL instruction (generally pentium pro+)">;
39 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMOV", "true",
40                                       "Enable conditional move instructions">;
42 def FeatureCX8     : SubtargetFeature<"cx8", "HasCX8", "true",
43                                       "Support CMPXCHG8B instructions">;
45 def FeatureCRC32   : SubtargetFeature<"crc32", "HasCRC32", "true",
46                                       "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
48 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
49                                        "Support POPCNT instruction">;
51 def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
52                                       "Support fxsave/fxrestore instructions">;
54 def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
55                                        "Support xsave instructions">;
57 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
58                                        "Support xsaveopt instructions",
59                                        [FeatureXSAVE]>;
61 def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
62                                        "Support xsavec instructions",
63                                        [FeatureXSAVE]>;
65 def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
66                                        "Support xsaves instructions",
67                                        [FeatureXSAVE]>;
69 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
70                                       "Enable SSE instructions">;
71 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
72                                       "Enable SSE2 instructions",
73                                       [FeatureSSE1]>;
74 def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
75                                       "Enable SSE3 instructions",
76                                       [FeatureSSE2]>;
77 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
78                                       "Enable SSSE3 instructions",
79                                       [FeatureSSE3]>;
80 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
81                                       "Enable SSE 4.1 instructions",
82                                       [FeatureSSSE3]>;
83 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
84                                       "Enable SSE 4.2 instructions",
85                                       [FeatureSSE41]>;
86 // The MMX subtarget feature is separate from the rest of the SSE features
87 // because it's important (for odd compatibility reasons) to be able to
88 // turn it off explicitly while allowing SSE+ to be on.
89 def FeatureMMX     : SubtargetFeature<"mmx","HasMMX", "true",
90                                       "Enable MMX instructions">;
91 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
92 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
93 // without disabling 64-bit mode. Nothing should imply this feature bit. It
94 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
95 def FeatureX86_64   : SubtargetFeature<"64bit", "HasX86_64", "true",
96                                       "Support 64-bit instructions">;
97 def FeatureCX16     : SubtargetFeature<"cx16", "HasCX16", "true",
98                                        "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
99                                        [FeatureCX8]>;
100 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
101                                       "Support SSE 4a instructions",
102                                       [FeatureSSE3]>;
104 def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
105                                       "Enable AVX instructions",
106                                       [FeatureSSE42]>;
107 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
108                                       "Enable AVX2 instructions",
109                                       [FeatureAVX]>;
110 def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
111                                       "Enable three-operand fused multiple-add",
112                                       [FeatureAVX]>;
113 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
114                        "Support 16-bit floating point conversion instructions",
115                        [FeatureAVX]>;
116 def FeatureEVEX512  : SubtargetFeature<"evex512", "HasEVEX512", "true",
117                         "Support ZMM and 64-bit mask instructions">;
118 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
119                                       "Enable AVX-512 instructions",
120                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
121 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
122                       "Enable AVX-512 Conflict Detection Instructions",
123                                       [FeatureAVX512]>;
124 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
125                        "true", "Enable AVX-512 Population Count Instructions",
126                                       [FeatureAVX512]>;
127 def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
128                                    "true",
129                                    "Prefetch instruction with T0 or T1 Hint">;
130 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
131                       "Enable AVX-512 Doubleword and Quadword Instructions",
132                                       [FeatureAVX512]>;
133 def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
134                       "Enable AVX-512 Byte and Word Instructions",
135                                       [FeatureAVX512]>;
136 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
137                       "Enable AVX-512 Vector Length eXtensions",
138                                       [FeatureAVX512]>;
139 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
140                       "Enable AVX-512 Vector Byte Manipulation Instructions",
141                                       [FeatureBWI]>;
142 def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
143                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
144                                       [FeatureBWI]>;
145 def FeatureAVXIFMA    : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
146                            "Enable AVX-IFMA",
147                            [FeatureAVX2]>;
148 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
149                       "Enable AVX-512 Integer Fused Multiple-Add",
150                                       [FeatureAVX512]>;
151 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
152                       "Enable protection keys">;
153 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
154                           "Enable AVX-512 Vector Neural Network Instructions",
155                                       [FeatureAVX512]>;
156 def FeatureAVXVNNI    : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
157                            "Support AVX_VNNI encoding",
158                                       [FeatureAVX2]>;
159 def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
160                            "Support bfloat16 floating point",
161                                       [FeatureBWI]>;
162 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
163                        "Enable AVX-512 Bit Algorithms",
164                         [FeatureBWI]>;
165 def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
166                                             "HasVP2INTERSECT", "true",
167                                             "Enable AVX-512 vp2intersect",
168                                             [FeatureAVX512]>;
169 // FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
170 // guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
171 // FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
172 // supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
173 // currently.
174 def FeatureFP16    : SubtargetFeature<"avx512fp16", "HasFP16", "true",
175                            "Support 16-bit floating point",
176                            [FeatureBWI, FeatureVLX, FeatureDQI]>;
177 def FeatureAVXVNNIINT8  : SubtargetFeature<"avxvnniint8",
178                              "HasAVXVNNIINT8", "true",
179                              "Enable AVX-VNNI-INT8",
180                              [FeatureAVX2]>;
181 def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
182                              "HasAVXVNNIINT16", "true",
183                              "Enable AVX-VNNI-INT16",
184                              [FeatureAVX2]>;
185 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
186                          "Enable packed carry-less multiplication instructions",
187                                [FeatureSSE2]>;
188 def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
189                          "Enable Galois Field Arithmetic Instructions",
190                                [FeatureSSE2]>;
191 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
192                                          "Enable vpclmulqdq instructions",
193                                          [FeatureAVX, FeaturePCLMUL]>;
194 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
195                                       "Enable four-operand fused multiple-add",
196                                       [FeatureAVX, FeatureSSE4A]>;
197 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
198                                       "Enable XOP instructions",
199                                       [FeatureFMA4]>;
200 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
201                                           "HasSSEUnalignedMem", "true",
202                       "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
203 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
204                                       "Enable AES instructions",
205                                       [FeatureSSE2]>;
206 def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
207                        "Promote selected AES instructions to AVX512/AVX registers",
208                         [FeatureAVX2, FeatureAES]>;
209 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
210                                       "Enable TBM instructions">;
211 def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
212                                       "Enable LWP instructions">;
213 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
214                                       "Support MOVBE instruction">;
215 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
216                                       "Support RDRAND instruction">;
217 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
218                                        "Support FS/GS Base instructions">;
219 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
220                                       "Support LZCNT instruction">;
221 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
222                                       "Support BMI instructions">;
223 def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
224                                       "Support BMI2 instructions">;
225 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
226                                       "Support RTM instructions">;
227 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
228                                       "Support ADX instructions">;
229 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
230                                       "Enable SHA instructions",
231                                       [FeatureSSE2]>;
232 def FeatureSHA512  : SubtargetFeature<"sha512", "HasSHA512", "true",
233                                       "Support SHA512 instructions",
234                                       [FeatureAVX2]>;
235 // Processor supports CET SHSTK - Control-Flow Enforcement Technology
236 // using Shadow Stack
237 def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
238                        "Support CET Shadow-Stack instructions">;
239 def FeatureSM3     : SubtargetFeature<"sm3", "HasSM3", "true",
240                                       "Support SM3 instructions",
241                                       [FeatureAVX]>;
242 def FeatureSM4     : SubtargetFeature<"sm4", "HasSM4", "true",
243                                       "Support SM4 instructions",
244                                       [FeatureAVX2]>;
245 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
246                                       "Support PRFCHW instructions">;
247 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
248                                       "Support RDSEED instruction">;
249 def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
250                            "Support LAHF and SAHF instructions in 64-bit mode">;
251 def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
252                                       "Enable MONITORX/MWAITX timer functionality">;
253 def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
254                                       "Enable Cache Line Zero">;
255 def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
256                                       "Enable Cache Line Demote">;
257 def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
258                                       "Support ptwrite instruction">;
259 def FeatureAMXTILE     : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
260                                       "Support AMX-TILE instructions">;
261 def FeatureAMXINT8     : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
262                                       "Support AMX-INT8 instructions",
263                                       [FeatureAMXTILE]>;
264 def FeatureAMXBF16     : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
265                                       "Support AMX-BF16 instructions",
266                                       [FeatureAMXTILE]>;
267 def FeatureAMXFP16     : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
268                                       "Support AMX amx-fp16 instructions",
269                                       [FeatureAMXTILE]>;
270 def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
271                                          "Support AMX-COMPLEX instructions",
272                                          [FeatureAMXTILE]>;
273 def FeatureAMXFP8 : SubtargetFeature<"amx-fp8", "HasAMXFP8", "true",
274                                      "Support AMX-FP8 instructions",
275                                      [FeatureAMXTILE]>;
276 def FeatureAMXMOVRS : SubtargetFeature<"amx-movrs", "HasAMXMOVRS", "true",
277                                        "Support AMX-MOVRS instructions",
278                                        [FeatureAMXTILE]>;
279 def FeatureAMXTRANSPOSE : SubtargetFeature<"amx-transpose", "HasAMXTRANSPOSE", "true",
280                                            "Support AMX amx-transpose instructions",
281                                            [FeatureAMXTILE]>;
282 def FeatureAMXAVX512 : SubtargetFeature<"amx-avx512",
283                                         "HasAMXAVX512", "true",
284                                         "Support AMX-AVX512 instructions",
285                                         [FeatureAMXTILE]>;
286 def FeatureAMXTF32 : SubtargetFeature<"amx-tf32", "HasAMXTF32", "true",
287                                       "Support AMX-TF32 instructions",
288                                       [FeatureAMXTILE]>;
289 def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
290                                         "Support CMPCCXADD instructions">;
291 def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
292                                      "Support RAO-INT instructions",
293                                      []>;
294 def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
295                                            "Support AVX-NE-CONVERT instructions",
296                                            [FeatureAVX2]>;
297 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
298                                       "Invalidate Process-Context Identifier">;
299 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
300                                       "Enable Software Guard Extensions">;
301 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
302                                       "Flush A Cache Line Optimized">;
303 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
304                                       "Cache Line Write Back">;
305 def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
306                                       "Write Back No Invalidate">;
307 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
308                                     "Support RDPID instructions">;
309 def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
310                                     "Support RDPRU instructions">;
311 def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
312                                       "Wait and pause enhancements">;
313 def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
314                                      "Has ENQCMD instructions">;
315 def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
316                                   "Support Key Locker kl Instructions",
317                                   [FeatureSSE2]>;
318 def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
319                                       "Support Key Locker wide Instructions",
320                                       [FeatureKL]>;
321 def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
322                                       "Has hreset instruction">;
323 def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
324                                         "Has serialize instruction">;
325 def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
326                                        "Support TSXLDTRK instructions">;
327 def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
328                                     "Has UINTR Instructions">;
329 def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true",
330                                       "Support USERMSR instructions">;
331 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
332                                       "platform configuration instruction">;
333 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
334                                        "Support movdiri instruction (direct store integer)">;
335 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
336                                         "Support movdir64b instruction (direct store 64 bytes)">;
337 def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
338                                       "Support AVX10.1 up to 256-bit instruction",
339                                       [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
340                                        FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
341                                        FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
342 def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
343                                           "Support AVX10.1 up to 512-bit instruction",
344                                           [FeatureAVX10_1, FeatureEVEX512]>;
345 def FeatureAVX10_2 : SubtargetFeature<"avx10.2-256", "HasAVX10_2", "true",
346                                       "Support AVX10.2 up to 256-bit instruction",
347                                       [FeatureAVX10_1]>;
348 def FeatureAVX10_2_512 : SubtargetFeature<"avx10.2-512", "HasAVX10_2_512", "true",
349                                           "Support AVX10.2 up to 512-bit instruction",
350                                           [FeatureAVX10_2, FeatureAVX10_1_512]>;
351 def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
352                                    "Support extended general purpose register">;
353 def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
354                                         "Support PUSH2/POP2 instructions">;
355 def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true",
356                                   "Support Push-Pop Acceleration">;
357 def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true",
358                                   "Support non-destructive destination">;
359 def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
360                                    "Support conditional cmp & test instructions">;
361 def FeatureNF : SubtargetFeature<"nf", "HasNF", "true",
362                                  "Support status flags update suppression">;
363 def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
364                                  "Support conditional faulting">;
365 def FeatureZU : SubtargetFeature<"zu", "HasZU", "true",
366                                  "Support zero-upper SETcc/IMUL">;
367 def FeatureUseGPR32InInlineAsm
368     : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
369                        "Enable use of GPR32 in inline assembly for APX">;
370 def FeatureMOVRS   : SubtargetFeature<"movrs", "HasMOVRS", "true",
371                            "Enable MOVRS", []>;
373 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
374 // "string operations"). See "REP String Enhancement" in the Intel Software
375 // Development Manual. This feature essentially means that REP MOVSB will copy
376 // using the largest available size instead of copying bytes one by one, making
377 // it at least as fast as REPMOVS{W,D,Q}.
378 def FeatureERMSB
379     : SubtargetFeature<
380           "ermsb", "HasERMSB", "true",
381           "REP MOVS/STOS are fast">;
383 // Icelake and newer processors have Fast Short REP MOV.
384 def FeatureFSRM
385     : SubtargetFeature<
386           "fsrm", "HasFSRM", "true",
387           "REP MOVSB of short lengths is faster">;
389 def FeatureSoftFloat
390     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
391                        "Use software floating point features">;
393 //===----------------------------------------------------------------------===//
394 // X86 Subtarget Security Mitigation features
395 //===----------------------------------------------------------------------===//
397 // Lower indirect calls using a special construct called a `retpoline` to
398 // mitigate potential Spectre v2 attacks against them.
399 def FeatureRetpolineIndirectCalls
400     : SubtargetFeature<
401           "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
402           "Remove speculation of indirect calls from the generated code">;
404 // Lower indirect branches and switches either using conditional branch trees
405 // or using a special construct called a `retpoline` to mitigate potential
406 // Spectre v2 attacks against them.
407 def FeatureRetpolineIndirectBranches
408     : SubtargetFeature<
409           "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
410           "Remove speculation of indirect branches from the generated code">;
412 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
413 // `retpoline-indirect-branches` above.
414 def FeatureRetpoline
415     : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
416                        "Remove speculation of indirect branches from the "
417                        "generated code, either by avoiding them entirely or "
418                        "lowering them with a speculation blocking construct",
419                        [FeatureRetpolineIndirectCalls,
420                         FeatureRetpolineIndirectBranches]>;
422 // Rely on external thunks for the emitted retpoline calls. This allows users
423 // to provide their own custom thunk definitions in highly specialized
424 // environments such as a kernel that does boot-time hot patching.
425 def FeatureRetpolineExternalThunk
426     : SubtargetFeature<
427           "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
428           "When lowering an indirect call or branch using a `retpoline`, rely "
429           "on the specified user provided thunk rather than emitting one "
430           "ourselves. Only has effect when combined with some other retpoline "
431           "feature", [FeatureRetpolineIndirectCalls]>;
433 // Mitigate LVI attacks against indirect calls/branches and call returns
434 def FeatureLVIControlFlowIntegrity
435     : SubtargetFeature<
436           "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
437           "Prevent indirect calls/branches from using a memory operand, and "
438           "precede all indirect calls/branches from a register with an "
439           "LFENCE instruction to serialize control flow. Also decompose RET "
440           "instructions into a POP+LFENCE+JMP sequence.">;
442 // Enable SESES to mitigate speculative execution attacks
443 def FeatureSpeculativeExecutionSideEffectSuppression
444     : SubtargetFeature<
445           "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
446           "Prevent speculative execution side channel timing attacks by "
447           "inserting a speculation barrier before memory reads, memory writes, "
448           "and conditional branches. Implies LVI Control Flow integrity.",
449           [FeatureLVIControlFlowIntegrity]>;
451 // Mitigate LVI attacks against data loads
452 def FeatureLVILoadHardening
453     : SubtargetFeature<
454           "lvi-load-hardening", "UseLVILoadHardening", "true",
455           "Insert LFENCE instructions to prevent data speculatively injected "
456           "into loads from being used maliciously.">;
458 def FeatureTaggedGlobals
459     : SubtargetFeature<
460           "tagged-globals", "AllowTaggedGlobals", "true",
461           "Use an instruction sequence for taking the address of a global "
462           "that allows a memory tag in the upper address bits.">;
464 // Control codegen mitigation against Straight Line Speculation vulnerability.
465 def FeatureHardenSlsRet
466     : SubtargetFeature<
467           "harden-sls-ret", "HardenSlsRet", "true",
468           "Harden against straight line speculation across RET instructions.">;
470 def FeatureHardenSlsIJmp
471     : SubtargetFeature<
472           "harden-sls-ijmp", "HardenSlsIJmp", "true",
473           "Harden against straight line speculation across indirect JMP instructions.">;
475 //===----------------------------------------------------------------------===//
476 // X86 Subtarget Tuning features
477 //===----------------------------------------------------------------------===//
478 def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
479                                        "PreferMovmskOverVTest", "true",
480                                        "Prefer movmsk over vtest instruction">;
482 def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
483                                        "SHLD instruction is slow">;
485 def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
486                                         "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
488 def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
489                                           "true",
490                                           "PMADDWD is slower than PMULLD">;
492 // FIXME: This should not apply to CPUs that do not have SSE.
493 def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
494                                 "IsUnalignedMem16Slow", "true",
495                                 "Slow unaligned 16-byte memory access">;
497 def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
498                                 "IsUnalignedMem32Slow", "true",
499                                 "Slow unaligned 32-byte memory access">;
501 def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
502                                      "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;
504 // True if 8-bit divisions are significantly faster than
505 // 32-bit divisions and should be used when possible.
506 def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
507                                      "HasSlowDivide32", "true",
508                                      "Use 8-bit divide for positive values less than 256">;
510 // True if 32-bit divides are significantly faster than
511 // 64-bit divisions and should be used when possible.
512 def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
513                                      "HasSlowDivide64", "true",
514                                      "Use 32-bit divide for positive values less than 2^32">;
516 def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
517                                      "PadShortFunctions", "true",
518                                      "Pad short functions (to prevent a stall when returning too early)">;
520 // On some processors, instructions that implicitly take two memory operands are
521 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
522 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
523 def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
524                                      "SlowTwoMemOps", "true",
525                                      "Two memory operand instructions are slow">;
527 // True if the LEA instruction inputs have to be ready at address generation
528 // (AG) time.
529 def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
530                                    "LEA instruction needs inputs at AG stage">;
532 def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
533                                    "LEA instruction with certain arguments is slow">;
535 // True if the LEA instruction has all three source operands: base, index,
536 // and offset or if the LEA instruction uses base and index registers where
537 // the base is EBP, RBP,or R13
538 def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
539                                    "LEA instruction with 3 ops or certain registers is slow">;
541 // True if INC and DEC instructions are slow when writing to flags
542 def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
543                                    "INC and DEC instructions are slower than ADD and SUB">;
545 def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
546                                      "HasPOPCNTFalseDeps", "true",
547                                      "POPCNT has a false dependency on dest register">;
549 def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
550                                      "HasLZCNTFalseDeps", "true",
551                                      "LZCNT/TZCNT have a false dependency on dest register">;
553 def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
554                                "HasMULCFalseDeps", "true",
555                                "VF[C]MULCPH/SH has a false dependency on dest register">;
557 def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
558                                "HasPERMFalseDeps", "true",
559                                "VPERMD/Q/PS/PD has a false dependency on dest register">;
561 def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
562                                "HasRANGEFalseDeps", "true",
563                                "VRANGEPD/PS/SD/SS has a false dependency on dest register">;
565 def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
566                                "HasGETMANTFalseDeps", "true",
567                                "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
568                                " false dependency on dest register">;
570 def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
571                                "HasMULLQFalseDeps", "true",
572                                "VPMULLQ has a false dependency on dest register">;
574 def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
575                                      "HasSBBDepBreaking", "true",
576                                      "SBB with same register has no source dependency">;
578 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
579 // using a variable mask over multiple fixed shuffles.
580 def TuningFastVariableCrossLaneShuffle
581     : SubtargetFeature<"fast-variable-crosslane-shuffle",
582                        "HasFastVariableCrossLaneShuffle",
583                        "true", "Cross-lane shuffles with variable masks are fast">;
584 def TuningFastVariablePerLaneShuffle
585     : SubtargetFeature<"fast-variable-perlane-shuffle",
586                        "HasFastVariablePerLaneShuffle",
587                        "true", "Per-lane shuffles with variable masks are fast">;
589 // Goldmont / Tremont (atom in general) has no bypass delay
590 def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
591                                    "NoDomainDelay","true",
592                                    "Has no bypass delay when using the 'wrong' domain">;
594 // Many processors (Nehalem+ on Intel) have no bypass delay when
595 // using the wrong mov type.
596 def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
597                                    "NoDomainDelayMov","true",
598                                    "Has no bypass delay when using the 'wrong' mov type">;
600 // Newer processors (Skylake+ on Intel) have no bypass delay when
601 // using the wrong blend type.
602 def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
603                                    "NoDomainDelayBlend","true",
604                                    "Has no bypass delay when using the 'wrong' blend type">;
606 // Newer processors (Haswell+ on Intel) have no bypass delay when
607 // using the wrong shuffle type.
608 def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
609                                    "NoDomainDelayShuffle","true",
610                                    "Has no bypass delay when using the 'wrong' shuffle type">;
612 // Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
613 // imm shifts/rotate if they can use more ports than regular shuffles.
614 def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
615                                    "PreferLowerShuffleAsShift", "true",
616                                    "Shifts are faster (or as fast) as shuffle">;
618 def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
619                                    "FastImmVectorShift", "true",
620                                    "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
622 // On some X86 processors, a vzeroupper instruction should be inserted after
623 // using ymm/zmm registers before executing code that may use SSE instructions.
624 def TuningInsertVZEROUPPER
625     : SubtargetFeature<"vzeroupper",
626                        "InsertVZEROUPPER",
627                        "true", "Should insert vzeroupper instructions">;
629 // TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
630 // than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
631 // vector FSQRT has higher throughput than the corresponding NR code.
632 // The idea is that throughput bound code is likely to be vectorized, so for
633 // vectorized code we should care about the throughput of SQRT operations.
634 // But if the code is scalar that probably means that the code has some kind of
635 // dependency and we should care more about reducing the latency.
637 // True if hardware SQRTSS instruction is at least as fast (latency) as
638 // RSQRTSS followed by a Newton-Raphson iteration.
639 def TuningFastScalarFSQRT
640     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
641                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
642 // True if hardware SQRTPS/VSQRTPS instructions are at least as fast
643 // (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
644 def TuningFastVectorFSQRT
645     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
646                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
648 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
649 // be used to replace test/set sequences.
650 def TuningFastLZCNT
651     : SubtargetFeature<
652           "fast-lzcnt", "HasFastLZCNT", "true",
653           "LZCNT instructions are as fast as most simple integer ops">;
655 // If the target can efficiently decode NOPs upto 7-bytes in length.
656 def TuningFast7ByteNOP
657     : SubtargetFeature<
658           "fast-7bytenop", "HasFast7ByteNOP", "true",
659           "Target can quickly decode up to 7 byte NOPs">;
661 // If the target can efficiently decode NOPs upto 11-bytes in length.
662 def TuningFast11ByteNOP
663     : SubtargetFeature<
664           "fast-11bytenop", "HasFast11ByteNOP", "true",
665           "Target can quickly decode up to 11 byte NOPs">;
667 // If the target can efficiently decode NOPs upto 15-bytes in length.
668 def TuningFast15ByteNOP
669     : SubtargetFeature<
670           "fast-15bytenop", "HasFast15ByteNOP", "true",
671           "Target can quickly decode up to 15 byte NOPs">;
673 // Sandy Bridge and newer processors can use SHLD with the same source on both
674 // inputs to implement rotate to avoid the partial flag update of the normal
675 // rotate instructions.
676 def TuningFastSHLDRotate
677     : SubtargetFeature<
678           "fast-shld-rotate", "HasFastSHLDRotate", "true",
679           "SHLD can be used as a faster rotate">;
681 // Bulldozer and newer processors can merge CMP/TEST (but not other
682 // instructions) with conditional branches.
683 def TuningBranchFusion
684     : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
685                  "CMP/TEST can be fused with conditional branches">;
687 // Sandy Bridge and newer processors have many instructions that can be
688 // fused with conditional branches and pass through the CPU as a single
689 // operation.
690 def TuningMacroFusion
691     : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
692                  "Various instructions can be fused with conditional branches">;
694 // Gather is available since Haswell (AVX2 set). So technically, we can
695 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
696 // Skylake Client processor has faster Gathers than HSW and performance is
697 // similar to Skylake Server (AVX-512).
698 def TuningFastGather
699     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
700                        "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
702 // Generate vpdpwssd instead of vpmaddwd+vpaddd sequence.
703 def TuningFastDPWSSD
704     : SubtargetFeature<
705           "fast-dpwssd", "HasFastDPWSSD", "true",
706           "Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence">;
708 def TuningPreferNoGather
709     : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
710                        "Prefer no gather instructions">;
711 def TuningPreferNoScatter
712     : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
713                        "Prefer no scatter instructions">;
715 def TuningPrefer128Bit
716     : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
717                        "Prefer 128-bit AVX instructions">;
719 def TuningPrefer256Bit
720     : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
721                        "Prefer 256-bit AVX instructions">;
723 def TuningAllowLight256Bit
724     : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
725                        "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
727 def TuningPreferMaskRegisters
728     : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
729                        "Prefer AVX512 mask registers over PTEST/MOVMSK">;
731 def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
732           "Indicates that the BEXTR instruction is implemented as a single uop "
733           "with good throughput">;
735 // Combine vector math operations with shuffles into horizontal math
736 // instructions if a CPU implements horizontal operations (introduced with
737 // SSE3) with better latency/throughput than the alternative sequence.
738 def TuningFastHorizontalOps
739     : SubtargetFeature<
740         "fast-hops", "HasFastHorizontalOps", "true",
741         "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
742         "normal vector instructions with shuffles">;
744 def TuningFastScalarShiftMasks
745     : SubtargetFeature<
746         "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
747         "Prefer a left/right scalar logical shift pair over a shift+and pair">;
749 def TuningFastVectorShiftMasks
750     : SubtargetFeature<
751         "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
752         "Prefer a left/right vector logical shift pair over a shift+and pair">;
754 def TuningFastMOVBE
755     : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
756     "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
758 def TuningFastImm16
759     : SubtargetFeature<"fast-imm16", "HasFastImm16", "true",
760     "Prefer a i16 instruction with i16 immediate over extension to i32">;
762 def TuningUseSLMArithCosts
763     : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
764         "Use Silvermont specific arithmetic costs">;
766 def TuningUseGLMDivSqrtCosts
767     : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
768         "Use Goldmont specific floating point div/sqrt costs">;
770 // Starting with Redwood Cove architecture, the branch has branch taken hint
771 // (i.e., instruction prefix 3EH).
772 def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true",
773                                         "Target has branch hint feature">;
775 //===----------------------------------------------------------------------===//
776 // X86 CPU Families
777 // TODO: Remove these - use general tuning features to determine codegen.
778 //===----------------------------------------------------------------------===//
780 // Bonnell
781 def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;
783 //===----------------------------------------------------------------------===//
784 // Register File Description
785 //===----------------------------------------------------------------------===//
787 include "X86RegisterInfo.td"
788 include "X86RegisterBanks.td"
790 //===----------------------------------------------------------------------===//
791 // Instruction Descriptions
792 //===----------------------------------------------------------------------===//
794 include "X86Schedule.td"
795 include "X86InstrInfo.td"
796 include "X86SchedPredicates.td"
798 def X86InstrInfo : InstrInfo;
800 //===----------------------------------------------------------------------===//
801 // X86 Scheduler Models
802 //===----------------------------------------------------------------------===//
804 include "X86ScheduleAtom.td"
805 include "X86SchedSandyBridge.td"
806 include "X86SchedHaswell.td"
807 include "X86SchedBroadwell.td"
808 include "X86ScheduleSLM.td"
809 include "X86ScheduleZnver1.td"
810 include "X86ScheduleZnver2.td"
811 include "X86ScheduleZnver3.td"
812 include "X86ScheduleZnver4.td"
813 include "X86ScheduleBdVer2.td"
814 include "X86ScheduleBtVer2.td"
815 include "X86SchedSkylakeClient.td"
816 include "X86SchedSkylakeServer.td"
817 include "X86SchedIceLake.td"
818 include "X86SchedAlderlakeP.td"
819 include "X86SchedSapphireRapids.td"
821 //===----------------------------------------------------------------------===//
822 // X86 Processor Feature Lists
823 //===----------------------------------------------------------------------===//
825 def ProcessorFeatures {
826   // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
827   list<SubtargetFeature> X86_64V1Features = [
828     FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
829     FeatureFXSR, FeatureNOPL, FeatureX86_64,
830   ];
831   list<SubtargetFeature> X86_64V1Tuning = [
832     TuningMacroFusion,
833     TuningSlow3OpsLEA,
834     TuningSlowDivide64,
835     TuningSlowIncDec,
836     TuningInsertVZEROUPPER
837   ];
839   list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
840     FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
841     FeatureSSE42
842   ]);
843   list<SubtargetFeature> X86_64V2Tuning = [
844     TuningMacroFusion,
845     TuningSlow3OpsLEA,
846     TuningSlowDivide64,
847     TuningSlowUAMem32,
848     TuningFastScalarFSQRT,
849     TuningFastSHLDRotate,
850     TuningFast15ByteNOP,
851     TuningPOPCNTFalseDeps,
852     TuningInsertVZEROUPPER
853   ];
855   list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
856     FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
857     FeatureMOVBE, FeatureXSAVE
858   ]);
859   list<SubtargetFeature> X86_64V3Tuning = [
860     TuningMacroFusion,
861     TuningSlow3OpsLEA,
862     TuningSlowDivide64,
863     TuningFastScalarFSQRT,
864     TuningFastSHLDRotate,
865     TuningFast15ByteNOP,
866     TuningFastVariableCrossLaneShuffle,
867     TuningFastVariablePerLaneShuffle,
868     TuningPOPCNTFalseDeps,
869     TuningLZCNTFalseDeps,
870     TuningInsertVZEROUPPER,
871     TuningAllowLight256Bit
872   ];
874   list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
875     FeatureEVEX512,
876     FeatureBWI,
877     FeatureCDI,
878     FeatureDQI,
879     FeatureVLX,
880   ]);
881   list<SubtargetFeature> X86_64V4Tuning = [
882     TuningMacroFusion,
883     TuningSlow3OpsLEA,
884     TuningSlowDivide64,
885     TuningFastScalarFSQRT,
886     TuningFastVectorFSQRT,
887     TuningFastSHLDRotate,
888     TuningFast15ByteNOP,
889     TuningFastVariableCrossLaneShuffle,
890     TuningFastVariablePerLaneShuffle,
891     TuningPrefer256Bit,
892     TuningFastGather,
893     TuningPOPCNTFalseDeps,
894     TuningInsertVZEROUPPER,
895     TuningAllowLight256Bit
896   ];
898   // Nehalem
899   list<SubtargetFeature> NHMFeatures = X86_64V2Features;
900   list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
901                                       TuningSlowDivide64,
902                                       TuningInsertVZEROUPPER,
903                                       TuningNoDomainDelayMov];
905   // Westmere
906   list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
907   list<SubtargetFeature> WSMTuning = NHMTuning;
908   list<SubtargetFeature> WSMFeatures =
909     !listconcat(NHMFeatures, WSMAdditionalFeatures);
911   // Sandybridge
912   list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
913                                                   FeatureXSAVE,
914                                                   FeatureXSAVEOPT];
915   list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
916                                       TuningSlow3OpsLEA,
917                                       TuningSlowDivide64,
918                                       TuningSlowUAMem32,
919                                       TuningFastScalarFSQRT,
920                                       TuningFastSHLDRotate,
921                                       TuningFast15ByteNOP,
922                                       TuningPOPCNTFalseDeps,
923                                       TuningInsertVZEROUPPER,
924                                       TuningNoDomainDelayMov];
925   list<SubtargetFeature> SNBFeatures =
926     !listconcat(WSMFeatures, SNBAdditionalFeatures);
928   // Ivybridge
929   list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
930                                                   FeatureF16C,
931                                                   FeatureFSGSBase];
932   list<SubtargetFeature> IVBTuning = SNBTuning;
933   list<SubtargetFeature> IVBFeatures =
934     !listconcat(SNBFeatures, IVBAdditionalFeatures);
936   // Haswell
937   list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
938                                                   FeatureBMI,
939                                                   FeatureBMI2,
940                                                   FeatureERMSB,
941                                                   FeatureFMA,
942                                                   FeatureINVPCID,
943                                                   FeatureLZCNT,
944                                                   FeatureMOVBE];
945   list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
946                                       TuningSlow3OpsLEA,
947                                       TuningSlowDivide64,
948                                       TuningFastScalarFSQRT,
949                                       TuningFastSHLDRotate,
950                                       TuningFast15ByteNOP,
951                                       TuningFastVariableCrossLaneShuffle,
952                                       TuningFastVariablePerLaneShuffle,
953                                       TuningPOPCNTFalseDeps,
954                                       TuningLZCNTFalseDeps,
955                                       TuningInsertVZEROUPPER,
956                                       TuningAllowLight256Bit,
957                                       TuningNoDomainDelayMov,
958                                       TuningNoDomainDelayShuffle];
959   list<SubtargetFeature> HSWFeatures =
960     !listconcat(IVBFeatures, HSWAdditionalFeatures);
962   // Broadwell
963   list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
964                                                   FeatureRDSEED,
965                                                   FeaturePRFCHW];
966   list<SubtargetFeature> BDWTuning = HSWTuning;
967   list<SubtargetFeature> BDWFeatures =
968     !listconcat(HSWFeatures, BDWAdditionalFeatures);
970   // Skylake
971   list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
972                                                   FeatureXSAVEC,
973                                                   FeatureXSAVES,
974                                                   FeatureCLFLUSHOPT];
975   list<SubtargetFeature> SKLTuning = [TuningFastGather,
976                                       TuningMacroFusion,
977                                       TuningSlow3OpsLEA,
978                                       TuningSlowDivide64,
979                                       TuningFastScalarFSQRT,
980                                       TuningFastVectorFSQRT,
981                                       TuningFastSHLDRotate,
982                                       TuningFast15ByteNOP,
983                                       TuningFastVariableCrossLaneShuffle,
984                                       TuningFastVariablePerLaneShuffle,
985                                       TuningPOPCNTFalseDeps,
986                                       TuningInsertVZEROUPPER,
987                                       TuningAllowLight256Bit,
988                                       TuningNoDomainDelayMov,
989                                       TuningNoDomainDelayShuffle,
990                                       TuningNoDomainDelayBlend];
991   list<SubtargetFeature> SKLFeatures =
992     !listconcat(BDWFeatures, SKLAdditionalFeatures);
994   // Skylake-AVX512
995   list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
996                                                   FeatureXSAVEC,
997                                                   FeatureXSAVES,
998                                                   FeatureCLFLUSHOPT,
999                                                   FeatureAVX512,
1000                                                   FeatureEVEX512,
1001                                                   FeatureCDI,
1002                                                   FeatureDQI,
1003                                                   FeatureBWI,
1004                                                   FeatureVLX,
1005                                                   FeaturePKU,
1006                                                   FeatureCLWB];
1007   list<SubtargetFeature> SKXTuning = [TuningFastGather,
1008                                       TuningMacroFusion,
1009                                       TuningSlow3OpsLEA,
1010                                       TuningSlowDivide64,
1011                                       TuningFastScalarFSQRT,
1012                                       TuningFastVectorFSQRT,
1013                                       TuningFastSHLDRotate,
1014                                       TuningFast15ByteNOP,
1015                                       TuningFastVariableCrossLaneShuffle,
1016                                       TuningFastVariablePerLaneShuffle,
1017                                       TuningPrefer256Bit,
1018                                       TuningPOPCNTFalseDeps,
1019                                       TuningInsertVZEROUPPER,
1020                                       TuningAllowLight256Bit,
1021                                       TuningPreferShiftShuffle,
1022                                       TuningNoDomainDelayMov,
1023                                       TuningNoDomainDelayShuffle,
1024                                       TuningNoDomainDelayBlend,
1025                                       TuningFastImmVectorShift];
1026   list<SubtargetFeature> SKXFeatures =
1027     !listconcat(BDWFeatures, SKXAdditionalFeatures);
1029   // Cascadelake
1030   list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
1031   list<SubtargetFeature> CLXTuning = SKXTuning;
1032   list<SubtargetFeature> CLXFeatures =
1033     !listconcat(SKXFeatures, CLXAdditionalFeatures);
1035   // Cooperlake
1036   list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
1037   list<SubtargetFeature> CPXTuning = SKXTuning;
1038   list<SubtargetFeature> CPXFeatures =
1039     !listconcat(CLXFeatures, CPXAdditionalFeatures);
1041   // Cannonlake
1042   list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
1043                                                   FeatureEVEX512,
1044                                                   FeatureCDI,
1045                                                   FeatureDQI,
1046                                                   FeatureBWI,
1047                                                   FeatureVLX,
1048                                                   FeaturePKU,
1049                                                   FeatureVBMI,
1050                                                   FeatureIFMA,
1051                                                   FeatureSHA];
1052   list<SubtargetFeature> CNLTuning = [TuningFastGather,
1053                                       TuningMacroFusion,
1054                                       TuningSlow3OpsLEA,
1055                                       TuningSlowDivide64,
1056                                       TuningFastScalarFSQRT,
1057                                       TuningFastVectorFSQRT,
1058                                       TuningFastSHLDRotate,
1059                                       TuningFast15ByteNOP,
1060                                       TuningFastVariableCrossLaneShuffle,
1061                                       TuningFastVariablePerLaneShuffle,
1062                                       TuningPrefer256Bit,
1063                                       TuningInsertVZEROUPPER,
1064                                       TuningAllowLight256Bit,
1065                                       TuningNoDomainDelayMov,
1066                                       TuningNoDomainDelayShuffle,
1067                                       TuningNoDomainDelayBlend,
1068                                       TuningFastImmVectorShift];
1069   list<SubtargetFeature> CNLFeatures =
1070     !listconcat(SKLFeatures, CNLAdditionalFeatures);
1072   // Icelake
1073   list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
1074                                                   FeatureVAES,
1075                                                   FeatureVBMI2,
1076                                                   FeatureVNNI,
1077                                                   FeatureVPCLMULQDQ,
1078                                                   FeatureVPOPCNTDQ,
1079                                                   FeatureGFNI,
1080                                                   FeatureRDPID,
1081                                                   FeatureFSRM];
1082   list<SubtargetFeature> ICLTuning = [TuningFastGather,
1083                                       TuningMacroFusion,
1084                                       TuningSlowDivide64,
1085                                       TuningFastScalarFSQRT,
1086                                       TuningFastVectorFSQRT,
1087                                       TuningFastSHLDRotate,
1088                                       TuningFast15ByteNOP,
1089                                       TuningFastVariableCrossLaneShuffle,
1090                                       TuningFastVariablePerLaneShuffle,
1091                                       TuningPrefer256Bit,
1092                                       TuningInsertVZEROUPPER,
1093                                       TuningAllowLight256Bit,
1094                                       TuningNoDomainDelayMov,
1095                                       TuningNoDomainDelayShuffle,
1096                                       TuningNoDomainDelayBlend,
1097                                       TuningFastImmVectorShift];
1098   list<SubtargetFeature> ICLFeatures =
1099     !listconcat(CNLFeatures, ICLAdditionalFeatures);
1101   // Icelake Server
1102   list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
1103                                                   FeatureCLWB,
1104                                                   FeatureWBNOINVD];
1105   list<SubtargetFeature> ICXTuning = ICLTuning;
1106   list<SubtargetFeature> ICXFeatures =
1107     !listconcat(ICLFeatures, ICXAdditionalFeatures);
1109   // Tigerlake
1110   list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
1111                                                   FeatureCLWB,
1112                                                   FeatureMOVDIRI,
1113                                                   FeatureMOVDIR64B,
1114                                                   FeatureSHSTK];
1115   list<SubtargetFeature> TGLTuning = ICLTuning;
1116   list<SubtargetFeature> TGLFeatures =
1117     !listconcat(ICLFeatures, TGLAdditionalFeatures );
1119   // Sapphirerapids
1120   list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
1121                                                   FeatureAMXINT8,
1122                                                   FeatureAMXBF16,
1123                                                   FeatureBF16,
1124                                                   FeatureSERIALIZE,
1125                                                   FeatureCLDEMOTE,
1126                                                   FeatureWAITPKG,
1127                                                   FeaturePTWRITE,
1128                                                   FeatureFP16,
1129                                                   FeatureAVXVNNI,
1130                                                   FeatureTSXLDTRK,
1131                                                   FeatureENQCMD,
1132                                                   FeatureSHSTK,
1133                                                   FeatureMOVDIRI,
1134                                                   FeatureMOVDIR64B,
1135                                                   FeatureUINTR];
1136   list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
1137                                                 TuningPERMFalseDeps,
1138                                                 TuningRANGEFalseDeps,
1139                                                 TuningGETMANTFalseDeps,
1140                                                 TuningMULLQFalseDeps];
1141   list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
1142   list<SubtargetFeature> SPRFeatures =
1143     !listconcat(ICXFeatures, SPRAdditionalFeatures);
1145   // Graniterapids
1146   list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
1147                                                   FeaturePREFETCHI];
1148   list<SubtargetFeature> GNRFeatures =
1149     !listconcat(SPRFeatures, GNRAdditionalFeatures);
1150   list<SubtargetFeature> GNRAdditionalTuning = [TuningBranchHint];
1151   list<SubtargetFeature> GNRTuning = !listconcat(SPRTuning, GNRAdditionalTuning);
1153   // Graniterapids D
1154   list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
1155   list<SubtargetFeature> GNRDFeatures =
1156     !listconcat(GNRFeatures, GNRDAdditionalFeatures);
1158   // Diamond Rapids
1159   list<SubtargetFeature> DMRAdditionalFeatures = [FeatureAVX10_2_512,
1160                                                   FeatureSM4,
1161                                                   FeatureCMPCCXADD,
1162                                                   FeatureAVXIFMA,
1163                                                   FeatureAVXNECONVERT,
1164                                                   FeatureAVXVNNIINT8,
1165                                                   FeatureAVXVNNIINT16,
1166                                                   FeatureUSERMSR,
1167                                                   FeatureSHA512,
1168                                                   FeatureSM3,
1169                                                   FeatureEGPR,
1170                                                   FeatureZU,
1171                                                   FeatureCCMP,
1172                                                   FeaturePush2Pop2,
1173                                                   FeaturePPX,
1174                                                   FeatureNDD,
1175                                                   FeatureNF,
1176                                                   FeatureCF,
1177                                                   FeatureMOVRS,
1178                                                   FeatureAMXMOVRS,
1179                                                   FeatureAMXAVX512,
1180                                                   FeatureAMXFP8,
1181                                                   FeatureAMXTF32,
1182                                                   FeatureAMXTRANSPOSE];
1183   list<SubtargetFeature> DMRFeatures =
1184     !listconcat(GNRDFeatures, DMRAdditionalFeatures);
1186   // Atom
1187   list<SubtargetFeature> AtomFeatures = [FeatureX87,
1188                                          FeatureCX8,
1189                                          FeatureCMOV,
1190                                          FeatureMMX,
1191                                          FeatureSSSE3,
1192                                          FeatureFXSR,
1193                                          FeatureNOPL,
1194                                          FeatureX86_64,
1195                                          FeatureCX16,
1196                                          FeatureMOVBE,
1197                                          FeatureLAHFSAHF64];
1198   list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
1199                                        TuningSlowUAMem16,
1200                                        TuningLEAForSP,
1201                                        TuningSlowDivide32,
1202                                        TuningSlowDivide64,
1203                                        TuningSlowTwoMemOps,
1204                                        TuningFastImm16,
1205                                        TuningLEAUsesAG,
1206                                        TuningPadShortFunctions,
1207                                        TuningInsertVZEROUPPER,
1208                                        TuningNoDomainDelay];
1210   // Silvermont
1211   list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
1212                                                   FeatureCRC32,
1213                                                   FeaturePOPCNT,
1214                                                   FeaturePCLMUL,
1215                                                   FeaturePRFCHW,
1216                                                   FeatureRDRAND];
1217   list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
1218                                       TuningSlowTwoMemOps,
1219                                       TuningSlowLEA,
1220                                       TuningSlowIncDec,
1221                                       TuningSlowDivide64,
1222                                       TuningSlowPMULLD,
1223                                       TuningFast7ByteNOP,
1224                                       TuningFastMOVBE,
1225                                       TuningFastImm16,
1226                                       TuningPOPCNTFalseDeps,
1227                                       TuningInsertVZEROUPPER,
1228                                       TuningNoDomainDelay];
1229   list<SubtargetFeature> SLMFeatures =
1230     !listconcat(AtomFeatures, SLMAdditionalFeatures);
1232   // Goldmont
1233   list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
1234                                                   FeatureSHA,
1235                                                   FeatureRDSEED,
1236                                                   FeatureXSAVE,
1237                                                   FeatureXSAVEOPT,
1238                                                   FeatureXSAVEC,
1239                                                   FeatureXSAVES,
1240                                                   FeatureCLFLUSHOPT,
1241                                                   FeatureFSGSBase];
1242   list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
1243                                       TuningSlowTwoMemOps,
1244                                       TuningSlowLEA,
1245                                       TuningSlowIncDec,
1246                                       TuningFastMOVBE,
1247                                       TuningFastImm16,
1248                                       TuningPOPCNTFalseDeps,
1249                                       TuningInsertVZEROUPPER,
1250                                       TuningNoDomainDelay];
1251   list<SubtargetFeature> GLMFeatures =
1252     !listconcat(SLMFeatures, GLMAdditionalFeatures);
1254   // Goldmont Plus
1255   list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
1256                                                   FeatureRDPID];
1257   list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
1258                                       TuningSlowTwoMemOps,
1259                                       TuningSlowLEA,
1260                                       TuningSlowIncDec,
1261                                       TuningFastMOVBE,
1262                                       TuningFastImm16,
1263                                       TuningInsertVZEROUPPER,
1264                                       TuningNoDomainDelay];
1265   list<SubtargetFeature> GLPFeatures =
1266     !listconcat(GLMFeatures, GLPAdditionalFeatures);
1268   // Tremont
1269   list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1270                                                   FeatureGFNI];
1271   list<SubtargetFeature> TRMTuning = GLPTuning;
1272   list<SubtargetFeature> TRMFeatures =
1273     !listconcat(GLPFeatures, TRMAdditionalFeatures);
1275   // Alderlake
1276   list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
1277                                                   FeaturePCONFIG,
1278                                                   FeatureSHSTK,
1279                                                   FeatureWIDEKL,
1280                                                   FeatureINVPCID,
1281                                                   FeatureADX,
1282                                                   FeatureFMA,
1283                                                   FeatureVAES,
1284                                                   FeatureVPCLMULQDQ,
1285                                                   FeatureF16C,
1286                                                   FeatureBMI,
1287                                                   FeatureBMI2,
1288                                                   FeatureLZCNT,
1289                                                   FeatureAVXVNNI,
1290                                                   FeaturePKU,
1291                                                   FeatureHRESET,
1292                                                   FeatureCLDEMOTE,
1293                                                   FeatureMOVDIRI,
1294                                                   FeatureMOVDIR64B,
1295                                                   FeatureWAITPKG];
1296   list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
1297                                                 TuningPreferMovmskOverVTest,
1298                                                 TuningFastImmVectorShift];
1299   list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
1300   list<SubtargetFeature> ADLFeatures =
1301     !listconcat(TRMFeatures, ADLAdditionalFeatures);
1303   // Gracemont
1304   list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
1305                                       TuningSlow3OpsLEA,
1306                                       TuningFastScalarFSQRT,
1307                                       TuningFastVectorFSQRT,
1308                                       TuningFast15ByteNOP,
1309                                       TuningFastVariablePerLaneShuffle,
1310                                       TuningPOPCNTFalseDeps,
1311                                       TuningInsertVZEROUPPER];
1313   // Sierraforest
1314   list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
1315                                                   FeatureAVXIFMA,
1316                                                   FeatureAVXNECONVERT,
1317                                                   FeatureENQCMD,
1318                                                   FeatureUINTR,
1319                                                   FeatureAVXVNNIINT8];
1320   list<SubtargetFeature> SRFFeatures =
1321     !listconcat(ADLFeatures, SRFAdditionalFeatures);
1323   // Arrowlake S
1324   list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
1325                                                    FeatureSHA512,
1326                                                    FeatureSM3,
1327                                                    FeatureSM4];
1328   list<SubtargetFeature> ARLSFeatures =
1329     !listconcat(SRFFeatures, ARLSAdditionalFeatures);
1331   // Pantherlake
1332   list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
1333   list<SubtargetFeature> PTLFeatures =
1334     !listconcat(ARLSFeatures, PTLAdditionalFeatures);
1337   // Clearwaterforest
1338   list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
1339                                                   FeatureUSERMSR];
1340   list<SubtargetFeature> CWFFeatures =
1341     !listconcat(ARLSFeatures, CWFAdditionalFeatures);
1343   // Knights Landing
1344   list<SubtargetFeature> KNLFeatures = [FeatureX87,
1345                                         FeatureCX8,
1346                                         FeatureCMOV,
1347                                         FeatureMMX,
1348                                         FeatureFXSR,
1349                                         FeatureNOPL,
1350                                         FeatureX86_64,
1351                                         FeatureCX16,
1352                                         FeatureCRC32,
1353                                         FeaturePOPCNT,
1354                                         FeaturePCLMUL,
1355                                         FeatureXSAVE,
1356                                         FeatureXSAVEOPT,
1357                                         FeatureLAHFSAHF64,
1358                                         FeatureAES,
1359                                         FeatureRDRAND,
1360                                         FeatureF16C,
1361                                         FeatureFSGSBase,
1362                                         FeatureAVX512,
1363                                         FeatureEVEX512,
1364                                         FeatureCDI,
1365                                         FeatureADX,
1366                                         FeatureRDSEED,
1367                                         FeatureMOVBE,
1368                                         FeatureLZCNT,
1369                                         FeatureBMI,
1370                                         FeatureBMI2,
1371                                         FeatureFMA,
1372                                         FeaturePRFCHW];
1373   list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
1374                                       TuningSlow3OpsLEA,
1375                                       TuningSlowIncDec,
1376                                       TuningSlowTwoMemOps,
1377                                       TuningPreferMaskRegisters,
1378                                       TuningFastGather,
1379                                       TuningFastMOVBE,
1380                                       TuningFastImm16,
1381                                       TuningSlowPMADDWD];
1382   // TODO Add AVX5124FMAPS/AVX5124VNNIW features
1383   list<SubtargetFeature> KNMFeatures =
1384     !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
1386   // Barcelona
1387   list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1388                                               FeatureCX8,
1389                                               FeatureSSE4A,
1390                                               FeatureFXSR,
1391                                               FeatureNOPL,
1392                                               FeatureCX16,
1393                                               FeaturePRFCHW,
1394                                               FeatureLZCNT,
1395                                               FeaturePOPCNT,
1396                                               FeatureLAHFSAHF64,
1397                                               FeatureCMOV,
1398                                               FeatureX86_64];
1399   list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1400                                             TuningSlowDivide64,
1401                                             TuningSlowSHLD,
1402                                             TuningSBBDepBreaking,
1403                                             TuningInsertVZEROUPPER];
1405   // Bobcat
1406   list<SubtargetFeature> BtVer1Features = [FeatureX87,
1407                                            FeatureCX8,
1408                                            FeatureCMOV,
1409                                            FeatureMMX,
1410                                            FeatureSSSE3,
1411                                            FeatureSSE4A,
1412                                            FeatureFXSR,
1413                                            FeatureNOPL,
1414                                            FeatureX86_64,
1415                                            FeatureCX16,
1416                                            FeaturePRFCHW,
1417                                            FeatureLZCNT,
1418                                            FeaturePOPCNT,
1419                                            FeatureLAHFSAHF64];
1420   list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1421                                          TuningFastScalarShiftMasks,
1422                                          TuningFastVectorShiftMasks,
1423                                          TuningSlowDivide64,
1424                                          TuningSlowSHLD,
1425                                          TuningFastImm16,
1426                                          TuningSBBDepBreaking,
1427                                          TuningInsertVZEROUPPER];
1429   // Jaguar
1430   list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1431                                                      FeatureAES,
1432                                                      FeatureCRC32,
1433                                                      FeaturePCLMUL,
1434                                                      FeatureBMI,
1435                                                      FeatureF16C,
1436                                                      FeatureMOVBE,
1437                                                      FeatureXSAVE,
1438                                                      FeatureXSAVEOPT];
1439   list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1440                                          TuningFastBEXTR,
1441                                          TuningFastHorizontalOps,
1442                                          TuningFast15ByteNOP,
1443                                          TuningFastScalarShiftMasks,
1444                                          TuningFastVectorShiftMasks,
1445                                          TuningFastMOVBE,
1446                                          TuningFastImm16,
1447                                          TuningSBBDepBreaking,
1448                                          TuningSlowDivide64,
1449                                          TuningSlowSHLD];
1450   list<SubtargetFeature> BtVer2Features =
1451     !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1453   // Bulldozer
1454   list<SubtargetFeature> BdVer1Features = [FeatureX87,
1455                                            FeatureCX8,
1456                                            FeatureCMOV,
1457                                            FeatureXOP,
1458                                            FeatureX86_64,
1459                                            FeatureCX16,
1460                                            FeatureAES,
1461                                            FeatureCRC32,
1462                                            FeaturePRFCHW,
1463                                            FeaturePCLMUL,
1464                                            FeatureMMX,
1465                                            FeatureFXSR,
1466                                            FeatureNOPL,
1467                                            FeatureLZCNT,
1468                                            FeaturePOPCNT,
1469                                            FeatureXSAVE,
1470                                            FeatureLWP,
1471                                            FeatureLAHFSAHF64];
1472   list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1473                                          TuningSlowDivide64,
1474                                          TuningFast11ByteNOP,
1475                                          TuningFastScalarShiftMasks,
1476                                          TuningBranchFusion,
1477                                          TuningSBBDepBreaking,
1478                                          TuningInsertVZEROUPPER];
1480   // PileDriver
1481   list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1482                                                      FeatureBMI,
1483                                                      FeatureTBM,
1484                                                      FeatureFMA];
1485   list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1486                                                    TuningFastMOVBE];
1487   list<SubtargetFeature> BdVer2Tuning =
1488     !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1489   list<SubtargetFeature> BdVer2Features =
1490     !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1492   // Steamroller
1493   list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1494                                                      FeatureFSGSBase];
1495   list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1496   list<SubtargetFeature> BdVer3Features =
1497     !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1499   // Excavator
1500   list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1501                                                      FeatureBMI2,
1502                                                      FeatureMOVBE,
1503                                                      FeatureRDRAND,
1504                                                      FeatureMWAITX];
1505   list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1506   list<SubtargetFeature> BdVer4Features =
1507     !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1510   // AMD Zen Processors common ISAs
1511   list<SubtargetFeature> ZNFeatures = [FeatureADX,
1512                                        FeatureAES,
1513                                        FeatureAVX2,
1514                                        FeatureBMI,
1515                                        FeatureBMI2,
1516                                        FeatureCLFLUSHOPT,
1517                                        FeatureCLZERO,
1518                                        FeatureCMOV,
1519                                        FeatureX86_64,
1520                                        FeatureCX16,
1521                                        FeatureCRC32,
1522                                        FeatureF16C,
1523                                        FeatureFMA,
1524                                        FeatureFSGSBase,
1525                                        FeatureFXSR,
1526                                        FeatureNOPL,
1527                                        FeatureLAHFSAHF64,
1528                                        FeatureLZCNT,
1529                                        FeatureMMX,
1530                                        FeatureMOVBE,
1531                                        FeatureMWAITX,
1532                                        FeaturePCLMUL,
1533                                        FeaturePOPCNT,
1534                                        FeaturePRFCHW,
1535                                        FeatureRDRAND,
1536                                        FeatureRDSEED,
1537                                        FeatureSHA,
1538                                        FeatureSSE4A,
1539                                        FeatureX87,
1540                                        FeatureXSAVE,
1541                                        FeatureXSAVEC,
1542                                        FeatureXSAVEOPT,
1543                                        FeatureXSAVES];
1544   list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1545                                      TuningFastBEXTR,
1546                                      TuningFast15ByteNOP,
1547                                      TuningBranchFusion,
1548                                      TuningFastScalarFSQRT,
1549                                      TuningFastVectorFSQRT,
1550                                      TuningFastScalarShiftMasks,
1551                                      TuningFastVariablePerLaneShuffle,
1552                                      TuningFastMOVBE,
1553                                      TuningFastImm16,
1554                                      TuningSlowDivide64,
1555                                      TuningSlowSHLD,
1556                                      TuningSBBDepBreaking,
1557                                      TuningInsertVZEROUPPER,
1558                                      TuningAllowLight256Bit];
1559   list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1560                                                   FeatureRDPID,
1561                                                   FeatureRDPRU,
1562                                                   FeatureWBNOINVD];
1563   list<SubtargetFeature> ZN2Tuning = ZNTuning;
1564   list<SubtargetFeature> ZN2Features =
1565     !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1566   list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1567                                                   FeatureINVPCID,
1568                                                   FeaturePKU,
1569                                                   FeatureVAES,
1570                                                   FeatureVPCLMULQDQ];
1571   list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
1572   list<SubtargetFeature> ZN3Tuning =
1573     !listconcat(ZN2Tuning, ZN3AdditionalTuning);
1574   list<SubtargetFeature> ZN3Features =
1575     !listconcat(ZN2Features, ZN3AdditionalFeatures);
1577   list<SubtargetFeature> ZN4AdditionalTuning = [TuningFastDPWSSD];
1578   list<SubtargetFeature> ZN4Tuning =
1579     !listconcat(ZN3Tuning, ZN4AdditionalTuning);
1580   list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
1581                                                   FeatureEVEX512,
1582                                                   FeatureCDI,
1583                                                   FeatureDQI,
1584                                                   FeatureBWI,
1585                                                   FeatureVLX,
1586                                                   FeatureVBMI,
1587                                                   FeatureVBMI2,
1588                                                   FeatureIFMA,
1589                                                   FeatureVNNI,
1590                                                   FeatureBITALG,
1591                                                   FeatureGFNI,
1592                                                   FeatureBF16,
1593                                                   FeatureSHSTK,
1594                                                   FeatureVPOPCNTDQ];
1595   list<SubtargetFeature> ZN4Features =
1596     !listconcat(ZN3Features, ZN4AdditionalFeatures);
1598   list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
1599   list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
1600                                                   FeatureMOVDIRI,
1601                                                   FeatureMOVDIR64B,
1602                                                   FeatureVP2INTERSECT,
1603                                                   FeaturePREFETCHI,
1604                                                   FeatureAVXVNNI
1605                                                   ];
1606   list<SubtargetFeature> ZN5Features =
1607     !listconcat(ZN4Features, ZN5AdditionalFeatures);
1610 //===----------------------------------------------------------------------===//
1611 // X86 processors supported.
1612 //===----------------------------------------------------------------------===//
1614 class Proc<string Name, list<SubtargetFeature> Features,
1615            list<SubtargetFeature> TuneFeatures>
1616  : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1618 class ProcModel<string Name, SchedMachineModel Model,
1619                 list<SubtargetFeature> Features,
1620                 list<SubtargetFeature> TuneFeatures>
1621  : ProcessorModel<Name, Model, Features, TuneFeatures>;
1623 // NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1624 // if i386/i486 is specifically requested.
1625 // NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1626 // constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1627 // enabled. It has no effect on code generation.
1628 // NOTE: As a default tuning, "generic" aims to produce code optimized for the
1629 // most common X86 processors. The tunings might be changed over time. It is
1630 // recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1631 def : ProcModel<"generic", SandyBridgeModel,
1632                 [FeatureX87, FeatureCX8, FeatureX86_64],
1633                 [TuningSlow3OpsLEA,
1634                  TuningSlowDivide64,
1635                  TuningMacroFusion,
1636                  TuningFastScalarFSQRT,
1637                  TuningFast15ByteNOP,
1638                  TuningInsertVZEROUPPER]>;
1640 def : Proc<"i386",            [FeatureX87],
1641                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1642 def : Proc<"i486",            [FeatureX87],
1643                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1644 def : Proc<"i586",            [FeatureX87, FeatureCX8],
1645                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1646 def : Proc<"pentium",         [FeatureX87, FeatureCX8],
1647                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1648 foreach P = ["pentium-mmx", "pentium_mmx"] in {
1649   def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
1650                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1652 def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
1653                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1654 foreach P = ["pentiumpro", "pentium_pro"] in {
1655   def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
1656                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1658 foreach P = ["pentium2", "pentium_ii"] in {
1659   def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
1660                           FeatureFXSR, FeatureNOPL],
1661                         [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1663 foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
1664   def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
1665                  FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1666                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1669 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1670 // The intent is to enable it for pentium4 which is the current default
1671 // processor in a vanilla 32-bit clang compilation when no specific
1672 // architecture is specified.  This generally gives a nice performance
1673 // increase on silvermont, with largely neutral behavior on other
1674 // contemporary large core processors.
1675 // pentium-m, pentium4m, prescott and nocona are included as a preventative
1676 // measure to avoid performance surprises, in case clang's default cpu
1677 // changes slightly.
1679 foreach P = ["pentium_m", "pentium-m"] in {
1680 def : ProcModel<P, GenericPostRAModel,
1681                 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1682                 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1683                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1686 foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
1687   def : ProcModel<P, GenericPostRAModel,
1688                   [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1689                    FeatureFXSR, FeatureNOPL, FeatureCMOV],
1690                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1693 // Intel Quark.
1694 def : Proc<"lakemont", [FeatureCX8],
1695                        [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1697 // Intel Core Duo.
1698 def : ProcModel<"yonah", SandyBridgeModel,
1699                 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1700                  FeatureFXSR, FeatureNOPL, FeatureCMOV],
1701                 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1703 // NetBurst.
1704 foreach P = ["prescott", "pentium_4_sse3"] in {
1705   def : ProcModel<P, GenericPostRAModel,
1706                   [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1707                   FeatureFXSR, FeatureNOPL, FeatureCMOV],
1708                   [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1710 def : ProcModel<"nocona", GenericPostRAModel, [
1711   FeatureX87,
1712   FeatureCX8,
1713   FeatureCMOV,
1714   FeatureMMX,
1715   FeatureSSE3,
1716   FeatureFXSR,
1717   FeatureNOPL,
1718   FeatureX86_64,
1719   FeatureCX16,
1722   TuningSlowUAMem16,
1723   TuningInsertVZEROUPPER
1726 // Intel Core 2 Solo/Duo.
1727 foreach P = ["core2", "core_2_duo_ssse3"] in {
1728 def : ProcModel<P, SandyBridgeModel, [
1729   FeatureX87,
1730   FeatureCX8,
1731   FeatureCMOV,
1732   FeatureMMX,
1733   FeatureSSSE3,
1734   FeatureFXSR,
1735   FeatureNOPL,
1736   FeatureX86_64,
1737   FeatureCX16,
1738   FeatureLAHFSAHF64
1741   TuningMacroFusion,
1742   TuningSlowUAMem16,
1743   TuningInsertVZEROUPPER
1746 foreach P = ["penryn", "core_2_duo_sse4_1"] in {
1747 def : ProcModel<P, SandyBridgeModel, [
1748   FeatureX87,
1749   FeatureCX8,
1750   FeatureCMOV,
1751   FeatureMMX,
1752   FeatureSSE41,
1753   FeatureFXSR,
1754   FeatureNOPL,
1755   FeatureX86_64,
1756   FeatureCX16,
1757   FeatureLAHFSAHF64
1760   TuningMacroFusion,
1761   TuningSlowUAMem16,
1762   TuningInsertVZEROUPPER
1766 // Atom CPUs.
1767 foreach P = ["bonnell", "atom"] in {
1768   def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1769                   ProcessorFeatures.AtomTuning>;
1772 foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
1773   def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1774                   ProcessorFeatures.SLMTuning>;
1777 def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
1778                 ProcessorFeatures.SLMTuning>;
1779 def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1780                 ProcessorFeatures.GLMTuning>;
1781 foreach P = ["goldmont_plus", "goldmont-plus"] in {
1782   def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
1783                   ProcessorFeatures.GLPTuning>;
1785 def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1786                 ProcessorFeatures.TRMTuning>;
1788 // "Arrandale" along with corei3 and corei5
1789 foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
1790   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1791                   ProcessorFeatures.NHMTuning>;
1794 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1795 foreach P = ["westmere", "core_aes_pclmulqdq"] in {
1796   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1797                   ProcessorFeatures.WSMTuning>;
1800 foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1801   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1802                   ProcessorFeatures.SNBTuning>;
1805 foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1806   def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1807                   ProcessorFeatures.IVBTuning>;
1810 foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1811   def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1812                   ProcessorFeatures.HSWTuning>;
1815 foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
1816   def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
1817                   ProcessorFeatures.BDWTuning>;
1820 def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1821                 ProcessorFeatures.SKLTuning>;
1823 // FIXME: define KNL scheduler model
1824 foreach P = ["knl", "mic_avx512"] in {
1825   def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
1826                   ProcessorFeatures.KNLTuning>;
1828 def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1829                 ProcessorFeatures.KNLTuning>;
1831 foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1832   def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1833                   ProcessorFeatures.SKXTuning>;
1836 def : ProcModel<"cascadelake", SkylakeServerModel,
1837                 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1838 def : ProcModel<"cooperlake", SkylakeServerModel,
1839                 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1840 def : ProcModel<"cannonlake", SkylakeServerModel,
1841                 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1842 foreach P = ["icelake-client", "icelake_client"] in {
1843 def : ProcModel<P, IceLakeModel,
1844                 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1846 def : ProcModel<"rocketlake", IceLakeModel,
1847                 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1848 foreach P = ["icelake-server", "icelake_server"] in {
1849 def : ProcModel<P, IceLakeModel,
1850                 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1852 def : ProcModel<"tigerlake", IceLakeModel,
1853                 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1854 def : ProcModel<"sapphirerapids", SapphireRapidsModel,
1855                 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1856 def : ProcModel<"alderlake", AlderlakePModel,
1857                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1858 // FIXME: Use Gracemont Schedule Model when it is ready.
1859 def : ProcModel<"gracemont", AlderlakePModel,
1860                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
1861 foreach P = ["sierraforest", "grandridge"] in {
1862   def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures,
1863                 ProcessorFeatures.GRTTuning>;
1865 def : ProcModel<"raptorlake", AlderlakePModel,
1866                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1867 def : ProcModel<"meteorlake", AlderlakePModel,
1868                 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1869 def : ProcModel<"arrowlake", AlderlakePModel,
1870                 ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
1871 foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1872 def : ProcModel<P, AlderlakePModel,
1873                 ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
1875 def : ProcModel<"pantherlake", AlderlakePModel,
1876                 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
1877 def : ProcModel<"clearwaterforest", AlderlakePModel,
1878                 ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
1879 def : ProcModel<"emeraldrapids", SapphireRapidsModel,
1880                 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1881 def : ProcModel<"graniterapids", SapphireRapidsModel,
1882                 ProcessorFeatures.GNRFeatures, ProcessorFeatures.GNRTuning>;
1883 foreach P = ["graniterapids-d", "graniterapids_d"] in {
1884 def : ProcModel<P, SapphireRapidsModel,
1885                 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>;
1887 def : ProcModel<"diamondrapids", SapphireRapidsModel,
1888                 ProcessorFeatures.DMRFeatures, ProcessorFeatures.GNRTuning>;
1890 // AMD CPUs.
1892 def : Proc<"k6",   [FeatureX87, FeatureCX8, FeatureMMX],
1893                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1894 def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1895                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1896 def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1897                    [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1899 foreach P = ["athlon", "athlon-tbird"] in {
1900   def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeaturePRFCHW,
1901                  FeatureNOPL],
1902                 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1905 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1906   def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
1907                  FeatureSSE1, FeatureMMX, FeaturePRFCHW, FeatureFXSR, FeatureNOPL],
1908                 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1911 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1912   def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, FeatureMMX, FeaturePRFCHW,
1913                  FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
1914                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1915                  TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1918 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1919   def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, FeatureMMX, FeaturePRFCHW,
1920                  FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
1921                  FeatureX86_64],
1922                 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1923                  TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1926 foreach P = ["amdfam10", "barcelona"] in {
1927   def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1928              ProcessorFeatures.BarcelonaTuning>;
1931 // Bobcat
1932 def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1933            ProcessorFeatures.BtVer1Tuning>;
1934 // Jaguar
1935 def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1936                 ProcessorFeatures.BtVer2Tuning>;
1938 // Bulldozer
1939 def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1940                 ProcessorFeatures.BdVer1Tuning>;
1941 // Piledriver
1942 def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1943                 ProcessorFeatures.BdVer2Tuning>;
1944 // Steamroller
1945 // NOTE: BdVer2Model is only an approx model for Steamroller.
1946 def : ProcModel<"bdver3", BdVer2Model, ProcessorFeatures.BdVer3Features,
1947                 ProcessorFeatures.BdVer3Tuning>;
1948 // Excavator
1949 // NOTE: Znver1Model is only an approx model for Excavator (with AVX2).
1950 def : ProcModel<"bdver4", Znver1Model, ProcessorFeatures.BdVer4Features,
1951                 ProcessorFeatures.BdVer4Tuning>;
1953 def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1954                 ProcessorFeatures.ZNTuning>;
1955 def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1956                 ProcessorFeatures.ZN2Tuning>;
1957 def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1958                 ProcessorFeatures.ZN3Tuning>;
1959 def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
1960                 ProcessorFeatures.ZN4Tuning>;
1961 def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
1962                 ProcessorFeatures.ZN5Tuning>;
1964 def : Proc<"geode",           [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
1965                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1967 def : Proc<"winchip-c6",      [FeatureX87, FeatureMMX],
1968                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1969 def : Proc<"winchip2",        [FeatureX87, FeatureMMX, FeaturePRFCHW],
1970                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1971 def : Proc<"c3",              [FeatureX87, FeatureMMX, FeaturePRFCHW],
1972                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1973 def : Proc<"c3-2",            [FeatureX87, FeatureCX8, FeatureMMX,
1974                                FeatureSSE1, FeatureFXSR, FeatureCMOV],
1975                               [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1977 // We also provide a generic 64-bit specific x86 processor model which tries to
1978 // be good for modern chips without enabling instruction set encodings past the
1979 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1980 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1982 // We currently use the Sandy Bridge model as the default scheduling model as
1983 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1984 // covers a huge swath of x86 processors. If there are specific scheduling
1985 // knobs which need to be tuned differently for AMD chips, we might consider
1986 // forming a common base for them.
1987 def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1988                 ProcessorFeatures.X86_64V1Tuning>;
1989 // Close to Sandybridge.
1990 def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1991                 ProcessorFeatures.X86_64V2Tuning>;
1992 // Close to Haswell.
1993 def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1994                 ProcessorFeatures.X86_64V3Tuning>;
1995 // Close to the AVX-512 level implemented by Xeon Scalable Processors.
1996 def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1997                 ProcessorFeatures.X86_64V4Tuning>;
1999 //===----------------------------------------------------------------------===//
2000 // Calling Conventions
2001 //===----------------------------------------------------------------------===//
2003 include "X86CallingConv.td"
2006 //===----------------------------------------------------------------------===//
2007 // Assembly Parser
2008 //===----------------------------------------------------------------------===//
2010 def ATTAsmParserVariant : AsmParserVariant {
2011   int Variant = 0;
2013   // Variant name.
2014   string Name = "att";
2016   // Discard comments in assembly strings.
2017   string CommentDelimiter = "#";
2019   // Recognize hard coded registers.
2020   string RegisterPrefix = "%";
2023 def IntelAsmParserVariant : AsmParserVariant {
2024   int Variant = 1;
2026   // Variant name.
2027   string Name = "intel";
2029   // Discard comments in assembly strings.
2030   string CommentDelimiter = ";";
2032   // Recognize hard coded registers.
2033   string RegisterPrefix = "";
2036 //===----------------------------------------------------------------------===//
2037 // Assembly Printers
2038 //===----------------------------------------------------------------------===//
2040 // The X86 target supports two different syntaxes for emitting machine code.
2041 // This is controlled by the -x86-asm-syntax={att|intel}
2042 def ATTAsmWriter : AsmWriter {
2043   string AsmWriterClassName  = "ATTInstPrinter";
2044   int Variant = 0;
2046 def IntelAsmWriter : AsmWriter {
2047   string AsmWriterClassName  = "IntelInstPrinter";
2048   int Variant = 1;
2051 def X86 : Target {
2052   // Information about the instructions...
2053   let InstructionSet = X86InstrInfo;
2054   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
2055   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
2056   let AllowRegisterRenaming = 1;
2059 //===----------------------------------------------------------------------===//
2060 // Pfm Counters
2061 //===----------------------------------------------------------------------===//
2063 include "X86PfmCounters.td"