1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This is a target description file for the Intel i386 architecture, referred
10 // to here as the "X86" architecture.
12 //===----------------------------------------------------------------------===//
14 // Get the target-independent interfaces which we are implementing...
16 include "llvm/Target/Target.td"
18 //===----------------------------------------------------------------------===//
19 // X86 Subtarget state
21 // disregarding specific ABI / programming model
22 def Is64Bit : SubtargetFeature<"64bit-mode", "Is64Bit", "true",
23 "64-bit mode (x86_64)">;
24 def Is32Bit : SubtargetFeature<"32bit-mode", "Is32Bit", "true",
25 "32-bit mode (80386)">;
26 def Is16Bit : SubtargetFeature<"16bit-mode", "Is16Bit", "true",
27 "16-bit mode (i8086)">;
29 //===----------------------------------------------------------------------===//
30 // X86 Subtarget ISA features
31 //===----------------------------------------------------------------------===//
33 def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
34 "Enable X87 float instructions">;
36 def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
37 "Enable NOPL instruction (generally pentium pro+)">;
39 def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true",
40 "Enable conditional move instructions">;
42 def FeatureCX8 : SubtargetFeature<"cx8", "HasCX8", "true",
43 "Support CMPXCHG8B instructions">;
45 def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true",
46 "Enable SSE 4.2 CRC32 instruction (used when SSE4.2 is supported but function is GPR only)">;
48 def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
49 "Support POPCNT instruction">;
51 def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
52 "Support fxsave/fxrestore instructions">;
54 def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
55 "Support xsave instructions">;
57 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
58 "Support xsaveopt instructions",
61 def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
62 "Support xsavec instructions",
65 def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
66 "Support xsaves instructions",
69 def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
70 "Enable SSE instructions">;
71 def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
72 "Enable SSE2 instructions",
74 def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
75 "Enable SSE3 instructions",
77 def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
78 "Enable SSSE3 instructions",
80 def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
81 "Enable SSE 4.1 instructions",
83 def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
84 "Enable SSE 4.2 instructions",
86 // The MMX subtarget feature is separate from the rest of the SSE features
87 // because it's important (for odd compatibility reasons) to be able to
88 // turn it off explicitly while allowing SSE+ to be on.
89 def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
90 "Enable MMX instructions">;
91 def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
92 "Enable 3DNow! instructions",
94 def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
95 "Enable 3DNow! Athlon instructions",
97 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
98 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
99 // without disabling 64-bit mode. Nothing should imply this feature bit. It
100 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
101 def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true",
102 "Support 64-bit instructions">;
103 def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true",
104 "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)",
106 def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
107 "Support SSE 4a instructions",
110 def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
111 "Enable AVX instructions",
113 def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
114 "Enable AVX2 instructions",
116 def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
117 "Enable three-operand fused multiple-add",
119 def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
120 "Support 16-bit floating point conversion instructions",
122 def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true",
123 "Support ZMM and 64-bit mask instructions">;
124 def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
125 "Enable AVX-512 instructions",
126 [FeatureAVX2, FeatureFMA, FeatureF16C]>;
127 def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
128 "Enable AVX-512 Exponential and Reciprocal Instructions",
130 def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
131 "Enable AVX-512 Conflict Detection Instructions",
133 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
134 "true", "Enable AVX-512 Population Count Instructions",
136 def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
137 "Enable AVX-512 PreFetch Instructions",
139 def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI",
141 "Prefetch instruction with T0 or T1 Hint">;
142 def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
144 "Prefetch with Intent to Write and T1 Hint">;
145 def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
146 "Enable AVX-512 Doubleword and Quadword Instructions",
148 def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
149 "Enable AVX-512 Byte and Word Instructions",
151 def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
152 "Enable AVX-512 Vector Length eXtensions",
154 def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
155 "Enable AVX-512 Vector Byte Manipulation Instructions",
157 def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
158 "Enable AVX-512 further Vector Byte Manipulation Instructions",
160 def FeatureAVXIFMA : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
163 def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
164 "Enable AVX-512 Integer Fused Multiple-Add",
166 def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
167 "Enable protection keys">;
168 def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
169 "Enable AVX-512 Vector Neural Network Instructions",
171 def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
172 "Support AVX_VNNI encoding",
174 def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true",
175 "Support bfloat16 floating point",
177 def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
178 "Enable AVX-512 Bit Algorithms",
180 def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
181 "HasVP2INTERSECT", "true",
182 "Enable AVX-512 vp2intersect",
184 // FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
185 // guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
186 // FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
187 // supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
189 def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true",
190 "Support 16-bit floating point",
191 [FeatureBWI, FeatureVLX, FeatureDQI]>;
192 def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8",
193 "HasAVXVNNIINT8", "true",
194 "Enable AVX-VNNI-INT8",
196 def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
197 "HasAVXVNNIINT16", "true",
198 "Enable AVX-VNNI-INT16",
200 def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
201 "Enable packed carry-less multiplication instructions",
203 def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true",
204 "Enable Galois Field Arithmetic Instructions",
206 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
207 "Enable vpclmulqdq instructions",
208 [FeatureAVX, FeaturePCLMUL]>;
209 def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
210 "Enable four-operand fused multiple-add",
211 [FeatureAVX, FeatureSSE4A]>;
212 def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
213 "Enable XOP instructions",
215 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
216 "HasSSEUnalignedMem", "true",
217 "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">;
218 def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
219 "Enable AES instructions",
221 def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
222 "Promote selected AES instructions to AVX512/AVX registers",
223 [FeatureAVX2, FeatureAES]>;
224 def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
225 "Enable TBM instructions">;
226 def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
227 "Enable LWP instructions">;
228 def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
229 "Support MOVBE instruction">;
230 def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
231 "Support RDRAND instruction">;
232 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
233 "Support FS/GS Base instructions">;
234 def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
235 "Support LZCNT instruction">;
236 def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
237 "Support BMI instructions">;
238 def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
239 "Support BMI2 instructions">;
240 def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
241 "Support RTM instructions">;
242 def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
243 "Support ADX instructions">;
244 def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
245 "Enable SHA instructions",
247 def FeatureSHA512 : SubtargetFeature<"sha512", "HasSHA512", "true",
248 "Support SHA512 instructions",
250 // Processor supports CET SHSTK - Control-Flow Enforcement Technology
251 // using Shadow Stack
252 def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
253 "Support CET Shadow-Stack instructions">;
254 def FeatureSM3 : SubtargetFeature<"sm3", "HasSM3", "true",
255 "Support SM3 instructions",
257 def FeatureSM4 : SubtargetFeature<"sm4", "HasSM4", "true",
258 "Support SM4 instructions",
260 def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
261 "Support PRFCHW instructions">;
262 def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
263 "Support RDSEED instruction">;
264 def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
265 "Support LAHF and SAHF instructions in 64-bit mode">;
266 def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
267 "Enable MONITORX/MWAITX timer functionality">;
268 def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
269 "Enable Cache Line Zero">;
270 def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
271 "Enable Cache Line Demote">;
272 def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
273 "Support ptwrite instruction">;
274 def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
275 "Support AMX-TILE instructions">;
276 def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
277 "Support AMX-INT8 instructions",
279 def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
280 "Support AMX-BF16 instructions",
282 def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
283 "Support AMX amx-fp16 instructions",
285 def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
286 "Support AMX-COMPLEX instructions",
288 def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
289 "Support CMPCCXADD instructions">;
290 def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
291 "Support RAO-INT instructions",
293 def FeatureAVXNECONVERT : SubtargetFeature<"avxneconvert", "HasAVXNECONVERT", "true",
294 "Support AVX-NE-CONVERT instructions",
296 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
297 "Invalidate Process-Context Identifier">;
298 def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
299 "Enable Software Guard Extensions">;
300 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
301 "Flush A Cache Line Optimized">;
302 def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
303 "Cache Line Write Back">;
304 def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
305 "Write Back No Invalidate">;
306 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
307 "Support RDPID instructions">;
308 def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
309 "Support RDPRU instructions">;
310 def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
311 "Wait and pause enhancements">;
312 def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
313 "Has ENQCMD instructions">;
314 def FeatureKL : SubtargetFeature<"kl", "HasKL", "true",
315 "Support Key Locker kl Instructions",
317 def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true",
318 "Support Key Locker wide Instructions",
320 def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
321 "Has hreset instruction">;
322 def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
323 "Has serialize instruction">;
324 def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
325 "Support TSXLDTRK instructions">;
326 def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
327 "Has UINTR Instructions">;
328 def FeatureUSERMSR : SubtargetFeature<"usermsr", "HasUSERMSR", "true",
329 "Support USERMSR instructions">;
330 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
331 "platform configuration instruction">;
332 def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
333 "Support movdiri instruction (direct store integer)">;
334 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
335 "Support movdir64b instruction (direct store 64 bytes)">;
336 def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
337 "Support AVX10.1 up to 256-bit instruction",
338 [FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
339 FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
340 FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
341 def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
342 "Support AVX10.1 up to 512-bit instruction",
343 [FeatureAVX10_1, FeatureEVEX512]>;
344 def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
345 "Support extended general purpose register">;
346 def FeaturePush2Pop2 : SubtargetFeature<"push2pop2", "HasPush2Pop2", "true",
347 "Support PUSH2/POP2 instructions">;
348 def FeaturePPX : SubtargetFeature<"ppx", "HasPPX", "true",
349 "Support Push-Pop Acceleration">;
350 def FeatureNDD : SubtargetFeature<"ndd", "HasNDD", "true",
351 "Support non-destructive destination">;
352 def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true",
353 "Support conditional cmp & test instructions">;
354 def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
355 "Support conditional faulting">;
357 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
358 // "string operations"). See "REP String Enhancement" in the Intel Software
359 // Development Manual. This feature essentially means that REP MOVSB will copy
360 // using the largest available size instead of copying bytes one by one, making
361 // it at least as fast as REPMOVS{W,D,Q}.
364 "ermsb", "HasERMSB", "true",
365 "REP MOVS/STOS are fast">;
367 // Icelake and newer processors have Fast Short REP MOV.
370 "fsrm", "HasFSRM", "true",
371 "REP MOVSB of short lengths is faster">;
374 : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
375 "Use software floating point features">;
377 //===----------------------------------------------------------------------===//
378 // X86 Subtarget Security Mitigation features
379 //===----------------------------------------------------------------------===//
381 // Lower indirect calls using a special construct called a `retpoline` to
382 // mitigate potential Spectre v2 attacks against them.
383 def FeatureRetpolineIndirectCalls
385 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
386 "Remove speculation of indirect calls from the generated code">;
388 // Lower indirect branches and switches either using conditional branch trees
389 // or using a special construct called a `retpoline` to mitigate potential
390 // Spectre v2 attacks against them.
391 def FeatureRetpolineIndirectBranches
393 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
394 "Remove speculation of indirect branches from the generated code">;
396 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
397 // `retpoline-indirect-branches` above.
399 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
400 "Remove speculation of indirect branches from the "
401 "generated code, either by avoiding them entirely or "
402 "lowering them with a speculation blocking construct",
403 [FeatureRetpolineIndirectCalls,
404 FeatureRetpolineIndirectBranches]>;
406 // Rely on external thunks for the emitted retpoline calls. This allows users
407 // to provide their own custom thunk definitions in highly specialized
408 // environments such as a kernel that does boot-time hot patching.
409 def FeatureRetpolineExternalThunk
411 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
412 "When lowering an indirect call or branch using a `retpoline`, rely "
413 "on the specified user provided thunk rather than emitting one "
414 "ourselves. Only has effect when combined with some other retpoline "
415 "feature", [FeatureRetpolineIndirectCalls]>;
417 // Mitigate LVI attacks against indirect calls/branches and call returns
418 def FeatureLVIControlFlowIntegrity
420 "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
421 "Prevent indirect calls/branches from using a memory operand, and "
422 "precede all indirect calls/branches from a register with an "
423 "LFENCE instruction to serialize control flow. Also decompose RET "
424 "instructions into a POP+LFENCE+JMP sequence.">;
426 // Enable SESES to mitigate speculative execution attacks
427 def FeatureSpeculativeExecutionSideEffectSuppression
429 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
430 "Prevent speculative execution side channel timing attacks by "
431 "inserting a speculation barrier before memory reads, memory writes, "
432 "and conditional branches. Implies LVI Control Flow integrity.",
433 [FeatureLVIControlFlowIntegrity]>;
435 // Mitigate LVI attacks against data loads
436 def FeatureLVILoadHardening
438 "lvi-load-hardening", "UseLVILoadHardening", "true",
439 "Insert LFENCE instructions to prevent data speculatively injected "
440 "into loads from being used maliciously.">;
442 def FeatureTaggedGlobals
444 "tagged-globals", "AllowTaggedGlobals", "true",
445 "Use an instruction sequence for taking the address of a global "
446 "that allows a memory tag in the upper address bits.">;
448 // Control codegen mitigation against Straight Line Speculation vulnerability.
449 def FeatureHardenSlsRet
451 "harden-sls-ret", "HardenSlsRet", "true",
452 "Harden against straight line speculation across RET instructions.">;
454 def FeatureHardenSlsIJmp
456 "harden-sls-ijmp", "HardenSlsIJmp", "true",
457 "Harden against straight line speculation across indirect JMP instructions.">;
459 //===----------------------------------------------------------------------===//
460 // X86 Subtarget Tuning features
461 //===----------------------------------------------------------------------===//
462 def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
463 "PreferMovmskOverVTest", "true",
464 "Prefer movmsk over vtest instruction">;
466 def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
467 "SHLD instruction is slow">;
469 def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
470 "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">;
472 def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
474 "PMADDWD is slower than PMULLD">;
476 // FIXME: This should not apply to CPUs that do not have SSE.
477 def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
478 "IsUnalignedMem16Slow", "true",
479 "Slow unaligned 16-byte memory access">;
481 def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
482 "IsUnalignedMem32Slow", "true",
483 "Slow unaligned 32-byte memory access">;
485 def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
486 "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">;
488 // True if 8-bit divisions are significantly faster than
489 // 32-bit divisions and should be used when possible.
490 def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
491 "HasSlowDivide32", "true",
492 "Use 8-bit divide for positive values less than 256">;
494 // True if 32-bit divides are significantly faster than
495 // 64-bit divisions and should be used when possible.
496 def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
497 "HasSlowDivide64", "true",
498 "Use 32-bit divide for positive values less than 2^32">;
500 def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
501 "PadShortFunctions", "true",
502 "Pad short functions (to prevent a stall when returning too early)">;
504 // On some processors, instructions that implicitly take two memory operands are
505 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
506 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
507 def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
508 "SlowTwoMemOps", "true",
509 "Two memory operand instructions are slow">;
511 // True if the LEA instruction inputs have to be ready at address generation
513 def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true",
514 "LEA instruction needs inputs at AG stage">;
516 def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
517 "LEA instruction with certain arguments is slow">;
519 // True if the LEA instruction has all three source operands: base, index,
520 // and offset or if the LEA instruction uses base and index registers where
521 // the base is EBP, RBP,or R13
522 def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
523 "LEA instruction with 3 ops or certain registers is slow">;
525 // True if INC and DEC instructions are slow when writing to flags
526 def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
527 "INC and DEC instructions are slower than ADD and SUB">;
529 def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
530 "HasPOPCNTFalseDeps", "true",
531 "POPCNT has a false dependency on dest register">;
533 def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
534 "HasLZCNTFalseDeps", "true",
535 "LZCNT/TZCNT have a false dependency on dest register">;
537 def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc",
538 "HasMULCFalseDeps", "true",
539 "VF[C]MULCPH/SH has a false dependency on dest register">;
541 def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm",
542 "HasPERMFalseDeps", "true",
543 "VPERMD/Q/PS/PD has a false dependency on dest register">;
545 def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range",
546 "HasRANGEFalseDeps", "true",
547 "VRANGEPD/PS/SD/SS has a false dependency on dest register">;
549 def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant",
550 "HasGETMANTFalseDeps", "true",
551 "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a"
552 " false dependency on dest register">;
554 def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq",
555 "HasMULLQFalseDeps", "true",
556 "VPMULLQ has a false dependency on dest register">;
558 def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking",
559 "HasSBBDepBreaking", "true",
560 "SBB with same register has no source dependency">;
562 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
563 // using a variable mask over multiple fixed shuffles.
564 def TuningFastVariableCrossLaneShuffle
565 : SubtargetFeature<"fast-variable-crosslane-shuffle",
566 "HasFastVariableCrossLaneShuffle",
567 "true", "Cross-lane shuffles with variable masks are fast">;
568 def TuningFastVariablePerLaneShuffle
569 : SubtargetFeature<"fast-variable-perlane-shuffle",
570 "HasFastVariablePerLaneShuffle",
571 "true", "Per-lane shuffles with variable masks are fast">;
573 // Goldmont / Tremont (atom in general) has no bypass delay
574 def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
575 "NoDomainDelay","true",
576 "Has no bypass delay when using the 'wrong' domain">;
578 // Many processors (Nehalem+ on Intel) have no bypass delay when
579 // using the wrong mov type.
580 def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
581 "NoDomainDelayMov","true",
582 "Has no bypass delay when using the 'wrong' mov type">;
584 // Newer processors (Skylake+ on Intel) have no bypass delay when
585 // using the wrong blend type.
586 def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
587 "NoDomainDelayBlend","true",
588 "Has no bypass delay when using the 'wrong' blend type">;
590 // Newer processors (Haswell+ on Intel) have no bypass delay when
591 // using the wrong shuffle type.
592 def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
593 "NoDomainDelayShuffle","true",
594 "Has no bypass delay when using the 'wrong' shuffle type">;
596 // Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
597 // imm shifts/rotate if they can use more ports than regular shuffles.
598 def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
599 "PreferLowerShuffleAsShift", "true",
600 "Shifts are faster (or as fast) as shuffle">;
602 def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
603 "FastImmVectorShift", "true",
604 "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
606 // On some X86 processors, a vzeroupper instruction should be inserted after
607 // using ymm/zmm registers before executing code that may use SSE instructions.
608 def TuningInsertVZEROUPPER
609 : SubtargetFeature<"vzeroupper",
611 "true", "Should insert vzeroupper instructions">;
613 // TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
614 // than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
615 // vector FSQRT has higher throughput than the corresponding NR code.
616 // The idea is that throughput bound code is likely to be vectorized, so for
617 // vectorized code we should care about the throughput of SQRT operations.
618 // But if the code is scalar that probably means that the code has some kind of
619 // dependency and we should care more about reducing the latency.
621 // True if hardware SQRTSS instruction is at least as fast (latency) as
622 // RSQRTSS followed by a Newton-Raphson iteration.
623 def TuningFastScalarFSQRT
624 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
625 "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
626 // True if hardware SQRTPS/VSQRTPS instructions are at least as fast
627 // (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
628 def TuningFastVectorFSQRT
629 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
630 "true", "Vector SQRT is fast (disable Newton-Raphson)">;
632 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
633 // be used to replace test/set sequences.
636 "fast-lzcnt", "HasFastLZCNT", "true",
637 "LZCNT instructions are as fast as most simple integer ops">;
639 // If the target can efficiently decode NOPs upto 7-bytes in length.
640 def TuningFast7ByteNOP
642 "fast-7bytenop", "HasFast7ByteNOP", "true",
643 "Target can quickly decode up to 7 byte NOPs">;
645 // If the target can efficiently decode NOPs upto 11-bytes in length.
646 def TuningFast11ByteNOP
648 "fast-11bytenop", "HasFast11ByteNOP", "true",
649 "Target can quickly decode up to 11 byte NOPs">;
651 // If the target can efficiently decode NOPs upto 15-bytes in length.
652 def TuningFast15ByteNOP
654 "fast-15bytenop", "HasFast15ByteNOP", "true",
655 "Target can quickly decode up to 15 byte NOPs">;
657 // Sandy Bridge and newer processors can use SHLD with the same source on both
658 // inputs to implement rotate to avoid the partial flag update of the normal
659 // rotate instructions.
660 def TuningFastSHLDRotate
662 "fast-shld-rotate", "HasFastSHLDRotate", "true",
663 "SHLD can be used as a faster rotate">;
665 // Bulldozer and newer processors can merge CMP/TEST (but not other
666 // instructions) with conditional branches.
667 def TuningBranchFusion
668 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
669 "CMP/TEST can be fused with conditional branches">;
671 // Sandy Bridge and newer processors have many instructions that can be
672 // fused with conditional branches and pass through the CPU as a single
674 def TuningMacroFusion
675 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
676 "Various instructions can be fused with conditional branches">;
678 // Gather is available since Haswell (AVX2 set). So technically, we can
679 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
680 // Skylake Client processor has faster Gathers than HSW and performance is
681 // similar to Skylake Server (AVX-512).
683 : SubtargetFeature<"fast-gather", "HasFastGather", "true",
684 "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">;
686 def TuningPreferNoGather
687 : SubtargetFeature<"prefer-no-gather", "PreferGather", "false",
688 "Prefer no gather instructions">;
689 def TuningPreferNoScatter
690 : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false",
691 "Prefer no scatter instructions">;
693 def TuningPrefer128Bit
694 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
695 "Prefer 128-bit AVX instructions">;
697 def TuningPrefer256Bit
698 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
699 "Prefer 256-bit AVX instructions">;
701 def TuningAllowLight256Bit
702 : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true",
703 "Enable generation of 256-bit load/stores even if we prefer 128-bit">;
705 def TuningPreferMaskRegisters
706 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
707 "Prefer AVX512 mask registers over PTEST/MOVMSK">;
709 def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
710 "Indicates that the BEXTR instruction is implemented as a single uop "
711 "with good throughput">;
713 // Combine vector math operations with shuffles into horizontal math
714 // instructions if a CPU implements horizontal operations (introduced with
715 // SSE3) with better latency/throughput than the alternative sequence.
716 def TuningFastHorizontalOps
718 "fast-hops", "HasFastHorizontalOps", "true",
719 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
720 "normal vector instructions with shuffles">;
722 def TuningFastScalarShiftMasks
724 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
725 "Prefer a left/right scalar logical shift pair over a shift+and pair">;
727 def TuningFastVectorShiftMasks
729 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
730 "Prefer a left/right vector logical shift pair over a shift+and pair">;
733 : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
734 "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
736 def TuningUseSLMArithCosts
737 : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
738 "Use Silvermont specific arithmetic costs">;
740 def TuningUseGLMDivSqrtCosts
741 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
742 "Use Goldmont specific floating point div/sqrt costs">;
744 //===----------------------------------------------------------------------===//
746 // TODO: Remove these - use general tuning features to determine codegen.
747 //===----------------------------------------------------------------------===//
750 def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">;
752 //===----------------------------------------------------------------------===//
753 // Register File Description
754 //===----------------------------------------------------------------------===//
756 include "X86RegisterInfo.td"
757 include "X86RegisterBanks.td"
759 //===----------------------------------------------------------------------===//
760 // Instruction Descriptions
761 //===----------------------------------------------------------------------===//
763 include "X86Schedule.td"
764 include "X86InstrInfo.td"
765 include "X86SchedPredicates.td"
767 def X86InstrInfo : InstrInfo;
769 //===----------------------------------------------------------------------===//
770 // X86 Scheduler Models
771 //===----------------------------------------------------------------------===//
773 include "X86ScheduleAtom.td"
774 include "X86SchedSandyBridge.td"
775 include "X86SchedHaswell.td"
776 include "X86SchedBroadwell.td"
777 include "X86ScheduleSLM.td"
778 include "X86ScheduleZnver1.td"
779 include "X86ScheduleZnver2.td"
780 include "X86ScheduleZnver3.td"
781 include "X86ScheduleZnver4.td"
782 include "X86ScheduleBdVer2.td"
783 include "X86ScheduleBtVer2.td"
784 include "X86SchedSkylakeClient.td"
785 include "X86SchedSkylakeServer.td"
786 include "X86SchedIceLake.td"
787 include "X86SchedAlderlakeP.td"
788 include "X86SchedSapphireRapids.td"
790 //===----------------------------------------------------------------------===//
791 // X86 Processor Feature Lists
792 //===----------------------------------------------------------------------===//
794 def ProcessorFeatures {
795 // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
796 list<SubtargetFeature> X86_64V1Features = [
797 FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
798 FeatureFXSR, FeatureNOPL, FeatureX86_64,
800 list<SubtargetFeature> X86_64V1Tuning = [
805 TuningInsertVZEROUPPER
808 list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
809 FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
812 list<SubtargetFeature> X86_64V2Tuning = [
817 TuningFastScalarFSQRT,
818 TuningFastSHLDRotate,
820 TuningPOPCNTFalseDeps,
821 TuningInsertVZEROUPPER
824 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
825 FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
826 FeatureMOVBE, FeatureXSAVE
828 list<SubtargetFeature> X86_64V3Tuning = [
832 TuningFastScalarFSQRT,
833 TuningFastSHLDRotate,
835 TuningFastVariableCrossLaneShuffle,
836 TuningFastVariablePerLaneShuffle,
837 TuningPOPCNTFalseDeps,
838 TuningLZCNTFalseDeps,
839 TuningInsertVZEROUPPER,
840 TuningAllowLight256Bit
843 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
850 list<SubtargetFeature> X86_64V4Tuning = [
854 TuningFastScalarFSQRT,
855 TuningFastVectorFSQRT,
856 TuningFastSHLDRotate,
858 TuningFastVariableCrossLaneShuffle,
859 TuningFastVariablePerLaneShuffle,
862 TuningPOPCNTFalseDeps,
863 TuningInsertVZEROUPPER,
864 TuningAllowLight256Bit
868 list<SubtargetFeature> NHMFeatures = X86_64V2Features;
869 list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
870 TuningInsertVZEROUPPER,
871 TuningNoDomainDelayMov];
874 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
875 list<SubtargetFeature> WSMTuning = NHMTuning;
876 list<SubtargetFeature> WSMFeatures =
877 !listconcat(NHMFeatures, WSMAdditionalFeatures);
880 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
883 list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
887 TuningFastScalarFSQRT,
888 TuningFastSHLDRotate,
890 TuningPOPCNTFalseDeps,
891 TuningInsertVZEROUPPER,
892 TuningNoDomainDelayMov];
893 list<SubtargetFeature> SNBFeatures =
894 !listconcat(WSMFeatures, SNBAdditionalFeatures);
897 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
900 list<SubtargetFeature> IVBTuning = SNBTuning;
901 list<SubtargetFeature> IVBFeatures =
902 !listconcat(SNBFeatures, IVBAdditionalFeatures);
905 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
913 list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
916 TuningFastScalarFSQRT,
917 TuningFastSHLDRotate,
919 TuningFastVariableCrossLaneShuffle,
920 TuningFastVariablePerLaneShuffle,
921 TuningPOPCNTFalseDeps,
922 TuningLZCNTFalseDeps,
923 TuningInsertVZEROUPPER,
924 TuningAllowLight256Bit,
925 TuningNoDomainDelayMov,
926 TuningNoDomainDelayShuffle];
927 list<SubtargetFeature> HSWFeatures =
928 !listconcat(IVBFeatures, HSWAdditionalFeatures);
931 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
934 list<SubtargetFeature> BDWTuning = HSWTuning;
935 list<SubtargetFeature> BDWFeatures =
936 !listconcat(HSWFeatures, BDWAdditionalFeatures);
939 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
943 list<SubtargetFeature> SKLTuning = [TuningFastGather,
947 TuningFastScalarFSQRT,
948 TuningFastVectorFSQRT,
949 TuningFastSHLDRotate,
951 TuningFastVariableCrossLaneShuffle,
952 TuningFastVariablePerLaneShuffle,
953 TuningPOPCNTFalseDeps,
954 TuningInsertVZEROUPPER,
955 TuningAllowLight256Bit,
956 TuningNoDomainDelayMov,
957 TuningNoDomainDelayShuffle,
958 TuningNoDomainDelayBlend];
959 list<SubtargetFeature> SKLFeatures =
960 !listconcat(BDWFeatures, SKLAdditionalFeatures);
963 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
975 list<SubtargetFeature> SKXTuning = [TuningFastGather,
979 TuningFastScalarFSQRT,
980 TuningFastVectorFSQRT,
981 TuningFastSHLDRotate,
983 TuningFastVariableCrossLaneShuffle,
984 TuningFastVariablePerLaneShuffle,
986 TuningPOPCNTFalseDeps,
987 TuningInsertVZEROUPPER,
988 TuningAllowLight256Bit,
989 TuningPreferShiftShuffle,
990 TuningNoDomainDelayMov,
991 TuningNoDomainDelayShuffle,
992 TuningNoDomainDelayBlend,
993 TuningFastImmVectorShift];
994 list<SubtargetFeature> SKXFeatures =
995 !listconcat(BDWFeatures, SKXAdditionalFeatures);
998 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
999 list<SubtargetFeature> CLXTuning = SKXTuning;
1000 list<SubtargetFeature> CLXFeatures =
1001 !listconcat(SKXFeatures, CLXAdditionalFeatures);
1004 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
1005 list<SubtargetFeature> CPXTuning = SKXTuning;
1006 list<SubtargetFeature> CPXFeatures =
1007 !listconcat(CLXFeatures, CPXAdditionalFeatures);
1010 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
1020 list<SubtargetFeature> CNLTuning = [TuningFastGather,
1024 TuningFastScalarFSQRT,
1025 TuningFastVectorFSQRT,
1026 TuningFastSHLDRotate,
1027 TuningFast15ByteNOP,
1028 TuningFastVariableCrossLaneShuffle,
1029 TuningFastVariablePerLaneShuffle,
1031 TuningInsertVZEROUPPER,
1032 TuningAllowLight256Bit,
1033 TuningNoDomainDelayMov,
1034 TuningNoDomainDelayShuffle,
1035 TuningNoDomainDelayBlend,
1036 TuningFastImmVectorShift];
1037 list<SubtargetFeature> CNLFeatures =
1038 !listconcat(SKLFeatures, CNLAdditionalFeatures);
1041 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
1050 list<SubtargetFeature> ICLTuning = [TuningFastGather,
1053 TuningFastScalarFSQRT,
1054 TuningFastVectorFSQRT,
1055 TuningFastSHLDRotate,
1056 TuningFast15ByteNOP,
1057 TuningFastVariableCrossLaneShuffle,
1058 TuningFastVariablePerLaneShuffle,
1060 TuningInsertVZEROUPPER,
1061 TuningAllowLight256Bit,
1062 TuningNoDomainDelayMov,
1063 TuningNoDomainDelayShuffle,
1064 TuningNoDomainDelayBlend,
1065 TuningFastImmVectorShift];
1066 list<SubtargetFeature> ICLFeatures =
1067 !listconcat(CNLFeatures, ICLAdditionalFeatures);
1070 list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
1073 list<SubtargetFeature> ICXTuning = ICLTuning;
1074 list<SubtargetFeature> ICXFeatures =
1075 !listconcat(ICLFeatures, ICXAdditionalFeatures);
1078 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
1083 list<SubtargetFeature> TGLTuning = ICLTuning;
1084 list<SubtargetFeature> TGLFeatures =
1085 !listconcat(ICLFeatures, TGLAdditionalFeatures );
1088 list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
1104 list<SubtargetFeature> SPRAdditionalTuning = [TuningMULCFalseDeps,
1105 TuningPERMFalseDeps,
1106 TuningRANGEFalseDeps,
1107 TuningGETMANTFalseDeps,
1108 TuningMULLQFalseDeps];
1109 list<SubtargetFeature> SPRTuning = !listconcat(ICXTuning, SPRAdditionalTuning);
1110 list<SubtargetFeature> SPRFeatures =
1111 !listconcat(ICXFeatures, SPRAdditionalFeatures);
1114 list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
1116 list<SubtargetFeature> GNRFeatures =
1117 !listconcat(SPRFeatures, GNRAdditionalFeatures);
1120 list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
1121 list<SubtargetFeature> GNRDFeatures =
1122 !listconcat(GNRFeatures, GNRDAdditionalFeatures);
1125 list<SubtargetFeature> AtomFeatures = [FeatureX87,
1136 list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
1141 TuningSlowTwoMemOps,
1143 TuningPadShortFunctions,
1144 TuningInsertVZEROUPPER,
1145 TuningNoDomainDelay];
1148 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
1154 list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
1155 TuningSlowTwoMemOps,
1162 TuningPOPCNTFalseDeps,
1163 TuningInsertVZEROUPPER,
1164 TuningNoDomainDelay];
1165 list<SubtargetFeature> SLMFeatures =
1166 !listconcat(AtomFeatures, SLMAdditionalFeatures);
1169 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
1178 list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
1179 TuningSlowTwoMemOps,
1183 TuningPOPCNTFalseDeps,
1184 TuningInsertVZEROUPPER,
1185 TuningNoDomainDelay];
1186 list<SubtargetFeature> GLMFeatures =
1187 !listconcat(SLMFeatures, GLMAdditionalFeatures);
1190 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
1192 list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
1193 TuningSlowTwoMemOps,
1197 TuningInsertVZEROUPPER,
1198 TuningNoDomainDelay];
1199 list<SubtargetFeature> GLPFeatures =
1200 !listconcat(GLMFeatures, GLPAdditionalFeatures);
1203 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
1205 list<SubtargetFeature> TRMTuning = GLPTuning;
1206 list<SubtargetFeature> TRMFeatures =
1207 !listconcat(GLPFeatures, TRMAdditionalFeatures);
1210 list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
1230 list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
1231 TuningPreferMovmskOverVTest,
1232 TuningFastImmVectorShift];
1233 list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
1234 list<SubtargetFeature> ADLFeatures =
1235 !listconcat(TRMFeatures, ADLAdditionalFeatures);
1238 list<SubtargetFeature> GRTTuning = [TuningMacroFusion,
1242 TuningFastScalarFSQRT,
1243 TuningFastVectorFSQRT,
1244 TuningFast15ByteNOP,
1245 TuningFastVariablePerLaneShuffle,
1246 TuningPOPCNTFalseDeps,
1247 TuningInsertVZEROUPPER];
1250 list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
1252 FeatureAVXNECONVERT,
1255 FeatureAVXVNNIINT8];
1256 list<SubtargetFeature> SRFFeatures =
1257 !listconcat(ADLFeatures, SRFAdditionalFeatures);
1260 list<SubtargetFeature> ARLSAdditionalFeatures = [FeatureAVXVNNIINT16,
1264 list<SubtargetFeature> ARLSFeatures =
1265 !listconcat(SRFFeatures, ARLSAdditionalFeatures);
1268 list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
1269 list<SubtargetFeature> PTLFeatures =
1270 !listconcat(ARLSFeatures, PTLAdditionalFeatures);
1274 list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
1276 list<SubtargetFeature> CWFFeatures =
1277 !listconcat(ARLSFeatures, CWFAdditionalFeatures);
1280 list<SubtargetFeature> KNLFeatures = [FeatureX87,
1312 list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
1315 TuningSlowTwoMemOps,
1316 TuningPreferMaskRegisters,
1320 // TODO Add AVX5124FMAPS/AVX5124VNNIW features
1321 list<SubtargetFeature> KNMFeatures =
1322 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
1325 list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1338 list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1340 TuningSBBDepBreaking,
1341 TuningInsertVZEROUPPER];
1344 list<SubtargetFeature> BtVer1Features = [FeatureX87,
1358 list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1359 TuningFastScalarShiftMasks,
1360 TuningFastVectorShiftMasks,
1362 TuningSBBDepBreaking,
1363 TuningInsertVZEROUPPER];
1366 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1375 list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1377 TuningFastHorizontalOps,
1378 TuningFast15ByteNOP,
1379 TuningFastScalarShiftMasks,
1380 TuningFastVectorShiftMasks,
1382 TuningSBBDepBreaking,
1384 list<SubtargetFeature> BtVer2Features =
1385 !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1388 list<SubtargetFeature> BdVer1Features = [FeatureX87,
1406 list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1407 TuningFast11ByteNOP,
1408 TuningFastScalarShiftMasks,
1410 TuningSBBDepBreaking,
1411 TuningInsertVZEROUPPER];
1414 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1418 list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1420 list<SubtargetFeature> BdVer2Tuning =
1421 !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1422 list<SubtargetFeature> BdVer2Features =
1423 !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1426 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1428 list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1429 list<SubtargetFeature> BdVer3Features =
1430 !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1433 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1438 list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1439 list<SubtargetFeature> BdVer4Features =
1440 !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1443 // AMD Zen Processors common ISAs
1444 list<SubtargetFeature> ZNFeatures = [FeatureADX,
1477 list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1479 TuningFast15ByteNOP,
1481 TuningFastScalarFSQRT,
1482 TuningFastVectorFSQRT,
1483 TuningFastScalarShiftMasks,
1484 TuningFastVariablePerLaneShuffle,
1487 TuningSBBDepBreaking,
1488 TuningInsertVZEROUPPER,
1489 TuningAllowLight256Bit];
1490 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1494 list<SubtargetFeature> ZN2Tuning = ZNTuning;
1495 list<SubtargetFeature> ZN2Features =
1496 !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1497 list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1502 list<SubtargetFeature> ZN3AdditionalTuning = [TuningMacroFusion];
1503 list<SubtargetFeature> ZN3Tuning =
1504 !listconcat(ZN2Tuning, ZN3AdditionalTuning);
1505 list<SubtargetFeature> ZN3Features =
1506 !listconcat(ZN2Features, ZN3AdditionalFeatures);
1507 list<SubtargetFeature> ZN4Tuning = ZN3Tuning;
1508 list<SubtargetFeature> ZN4AdditionalFeatures = [FeatureAVX512,
1523 list<SubtargetFeature> ZN4Features =
1524 !listconcat(ZN3Features, ZN4AdditionalFeatures);
1527 //===----------------------------------------------------------------------===//
1528 // X86 processors supported.
1529 //===----------------------------------------------------------------------===//
1531 class Proc<string Name, list<SubtargetFeature> Features,
1532 list<SubtargetFeature> TuneFeatures>
1533 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1535 class ProcModel<string Name, SchedMachineModel Model,
1536 list<SubtargetFeature> Features,
1537 list<SubtargetFeature> TuneFeatures>
1538 : ProcessorModel<Name, Model, Features, TuneFeatures>;
1540 // NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1541 // if i386/i486 is specifically requested.
1542 // NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1543 // constructor checks that any CPU used in 64-bit mode has FeatureX86_64
1544 // enabled. It has no effect on code generation.
1545 // NOTE: As a default tuning, "generic" aims to produce code optimized for the
1546 // most common X86 processors. The tunings might be changed over time. It is
1547 // recommended to use "tune-cpu"="x86-64" in function attribute for consistency.
1548 def : ProcModel<"generic", SandyBridgeModel,
1549 [FeatureX87, FeatureCX8, FeatureX86_64],
1553 TuningFastScalarFSQRT,
1554 TuningFast15ByteNOP,
1555 TuningInsertVZEROUPPER]>;
1557 def : Proc<"i386", [FeatureX87],
1558 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1559 def : Proc<"i486", [FeatureX87],
1560 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1561 def : Proc<"i586", [FeatureX87, FeatureCX8],
1562 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1563 def : Proc<"pentium", [FeatureX87, FeatureCX8],
1564 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1565 foreach P = ["pentium-mmx", "pentium_mmx"] in {
1566 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
1567 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1569 def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
1570 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1571 foreach P = ["pentiumpro", "pentium_pro"] in {
1572 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
1573 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1575 foreach P = ["pentium2", "pentium_ii"] in {
1576 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
1577 FeatureFXSR, FeatureNOPL],
1578 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1580 foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
1581 def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
1582 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1583 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1586 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1587 // The intent is to enable it for pentium4 which is the current default
1588 // processor in a vanilla 32-bit clang compilation when no specific
1589 // architecture is specified. This generally gives a nice performance
1590 // increase on silvermont, with largely neutral behavior on other
1591 // contemporary large core processors.
1592 // pentium-m, pentium4m, prescott and nocona are included as a preventative
1593 // measure to avoid performance surprises, in case clang's default cpu
1594 // changes slightly.
1596 foreach P = ["pentium_m", "pentium-m"] in {
1597 def : ProcModel<P, GenericPostRAModel,
1598 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1599 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1600 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1603 foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
1604 def : ProcModel<P, GenericPostRAModel,
1605 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
1606 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1607 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1611 def : Proc<"lakemont", [FeatureCX8],
1612 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1615 def : ProcModel<"yonah", SandyBridgeModel,
1616 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1617 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1618 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1621 foreach P = ["prescott", "pentium_4_sse3"] in {
1622 def : ProcModel<P, GenericPostRAModel,
1623 [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
1624 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1625 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1627 def : ProcModel<"nocona", GenericPostRAModel, [
1640 TuningInsertVZEROUPPER
1643 // Intel Core 2 Solo/Duo.
1644 foreach P = ["core2", "core_2_duo_ssse3"] in {
1645 def : ProcModel<P, SandyBridgeModel, [
1660 TuningInsertVZEROUPPER
1663 foreach P = ["penryn", "core_2_duo_sse4_1"] in {
1664 def : ProcModel<P, SandyBridgeModel, [
1679 TuningInsertVZEROUPPER
1684 foreach P = ["bonnell", "atom"] in {
1685 def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1686 ProcessorFeatures.AtomTuning>;
1689 foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
1690 def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1691 ProcessorFeatures.SLMTuning>;
1694 def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
1695 ProcessorFeatures.SLMTuning>;
1696 def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1697 ProcessorFeatures.GLMTuning>;
1698 foreach P = ["goldmont_plus", "goldmont-plus"] in {
1699 def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
1700 ProcessorFeatures.GLPTuning>;
1702 def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1703 ProcessorFeatures.TRMTuning>;
1704 foreach P = ["sierraforest", "grandridge"] in {
1705 def : ProcModel<P, AlderlakePModel, ProcessorFeatures.SRFFeatures,
1706 ProcessorFeatures.TRMTuning>;
1709 // "Arrandale" along with corei3 and corei5
1710 foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
1711 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1712 ProcessorFeatures.NHMTuning>;
1715 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1716 foreach P = ["westmere", "core_aes_pclmulqdq"] in {
1717 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1718 ProcessorFeatures.WSMTuning>;
1721 foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
1722 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1723 ProcessorFeatures.SNBTuning>;
1726 foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
1727 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1728 ProcessorFeatures.IVBTuning>;
1731 foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
1732 def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1733 ProcessorFeatures.HSWTuning>;
1736 foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
1737 def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
1738 ProcessorFeatures.BDWTuning>;
1741 def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1742 ProcessorFeatures.SKLTuning>;
1744 // FIXME: define KNL scheduler model
1745 foreach P = ["knl", "mic_avx512"] in {
1746 def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
1747 ProcessorFeatures.KNLTuning>;
1749 def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1750 ProcessorFeatures.KNLTuning>;
1752 foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
1753 def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1754 ProcessorFeatures.SKXTuning>;
1757 def : ProcModel<"cascadelake", SkylakeServerModel,
1758 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1759 def : ProcModel<"cooperlake", SkylakeServerModel,
1760 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1761 def : ProcModel<"cannonlake", SkylakeServerModel,
1762 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1763 foreach P = ["icelake-client", "icelake_client"] in {
1764 def : ProcModel<P, IceLakeModel,
1765 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1767 def : ProcModel<"rocketlake", IceLakeModel,
1768 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1769 foreach P = ["icelake-server", "icelake_server"] in {
1770 def : ProcModel<P, IceLakeModel,
1771 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1773 def : ProcModel<"tigerlake", IceLakeModel,
1774 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1775 def : ProcModel<"sapphirerapids", SapphireRapidsModel,
1776 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1777 def : ProcModel<"alderlake", AlderlakePModel,
1778 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1779 // FIXME: Use Gracemont Schedule Model when it is ready.
1780 def : ProcModel<"gracemont", AlderlakePModel,
1781 ProcessorFeatures.ADLFeatures, ProcessorFeatures.GRTTuning>;
1782 def : ProcModel<"raptorlake", AlderlakePModel,
1783 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1784 def : ProcModel<"meteorlake", AlderlakePModel,
1785 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1786 def : ProcModel<"arrowlake", AlderlakePModel,
1787 ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
1788 foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1789 def : ProcModel<P, AlderlakePModel,
1790 ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
1792 def : ProcModel<"pantherlake", AlderlakePModel,
1793 ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
1794 def : ProcModel<"clearwaterforest", AlderlakePModel,
1795 ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
1796 def : ProcModel<"graniterapids", SapphireRapidsModel,
1797 ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>;
1798 def : ProcModel<"emeraldrapids", SapphireRapidsModel,
1799 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1800 foreach P = ["graniterapids-d", "graniterapids_d"] in {
1801 def : ProcModel<P, SapphireRapidsModel,
1802 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>;
1807 def : Proc<"k6", [FeatureX87, FeatureCX8, FeatureMMX],
1808 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1809 def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow],
1810 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1811 def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow],
1812 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1814 foreach P = ["athlon", "athlon-tbird"] in {
1815 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, Feature3DNowA,
1817 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1820 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1821 def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV,
1822 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
1823 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1826 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1827 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE2, Feature3DNowA,
1828 FeatureFXSR, FeatureNOPL, FeatureX86_64, FeatureCMOV],
1829 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1830 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1833 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1834 def : Proc<P, [FeatureX87, FeatureCX8, FeatureSSE3, Feature3DNowA,
1835 FeatureFXSR, FeatureNOPL, FeatureCX16, FeatureCMOV,
1837 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1838 TuningSBBDepBreaking, TuningInsertVZEROUPPER]>;
1841 foreach P = ["amdfam10", "barcelona"] in {
1842 def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1843 ProcessorFeatures.BarcelonaTuning>;
1847 def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1848 ProcessorFeatures.BtVer1Tuning>;
1850 def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1851 ProcessorFeatures.BtVer2Tuning>;
1854 def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1855 ProcessorFeatures.BdVer1Tuning>;
1857 def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1858 ProcessorFeatures.BdVer2Tuning>;
1860 def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
1861 ProcessorFeatures.BdVer3Tuning>;
1863 def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
1864 ProcessorFeatures.BdVer4Tuning>;
1866 def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1867 ProcessorFeatures.ZNTuning>;
1868 def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1869 ProcessorFeatures.ZN2Tuning>;
1870 def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1871 ProcessorFeatures.ZN3Tuning>;
1872 def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
1873 ProcessorFeatures.ZN4Tuning>;
1875 def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA],
1876 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1878 def : Proc<"winchip-c6", [FeatureX87, FeatureMMX],
1879 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1880 def : Proc<"winchip2", [FeatureX87, Feature3DNow],
1881 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1882 def : Proc<"c3", [FeatureX87, Feature3DNow],
1883 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1884 def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX,
1885 FeatureSSE1, FeatureFXSR, FeatureCMOV],
1886 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1888 // We also provide a generic 64-bit specific x86 processor model which tries to
1889 // be good for modern chips without enabling instruction set encodings past the
1890 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1891 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1893 // We currently use the Sandy Bridge model as the default scheduling model as
1894 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1895 // covers a huge swath of x86 processors. If there are specific scheduling
1896 // knobs which need to be tuned differently for AMD chips, we might consider
1897 // forming a common base for them.
1898 def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1899 ProcessorFeatures.X86_64V1Tuning>;
1900 // Close to Sandybridge.
1901 def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1902 ProcessorFeatures.X86_64V2Tuning>;
1903 // Close to Haswell.
1904 def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1905 ProcessorFeatures.X86_64V3Tuning>;
1906 // Close to the AVX-512 level implemented by Xeon Scalable Processors.
1907 def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1908 ProcessorFeatures.X86_64V4Tuning>;
1910 //===----------------------------------------------------------------------===//
1911 // Calling Conventions
1912 //===----------------------------------------------------------------------===//
1914 include "X86CallingConv.td"
1917 //===----------------------------------------------------------------------===//
1919 //===----------------------------------------------------------------------===//
1921 def ATTAsmParserVariant : AsmParserVariant {
1925 string Name = "att";
1927 // Discard comments in assembly strings.
1928 string CommentDelimiter = "#";
1930 // Recognize hard coded registers.
1931 string RegisterPrefix = "%";
1934 def IntelAsmParserVariant : AsmParserVariant {
1938 string Name = "intel";
1940 // Discard comments in assembly strings.
1941 string CommentDelimiter = ";";
1943 // Recognize hard coded registers.
1944 string RegisterPrefix = "";
1947 //===----------------------------------------------------------------------===//
1948 // Assembly Printers
1949 //===----------------------------------------------------------------------===//
1951 // The X86 target supports two different syntaxes for emitting machine code.
1952 // This is controlled by the -x86-asm-syntax={att|intel}
1953 def ATTAsmWriter : AsmWriter {
1954 string AsmWriterClassName = "ATTInstPrinter";
1957 def IntelAsmWriter : AsmWriter {
1958 string AsmWriterClassName = "IntelInstPrinter";
1963 // Information about the instructions...
1964 let InstructionSet = X86InstrInfo;
1965 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1966 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1967 let AllowRegisterRenaming = 1;
1970 //===----------------------------------------------------------------------===//
1972 //===----------------------------------------------------------------------===//
1974 include "X86PfmCounters.td"