1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This is a target description file for the Intel i386 architecture, referred
10 // to here as the "X86" architecture.
12 //===----------------------------------------------------------------------===//
14 // Get the target-independent interfaces which we are implementing...
16 include "llvm/Target/Target.td"
18 //===----------------------------------------------------------------------===//
19 // X86 Subtarget state
22 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
23 "64-bit mode (x86_64)">;
24 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
25 "32-bit mode (80386)">;
26 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
27 "16-bit mode (i8086)">;
29 //===----------------------------------------------------------------------===//
30 // X86 Subtarget ISA features
31 //===----------------------------------------------------------------------===//
33 def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
34 "Enable X87 float instructions">;
36 def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
37 "Enable NOPL instruction">;
39 def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
40 "Enable conditional move instructions">;
42 def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
43 "Support CMPXCHG8B instructions">;
45 def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
46 "Support POPCNT instruction">;
48 def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
49 "Support fxsave/fxrestore instructions">;
51 def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
52 "Support xsave instructions">;
54 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
55 "Support xsaveopt instructions",
58 def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
59 "Support xsavec instructions",
62 def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
63 "Support xsaves instructions",
66 def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
67 "Enable SSE instructions">;
68 def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
69 "Enable SSE2 instructions",
71 def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
72 "Enable SSE3 instructions",
74 def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
75 "Enable SSSE3 instructions",
77 def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
78 "Enable SSE 4.1 instructions",
80 def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
81 "Enable SSE 4.2 instructions",
83 // The MMX subtarget feature is separate from the rest of the SSE features
84 // because it's important (for odd compatibility reasons) to be able to
85 // turn it off explicitly while allowing SSE+ to be on.
86 def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
87 "Enable MMX instructions">;
88 def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
89 "Enable 3DNow! instructions",
91 def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
92 "Enable 3DNow! Athlon instructions",
94 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
95 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
96 // without disabling 64-bit mode. Nothing should imply this feature bit. It
97 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
98 def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
99 "Support 64-bit instructions">;
100 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
101 "64-bit with cmpxchg16b",
103 def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
104 "Support SSE 4a instructions",
107 def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
108 "Enable AVX instructions",
110 def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
111 "Enable AVX2 instructions",
113 def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
114 "Enable three-operand fused multiple-add",
116 def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
117 "Support 16-bit floating point conversion instructions",
119 def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
120 "Enable AVX-512 instructions",
121 [FeatureAVX2, FeatureFMA, FeatureF16C]>;
122 def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
123 "Enable AVX-512 Exponential and Reciprocal Instructions",
125 def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
126 "Enable AVX-512 Conflict Detection Instructions",
128 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
129 "true", "Enable AVX-512 Population Count Instructions",
131 def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
132 "Enable AVX-512 PreFetch Instructions",
134 def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
136 "Prefetch with Intent to Write and T1 Hint">;
137 def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
138 "Enable AVX-512 Doubleword and Quadword Instructions",
140 def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
141 "Enable AVX-512 Byte and Word Instructions",
143 def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
144 "Enable AVX-512 Vector Length eXtensions",
146 def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
147 "Enable AVX-512 Vector Byte Manipulation Instructions",
149 def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
150 "Enable AVX-512 further Vector Byte Manipulation Instructions",
152 def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
153 "Enable AVX-512 Integer Fused Multiple-Add",
155 def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
156 "Enable protection keys">;
157 def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
158 "Enable AVX-512 Vector Neural Network Instructions",
160 def FeatureAVXVNNI : SubtargetFeature<"avxvnni", "HasAVXVNNI", "true",
161 "Support AVX_VNNI encoding",
163 def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true",
164 "Support bfloat16 floating point",
166 def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
167 "Enable AVX-512 Bit Algorithms",
169 def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
170 "HasVP2INTERSECT", "true",
171 "Enable AVX-512 vp2intersect",
173 // FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
174 // guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
175 // FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
176 // supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
178 def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true",
179 "Support 16-bit floating point",
180 [FeatureBWI, FeatureVLX, FeatureDQI]>;
181 def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
182 "Enable packed carry-less multiplication instructions",
184 def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true",
185 "Enable Galois Field Arithmetic Instructions",
187 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
188 "Enable vpclmulqdq instructions",
189 [FeatureAVX, FeaturePCLMUL]>;
190 def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
191 "Enable four-operand fused multiple-add",
192 [FeatureAVX, FeatureSSE4A]>;
193 def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
194 "Enable XOP instructions",
196 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
197 "HasSSEUnalignedMem", "true",
198 "Allow unaligned memory operands with SSE instructions">;
199 def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
200 "Enable AES instructions",
202 def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
203 "Promote selected AES instructions to AVX512/AVX registers",
204 [FeatureAVX, FeatureAES]>;
205 def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
206 "Enable TBM instructions">;
207 def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
208 "Enable LWP instructions">;
209 def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
210 "Support MOVBE instruction">;
211 def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
212 "Support RDRAND instruction">;
213 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
214 "Support FS/GS Base instructions">;
215 def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
216 "Support LZCNT instruction">;
217 def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
218 "Support BMI instructions">;
219 def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
220 "Support BMI2 instructions">;
221 def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
222 "Support RTM instructions">;
223 def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
224 "Support ADX instructions">;
225 def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
226 "Enable SHA instructions",
228 def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
229 "Support CET Shadow-Stack instructions">;
230 def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
231 "Support PRFCHW instructions">;
232 def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
233 "Support RDSEED instruction">;
234 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true",
235 "Support LAHF and SAHF instructions in 64-bit mode">;
236 def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
237 "Enable MONITORX/MWAITX timer functionality">;
238 def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
239 "Enable Cache Line Zero">;
240 def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
241 "Enable Cache Demote">;
242 def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
243 "Support ptwrite instruction">;
244 def FeatureAMXTILE : SubtargetFeature<"amx-tile", "HasAMXTILE", "true",
245 "Support AMX-TILE instructions">;
246 def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
247 "Support AMX-INT8 instructions",
249 def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
250 "Support AMX-BF16 instructions",
252 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
253 "Invalidate Process-Context Identifier">;
254 def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
255 "Enable Software Guard Extensions">;
256 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
257 "Flush A Cache Line Optimized">;
258 def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
259 "Cache Line Write Back">;
260 def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
261 "Write Back No Invalidate">;
262 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
263 "Support RDPID instructions">;
264 def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
265 "Wait and pause enhancements">;
266 def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
267 "Has ENQCMD instructions">;
268 def FeatureKL : SubtargetFeature<"kl", "HasKL", "true",
269 "Support Key Locker kl Instructions",
271 def FeatureWIDEKL : SubtargetFeature<"widekl", "HasWIDEKL", "true",
272 "Support Key Locker wide Instructions",
274 def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
275 "Has hreset instruction">;
276 def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
277 "Has serialize instruction">;
278 def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
279 "Support TSXLDTRK instructions">;
280 def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
281 "Has UINTR Instructions">;
282 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
283 "platform configuration instruction">;
284 def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
285 "Support movdiri instruction">;
286 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
287 "Support movdir64b instruction">;
289 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
290 // "string operations"). See "REP String Enhancement" in the Intel Software
291 // Development Manual. This feature essentially means that REP MOVSB will copy
292 // using the largest available size instead of copying bytes one by one, making
293 // it at least as fast as REPMOVS{W,D,Q}.
296 "ermsb", "HasERMSB", "true",
297 "REP MOVS/STOS are fast">;
299 // Icelake and newer processors have Fast Short REP MOV.
302 "fsrm", "HasFSRM", "true",
303 "REP MOVSB of short lengths is faster">;
306 : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
307 "Use software floating point features">;
309 //===----------------------------------------------------------------------===//
310 // X86 Subtarget Security Mitigation features
311 //===----------------------------------------------------------------------===//
313 // Lower indirect calls using a special construct called a `retpoline` to
314 // mitigate potential Spectre v2 attacks against them.
315 def FeatureRetpolineIndirectCalls
317 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
318 "Remove speculation of indirect calls from the generated code">;
320 // Lower indirect branches and switches either using conditional branch trees
321 // or using a special construct called a `retpoline` to mitigate potential
322 // Spectre v2 attacks against them.
323 def FeatureRetpolineIndirectBranches
325 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
326 "Remove speculation of indirect branches from the generated code">;
328 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
329 // `retpoline-indirect-branches` above.
331 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
332 "Remove speculation of indirect branches from the "
333 "generated code, either by avoiding them entirely or "
334 "lowering them with a speculation blocking construct",
335 [FeatureRetpolineIndirectCalls,
336 FeatureRetpolineIndirectBranches]>;
338 // Rely on external thunks for the emitted retpoline calls. This allows users
339 // to provide their own custom thunk definitions in highly specialized
340 // environments such as a kernel that does boot-time hot patching.
341 def FeatureRetpolineExternalThunk
343 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
344 "When lowering an indirect call or branch using a `retpoline`, rely "
345 "on the specified user provided thunk rather than emitting one "
346 "ourselves. Only has effect when combined with some other retpoline "
347 "feature", [FeatureRetpolineIndirectCalls]>;
349 // Mitigate LVI attacks against indirect calls/branches and call returns
350 def FeatureLVIControlFlowIntegrity
352 "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
353 "Prevent indirect calls/branches from using a memory operand, and "
354 "precede all indirect calls/branches from a register with an "
355 "LFENCE instruction to serialize control flow. Also decompose RET "
356 "instructions into a POP+LFENCE+JMP sequence.">;
358 // Enable SESES to mitigate speculative execution attacks
359 def FeatureSpeculativeExecutionSideEffectSuppression
361 "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
362 "Prevent speculative execution side channel timing attacks by "
363 "inserting a speculation barrier before memory reads, memory writes, "
364 "and conditional branches. Implies LVI Control Flow integrity.",
365 [FeatureLVIControlFlowIntegrity]>;
367 // Mitigate LVI attacks against data loads
368 def FeatureLVILoadHardening
370 "lvi-load-hardening", "UseLVILoadHardening", "true",
371 "Insert LFENCE instructions to prevent data speculatively injected "
372 "into loads from being used maliciously.">;
374 //===----------------------------------------------------------------------===//
375 // X86 Subtarget Tuning features
376 //===----------------------------------------------------------------------===//
378 def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
379 "SHLD instruction is slow">;
381 def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
382 "PMULLD instruction is slow">;
384 def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
386 "PMADDWD is slower than PMULLD">;
388 // FIXME: This should not apply to CPUs that do not have SSE.
389 def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
390 "IsUAMem16Slow", "true",
391 "Slow unaligned 16-byte memory access">;
393 def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
394 "IsUAMem32Slow", "true",
395 "Slow unaligned 32-byte memory access">;
397 def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
398 "Use LEA for adjusting the stack pointer">;
400 def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
401 "HasSlowDivide32", "true",
402 "Use 8-bit divide for positive values less than 256">;
404 def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
405 "HasSlowDivide64", "true",
406 "Use 32-bit divide for positive values less than 2^32">;
408 def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
409 "PadShortFunctions", "true",
410 "Pad short functions">;
412 // On some processors, instructions that implicitly take two memory operands are
413 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
414 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
415 def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
416 "SlowTwoMemOps", "true",
417 "Two memory operand instructions are slow">;
419 def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
420 "LEA instruction needs inputs at AG stage">;
422 def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
423 "LEA instruction with certain arguments is slow">;
425 def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
426 "LEA instruction with 3 ops or certain registers is slow">;
428 def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
429 "INC and DEC instructions are slower than ADD and SUB">;
431 def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
432 "HasPOPCNTFalseDeps", "true",
433 "POPCNT has a false dependency on dest register">;
435 def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
436 "HasLZCNTFalseDeps", "true",
437 "LZCNT/TZCNT have a false dependency on dest register">;
439 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
440 // using a variable mask over multiple fixed shuffles.
441 def TuningFastVariableCrossLaneShuffle
442 : SubtargetFeature<"fast-variable-crosslane-shuffle",
443 "HasFastVariableCrossLaneShuffle",
444 "true", "Cross-lane shuffles with variable masks are fast">;
445 def TuningFastVariablePerLaneShuffle
446 : SubtargetFeature<"fast-variable-perlane-shuffle",
447 "HasFastVariablePerLaneShuffle",
448 "true", "Per-lane shuffles with variable masks are fast">;
450 // On some X86 processors, a vzeroupper instruction should be inserted after
451 // using ymm/zmm registers before executing code that may use SSE instructions.
452 def TuningInsertVZEROUPPER
453 : SubtargetFeature<"vzeroupper",
455 "true", "Should insert vzeroupper instructions">;
457 // TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
458 // than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
459 // vector FSQRT has higher throughput than the corresponding NR code.
460 // The idea is that throughput bound code is likely to be vectorized, so for
461 // vectorized code we should care about the throughput of SQRT operations.
462 // But if the code is scalar that probably means that the code has some kind of
463 // dependency and we should care more about reducing the latency.
464 def TuningFastScalarFSQRT
465 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
466 "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
467 def TuningFastVectorFSQRT
468 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
469 "true", "Vector SQRT is fast (disable Newton-Raphson)">;
471 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
472 // be used to replace test/set sequences.
475 "fast-lzcnt", "HasFastLZCNT", "true",
476 "LZCNT instructions are as fast as most simple integer ops">;
478 // If the target can efficiently decode NOPs upto 7-bytes in length.
479 def TuningFast7ByteNOP
481 "fast-7bytenop", "HasFast7ByteNOP", "true",
482 "Target can quickly decode up to 7 byte NOPs">;
484 // If the target can efficiently decode NOPs upto 11-bytes in length.
485 def TuningFast11ByteNOP
487 "fast-11bytenop", "HasFast11ByteNOP", "true",
488 "Target can quickly decode up to 11 byte NOPs">;
490 // If the target can efficiently decode NOPs upto 15-bytes in length.
491 def TuningFast15ByteNOP
493 "fast-15bytenop", "HasFast15ByteNOP", "true",
494 "Target can quickly decode up to 15 byte NOPs">;
496 // Sandy Bridge and newer processors can use SHLD with the same source on both
497 // inputs to implement rotate to avoid the partial flag update of the normal
498 // rotate instructions.
499 def TuningFastSHLDRotate
501 "fast-shld-rotate", "HasFastSHLDRotate", "true",
502 "SHLD can be used as a faster rotate">;
504 // Bulldozer and newer processors can merge CMP/TEST (but not other
505 // instructions) with conditional branches.
506 def TuningBranchFusion
507 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
508 "CMP/TEST can be fused with conditional branches">;
510 // Sandy Bridge and newer processors have many instructions that can be
511 // fused with conditional branches and pass through the CPU as a single
513 def TuningMacroFusion
514 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
515 "Various instructions can be fused with conditional branches">;
517 // Gather is available since Haswell (AVX2 set). So technically, we can
518 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
519 // Skylake Client processor has faster Gathers than HSW and performance is
520 // similar to Skylake Server (AVX-512).
522 : SubtargetFeature<"fast-gather", "HasFastGather", "true",
523 "Indicates if gather is reasonably fast">;
525 def TuningPrefer128Bit
526 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
527 "Prefer 128-bit AVX instructions">;
529 def TuningPrefer256Bit
530 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
531 "Prefer 256-bit AVX instructions">;
533 def TuningPreferMaskRegisters
534 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
535 "Prefer AVX512 mask registers over PTEST/MOVMSK">;
537 def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
538 "Indicates that the BEXTR instruction is implemented as a single uop "
539 "with good throughput">;
541 // Combine vector math operations with shuffles into horizontal math
542 // instructions if a CPU implements horizontal operations (introduced with
543 // SSE3) with better latency/throughput than the alternative sequence.
544 def TuningFastHorizontalOps
546 "fast-hops", "HasFastHorizontalOps", "true",
547 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
548 "normal vector instructions with shuffles">;
550 def TuningFastScalarShiftMasks
552 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
553 "Prefer a left/right scalar logical shift pair over a shift+and pair">;
555 def TuningFastVectorShiftMasks
557 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
558 "Prefer a left/right vector logical shift pair over a shift+and pair">;
561 : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
562 "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
564 def TuningUseGLMDivSqrtCosts
565 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
566 "Use Goldmont specific floating point div/sqrt costs">;
568 // Enable use of alias analysis during code generation.
569 def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
570 "Use alias analysis during codegen">;
572 //===----------------------------------------------------------------------===//
574 // TODO: Remove these - use general tuning features to determine codegen.
575 //===----------------------------------------------------------------------===//
578 def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
580 def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">;
582 //===----------------------------------------------------------------------===//
583 // Register File Description
584 //===----------------------------------------------------------------------===//
586 include "X86RegisterInfo.td"
587 include "X86RegisterBanks.td"
589 //===----------------------------------------------------------------------===//
590 // Instruction Descriptions
591 //===----------------------------------------------------------------------===//
593 include "X86Schedule.td"
594 include "X86InstrInfo.td"
595 include "X86SchedPredicates.td"
597 def X86InstrInfo : InstrInfo;
599 //===----------------------------------------------------------------------===//
600 // X86 Scheduler Models
601 //===----------------------------------------------------------------------===//
603 include "X86ScheduleAtom.td"
604 include "X86SchedSandyBridge.td"
605 include "X86SchedHaswell.td"
606 include "X86SchedBroadwell.td"
607 include "X86ScheduleSLM.td"
608 include "X86ScheduleZnver1.td"
609 include "X86ScheduleZnver2.td"
610 include "X86ScheduleZnver3.td"
611 include "X86ScheduleBdVer2.td"
612 include "X86ScheduleBtVer2.td"
613 include "X86SchedSkylakeClient.td"
614 include "X86SchedSkylakeServer.td"
616 //===----------------------------------------------------------------------===//
617 // X86 Processor Feature Lists
618 //===----------------------------------------------------------------------===//
620 def ProcessorFeatures {
621 // x86-64 and x86-64-v[234]
622 list<SubtargetFeature> X86_64V1Features = [
623 FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
624 FeatureFXSR, FeatureNOPL, Feature64Bit
626 list<SubtargetFeature> X86_64V2Features = !listconcat(
628 [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]);
629 list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
630 FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
631 FeatureMOVBE, FeatureXSAVE
633 list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
641 list<SubtargetFeature> NHMFeatures = X86_64V2Features;
642 list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
643 TuningInsertVZEROUPPER];
646 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
647 list<SubtargetFeature> WSMTuning = NHMTuning;
648 list<SubtargetFeature> WSMFeatures =
649 !listconcat(NHMFeatures, WSMAdditionalFeatures);
652 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
655 list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
659 TuningFastScalarFSQRT,
660 TuningFastSHLDRotate,
662 TuningPOPCNTFalseDeps,
663 TuningInsertVZEROUPPER];
664 list<SubtargetFeature> SNBFeatures =
665 !listconcat(WSMFeatures, SNBAdditionalFeatures);
668 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
671 list<SubtargetFeature> IVBTuning = SNBTuning;
672 list<SubtargetFeature> IVBFeatures =
673 !listconcat(SNBFeatures, IVBAdditionalFeatures);
676 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
684 list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
687 TuningFastScalarFSQRT,
688 TuningFastSHLDRotate,
690 TuningFastVariableCrossLaneShuffle,
691 TuningFastVariablePerLaneShuffle,
692 TuningPOPCNTFalseDeps,
693 TuningLZCNTFalseDeps,
694 TuningInsertVZEROUPPER];
695 list<SubtargetFeature> HSWFeatures =
696 !listconcat(IVBFeatures, HSWAdditionalFeatures);
699 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
702 list<SubtargetFeature> BDWTuning = HSWTuning;
703 list<SubtargetFeature> BDWFeatures =
704 !listconcat(HSWFeatures, BDWAdditionalFeatures);
707 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
711 list<SubtargetFeature> SKLTuning = [TuningFastGather,
715 TuningFastScalarFSQRT,
716 TuningFastVectorFSQRT,
717 TuningFastSHLDRotate,
719 TuningFastVariableCrossLaneShuffle,
720 TuningFastVariablePerLaneShuffle,
721 TuningPOPCNTFalseDeps,
722 TuningInsertVZEROUPPER];
723 list<SubtargetFeature> SKLFeatures =
724 !listconcat(BDWFeatures, SKLAdditionalFeatures);
727 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAES,
738 list<SubtargetFeature> SKXTuning = [TuningFastGather,
742 TuningFastScalarFSQRT,
743 TuningFastVectorFSQRT,
744 TuningFastSHLDRotate,
746 TuningFastVariableCrossLaneShuffle,
747 TuningFastVariablePerLaneShuffle,
749 TuningPOPCNTFalseDeps,
750 TuningInsertVZEROUPPER];
751 list<SubtargetFeature> SKXFeatures =
752 !listconcat(BDWFeatures, SKXAdditionalFeatures);
755 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
756 list<SubtargetFeature> CLXTuning = SKXTuning;
757 list<SubtargetFeature> CLXFeatures =
758 !listconcat(SKXFeatures, CLXAdditionalFeatures);
761 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
762 list<SubtargetFeature> CPXTuning = SKXTuning;
763 list<SubtargetFeature> CPXFeatures =
764 !listconcat(CLXFeatures, CPXAdditionalFeatures);
767 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
776 list<SubtargetFeature> CNLTuning = [TuningFastGather,
780 TuningFastScalarFSQRT,
781 TuningFastVectorFSQRT,
782 TuningFastSHLDRotate,
784 TuningFastVariableCrossLaneShuffle,
785 TuningFastVariablePerLaneShuffle,
787 TuningInsertVZEROUPPER];
788 list<SubtargetFeature> CNLFeatures =
789 !listconcat(SKLFeatures, CNLAdditionalFeatures);
792 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
801 list<SubtargetFeature> ICLTuning = CNLTuning;
802 list<SubtargetFeature> ICLFeatures =
803 !listconcat(CNLFeatures, ICLAdditionalFeatures);
806 list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
809 list<SubtargetFeature> ICXTuning = CNLTuning;
810 list<SubtargetFeature> ICXFeatures =
811 !listconcat(ICLFeatures, ICXAdditionalFeatures);
814 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
819 list<SubtargetFeature> TGLTuning = CNLTuning;
820 list<SubtargetFeature> TGLFeatures =
821 !listconcat(ICLFeatures, TGLAdditionalFeatures );
824 list<SubtargetFeature> SPRAdditionalFeatures = [FeatureAMXTILE,
841 list<SubtargetFeature> SPRTuning = ICXTuning;
842 list<SubtargetFeature> SPRFeatures =
843 !listconcat(ICXFeatures, SPRAdditionalFeatures);
846 list<SubtargetFeature> AtomFeatures = [FeatureX87,
857 list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
864 TuningPadShortFunctions,
865 TuningInsertVZEROUPPER];
868 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
873 list<SubtargetFeature> SLMTuning = [ProcIntelSLM,
881 TuningPOPCNTFalseDeps,
882 TuningInsertVZEROUPPER];
883 list<SubtargetFeature> SLMFeatures =
884 !listconcat(AtomFeatures, SLMAdditionalFeatures);
887 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
896 list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
901 TuningPOPCNTFalseDeps,
902 TuningInsertVZEROUPPER];
903 list<SubtargetFeature> GLMFeatures =
904 !listconcat(SLMFeatures, GLMAdditionalFeatures);
907 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
909 list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
914 TuningInsertVZEROUPPER];
915 list<SubtargetFeature> GLPFeatures =
916 !listconcat(GLMFeatures, GLPAdditionalFeatures);
919 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLWB,
921 list<SubtargetFeature> TRMTuning = GLPTuning;
922 list<SubtargetFeature> TRMFeatures =
923 !listconcat(GLPFeatures, TRMAdditionalFeatures);
926 list<SubtargetFeature> ADLAdditionalFeatures = [FeatureSERIALIZE,
946 list<SubtargetFeature> ADLTuning = SKLTuning;
947 list<SubtargetFeature> ADLFeatures =
948 !listconcat(TRMFeatures, ADLAdditionalFeatures);
951 list<SubtargetFeature> KNLFeatures = [FeatureX87,
981 list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
985 TuningPreferMaskRegisters,
989 // TODO Add AVX5124FMAPS/AVX5124VNNIW features
990 list<SubtargetFeature> KNMFeatures =
991 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
994 list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
1007 list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
1009 TuningInsertVZEROUPPER];
1012 list<SubtargetFeature> BtVer1Features = [FeatureX87,
1026 list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
1027 TuningFastScalarShiftMasks,
1028 TuningFastVectorShiftMasks,
1030 TuningInsertVZEROUPPER];
1033 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
1041 list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
1043 TuningFastHorizontalOps,
1044 TuningFast15ByteNOP,
1045 TuningFastScalarShiftMasks,
1046 TuningFastVectorShiftMasks,
1049 list<SubtargetFeature> BtVer2Features =
1050 !listconcat(BtVer1Features, BtVer2AdditionalFeatures);
1053 list<SubtargetFeature> BdVer1Features = [FeatureX87,
1070 list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
1071 TuningFast11ByteNOP,
1072 TuningFastScalarShiftMasks,
1074 TuningInsertVZEROUPPER];
1077 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
1081 list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
1083 list<SubtargetFeature> BdVer2Tuning =
1084 !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
1085 list<SubtargetFeature> BdVer2Features =
1086 !listconcat(BdVer1Features, BdVer2AdditionalFeatures);
1089 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
1091 list<SubtargetFeature> BdVer3Tuning = BdVer2Tuning;
1092 list<SubtargetFeature> BdVer3Features =
1093 !listconcat(BdVer2Features, BdVer3AdditionalFeatures);
1096 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
1101 list<SubtargetFeature> BdVer4Tuning = BdVer3Tuning;
1102 list<SubtargetFeature> BdVer4Features =
1103 !listconcat(BdVer3Features, BdVer4AdditionalFeatures);
1106 // AMD Zen Processors common ISAs
1107 list<SubtargetFeature> ZNFeatures = [FeatureADX,
1139 list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
1141 TuningFast15ByteNOP,
1143 TuningFastScalarShiftMasks,
1146 TuningInsertVZEROUPPER];
1147 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
1150 list<SubtargetFeature> ZN2Tuning = ZNTuning;
1151 list<SubtargetFeature> ZN2Features =
1152 !listconcat(ZNFeatures, ZN2AdditionalFeatures);
1153 list<SubtargetFeature> ZN3AdditionalFeatures = [FeatureFSRM,
1158 list<SubtargetFeature> ZN3AdditionalTuning =
1160 TuningFastVariablePerLaneShuffle];
1161 list<SubtargetFeature> ZN3Tuning =
1162 !listconcat(ZNTuning, ZN3AdditionalTuning);
1163 list<SubtargetFeature> ZN3Features =
1164 !listconcat(ZN2Features, ZN3AdditionalFeatures);
1167 //===----------------------------------------------------------------------===//
1168 // X86 processors supported.
1169 //===----------------------------------------------------------------------===//
1171 class Proc<string Name, list<SubtargetFeature> Features,
1172 list<SubtargetFeature> TuneFeatures>
1173 : ProcessorModel<Name, GenericModel, Features, TuneFeatures>;
1175 class ProcModel<string Name, SchedMachineModel Model,
1176 list<SubtargetFeature> Features,
1177 list<SubtargetFeature> TuneFeatures>
1178 : ProcessorModel<Name, Model, Features, TuneFeatures>;
1180 // NOTE: CMPXCHG8B is here for legacy compatibility so that it is only disabled
1181 // if i386/i486 is specifically requested.
1182 // NOTE: 64Bit is here as "generic" is the default llc CPU. The X86Subtarget
1183 // constructor checks that any CPU used in 64-bit mode has Feature64Bit enabled.
1184 // It has no effect on code generation.
1185 def : ProcModel<"generic", SandyBridgeModel,
1186 [FeatureX87, FeatureCMPXCHG8B, Feature64Bit],
1191 TuningInsertVZEROUPPER]>;
1193 def : Proc<"i386", [FeatureX87],
1194 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1195 def : Proc<"i486", [FeatureX87],
1196 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1197 def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B],
1198 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1199 def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B],
1200 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1201 def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
1202 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1204 def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV],
1205 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1206 def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
1208 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1210 def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV,
1211 FeatureFXSR, FeatureNOPL],
1212 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1214 foreach P = ["pentium3", "pentium3m"] in {
1215 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
1216 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
1217 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1220 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1221 // The intent is to enable it for pentium4 which is the current default
1222 // processor in a vanilla 32-bit clang compilation when no specific
1223 // architecture is specified. This generally gives a nice performance
1224 // increase on silvermont, with largely neutral behavior on other
1225 // contemporary large core processors.
1226 // pentium-m, pentium4m, prescott and nocona are included as a preventative
1227 // measure to avoid performance surprises, in case clang's default cpu
1228 // changes slightly.
1230 def : ProcModel<"pentium-m", GenericPostRAModel,
1231 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
1232 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1233 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1235 foreach P = ["pentium4", "pentium4m"] in {
1236 def : ProcModel<P, GenericPostRAModel,
1237 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
1238 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1239 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1243 def : Proc<"lakemont", [FeatureCMPXCHG8B],
1244 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1247 def : ProcModel<"yonah", SandyBridgeModel,
1248 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
1249 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1250 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1253 def : ProcModel<"prescott", GenericPostRAModel,
1254 [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
1255 FeatureFXSR, FeatureNOPL, FeatureCMOV],
1256 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1257 def : ProcModel<"nocona", GenericPostRAModel, [
1270 TuningInsertVZEROUPPER
1273 // Intel Core 2 Solo/Duo.
1274 def : ProcModel<"core2", SandyBridgeModel, [
1289 TuningInsertVZEROUPPER
1291 def : ProcModel<"penryn", SandyBridgeModel, [
1306 TuningInsertVZEROUPPER
1310 foreach P = ["bonnell", "atom"] in {
1311 def : ProcModel<P, AtomModel, ProcessorFeatures.AtomFeatures,
1312 ProcessorFeatures.AtomTuning>;
1315 foreach P = ["silvermont", "slm"] in {
1316 def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
1317 ProcessorFeatures.SLMTuning>;
1320 def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
1321 ProcessorFeatures.GLMTuning>;
1322 def : ProcModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures,
1323 ProcessorFeatures.GLPTuning>;
1324 def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
1325 ProcessorFeatures.TRMTuning>;
1327 // "Arrandale" along with corei3 and corei5
1328 foreach P = ["nehalem", "corei7"] in {
1329 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
1330 ProcessorFeatures.NHMTuning>;
1333 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1334 def : ProcModel<"westmere", SandyBridgeModel, ProcessorFeatures.WSMFeatures,
1335 ProcessorFeatures.WSMTuning>;
1337 foreach P = ["sandybridge", "corei7-avx"] in {
1338 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
1339 ProcessorFeatures.SNBTuning>;
1342 foreach P = ["ivybridge", "core-avx-i"] in {
1343 def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
1344 ProcessorFeatures.IVBTuning>;
1347 foreach P = ["haswell", "core-avx2"] in {
1348 def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
1349 ProcessorFeatures.HSWTuning>;
1352 def : ProcModel<"broadwell", BroadwellModel, ProcessorFeatures.BDWFeatures,
1353 ProcessorFeatures.BDWTuning>;
1355 def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
1356 ProcessorFeatures.SKLTuning>;
1358 // FIXME: define KNL scheduler model
1359 def : ProcModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures,
1360 ProcessorFeatures.KNLTuning>;
1361 def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
1362 ProcessorFeatures.KNLTuning>;
1364 foreach P = ["skylake-avx512", "skx"] in {
1365 def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
1366 ProcessorFeatures.SKXTuning>;
1369 def : ProcModel<"cascadelake", SkylakeServerModel,
1370 ProcessorFeatures.CLXFeatures, ProcessorFeatures.CLXTuning>;
1371 def : ProcModel<"cooperlake", SkylakeServerModel,
1372 ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
1373 def : ProcModel<"cannonlake", SkylakeServerModel,
1374 ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
1375 def : ProcModel<"icelake-client", SkylakeServerModel,
1376 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1377 def : ProcModel<"rocketlake", SkylakeServerModel,
1378 ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
1379 def : ProcModel<"icelake-server", SkylakeServerModel,
1380 ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
1381 def : ProcModel<"tigerlake", SkylakeServerModel,
1382 ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
1383 def : ProcModel<"sapphirerapids", SkylakeServerModel,
1384 ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
1385 def : ProcModel<"alderlake", SkylakeClientModel,
1386 ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
1390 def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
1391 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1392 def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
1393 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1394 def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
1395 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1397 foreach P = ["athlon", "athlon-tbird"] in {
1398 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA,
1400 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1403 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1404 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
1405 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
1406 [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1409 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1410 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
1411 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
1412 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1413 TuningInsertVZEROUPPER]>;
1416 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1417 def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA,
1418 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
1420 [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
1421 TuningInsertVZEROUPPER]>;
1424 foreach P = ["amdfam10", "barcelona"] in {
1425 def : Proc<P, ProcessorFeatures.BarcelonaFeatures,
1426 ProcessorFeatures.BarcelonaTuning>;
1430 def : Proc<"btver1", ProcessorFeatures.BtVer1Features,
1431 ProcessorFeatures.BtVer1Tuning>;
1433 def : ProcModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features,
1434 ProcessorFeatures.BtVer2Tuning>;
1437 def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
1438 ProcessorFeatures.BdVer1Tuning>;
1440 def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
1441 ProcessorFeatures.BdVer2Tuning>;
1443 def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
1444 ProcessorFeatures.BdVer3Tuning>;
1446 def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
1447 ProcessorFeatures.BdVer4Tuning>;
1449 def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
1450 ProcessorFeatures.ZNTuning>;
1451 def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
1452 ProcessorFeatures.ZN2Tuning>;
1453 def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
1454 ProcessorFeatures.ZN3Tuning>;
1456 def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA],
1457 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1459 def : Proc<"winchip-c6", [FeatureX87, FeatureMMX],
1460 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1461 def : Proc<"winchip2", [FeatureX87, Feature3DNow],
1462 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1463 def : Proc<"c3", [FeatureX87, Feature3DNow],
1464 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1465 def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
1466 FeatureSSE1, FeatureFXSR, FeatureCMOV],
1467 [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
1469 // We also provide a generic 64-bit specific x86 processor model which tries to
1470 // be good for modern chips without enabling instruction set encodings past the
1471 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1472 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1474 // We currently use the Sandy Bridge model as the default scheduling model as
1475 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1476 // covers a huge swath of x86 processors. If there are specific scheduling
1477 // knobs which need to be tuned differently for AMD chips, we might consider
1478 // forming a common base for them.
1479 def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
1485 TuningInsertVZEROUPPER
1488 // x86-64 micro-architecture levels.
1489 def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
1490 ProcessorFeatures.SNBTuning>;
1491 // Close to Haswell.
1492 def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
1493 ProcessorFeatures.HSWTuning>;
1494 // Close to the AVX-512 level implemented by Xeon Scalable Processors.
1495 def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
1496 ProcessorFeatures.SKXTuning>;
1498 //===----------------------------------------------------------------------===//
1499 // Calling Conventions
1500 //===----------------------------------------------------------------------===//
1502 include "X86CallingConv.td"
1505 //===----------------------------------------------------------------------===//
1507 //===----------------------------------------------------------------------===//
1509 def ATTAsmParserVariant : AsmParserVariant {
1513 string Name = "att";
1515 // Discard comments in assembly strings.
1516 string CommentDelimiter = "#";
1518 // Recognize hard coded registers.
1519 string RegisterPrefix = "%";
1522 def IntelAsmParserVariant : AsmParserVariant {
1526 string Name = "intel";
1528 // Discard comments in assembly strings.
1529 string CommentDelimiter = ";";
1531 // Recognize hard coded registers.
1532 string RegisterPrefix = "";
1535 //===----------------------------------------------------------------------===//
1536 // Assembly Printers
1537 //===----------------------------------------------------------------------===//
1539 // The X86 target supports two different syntaxes for emitting machine code.
1540 // This is controlled by the -x86-asm-syntax={att|intel}
1541 def ATTAsmWriter : AsmWriter {
1542 string AsmWriterClassName = "ATTInstPrinter";
1545 def IntelAsmWriter : AsmWriter {
1546 string AsmWriterClassName = "IntelInstPrinter";
1551 // Information about the instructions...
1552 let InstructionSet = X86InstrInfo;
1553 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1554 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1555 let AllowRegisterRenaming = 1;
1558 //===----------------------------------------------------------------------===//
1560 //===----------------------------------------------------------------------===//
1562 include "X86PfmCounters.td"