1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This is a target description file for the Intel i386 architecture, referred
10 // to here as the "X86" architecture.
12 //===----------------------------------------------------------------------===//
14 // Get the target-independent interfaces which we are implementing...
16 include "llvm/Target/Target.td"
18 //===----------------------------------------------------------------------===//
19 // X86 Subtarget state
22 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
23 "64-bit mode (x86_64)">;
24 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
25 "32-bit mode (80386)">;
26 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
27 "16-bit mode (i8086)">;
29 //===----------------------------------------------------------------------===//
30 // X86 Subtarget features
31 //===----------------------------------------------------------------------===//
33 def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
34 "Enable X87 float instructions">;
36 def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
37 "Enable NOPL instruction">;
39 def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
40 "Enable conditional move instructions">;
42 def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
43 "Support POPCNT instruction">;
45 def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
46 "Support fxsave/fxrestore instructions">;
48 def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
49 "Support xsave instructions">;
51 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
52 "Support xsaveopt instructions">;
54 def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
55 "Support xsavec instructions">;
57 def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
58 "Support xsaves instructions">;
60 def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
61 "Enable SSE instructions">;
62 def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
63 "Enable SSE2 instructions",
65 def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
66 "Enable SSE3 instructions",
68 def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
69 "Enable SSSE3 instructions",
71 def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
72 "Enable SSE 4.1 instructions",
74 def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
75 "Enable SSE 4.2 instructions",
77 // The MMX subtarget feature is separate from the rest of the SSE features
78 // because it's important (for odd compatibility reasons) to be able to
79 // turn it off explicitly while allowing SSE+ to be on.
80 def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
81 "Enable MMX instructions">;
82 def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
83 "Enable 3DNow! instructions",
85 def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
86 "Enable 3DNow! Athlon instructions",
88 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
89 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
90 // without disabling 64-bit mode. Nothing should imply this feature bit. It
91 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
92 def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
93 "Support 64-bit instructions">;
94 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
95 "64-bit with cmpxchg16b">;
96 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
97 "SHLD instruction is slow">;
98 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
99 "PMULLD instruction is slow">;
100 def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
102 "PMADDWD is slower than PMULLD">;
103 // FIXME: This should not apply to CPUs that do not have SSE.
104 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
105 "IsUAMem16Slow", "true",
106 "Slow unaligned 16-byte memory access">;
107 def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
108 "IsUAMem32Slow", "true",
109 "Slow unaligned 32-byte memory access">;
110 def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
111 "Support SSE 4a instructions",
114 def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
115 "Enable AVX instructions",
117 def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
118 "Enable AVX2 instructions",
120 def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
121 "Enable three-operand fused multiple-add",
123 def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
124 "Support 16-bit floating point conversion instructions",
126 def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
127 "Enable AVX-512 instructions",
128 [FeatureAVX2, FeatureFMA, FeatureF16C]>;
129 def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
130 "Enable AVX-512 Exponential and Reciprocal Instructions",
132 def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
133 "Enable AVX-512 Conflict Detection Instructions",
135 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
136 "true", "Enable AVX-512 Population Count Instructions",
138 def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
139 "Enable AVX-512 PreFetch Instructions",
141 def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
143 "Prefetch with Intent to Write and T1 Hint">;
144 def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
145 "Enable AVX-512 Doubleword and Quadword Instructions",
147 def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
148 "Enable AVX-512 Byte and Word Instructions",
150 def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
151 "Enable AVX-512 Vector Length eXtensions",
153 def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
154 "Enable AVX-512 Vector Byte Manipulation Instructions",
156 def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
157 "Enable AVX-512 further Vector Byte Manipulation Instructions",
159 def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
160 "Enable AVX-512 Integer Fused Multiple-Add",
162 def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
163 "Enable protection keys">;
164 def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
165 "Enable AVX-512 Vector Neural Network Instructions",
167 def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
168 "Enable AVX-512 Bit Algorithms",
170 def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
171 "Enable packed carry-less multiplication instructions",
173 def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true",
174 "Enable Galois Field Arithmetic Instructions",
176 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
177 "Enable vpclmulqdq instructions",
178 [FeatureAVX, FeaturePCLMUL]>;
179 def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
180 "Enable four-operand fused multiple-add",
181 [FeatureAVX, FeatureSSE4A]>;
182 def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
183 "Enable XOP instructions",
185 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
186 "HasSSEUnalignedMem", "true",
187 "Allow unaligned memory operands with SSE instructions">;
188 def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
189 "Enable AES instructions",
191 def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
192 "Promote selected AES instructions to AVX512/AVX registers",
193 [FeatureAVX, FeatureAES]>;
194 def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
195 "Enable TBM instructions">;
196 def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
197 "Enable LWP instructions">;
198 def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
199 "Support MOVBE instruction">;
200 def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
201 "Support RDRAND instruction">;
202 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
203 "Support FS/GS Base instructions">;
204 def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
205 "Support LZCNT instruction">;
206 def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
207 "Support BMI instructions">;
208 def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
209 "Support BMI2 instructions">;
210 def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
211 "Support RTM instructions">;
212 def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
213 "Support ADX instructions">;
214 def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
215 "Enable SHA instructions",
217 def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
218 "Support CET Shadow-Stack instructions">;
219 def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
220 "Support PRFCHW instructions">;
221 def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
222 "Support RDSEED instruction">;
223 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
224 "Support LAHF and SAHF instructions">;
225 def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
226 "Enable MONITORX/MWAITX timer functionality">;
227 def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
228 "Enable Cache Line Zero">;
229 def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
230 "Enable Cache Demote">;
231 def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
232 "Support ptwrite instruction">;
233 def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
234 "Support MPX instructions">;
235 def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
236 "Use LEA for adjusting the stack pointer">;
237 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
238 "HasSlowDivide32", "true",
239 "Use 8-bit divide for positive values less than 256">;
240 def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
241 "HasSlowDivide64", "true",
242 "Use 32-bit divide for positive values less than 2^32">;
243 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
244 "PadShortFunctions", "true",
245 "Pad short functions">;
246 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
247 "Invalidate Process-Context Identifier">;
248 def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
249 "Enable Software Guard Extensions">;
250 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
251 "Flush A Cache Line Optimized">;
252 def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
253 "Cache Line Write Back">;
254 def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
255 "Write Back No Invalidate">;
256 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
257 "Support RDPID instructions">;
258 def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
259 "Wait and pause enhancements">;
260 // On some processors, instructions that implicitly take two memory operands are
261 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
262 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
263 def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
264 "SlowTwoMemOps", "true",
265 "Two memory operand instructions are slow">;
266 def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
267 "LEA instruction needs inputs at AG stage">;
268 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
269 "LEA instruction with certain arguments is slow">;
270 def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
271 "LEA instruction with 3 ops or certain registers is slow">;
272 def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
273 "INC and DEC instructions are slower than ADD and SUB">;
275 : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
276 "Use software floating point features.">;
277 def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
278 "HasPOPCNTFalseDeps", "true",
279 "POPCNT has a false dependency on dest register">;
280 def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
281 "HasLZCNTFalseDeps", "true",
282 "LZCNT/TZCNT have a false dependency on dest register">;
283 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
284 "platform configuration instruction">;
285 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
286 // using a variable mask over multiple fixed shuffles.
287 def FeatureFastVariableShuffle
288 : SubtargetFeature<"fast-variable-shuffle",
289 "HasFastVariableShuffle",
290 "true", "Shuffles with variable masks are fast">;
291 // On some X86 processors, there is no performance hazard to writing only the
292 // lower parts of a YMM or ZMM register without clearing the upper part.
293 def FeatureFastPartialYMMorZMMWrite
294 : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
295 "HasFastPartialYMMorZMMWrite",
296 "true", "Partial writes to YMM/ZMM registers are fast">;
297 // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
298 // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
299 // vector FSQRT has higher throughput than the corresponding NR code.
300 // The idea is that throughput bound code is likely to be vectorized, so for
301 // vectorized code we should care about the throughput of SQRT operations.
302 // But if the code is scalar that probably means that the code has some kind of
303 // dependency and we should care more about reducing the latency.
304 def FeatureFastScalarFSQRT
305 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
306 "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
307 def FeatureFastVectorFSQRT
308 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
309 "true", "Vector SQRT is fast (disable Newton-Raphson)">;
310 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
311 // be used to replace test/set sequences.
314 "fast-lzcnt", "HasFastLZCNT", "true",
315 "LZCNT instructions are as fast as most simple integer ops">;
316 // If the target can efficiently decode NOPs upto 11-bytes in length.
317 def FeatureFast11ByteNOP
319 "fast-11bytenop", "HasFast11ByteNOP", "true",
320 "Target can quickly decode up to 11 byte NOPs">;
321 // If the target can efficiently decode NOPs upto 15-bytes in length.
322 def FeatureFast15ByteNOP
324 "fast-15bytenop", "HasFast15ByteNOP", "true",
325 "Target can quickly decode up to 15 byte NOPs">;
326 // Sandy Bridge and newer processors can use SHLD with the same source on both
327 // inputs to implement rotate to avoid the partial flag update of the normal
328 // rotate instructions.
329 def FeatureFastSHLDRotate
331 "fast-shld-rotate", "HasFastSHLDRotate", "true",
332 "SHLD can be used as a faster rotate">;
334 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
335 // "string operations"). See "REP String Enhancement" in the Intel Software
336 // Development Manual. This feature essentially means that REP MOVSB will copy
337 // using the largest available size instead of copying bytes one by one, making
338 // it at least as fast as REPMOVS{W,D,Q}.
341 "ermsb", "HasERMSB", "true",
342 "REP MOVS/STOS are fast">;
344 // Sandy Bridge and newer processors have many instructions that can be
345 // fused with conditional branches and pass through the CPU as a single
347 def FeatureMacroFusion
348 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
349 "Various instructions can be fused with conditional branches">;
351 // Gather is available since Haswell (AVX2 set). So technically, we can
352 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
353 // Skylake Client processor has faster Gathers than HSW and performance is
354 // similar to Skylake Server (AVX-512).
355 def FeatureHasFastGather
356 : SubtargetFeature<"fast-gather", "HasFastGather", "true",
357 "Indicates if gather is reasonably fast.">;
359 def FeaturePrefer256Bit
360 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
361 "Prefer 256-bit AVX instructions">;
363 // Lower indirect calls using a special construct called a `retpoline` to
364 // mitigate potential Spectre v2 attacks against them.
365 def FeatureRetpolineIndirectCalls
367 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
368 "Remove speculation of indirect calls from the generated code.">;
370 // Lower indirect branches and switches either using conditional branch trees
371 // or using a special construct called a `retpoline` to mitigate potential
372 // Spectre v2 attacks against them.
373 def FeatureRetpolineIndirectBranches
375 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
376 "Remove speculation of indirect branches from the generated code.">;
378 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
379 // `retpoline-indirect-branches` above.
381 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
382 "Remove speculation of indirect branches from the "
383 "generated code, either by avoiding them entirely or "
384 "lowering them with a speculation blocking construct.",
385 [FeatureRetpolineIndirectCalls,
386 FeatureRetpolineIndirectBranches]>;
388 // Rely on external thunks for the emitted retpoline calls. This allows users
389 // to provide their own custom thunk definitions in highly specialized
390 // environments such as a kernel that does boot-time hot patching.
391 def FeatureRetpolineExternalThunk
393 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
394 "When lowering an indirect call or branch using a `retpoline`, rely "
395 "on the specified user provided thunk rather than emitting one "
396 "ourselves. Only has effect when combined with some other retpoline "
397 "feature.", [FeatureRetpolineIndirectCalls]>;
399 // Direct Move instructions.
400 def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
401 "Support movdiri instruction">;
402 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
403 "Support movdir64b instruction">;
405 def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
406 "Indicates that the BEXTR instruction is implemented as a single uop "
407 "with good throughput.">;
409 // Combine vector math operations with shuffles into horizontal math
410 // instructions if a CPU implements horizontal operations (introduced with
411 // SSE3) with better latency/throughput than the alternative sequence.
412 def FeatureFastHorizontalOps
414 "fast-hops", "HasFastHorizontalOps", "true",
415 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
416 "normal vector instructions with shuffles", [FeatureSSE3]>;
418 // Merge branches using three-way conditional code.
419 def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
420 "ThreewayBranchProfitable", "true",
421 "Merge branches to a three-way "
422 "conditional branch">;
424 //===----------------------------------------------------------------------===//
425 // Register File Description
426 //===----------------------------------------------------------------------===//
428 include "X86RegisterInfo.td"
429 include "X86RegisterBanks.td"
431 //===----------------------------------------------------------------------===//
432 // Instruction Descriptions
433 //===----------------------------------------------------------------------===//
435 include "X86Schedule.td"
436 include "X86InstrInfo.td"
437 include "X86SchedPredicates.td"
439 def X86InstrInfo : InstrInfo;
441 //===----------------------------------------------------------------------===//
442 // X86 processors supported.
443 //===----------------------------------------------------------------------===//
445 include "X86ScheduleAtom.td"
446 include "X86SchedSandyBridge.td"
447 include "X86SchedHaswell.td"
448 include "X86SchedBroadwell.td"
449 include "X86ScheduleSLM.td"
450 include "X86ScheduleZnver1.td"
451 include "X86ScheduleBdVer2.td"
452 include "X86ScheduleBtVer2.td"
453 include "X86SchedSkylakeClient.td"
454 include "X86SchedSkylakeServer.td"
456 def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
457 "Intel Atom processors">;
458 def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
459 "Intel Silvermont processors">;
460 def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
461 "Intel Goldmont processors">;
462 def ProcIntelGLP : SubtargetFeature<"glp", "X86ProcFamily", "IntelGLP",
463 "Intel Goldmont Plus processors">;
464 def ProcIntelTRM : SubtargetFeature<"tremont", "X86ProcFamily", "IntelTRM",
465 "Intel Tremont processors">;
467 class Proc<string Name, list<SubtargetFeature> Features>
468 : ProcessorModel<Name, GenericModel, Features>;
470 def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>;
471 def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>;
472 def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>;
473 def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16]>;
474 def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>;
475 def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
477 def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
478 def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV,
481 def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
482 FeatureCMOV, FeatureFXSR, FeatureNOPL]>;
484 foreach P = ["pentium3", "pentium3m"] in {
485 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
486 FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
489 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
490 // The intent is to enable it for pentium4 which is the current default
491 // processor in a vanilla 32-bit clang compilation when no specific
492 // architecture is specified. This generally gives a nice performance
493 // increase on silvermont, with largely neutral behavior on other
494 // contemporary large core processors.
495 // pentium-m, pentium4m, prescott and nocona are included as a preventative
496 // measure to avoid performance surprises, in case clang's default cpu
499 def : ProcessorModel<"pentium-m", GenericPostRAModel,
500 [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
501 FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
503 foreach P = ["pentium4", "pentium4m"] in {
504 def : ProcessorModel<P, GenericPostRAModel,
505 [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
506 FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
510 def : Proc<"lakemont", []>;
513 def : ProcessorModel<"yonah", SandyBridgeModel,
514 [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
515 FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
518 def : ProcessorModel<"prescott", GenericPostRAModel,
519 [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
520 FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
521 def : ProcessorModel<"nocona", GenericPostRAModel, [
533 // Intel Core 2 Solo/Duo.
534 def : ProcessorModel<"core2", SandyBridgeModel, [
547 def : ProcessorModel<"penryn", SandyBridgeModel, [
562 class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
577 FeatureSlowTwoMemOps,
579 FeaturePadShortFunctions,
582 def : BonnellProc<"bonnell">;
583 def : BonnellProc<"atom">; // Pin the generic name to the baseline.
585 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
599 FeatureSlowTwoMemOps,
606 FeaturePOPCNTFalseDeps
608 def : SilvermontProc<"silvermont">;
609 def : SilvermontProc<"slm">; // Legacy alias.
611 class ProcessorFeatures<list<SubtargetFeature> Inherited,
612 list<SubtargetFeature> NewFeatures> {
613 list<SubtargetFeature> Value = !listconcat(Inherited, NewFeatures);
616 class ProcModel<string Name, SchedMachineModel Model,
617 list<SubtargetFeature> ProcFeatures,
618 list<SubtargetFeature> OtherFeatures> :
619 ProcessorModel<Name, Model, !listconcat(ProcFeatures, OtherFeatures)>;
621 def GLMFeatures : ProcessorFeatures<[], [
635 FeatureSlowTwoMemOps,
651 class GoldmontProc<string Name> : ProcModel<Name, SLMModel,
654 FeaturePOPCNTFalseDeps
656 def : GoldmontProc<"goldmont">;
658 def GLPFeatures : ProcessorFeatures<GLMFeatures.Value, [
664 class GoldmontPlusProc<string Name> : ProcModel<Name, SLMModel,
668 def : GoldmontPlusProc<"goldmont-plus">;
670 class TremontProc<string Name> : ProcModel<Name, SLMModel,
679 def : TremontProc<"tremont">;
681 // "Arrandale" along with corei3 and corei5
682 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
695 def : NehalemProc<"nehalem">;
696 def : NehalemProc<"corei7">;
698 // Westmere is a similar machine to nehalem with some additional features.
699 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
700 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
714 def : WestmereProc<"westmere">;
716 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
717 // rather than a superset.
718 def SNBFeatures : ProcessorFeatures<[], [
734 FeatureFastScalarFSQRT,
735 FeatureFastSHLDRotate,
737 FeatureMergeToThreeWayBranch,
741 class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
744 FeaturePOPCNTFalseDeps
746 def : SandyBridgeProc<"sandybridge">;
747 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
749 def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
755 class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
758 FeaturePOPCNTFalseDeps
760 def : IvyBridgeProc<"ivybridge">;
761 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
763 def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
772 FeatureFastVariableShuffle
775 class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
777 FeaturePOPCNTFalseDeps,
778 FeatureLZCNTFalseDeps
780 def : HaswellProc<"haswell">;
781 def : HaswellProc<"core-avx2">; // Legacy alias.
783 def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
788 class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
790 FeaturePOPCNTFalseDeps,
791 FeatureLZCNTFalseDeps
793 def : BroadwellProc<"broadwell">;
795 def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
801 FeatureFastVectorFSQRT
804 class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
806 FeatureHasFastGather,
807 FeaturePOPCNTFalseDeps,
810 def : SkylakeClientProc<"skylake">;
812 def KNLFeatures : ProcessorFeatures<[], [
847 // FIXME: define KNL model
848 class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
850 FeatureSlowTwoMemOps,
851 FeatureFastPartialYMMorZMMWrite,
852 FeatureHasFastGather,
855 def : KnightsLandingProc<"knl">;
857 class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
859 FeatureSlowTwoMemOps,
860 FeatureFastPartialYMMorZMMWrite,
861 FeatureHasFastGather,
865 def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
867 def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
877 class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
879 FeatureHasFastGather,
880 FeaturePOPCNTFalseDeps
882 def : SkylakeServerProc<"skylake-avx512">;
883 def : SkylakeServerProc<"skx">; // Legacy alias.
885 def CLXFeatures : ProcessorFeatures<SKXFeatures.Value, [
889 class CascadelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
891 FeatureHasFastGather,
892 FeaturePOPCNTFalseDeps
894 def : CascadelakeProc<"cascadelake">;
896 def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
909 class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
913 def : CannonlakeProc<"cannonlake">;
915 def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
927 class IcelakeClientProc<string Name> : ProcModel<Name, SkylakeServerModel,
931 def : IcelakeClientProc<"icelake-client">;
933 class IcelakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
939 def : IcelakeServerProc<"icelake-server">;
943 def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
944 def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
945 def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
947 foreach P = ["athlon", "athlon-tbird"] in {
948 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, Feature3DNowA,
949 FeatureNOPL, FeatureSlowSHLD]>;
952 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
953 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
954 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
957 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
958 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
959 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
963 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
964 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
965 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
966 FeatureCMOV, Feature64Bit]>;
969 foreach P = ["amdfam10", "barcelona"] in {
970 def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
971 FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
972 FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>;
976 def : Proc<"btver1", [
995 def : ProcessorModel<"btver2", BtVer2Model, [
1018 FeatureFast15ByteNOP,
1020 FeatureFastPartialYMMorZMMWrite,
1021 FeatureFastHorizontalOps
1025 def : ProcessorModel<"bdver1", BdVer2Model, [
1046 FeatureFast11ByteNOP,
1050 def : ProcessorModel<"bdver2", BdVer2Model, [
1075 FeatureFast11ByteNOP,
1081 def : Proc<"bdver3", [
1108 FeatureFast11ByteNOP,
1114 def : Proc<"bdver4", [
1142 FeatureFast11ByteNOP,
1148 def: ProcessorModel<"znver1", Znver1Model, [
1168 FeatureFast15ByteNOP,
1187 def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
1189 def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
1190 def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
1191 def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
1192 def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
1193 FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
1195 // We also provide a generic 64-bit specific x86 processor model which tries to
1196 // be good for modern chips without enabling instruction set encodings past the
1197 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1198 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1200 // We currently use the Sandy Bridge model as the default scheduling model as
1201 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1202 // covers a huge swath of x86 processors. If there are specific scheduling
1203 // knobs which need to be tuned differently for AMD chips, we might consider
1204 // forming a common base for them.
1205 def : ProcessorModel<"x86-64", SandyBridgeModel, [
1218 //===----------------------------------------------------------------------===//
1219 // Calling Conventions
1220 //===----------------------------------------------------------------------===//
1222 include "X86CallingConv.td"
1225 //===----------------------------------------------------------------------===//
1227 //===----------------------------------------------------------------------===//
1229 def ATTAsmParserVariant : AsmParserVariant {
1233 string Name = "att";
1235 // Discard comments in assembly strings.
1236 string CommentDelimiter = "#";
1238 // Recognize hard coded registers.
1239 string RegisterPrefix = "%";
1242 def IntelAsmParserVariant : AsmParserVariant {
1246 string Name = "intel";
1248 // Discard comments in assembly strings.
1249 string CommentDelimiter = ";";
1251 // Recognize hard coded registers.
1252 string RegisterPrefix = "";
1255 //===----------------------------------------------------------------------===//
1256 // Assembly Printers
1257 //===----------------------------------------------------------------------===//
1259 // The X86 target supports two different syntaxes for emitting machine code.
1260 // This is controlled by the -x86-asm-syntax={att|intel}
1261 def ATTAsmWriter : AsmWriter {
1262 string AsmWriterClassName = "ATTInstPrinter";
1265 def IntelAsmWriter : AsmWriter {
1266 string AsmWriterClassName = "IntelInstPrinter";
1271 // Information about the instructions...
1272 let InstructionSet = X86InstrInfo;
1273 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1274 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1275 let AllowRegisterRenaming = 1;
1278 //===----------------------------------------------------------------------===//
1280 //===----------------------------------------------------------------------===//
1282 include "X86PfmCounters.td"