[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Target / AMDGPU / AMDGPU.td
blob42b477e07b3b70708a62d914ff69b5b67face80a
1 //===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===------------------------------------------------------------===//
9 include "llvm/TableGen/SearchableTable.td"
10 include "llvm/Target/Target.td"
11 include "AMDGPUFeatures.td"
13 def p0 : PtrValueType<i64, 0>;
14 def p1 : PtrValueType<i64, 1>;
15 def p2 : PtrValueType<i32, 2>;
16 def p3 : PtrValueType<i32, 3>;
17 def p4 : PtrValueType<i64, 4>;
18 def p5 : PtrValueType<i32, 5>;
19 def p6 : PtrValueType<i32, 6>;
22 class BoolToList<bit Value> {
23   list<int> ret = !if(Value, [1]<int>, []<int>);
26 //===------------------------------------------------------------===//
27 // Subtarget Features (device properties)
28 //===------------------------------------------------------------===//
30 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
31   "FastFMAF32",
32   "true",
33   "Assuming f32 fma is at least as fast as mul + add"
36 def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128",
37   "MIMG_R128",
38   "true",
39   "Support 128-bit texture resources"
42 def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
43   "HalfRate64Ops",
44   "true",
45   "Most fp64 instructions are half rate instead of quarter"
48 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
49   "FlatAddressSpace",
50   "true",
51   "Support flat address space"
54 def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets",
55   "FlatInstOffsets",
56   "true",
57   "Flat instructions have immediate offset addressing mode"
60 def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts",
61   "FlatGlobalInsts",
62   "true",
63   "Have global_* flat memory instructions"
66 def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
67   "FlatScratchInsts",
68   "true",
69   "Have scratch_* flat memory instructions"
72 def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
73   "ScalarFlatScratchInsts",
74   "true",
75   "Have s_scratch_* flat memory instructions"
78 def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
79   "AddNoCarryInsts",
80   "true",
81   "Have VALU add/sub instructions without carry out"
84 def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
85   "UnalignedBufferAccess",
86   "true",
87   "Support unaligned global loads and stores"
90 def FeatureTrapHandler: SubtargetFeature<"trap-handler",
91   "TrapHandler",
92   "true",
93   "Trap handler support"
96 def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
97   "UnalignedScratchAccess",
98   "true",
99   "Support unaligned scratch loads and stores"
102 def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
103   "HasApertureRegs",
104   "true",
105   "Has Memory Aperture Base and Size Registers"
108 def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts",
109   "HasMadMixInsts",
110   "true",
111   "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions"
114 def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
115   "HasFmaMixInsts",
116   "true",
117   "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
120 def FeatureDoesNotSupportXNACK : SubtargetFeature<"no-xnack-support",
121   "DoesNotSupportXNACK",
122   "true",
123   "Hardware does not support XNACK"
126 // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
127 // XNACK. The current default kernel driver setting is:
128 // - graphics ring: XNACK disabled
129 // - compute ring: XNACK enabled
131 // If XNACK is enabled, the VMEM latency can be worse.
132 // If XNACK is disabled, the 2 SGPRs can be used for general purposes.
133 def FeatureXNACK : SubtargetFeature<"xnack",
134   "EnableXNACK",
135   "true",
136   "Enable XNACK support"
139 def FeatureCuMode : SubtargetFeature<"cumode",
140   "EnableCuMode",
141   "true",
142   "Enable CU wavefront execution mode"
145 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
146   "SGPRInitBug",
147   "true",
148   "VI SGPR initialization bug requiring a fixed SGPR allocation size"
151 def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
152   "LDSMisalignedBug",
153   "true",
154   "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
157 def FeatureMFMAInlineLiteralBug : SubtargetFeature<"mfma-inline-literal-bug",
158   "HasMFMAInlineLiteralBug",
159   "true",
160   "MFMA cannot use inline literal as SrcC"
163 def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
164   "HasVcmpxPermlaneHazard",
165   "true",
166   "TODO: describe me"
169 def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard",
170   "HasVMEMtoScalarWriteHazard",
171   "true",
172   "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution."
175 def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard",
176   "HasSMEMtoVectorWriteHazard",
177   "true",
178   "s_load_dword followed by v_cmp page faults"
181 def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
182   "HasInstFwdPrefetchBug",
183   "true",
184   "S_INST_PREFETCH instruction causes shader to hang"
187 def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
188   "HasVcmpxExecWARHazard",
189   "true",
190   "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)"
193 def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard",
194   "HasLdsBranchVmemWARHazard",
195   "true",
196   "Switching between LDS and VMEM-tex not waiting VM_VSRC=0"
199 def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
200   "HasNSAtoVMEMBug",
201   "true",
202   "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
205 def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
206   "HasFlatSegmentOffsetBug",
207   "true",
208   "GFX10 bug, inst_offset ignored in flat segment"
211 def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug",
212   "HasOffset3fBug",
213   "true",
214   "Branch offset of 3f hardware bug"
217 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
218   "ldsbankcount"#Value,
219   "LDSBankCount",
220   !cast<string>(Value),
221   "The number of LDS banks per compute unit."
224 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
225 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
227 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
228   "GCN3Encoding",
229   "true",
230   "Encoding format for VI"
233 def FeatureCIInsts : SubtargetFeature<"ci-insts",
234   "CIInsts",
235   "true",
236   "Additional instructions for CI+"
239 def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts",
240   "GFX8Insts",
241   "true",
242   "Additional instructions for GFX8+"
245 def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
246   "GFX9Insts",
247   "true",
248   "Additional instructions for GFX9+"
251 def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
252   "GFX10Insts",
253   "true",
254   "Additional instructions for GFX10+"
257 def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
258   "GFX7GFX8GFX9Insts",
259   "true",
260   "Instructions shared in GFX7, GFX8, GFX9"
263 def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
264   "HasSMemRealTime",
265   "true",
266   "Has s_memrealtime instruction"
269 def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
270   "HasInv2PiInlineImm",
271   "true",
272   "Has 1 / (2 * pi) as inline immediate"
275 def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
276   "Has16BitInsts",
277   "true",
278   "Has i16/f16 instructions"
281 def FeatureVOP3P : SubtargetFeature<"vop3p",
282   "HasVOP3PInsts",
283   "true",
284   "Has VOP3P packed instructions"
287 def FeatureMovrel : SubtargetFeature<"movrel",
288   "HasMovrel",
289   "true",
290   "Has v_movrel*_b32 instructions"
293 def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
294   "HasVGPRIndexMode",
295   "true",
296   "Has VGPR mode register indexing"
299 def FeatureScalarStores : SubtargetFeature<"scalar-stores",
300   "HasScalarStores",
301   "true",
302   "Has store scalar memory instructions"
305 def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics",
306   "HasScalarAtomics",
307   "true",
308   "Has atomic scalar memory instructions"
311 def FeatureSDWA : SubtargetFeature<"sdwa",
312   "HasSDWA",
313   "true",
314   "Support SDWA (Sub-DWORD Addressing) extension"
317 def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod",
318   "HasSDWAOmod",
319   "true",
320   "Support OMod with SDWA (Sub-DWORD Addressing) extension"
323 def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar",
324   "HasSDWAScalar",
325   "true",
326   "Support scalar register with SDWA (Sub-DWORD Addressing) extension"
329 def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst",
330   "HasSDWASdst",
331   "true",
332   "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension"
335 def FeatureSDWAMac : SubtargetFeature<"sdwa-mav",
336   "HasSDWAMac",
337   "true",
338   "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension"
341 def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc",
342   "HasSDWAOutModsVOPC",
343   "true",
344   "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension"
347 def FeatureDPP : SubtargetFeature<"dpp",
348   "HasDPP",
349   "true",
350   "Support DPP (Data Parallel Primitives) extension"
353 // DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes.
354 def FeatureDPP8 : SubtargetFeature<"dpp8",
355   "HasDPP8",
356   "true",
357   "Support DPP8 (Data Parallel Primitives) extension"
360 def FeatureR128A16 : SubtargetFeature<"r128-a16",
361   "HasR128A16",
362   "true",
363   "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
366 def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
367   "HasNSAEncoding",
368   "true",
369   "Support NSA encoding for image instructions"
372 def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
373   "HasIntClamp",
374   "true",
375   "Support clamp for integer destination"
378 def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
379   "HasUnpackedD16VMem",
380   "true",
381   "Has unpacked d16 vmem instructions"
384 def FeatureDLInsts : SubtargetFeature<"dl-insts",
385   "HasDLInsts",
386   "true",
387   "Has v_fmac_f32 and v_xnor_b32 instructions"
390 def FeatureDot1Insts : SubtargetFeature<"dot1-insts",
391   "HasDot1Insts",
392   "true",
393   "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions"
396 def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
397   "HasDot2Insts",
398   "true",
399   "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
402 def FeatureDot3Insts : SubtargetFeature<"dot3-insts",
403   "HasDot3Insts",
404   "true",
405   "Has v_dot8c_i32_i4 instruction"
408 def FeatureDot4Insts : SubtargetFeature<"dot4-insts",
409   "HasDot4Insts",
410   "true",
411   "Has v_dot2c_i32_i16 instruction"
414 def FeatureDot5Insts : SubtargetFeature<"dot5-insts",
415   "HasDot5Insts",
416   "true",
417   "Has v_dot2c_f32_f16 instruction"
420 def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
421   "HasDot6Insts",
422   "true",
423   "Has v_dot4c_i32_i8 instruction"
426 def FeatureMAIInsts : SubtargetFeature<"mai-insts",
427   "HasMAIInsts",
428   "true",
429   "Has mAI instructions"
432 def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
433   "HasPkFmacF16Inst",
434   "true",
435   "Has v_pk_fmac_f16 instruction"
438 def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
439   "HasAtomicFaddInsts",
440   "true",
441   "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, "
442   "global_atomic_pk_add_f16 instructions"
445 def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
446   "DoesNotSupportSRAMECC",
447   "true",
448   "Hardware does not support SRAM ECC"
451 def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
452   "EnableSRAMECC",
453   "true",
454   "Enable SRAM ECC"
457 def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
458   "HasNoSdstCMPX",
459   "true",
460   "V_CMPX does not write VCC/SGPR in addition to EXEC"
463 def FeatureVscnt : SubtargetFeature<"vscnt",
464   "HasVscnt",
465   "true",
466   "Has separate store vscnt counter"
469 def FeatureRegisterBanking : SubtargetFeature<"register-banking",
470   "HasRegisterBanking",
471   "true",
472   "Has register banking"
475 def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
476   "HasVOP3Literal",
477   "true",
478   "Can use one literal in VOP3"
481 def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
482   "HasNoDataDepHazard",
483   "true",
484   "Does not need SW waitstates"
487 //===------------------------------------------------------------===//
488 // Subtarget Features (options and debugging)
489 //===------------------------------------------------------------===//
491 // Denormal handling for fp64 and fp16 is controlled by the same
492 // config register when fp16 supported.
493 // TODO: Do we need a separate f16 setting when not legal?
494 def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
495   "FP64FP16Denormals",
496   "true",
497   "Enable double and half precision denormal handling",
498   [FeatureFP64]
501 def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
502   "FP64FP16Denormals",
503   "true",
504   "Enable double and half precision denormal handling",
505   [FeatureFP64, FeatureFP64FP16Denormals]
508 def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
509   "FP64FP16Denormals",
510   "true",
511   "Enable half precision denormal handling",
512   [FeatureFP64FP16Denormals]
515 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
516   "FPExceptions",
517   "true",
518   "Enable floating point exceptions"
521 class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
522   "max-private-element-size-"#size,
523   "MaxPrivateElementSize",
524   !cast<string>(size),
525   "Maximum private access size may be "#size
528 def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
529 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
530 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
532 def FeatureDumpCode : SubtargetFeature <"DumpCode",
533   "DumpCode",
534   "true",
535   "Dump MachineInstrs in the CodeEmitter"
538 def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
539   "DumpCode",
540   "true",
541   "Dump MachineInstrs in the CodeEmitter"
544 // XXX - This should probably be removed once enabled by default
545 def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
546   "EnableLoadStoreOpt",
547   "true",
548   "Enable SI load/store optimizer pass"
551 // Performance debugging feature. Allow using DS instruction immediate
552 // offsets even if the base pointer can't be proven to be base. On SI,
553 // base pointer values that won't give the same result as a 16-bit add
554 // are not safe to fold, but this will override the conservative test
555 // for the base pointer.
556 def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
557   "unsafe-ds-offset-folding",
558   "EnableUnsafeDSOffsetFolding",
559   "true",
560   "Force using DS instruction immediate offsets on SI"
563 def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
564   "EnableSIScheduler",
565   "true",
566   "Enable SI Machine Scheduler"
569 def FeatureEnableDS128 : SubtargetFeature<"enable-ds128",
570   "EnableDS128",
571   "true",
572   "Use ds_{read|write}_b128"
575 // Sparse texture support requires that all result registers are zeroed when
576 // PRTStrictNull is set to true. This feature is turned on for all architectures
577 // but is enabled as a feature in case there are situations where PRTStrictNull
578 // is disabled by the driver.
579 def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null",
580   "EnablePRTStrictNull",
581   "true",
582   "Enable zeroing of result registers for sparse texture fetches"
585 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
586 // all OS-es on VI and newer hardware to avoid assertion failures due
587 // to missing ADDR64 variants of MUBUF instructions.
588 // FIXME: moveToVALU should be able to handle converting addr64 MUBUF
589 // instructions.
591 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
592   "FlatForGlobal",
593   "true",
594   "Force to generate flat instruction for global"
597 def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
598   "auto-waitcnt-before-barrier",
599   "AutoWaitcntBeforeBarrier",
600   "true",
601   "Hardware automatically inserts waitcnt before barrier"
604 def FeatureCodeObjectV3 : SubtargetFeature <
605   "code-object-v3",
606   "CodeObjectV3",
607   "true",
608   "Generate code object version 3"
611 def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
612   "HasTrigReducedRange",
613   "true",
614   "Requires use of fract on arguments to trig instructions"
617 // Dummy feature used to disable assembler instructions.
618 def FeatureDisable : SubtargetFeature<"",
619   "FeatureDisable","true",
620   "Dummy feature to disable assembler instructions"
623 class GCNSubtargetFeatureGeneration <string Value,
624                                      string FeatureName,
625                                      list<SubtargetFeature> Implies> :
626         SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>;
628 def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
629     "southern-islands",
630   [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
631   FeatureWavefrontSize64,
632   FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange,
633   FeatureDoesNotSupportSRAMECC, FeatureDoesNotSupportXNACK]
636 def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
637     "sea-islands",
638   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
639   FeatureWavefrontSize64, FeatureFlatAddressSpace,
640   FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
641   FeatureGFX7GFX8GFX9Insts, FeatureDoesNotSupportSRAMECC]
644 def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
645   "volcanic-islands",
646   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
647    FeatureWavefrontSize64, FeatureFlatAddressSpace,
648    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
649    FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
650    FeatureScalarStores, FeatureInv2PiInlineImm,
651    FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
652    FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
653    FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts
654   ]
657 def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
658   "gfx9",
659   [FeatureFP64, FeatureLocalMemorySize65536,
660    FeatureWavefrontSize64, FeatureFlatAddressSpace,
661    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
662    FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
663    FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
664    FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
665    FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
666    FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
667    FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
668    FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
669   ]
672 def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
673   "gfx10",
674   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
675    FeatureFlatAddressSpace,
676    FeatureCIInsts, Feature16BitInsts,
677    FeatureSMemRealTime, FeatureInv2PiInlineImm,
678    FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P,
679    FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
680    FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
681    FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
682    FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
683    FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
684    FeatureVOP3Literal, FeatureDPP8,
685    FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC
686   ]
689 class FeatureSet<list<SubtargetFeature> Features_> {
690   list<SubtargetFeature> Features = Features_;
693 def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands,
694    FeatureFastFMAF32,
695    HalfRate64Ops,
696    FeatureLDSBankCount32,
697    FeatureDoesNotSupportXNACK,
698    FeatureCodeObjectV3]>;
700 def FeatureISAVersion6_0_1 : FeatureSet<
701   [FeatureSouthernIslands,
702    FeatureLDSBankCount32,
703    FeatureDoesNotSupportXNACK,
704    FeatureCodeObjectV3]>;
706 def FeatureISAVersion7_0_0 : FeatureSet<
707   [FeatureSeaIslands,
708    FeatureLDSBankCount32,
709    FeatureDoesNotSupportXNACK,
710    FeatureCodeObjectV3]>;
712 def FeatureISAVersion7_0_1 : FeatureSet<
713   [FeatureSeaIslands,
714    HalfRate64Ops,
715    FeatureLDSBankCount32,
716    FeatureFastFMAF32,
717    FeatureDoesNotSupportXNACK,
718    FeatureCodeObjectV3]>;
720 def FeatureISAVersion7_0_2 : FeatureSet<
721   [FeatureSeaIslands,
722    FeatureLDSBankCount16,
723    FeatureFastFMAF32,
724    FeatureDoesNotSupportXNACK,
725    FeatureCodeObjectV3]>;
727 def FeatureISAVersion7_0_3 : FeatureSet<
728   [FeatureSeaIslands,
729    FeatureLDSBankCount16,
730    FeatureDoesNotSupportXNACK,
731    FeatureCodeObjectV3]>;
733 def FeatureISAVersion7_0_4 : FeatureSet<
734   [FeatureSeaIslands,
735    FeatureLDSBankCount32,
736    FeatureDoesNotSupportXNACK,
737    FeatureCodeObjectV3]>;
739 def FeatureISAVersion8_0_1 : FeatureSet<
740   [FeatureVolcanicIslands,
741    FeatureFastFMAF32,
742    HalfRate64Ops,
743    FeatureLDSBankCount32,
744    FeatureXNACK,
745    FeatureUnpackedD16VMem,
746    FeatureCodeObjectV3]>;
748 def FeatureISAVersion8_0_2 : FeatureSet<
749   [FeatureVolcanicIslands,
750    FeatureLDSBankCount32,
751    FeatureSGPRInitBug,
752    FeatureUnpackedD16VMem,
753    FeatureDoesNotSupportXNACK,
754    FeatureCodeObjectV3]>;
756 def FeatureISAVersion8_0_3 : FeatureSet<
757   [FeatureVolcanicIslands,
758    FeatureLDSBankCount32,
759    FeatureUnpackedD16VMem,
760    FeatureDoesNotSupportXNACK,
761    FeatureCodeObjectV3]>;
763 def FeatureISAVersion8_1_0 : FeatureSet<
764   [FeatureVolcanicIslands,
765    FeatureLDSBankCount16,
766    FeatureXNACK,
767    FeatureCodeObjectV3]>;
769 def FeatureISAVersion9_0_0 : FeatureSet<
770   [FeatureGFX9,
771    FeatureMadMixInsts,
772    FeatureLDSBankCount32,
773    FeatureCodeObjectV3,
774    FeatureDoesNotSupportXNACK,
775    FeatureDoesNotSupportSRAMECC]>;
777 def FeatureISAVersion9_0_2 : FeatureSet<
778   [FeatureGFX9,
779    FeatureMadMixInsts,
780    FeatureLDSBankCount32,
781    FeatureXNACK,
782    FeatureDoesNotSupportSRAMECC,
783    FeatureCodeObjectV3]>;
785 def FeatureISAVersion9_0_4 : FeatureSet<
786   [FeatureGFX9,
787    FeatureLDSBankCount32,
788    FeatureFmaMixInsts,
789    FeatureDoesNotSupportXNACK,
790    FeatureDoesNotSupportSRAMECC,
791    FeatureCodeObjectV3]>;
793 def FeatureISAVersion9_0_6 : FeatureSet<
794   [FeatureGFX9,
795    HalfRate64Ops,
796    FeatureFmaMixInsts,
797    FeatureLDSBankCount32,
798    FeatureDLInsts,
799    FeatureDot1Insts,
800    FeatureDot2Insts,
801    FeatureDoesNotSupportXNACK,
802    FeatureCodeObjectV3]>;
804 def FeatureISAVersion9_0_8 : FeatureSet<
805   [FeatureGFX9,
806    HalfRate64Ops,
807    FeatureFmaMixInsts,
808    FeatureLDSBankCount32,
809    FeatureDLInsts,
810    FeatureDot1Insts,
811    FeatureDot2Insts,
812    FeatureDot3Insts,
813    FeatureDot4Insts,
814    FeatureDot5Insts,
815    FeatureDot6Insts,
816    FeatureMAIInsts,
817    FeaturePkFmacF16Inst,
818    FeatureAtomicFaddInsts,
819    FeatureSRAMECC,
820    FeatureMFMAInlineLiteralBug,
821    FeatureCodeObjectV3]>;
823 def FeatureISAVersion9_0_9 : FeatureSet<
824   [FeatureGFX9,
825    FeatureMadMixInsts,
826    FeatureLDSBankCount32,
827    FeatureXNACK,
828    FeatureCodeObjectV3]>;
830 // TODO: Organize more features into groups.
831 def FeatureGroup {
832   // Bugs present on gfx10.1.
833   list<SubtargetFeature> GFX10_1_Bugs = [
834     FeatureVcmpxPermlaneHazard,
835     FeatureVMEMtoScalarWriteHazard,
836     FeatureSMEMtoVectorWriteHazard,
837     FeatureInstFwdPrefetchBug,
838     FeatureVcmpxExecWARHazard,
839     FeatureLdsBranchVmemWARHazard,
840     FeatureNSAtoVMEMBug,
841     FeatureOffset3fBug,
842     FeatureFlatSegmentOffsetBug
843    ];
846 def FeatureISAVersion10_1_0 : FeatureSet<
847   !listconcat(FeatureGroup.GFX10_1_Bugs,
848     [FeatureGFX10,
849      FeatureLDSBankCount32,
850      FeatureDLInsts,
851      FeatureNSAEncoding,
852      FeatureWavefrontSize32,
853      FeatureScalarStores,
854      FeatureScalarAtomics,
855      FeatureScalarFlatScratchInsts,
856      FeatureLdsMisalignedBug,
857      FeatureDoesNotSupportXNACK,
858      FeatureCodeObjectV3])>;
860 def FeatureISAVersion10_1_1 : FeatureSet<
861   !listconcat(FeatureGroup.GFX10_1_Bugs,
862     [FeatureGFX10,
863      FeatureLDSBankCount32,
864      FeatureDLInsts,
865      FeatureDot1Insts,
866      FeatureDot2Insts,
867      FeatureDot5Insts,
868      FeatureDot6Insts,
869      FeatureNSAEncoding,
870      FeatureWavefrontSize32,
871      FeatureScalarStores,
872      FeatureScalarAtomics,
873      FeatureScalarFlatScratchInsts,
874      FeatureDoesNotSupportXNACK,
875      FeatureCodeObjectV3])>;
877 def FeatureISAVersion10_1_2 : FeatureSet<
878   !listconcat(FeatureGroup.GFX10_1_Bugs,
879     [FeatureGFX10,
880      FeatureLDSBankCount32,
881      FeatureDLInsts,
882      FeatureDot1Insts,
883      FeatureDot2Insts,
884      FeatureDot5Insts,
885      FeatureDot6Insts,
886      FeatureNSAEncoding,
887      FeatureWavefrontSize32,
888      FeatureScalarStores,
889      FeatureScalarAtomics,
890      FeatureScalarFlatScratchInsts,
891      FeatureLdsMisalignedBug,
892      FeatureDoesNotSupportXNACK,
893      FeatureCodeObjectV3])>;
895 //===----------------------------------------------------------------------===//
897 def AMDGPUInstrInfo : InstrInfo {
898   let guessInstructionProperties = 1;
899   let noNamedPositionallyEncodedOperands = 1;
902 def AMDGPUAsmParser : AsmParser {
903   // Some of the R600 registers have the same name, so this crashes.
904   // For example T0_XYZW and T0_XY both have the asm name T0.
905   let ShouldEmitMatchRegisterName = 0;
908 def AMDGPUAsmWriter : AsmWriter {
909   int PassSubtarget = 1;
912 def AMDGPUAsmVariants {
913   string Default = "Default";
914   int Default_ID = 0;
915   string VOP3 = "VOP3";
916   int VOP3_ID = 1;
917   string SDWA = "SDWA";
918   int SDWA_ID = 2;
919   string SDWA9 = "SDWA9";
920   int SDWA9_ID = 3;
921   string DPP = "DPP";
922   int DPP_ID = 4;
923   string Disable = "Disable";
924   int Disable_ID = 5;
927 def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
928   let Variant = AMDGPUAsmVariants.Default_ID;
929   let Name = AMDGPUAsmVariants.Default;
932 def VOP3AsmParserVariant : AsmParserVariant {
933   let Variant = AMDGPUAsmVariants.VOP3_ID;
934   let Name = AMDGPUAsmVariants.VOP3;
937 def SDWAAsmParserVariant : AsmParserVariant {
938   let Variant = AMDGPUAsmVariants.SDWA_ID;
939   let Name = AMDGPUAsmVariants.SDWA;
942 def SDWA9AsmParserVariant : AsmParserVariant {
943   let Variant = AMDGPUAsmVariants.SDWA9_ID;
944   let Name = AMDGPUAsmVariants.SDWA9;
948 def DPPAsmParserVariant : AsmParserVariant {
949   let Variant = AMDGPUAsmVariants.DPP_ID;
950   let Name = AMDGPUAsmVariants.DPP;
953 def AMDGPU : Target {
954   // Pull in Instruction Info:
955   let InstructionSet = AMDGPUInstrInfo;
956   let AssemblyParsers = [AMDGPUAsmParser];
957   let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
958                                 VOP3AsmParserVariant,
959                                 SDWAAsmParserVariant,
960                                 SDWA9AsmParserVariant,
961                                 DPPAsmParserVariant];
962   let AssemblyWriters = [AMDGPUAsmWriter];
963   let AllowRegisterRenaming = 1;
966 // Dummy Instruction itineraries for pseudo instructions
967 def ALU_NULL : FuncUnit;
968 def NullALU : InstrItinClass;
970 //===----------------------------------------------------------------------===//
971 // Predicate helper class
972 //===----------------------------------------------------------------------===//
974 def isGFX6 :
975   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">,
976   AssemblerPredicate<"FeatureSouthernIslands">;
978 def isGFX6GFX7 :
979   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
980             "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
981   AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">;
983 def isGFX6GFX7GFX10 :
984   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
985             "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
986             "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
987   AssemblerPredicate<"!FeatureGCN3Encoding">;
989 def isGFX7Only :
990   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
991   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">;
993 def isGFX7GFX10 :
994   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
995             "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
996   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">;
998 def isGFX7GFX8GFX9 :
999   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
1000             "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
1001             "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1002   AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">;
1004 def isGFX6GFX7GFX8GFX9 :
1005   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
1006             "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
1007             "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
1008             "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1009   AssemblerPredicate<"!FeatureGFX10Insts">;
1011 def isGFX7Plus :
1012   Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
1013   AssemblerPredicate<"FeatureCIInsts">;
1015 def isGFX8Plus :
1016   Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
1017   AssemblerPredicate<"FeatureGFX8Insts">;
1019 def isGFX8Only : Predicate<"Subtarget->getGeneration() =="
1020                            "AMDGPUSubtarget::VOLCANIC_ISLANDS">,
1021   AssemblerPredicate <"FeatureVolcanicIslands">;
1023 def isGFX9Plus :
1024   Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
1025   AssemblerPredicate<"FeatureGFX9Insts">;
1027 def isGFX9Only : Predicate <
1028   "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1029   AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts">;
1031 def isGFX8GFX9 :
1032   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
1033             "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1034   AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">;
1036 def isGFX10Plus :
1037   Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
1038   AssemblerPredicate<"FeatureGFX10Insts">;
1040 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
1041   AssemblerPredicate<"FeatureFlatAddressSpace">;
1043 def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
1044   AssemblerPredicate<"FeatureFlatGlobalInsts">;
1045 def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
1046   AssemblerPredicate<"FeatureFlatScratchInsts">;
1047 def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
1048   AssemblerPredicate<"FeatureScalarFlatScratchInsts">;
1049 def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
1050   AssemblerPredicate<"FeatureGFX9Insts">;
1052 def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
1053   AssemblerPredicate<"FeatureUnpackedD16VMem">;
1054 def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
1055   AssemblerPredicate<"!FeatureUnpackedD16VMem">;
1057 def D16PreservesUnusedBits :
1058   Predicate<"Subtarget->d16PreservesUnusedBits()">,
1059   AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
1061 def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
1062 def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
1064 def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
1065   AssemblerPredicate<"FeatureGFX9Insts">;
1067 def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
1068   AssemblerPredicate<"FeatureAddNoCarryInsts">;
1070 def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
1072 def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
1073   AssemblerPredicate<"Feature16BitInsts">;
1074 def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
1075   AssemblerPredicate<"FeatureVOP3P">;
1077 def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
1078   AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">;
1080 def HasSDWA9 :
1081   Predicate<"Subtarget->hasSDWA()">,
1082   AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">;
1084 def HasSDWA10 :
1085   Predicate<"Subtarget->hasSDWA()">,
1086   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">;
1088 def HasDPP : Predicate<"Subtarget->hasDPP()">,
1089   AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
1091 def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
1092   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP8">;
1094 def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
1095   AssemblerPredicate<"FeatureR128A16">;
1097 def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
1098   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">;
1100 def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
1101   AssemblerPredicate<"FeatureIntClamp">;
1103 def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
1104   AssemblerPredicate<"FeatureMadMixInsts">;
1106 def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
1107   AssemblerPredicate<"FeatureScalarStores">;
1109 def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
1110   AssemblerPredicate<"FeatureScalarAtomics">;
1112 def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
1113   AssemblerPredicate<"FeatureNoSdstCMPX">;
1115 def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
1116   AssemblerPredicate<"!FeatureNoSdstCMPX">;
1118 def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
1119 def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
1120 def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
1121                       AssemblerPredicate<"FeatureVGPRIndexMode">;
1122 def HasMovrel : Predicate<"Subtarget->hasMovrel()">,
1123                 AssemblerPredicate<"FeatureMovrel">;
1125 def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
1126   AssemblerPredicate<"FeatureFmaMixInsts">;
1128 def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
1129   AssemblerPredicate<"FeatureDLInsts">;
1131 def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">,
1132   AssemblerPredicate<"FeatureDot1Insts">;
1134 def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">,
1135   AssemblerPredicate<"FeatureDot2Insts">;
1137 def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">,
1138   AssemblerPredicate<"FeatureDot3Insts">;
1140 def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">,
1141   AssemblerPredicate<"FeatureDot4Insts">;
1143 def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
1144   AssemblerPredicate<"FeatureDot5Insts">;
1146 def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
1147   AssemblerPredicate<"FeatureDot6Insts">;
1149 def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">,
1150   AssemblerPredicate<"FeatureMAIInsts">;
1152 def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
1153   AssemblerPredicate<"FeaturePkFmacF16Inst">;
1155 def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">,
1156   AssemblerPredicate<"FeatureAtomicFaddInsts">;
1158 def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">,
1159   AssemblerPredicate<"FeatureOffset3fBug">;
1161 def EnableLateCFGStructurize : Predicate<
1162   "EnableLateStructurizeCFG">;
1164 // Include AMDGPU TD files
1165 include "SISchedule.td"
1166 include "GCNProcessors.td"
1167 include "AMDGPUInstrInfo.td"
1168 include "AMDGPURegisterInfo.td"
1169 include "AMDGPURegisterBanks.td"
1170 include "AMDGPUInstructions.td"
1171 include "SIInstrInfo.td"
1172 include "AMDGPUCallingConv.td"
1173 include "AMDGPUSearchableTables.td"