[ARM] Better patterns for fp <> predicate vectors
[llvm-complete.git] / lib / Target / AMDGPU / AMDGPU.td
blobbaeba534012ca3c3d382de1e2fafe5a9a862a1e6
1 //===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===------------------------------------------------------------===//
9 include "llvm/TableGen/SearchableTable.td"
10 include "llvm/Target/Target.td"
11 include "AMDGPUFeatures.td"
13 class BoolToList<bit Value> {
14   list<int> ret = !if(Value, [1]<int>, []<int>);
17 //===------------------------------------------------------------===//
18 // Subtarget Features (device properties)
19 //===------------------------------------------------------------===//
21 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
22   "FastFMAF32",
23   "true",
24   "Assuming f32 fma is at least as fast as mul + add"
27 def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128",
28   "MIMG_R128",
29   "true",
30   "Support 128-bit texture resources"
33 def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
34   "HalfRate64Ops",
35   "true",
36   "Most fp64 instructions are half rate instead of quarter"
39 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
40   "FlatAddressSpace",
41   "true",
42   "Support flat address space"
45 def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets",
46   "FlatInstOffsets",
47   "true",
48   "Flat instructions have immediate offset addressing mode"
51 def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts",
52   "FlatGlobalInsts",
53   "true",
54   "Have global_* flat memory instructions"
57 def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
58   "FlatScratchInsts",
59   "true",
60   "Have scratch_* flat memory instructions"
63 def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
64   "ScalarFlatScratchInsts",
65   "true",
66   "Have s_scratch_* flat memory instructions"
69 def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
70   "AddNoCarryInsts",
71   "true",
72   "Have VALU add/sub instructions without carry out"
75 def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
76   "UnalignedBufferAccess",
77   "true",
78   "Support unaligned global loads and stores"
81 def FeatureTrapHandler: SubtargetFeature<"trap-handler",
82   "TrapHandler",
83   "true",
84   "Trap handler support"
87 def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
88   "UnalignedScratchAccess",
89   "true",
90   "Support unaligned scratch loads and stores"
93 def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
94   "HasApertureRegs",
95   "true",
96   "Has Memory Aperture Base and Size Registers"
99 def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts",
100   "HasMadMixInsts",
101   "true",
102   "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions"
105 def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
106   "HasFmaMixInsts",
107   "true",
108   "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
111 def FeatureDoesNotSupportXNACK : SubtargetFeature<"no-xnack-support",
112   "DoesNotSupportXNACK",
113   "true",
114   "Hardware does not support XNACK"
117 // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
118 // XNACK. The current default kernel driver setting is:
119 // - graphics ring: XNACK disabled
120 // - compute ring: XNACK enabled
122 // If XNACK is enabled, the VMEM latency can be worse.
123 // If XNACK is disabled, the 2 SGPRs can be used for general purposes.
124 def FeatureXNACK : SubtargetFeature<"xnack",
125   "EnableXNACK",
126   "true",
127   "Enable XNACK support"
130 def FeatureCuMode : SubtargetFeature<"cumode",
131   "EnableCuMode",
132   "true",
133   "Enable CU wavefront execution mode"
136 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
137   "SGPRInitBug",
138   "true",
139   "VI SGPR initialization bug requiring a fixed SGPR allocation size"
142 def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
143   "LDSMisalignedBug",
144   "true",
145   "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
148 def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
149   "HasVcmpxPermlaneHazard",
150   "true",
151   "TODO: describe me"
154 def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard",
155   "HasVMEMtoScalarWriteHazard",
156   "true",
157   "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution."
160 def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard",
161   "HasSMEMtoVectorWriteHazard",
162   "true",
163   "s_load_dword followed by v_cmp page faults"
166 def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
167   "HasInstFwdPrefetchBug",
168   "true",
169   "S_INST_PREFETCH instruction causes shader to hang"
172 def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
173   "HasVcmpxExecWARHazard",
174   "true",
175   "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)"
178 def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard",
179   "HasLdsBranchVmemWARHazard",
180   "true",
181   "Switching between LDS and VMEM-tex not waiting VM_VSRC=0"
184 def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
185   "HasNSAtoVMEMBug",
186   "true",
187   "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
190 def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
191   "HasFlatSegmentOffsetBug",
192   "true",
193   "GFX10 bug, inst_offset ignored in flat segment"
196 def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug",
197   "HasOffset3fBug",
198   "true",
199   "Branch offset of 3f hardware bug"
202 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
203   "ldsbankcount"#Value,
204   "LDSBankCount",
205   !cast<string>(Value),
206   "The number of LDS banks per compute unit."
209 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
210 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
212 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
213   "GCN3Encoding",
214   "true",
215   "Encoding format for VI"
218 def FeatureCIInsts : SubtargetFeature<"ci-insts",
219   "CIInsts",
220   "true",
221   "Additional instructions for CI+"
224 def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts",
225   "GFX8Insts",
226   "true",
227   "Additional instructions for GFX8+"
230 def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
231   "GFX9Insts",
232   "true",
233   "Additional instructions for GFX9+"
236 def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
237   "GFX10Insts",
238   "true",
239   "Additional instructions for GFX10+"
242 def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
243   "GFX7GFX8GFX9Insts",
244   "true",
245   "Instructions shared in GFX7, GFX8, GFX9"
248 def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
249   "HasSMemRealTime",
250   "true",
251   "Has s_memrealtime instruction"
254 def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
255   "HasInv2PiInlineImm",
256   "true",
257   "Has 1 / (2 * pi) as inline immediate"
260 def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
261   "Has16BitInsts",
262   "true",
263   "Has i16/f16 instructions"
266 def FeatureVOP3P : SubtargetFeature<"vop3p",
267   "HasVOP3PInsts",
268   "true",
269   "Has VOP3P packed instructions"
272 def FeatureMovrel : SubtargetFeature<"movrel",
273   "HasMovrel",
274   "true",
275   "Has v_movrel*_b32 instructions"
278 def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
279   "HasVGPRIndexMode",
280   "true",
281   "Has VGPR mode register indexing"
284 def FeatureScalarStores : SubtargetFeature<"scalar-stores",
285   "HasScalarStores",
286   "true",
287   "Has store scalar memory instructions"
290 def FeatureScalarAtomics : SubtargetFeature<"scalar-atomics",
291   "HasScalarAtomics",
292   "true",
293   "Has atomic scalar memory instructions"
296 def FeatureSDWA : SubtargetFeature<"sdwa",
297   "HasSDWA",
298   "true",
299   "Support SDWA (Sub-DWORD Addressing) extension"
302 def FeatureSDWAOmod : SubtargetFeature<"sdwa-omod",
303   "HasSDWAOmod",
304   "true",
305   "Support OMod with SDWA (Sub-DWORD Addressing) extension"
308 def FeatureSDWAScalar : SubtargetFeature<"sdwa-scalar",
309   "HasSDWAScalar",
310   "true",
311   "Support scalar register with SDWA (Sub-DWORD Addressing) extension"
314 def FeatureSDWASdst : SubtargetFeature<"sdwa-sdst",
315   "HasSDWASdst",
316   "true",
317   "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension"
320 def FeatureSDWAMac : SubtargetFeature<"sdwa-mav",
321   "HasSDWAMac",
322   "true",
323   "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension"
326 def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc",
327   "HasSDWAOutModsVOPC",
328   "true",
329   "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension"
332 def FeatureDPP : SubtargetFeature<"dpp",
333   "HasDPP",
334   "true",
335   "Support DPP (Data Parallel Primitives) extension"
338 // DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes.
339 def FeatureDPP8 : SubtargetFeature<"dpp8",
340   "HasDPP8",
341   "true",
342   "Support DPP8 (Data Parallel Primitives) extension"
345 def FeatureR128A16 : SubtargetFeature<"r128-a16",
346   "HasR128A16",
347   "true",
348   "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
351 def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
352   "HasNSAEncoding",
353   "true",
354   "Support NSA encoding for image instructions"
357 def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
358   "HasIntClamp",
359   "true",
360   "Support clamp for integer destination"
363 def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
364   "HasUnpackedD16VMem",
365   "true",
366   "Has unpacked d16 vmem instructions"
369 def FeatureDLInsts : SubtargetFeature<"dl-insts",
370   "HasDLInsts",
371   "true",
372   "Has v_fmac_f32 and v_xnor_b32 instructions"
375 def FeatureDot1Insts : SubtargetFeature<"dot1-insts",
376   "HasDot1Insts",
377   "true",
378   "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions"
381 def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
382   "HasDot2Insts",
383   "true",
384   "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
387 def FeatureDot3Insts : SubtargetFeature<"dot3-insts",
388   "HasDot3Insts",
389   "true",
390   "Has v_dot8c_i32_i4 instruction"
393 def FeatureDot4Insts : SubtargetFeature<"dot4-insts",
394   "HasDot4Insts",
395   "true",
396   "Has v_dot2c_i32_i16 instruction"
399 def FeatureDot5Insts : SubtargetFeature<"dot5-insts",
400   "HasDot5Insts",
401   "true",
402   "Has v_dot2c_f32_f16 instruction"
405 def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
406   "HasDot6Insts",
407   "true",
408   "Has v_dot4c_i32_i8 instruction"
411 def FeatureMAIInsts : SubtargetFeature<"mai-insts",
412   "HasMAIInsts",
413   "true",
414   "Has mAI instructions"
417 def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
418   "HasPkFmacF16Inst",
419   "true",
420   "Has v_pk_fmac_f16 instruction"
423 def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
424   "HasAtomicFaddInsts",
425   "true",
426   "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, "
427   "global_atomic_pk_add_f16 instructions"
430 def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
431   "DoesNotSupportSRAMECC",
432   "true",
433   "Hardware does not support SRAM ECC"
436 def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
437   "EnableSRAMECC",
438   "true",
439   "Enable SRAM ECC"
442 def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
443   "HasNoSdstCMPX",
444   "true",
445   "V_CMPX does not write VCC/SGPR in addition to EXEC"
448 def FeatureVscnt : SubtargetFeature<"vscnt",
449   "HasVscnt",
450   "true",
451   "Has separate store vscnt counter"
454 def FeatureRegisterBanking : SubtargetFeature<"register-banking",
455   "HasRegisterBanking",
456   "true",
457   "Has register banking"
460 def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
461   "HasVOP3Literal",
462   "true",
463   "Can use one literal in VOP3"
466 def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
467   "HasNoDataDepHazard",
468   "true",
469   "Does not need SW waitstates"
472 //===------------------------------------------------------------===//
473 // Subtarget Features (options and debugging)
474 //===------------------------------------------------------------===//
476 // Denormal handling for fp64 and fp16 is controlled by the same
477 // config register when fp16 supported.
478 // TODO: Do we need a separate f16 setting when not legal?
479 def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
480   "FP64FP16Denormals",
481   "true",
482   "Enable double and half precision denormal handling",
483   [FeatureFP64]
486 def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
487   "FP64FP16Denormals",
488   "true",
489   "Enable double and half precision denormal handling",
490   [FeatureFP64, FeatureFP64FP16Denormals]
493 def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
494   "FP64FP16Denormals",
495   "true",
496   "Enable half precision denormal handling",
497   [FeatureFP64FP16Denormals]
500 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
501   "FPExceptions",
502   "true",
503   "Enable floating point exceptions"
506 class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
507   "max-private-element-size-"#size,
508   "MaxPrivateElementSize",
509   !cast<string>(size),
510   "Maximum private access size may be "#size
513 def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
514 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
515 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
517 def FeatureDumpCode : SubtargetFeature <"DumpCode",
518   "DumpCode",
519   "true",
520   "Dump MachineInstrs in the CodeEmitter"
523 def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
524   "DumpCode",
525   "true",
526   "Dump MachineInstrs in the CodeEmitter"
529 // XXX - This should probably be removed once enabled by default
530 def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
531   "EnableLoadStoreOpt",
532   "true",
533   "Enable SI load/store optimizer pass"
536 // Performance debugging feature. Allow using DS instruction immediate
537 // offsets even if the base pointer can't be proven to be base. On SI,
538 // base pointer values that won't give the same result as a 16-bit add
539 // are not safe to fold, but this will override the conservative test
540 // for the base pointer.
541 def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
542   "unsafe-ds-offset-folding",
543   "EnableUnsafeDSOffsetFolding",
544   "true",
545   "Force using DS instruction immediate offsets on SI"
548 def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
549   "EnableSIScheduler",
550   "true",
551   "Enable SI Machine Scheduler"
554 def FeatureEnableDS128 : SubtargetFeature<"enable-ds128",
555   "EnableDS128",
556   "true",
557   "Use ds_{read|write}_b128"
560 // Sparse texture support requires that all result registers are zeroed when
561 // PRTStrictNull is set to true. This feature is turned on for all architectures
562 // but is enabled as a feature in case there are situations where PRTStrictNull
563 // is disabled by the driver.
564 def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null",
565   "EnablePRTStrictNull",
566   "true",
567   "Enable zeroing of result registers for sparse texture fetches"
570 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
571 // all OS-es on VI and newer hardware to avoid assertion failures due
572 // to missing ADDR64 variants of MUBUF instructions.
573 // FIXME: moveToVALU should be able to handle converting addr64 MUBUF
574 // instructions.
576 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
577   "FlatForGlobal",
578   "true",
579   "Force to generate flat instruction for global"
582 def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
583   "auto-waitcnt-before-barrier",
584   "AutoWaitcntBeforeBarrier",
585   "true",
586   "Hardware automatically inserts waitcnt before barrier"
589 def FeatureCodeObjectV3 : SubtargetFeature <
590   "code-object-v3",
591   "CodeObjectV3",
592   "true",
593   "Generate code object version 3"
596 def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
597   "HasTrigReducedRange",
598   "true",
599   "Requires use of fract on arguments to trig instructions"
602 // Dummy feature used to disable assembler instructions.
603 def FeatureDisable : SubtargetFeature<"",
604   "FeatureDisable","true",
605   "Dummy feature to disable assembler instructions"
608 class GCNSubtargetFeatureGeneration <string Value,
609                                      string FeatureName,
610                                      list<SubtargetFeature> Implies> :
611         SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>;
613 def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
614     "southern-islands",
615   [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
616   FeatureWavefrontSize64,
617   FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange,
618   FeatureDoesNotSupportSRAMECC, FeatureDoesNotSupportXNACK]
621 def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
622     "sea-islands",
623   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
624   FeatureWavefrontSize64, FeatureFlatAddressSpace,
625   FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
626   FeatureGFX7GFX8GFX9Insts, FeatureDoesNotSupportSRAMECC]
629 def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
630   "volcanic-islands",
631   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
632    FeatureWavefrontSize64, FeatureFlatAddressSpace,
633    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
634    FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
635    FeatureScalarStores, FeatureInv2PiInlineImm,
636    FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
637    FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
638    FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts
639   ]
642 def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
643   "gfx9",
644   [FeatureFP64, FeatureLocalMemorySize65536,
645    FeatureWavefrontSize64, FeatureFlatAddressSpace,
646    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
647    FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
648    FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
649    FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
650    FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
651    FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
652    FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
653    FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
654   ]
657 def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
658   "gfx10",
659   [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
660    FeatureFlatAddressSpace,
661    FeatureCIInsts, Feature16BitInsts,
662    FeatureSMemRealTime, FeatureInv2PiInlineImm,
663    FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P,
664    FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
665    FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
666    FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
667    FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
668    FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
669    FeatureVOP3Literal, FeatureDPP8,
670    FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC
671   ]
674 class FeatureSet<list<SubtargetFeature> Features_> {
675   list<SubtargetFeature> Features = Features_;
678 def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands,
679    FeatureFastFMAF32,
680    HalfRate64Ops,
681    FeatureLDSBankCount32,
682    FeatureDoesNotSupportXNACK,
683    FeatureCodeObjectV3]>;
685 def FeatureISAVersion6_0_1 : FeatureSet<
686   [FeatureSouthernIslands,
687    FeatureLDSBankCount32,
688    FeatureDoesNotSupportXNACK,
689    FeatureCodeObjectV3]>;
691 def FeatureISAVersion7_0_0 : FeatureSet<
692   [FeatureSeaIslands,
693    FeatureLDSBankCount32,
694    FeatureDoesNotSupportXNACK,
695    FeatureCodeObjectV3]>;
697 def FeatureISAVersion7_0_1 : FeatureSet<
698   [FeatureSeaIslands,
699    HalfRate64Ops,
700    FeatureLDSBankCount32,
701    FeatureFastFMAF32,
702    FeatureDoesNotSupportXNACK,
703    FeatureCodeObjectV3]>;
705 def FeatureISAVersion7_0_2 : FeatureSet<
706   [FeatureSeaIslands,
707    FeatureLDSBankCount16,
708    FeatureFastFMAF32,
709    FeatureDoesNotSupportXNACK,
710    FeatureCodeObjectV3]>;
712 def FeatureISAVersion7_0_3 : FeatureSet<
713   [FeatureSeaIslands,
714    FeatureLDSBankCount16,
715    FeatureDoesNotSupportXNACK,
716    FeatureCodeObjectV3]>;
718 def FeatureISAVersion7_0_4 : FeatureSet<
719   [FeatureSeaIslands,
720    FeatureLDSBankCount32,
721    FeatureDoesNotSupportXNACK,
722    FeatureCodeObjectV3]>;
724 def FeatureISAVersion8_0_1 : FeatureSet<
725   [FeatureVolcanicIslands,
726    FeatureFastFMAF32,
727    HalfRate64Ops,
728    FeatureLDSBankCount32,
729    FeatureXNACK,
730    FeatureUnpackedD16VMem,
731    FeatureCodeObjectV3]>;
733 def FeatureISAVersion8_0_2 : FeatureSet<
734   [FeatureVolcanicIslands,
735    FeatureLDSBankCount32,
736    FeatureSGPRInitBug,
737    FeatureUnpackedD16VMem,
738    FeatureDoesNotSupportXNACK,
739    FeatureCodeObjectV3]>;
741 def FeatureISAVersion8_0_3 : FeatureSet<
742   [FeatureVolcanicIslands,
743    FeatureLDSBankCount32,
744    FeatureUnpackedD16VMem,
745    FeatureDoesNotSupportXNACK,
746    FeatureCodeObjectV3]>;
748 def FeatureISAVersion8_1_0 : FeatureSet<
749   [FeatureVolcanicIslands,
750    FeatureLDSBankCount16,
751    FeatureXNACK,
752    FeatureCodeObjectV3]>;
754 def FeatureISAVersion9_0_0 : FeatureSet<
755   [FeatureGFX9,
756    FeatureMadMixInsts,
757    FeatureLDSBankCount32,
758    FeatureCodeObjectV3,
759    FeatureDoesNotSupportXNACK,
760    FeatureDoesNotSupportSRAMECC]>;
762 def FeatureISAVersion9_0_2 : FeatureSet<
763   [FeatureGFX9,
764    FeatureMadMixInsts,
765    FeatureLDSBankCount32,
766    FeatureXNACK,
767    FeatureDoesNotSupportSRAMECC,
768    FeatureCodeObjectV3]>;
770 def FeatureISAVersion9_0_4 : FeatureSet<
771   [FeatureGFX9,
772    FeatureLDSBankCount32,
773    FeatureFmaMixInsts,
774    FeatureDoesNotSupportXNACK,
775    FeatureDoesNotSupportSRAMECC,
776    FeatureCodeObjectV3]>;
778 def FeatureISAVersion9_0_6 : FeatureSet<
779   [FeatureGFX9,
780    HalfRate64Ops,
781    FeatureFmaMixInsts,
782    FeatureLDSBankCount32,
783    FeatureDLInsts,
784    FeatureDot1Insts,
785    FeatureDot2Insts,
786    FeatureDoesNotSupportXNACK,
787    FeatureCodeObjectV3]>;
789 def FeatureISAVersion9_0_8 : FeatureSet<
790   [FeatureGFX9,
791    HalfRate64Ops,
792    FeatureFmaMixInsts,
793    FeatureLDSBankCount32,
794    FeatureDLInsts,
795    FeatureDot1Insts,
796    FeatureDot2Insts,
797    FeatureDot3Insts,
798    FeatureDot4Insts,
799    FeatureDot5Insts,
800    FeatureDot6Insts,
801    FeatureMAIInsts,
802    FeaturePkFmacF16Inst,
803    FeatureAtomicFaddInsts,
804    FeatureSRAMECC,
805    FeatureCodeObjectV3]>;
807 def FeatureISAVersion9_0_9 : FeatureSet<
808   [FeatureGFX9,
809    FeatureMadMixInsts,
810    FeatureLDSBankCount32,
811    FeatureXNACK,
812    FeatureCodeObjectV3]>;
814 // TODO: Organize more features into groups.
815 def FeatureGroup {
816   // Bugs present on gfx10.1.
817   list<SubtargetFeature> GFX10_1_Bugs = [
818     FeatureVcmpxPermlaneHazard,
819     FeatureVMEMtoScalarWriteHazard,
820     FeatureSMEMtoVectorWriteHazard,
821     FeatureInstFwdPrefetchBug,
822     FeatureVcmpxExecWARHazard,
823     FeatureLdsBranchVmemWARHazard,
824     FeatureNSAtoVMEMBug,
825     FeatureOffset3fBug,
826     FeatureFlatSegmentOffsetBug
827    ];
830 def FeatureISAVersion10_1_0 : FeatureSet<
831   !listconcat(FeatureGroup.GFX10_1_Bugs,
832     [FeatureGFX10,
833      FeatureLDSBankCount32,
834      FeatureDLInsts,
835      FeatureNSAEncoding,
836      FeatureWavefrontSize32,
837      FeatureScalarStores,
838      FeatureScalarAtomics,
839      FeatureScalarFlatScratchInsts,
840      FeatureLdsMisalignedBug,
841      FeatureDoesNotSupportXNACK,
842      FeatureCodeObjectV3])>;
844 def FeatureISAVersion10_1_1 : FeatureSet<
845   !listconcat(FeatureGroup.GFX10_1_Bugs,
846     [FeatureGFX10,
847      FeatureLDSBankCount32,
848      FeatureDLInsts,
849      FeatureDot1Insts,
850      FeatureDot2Insts,
851      FeatureDot5Insts,
852      FeatureDot6Insts,
853      FeatureNSAEncoding,
854      FeatureWavefrontSize32,
855      FeatureScalarStores,
856      FeatureScalarAtomics,
857      FeatureScalarFlatScratchInsts,
858      FeatureDoesNotSupportXNACK,
859      FeatureCodeObjectV3])>;
861 def FeatureISAVersion10_1_2 : FeatureSet<
862   !listconcat(FeatureGroup.GFX10_1_Bugs,
863     [FeatureGFX10,
864      FeatureLDSBankCount32,
865      FeatureDLInsts,
866      FeatureDot1Insts,
867      FeatureDot2Insts,
868      FeatureDot5Insts,
869      FeatureDot6Insts,
870      FeatureNSAEncoding,
871      FeatureWavefrontSize32,
872      FeatureScalarStores,
873      FeatureScalarAtomics,
874      FeatureScalarFlatScratchInsts,
875      FeatureLdsMisalignedBug,
876      FeatureDoesNotSupportXNACK,
877      FeatureCodeObjectV3])>;
879 //===----------------------------------------------------------------------===//
881 def AMDGPUInstrInfo : InstrInfo {
882   let guessInstructionProperties = 1;
883   let noNamedPositionallyEncodedOperands = 1;
886 def AMDGPUAsmParser : AsmParser {
887   // Some of the R600 registers have the same name, so this crashes.
888   // For example T0_XYZW and T0_XY both have the asm name T0.
889   let ShouldEmitMatchRegisterName = 0;
892 def AMDGPUAsmWriter : AsmWriter {
893   int PassSubtarget = 1;
896 def AMDGPUAsmVariants {
897   string Default = "Default";
898   int Default_ID = 0;
899   string VOP3 = "VOP3";
900   int VOP3_ID = 1;
901   string SDWA = "SDWA";
902   int SDWA_ID = 2;
903   string SDWA9 = "SDWA9";
904   int SDWA9_ID = 3;
905   string DPP = "DPP";
906   int DPP_ID = 4;
907   string Disable = "Disable";
908   int Disable_ID = 5;
911 def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
912   let Variant = AMDGPUAsmVariants.Default_ID;
913   let Name = AMDGPUAsmVariants.Default;
916 def VOP3AsmParserVariant : AsmParserVariant {
917   let Variant = AMDGPUAsmVariants.VOP3_ID;
918   let Name = AMDGPUAsmVariants.VOP3;
921 def SDWAAsmParserVariant : AsmParserVariant {
922   let Variant = AMDGPUAsmVariants.SDWA_ID;
923   let Name = AMDGPUAsmVariants.SDWA;
926 def SDWA9AsmParserVariant : AsmParserVariant {
927   let Variant = AMDGPUAsmVariants.SDWA9_ID;
928   let Name = AMDGPUAsmVariants.SDWA9;
932 def DPPAsmParserVariant : AsmParserVariant {
933   let Variant = AMDGPUAsmVariants.DPP_ID;
934   let Name = AMDGPUAsmVariants.DPP;
937 def AMDGPU : Target {
938   // Pull in Instruction Info:
939   let InstructionSet = AMDGPUInstrInfo;
940   let AssemblyParsers = [AMDGPUAsmParser];
941   let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
942                                 VOP3AsmParserVariant,
943                                 SDWAAsmParserVariant,
944                                 SDWA9AsmParserVariant,
945                                 DPPAsmParserVariant];
946   let AssemblyWriters = [AMDGPUAsmWriter];
947   let AllowRegisterRenaming = 1;
950 // Dummy Instruction itineraries for pseudo instructions
951 def ALU_NULL : FuncUnit;
952 def NullALU : InstrItinClass;
954 //===----------------------------------------------------------------------===//
955 // Predicate helper class
956 //===----------------------------------------------------------------------===//
958 def isGFX6 :
959   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">,
960   AssemblerPredicate<"FeatureSouthernIslands">;
962 def isGFX6GFX7 :
963   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
964             "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
965   AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">;
967 def isGFX6GFX7GFX10 :
968   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
969             "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
970             "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
971   AssemblerPredicate<"!FeatureGCN3Encoding">;
973 def isGFX7Only :
974   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
975   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">;
977 def isGFX7GFX10 :
978   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
979             "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
980   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">;
982 def isGFX7GFX8GFX9 :
983   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
984             "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
985             "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
986   AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">;
988 def isGFX6GFX7GFX8GFX9 :
989   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
990             "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
991             "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
992             "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
993   AssemblerPredicate<"!FeatureGFX10Insts">;
995 def isGFX7Plus :
996   Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
997   AssemblerPredicate<"FeatureCIInsts">;
999 def isGFX8Plus :
1000   Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
1001   AssemblerPredicate<"FeatureGFX8Insts">;
1003 def isGFX8Only : Predicate<"Subtarget->getGeneration() =="
1004                            "AMDGPUSubtarget::VOLCANIC_ISLANDS">,
1005   AssemblerPredicate <"FeatureVolcanicIslands">;
1007 def isGFX9Plus :
1008   Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
1009   AssemblerPredicate<"FeatureGFX9Insts">;
1011 def isGFX9Only : Predicate <
1012   "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1013   AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts">;
1015 def isGFX8GFX9 :
1016   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
1017             "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
1018   AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">;
1020 def isGFX10Plus :
1021   Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
1022   AssemblerPredicate<"FeatureGFX10Insts">;
1024 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
1025   AssemblerPredicate<"FeatureFlatAddressSpace">;
1027 def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
1028   AssemblerPredicate<"FeatureFlatGlobalInsts">;
1029 def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
1030   AssemblerPredicate<"FeatureFlatScratchInsts">;
1031 def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
1032   AssemblerPredicate<"FeatureScalarFlatScratchInsts">;
1033 def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
1034   AssemblerPredicate<"FeatureGFX9Insts">;
1036 def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
1037   AssemblerPredicate<"FeatureUnpackedD16VMem">;
1038 def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
1039   AssemblerPredicate<"!FeatureUnpackedD16VMem">;
1041 def D16PreservesUnusedBits :
1042   Predicate<"Subtarget->d16PreservesUnusedBits()">,
1043   AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
1045 def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
1046 def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
1048 def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
1049   AssemblerPredicate<"FeatureGFX9Insts">;
1051 def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
1052   AssemblerPredicate<"FeatureAddNoCarryInsts">;
1054 def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
1056 def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
1057   AssemblerPredicate<"Feature16BitInsts">;
1058 def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
1059   AssemblerPredicate<"FeatureVOP3P">;
1061 def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
1062   AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">;
1064 def HasSDWA9 :
1065   Predicate<"Subtarget->hasSDWA()">,
1066   AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">;
1068 def HasSDWA10 :
1069   Predicate<"Subtarget->hasSDWA()">,
1070   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">;
1072 def HasDPP : Predicate<"Subtarget->hasDPP()">,
1073   AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
1075 def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
1076   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP8">;
1078 def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
1079   AssemblerPredicate<"FeatureR128A16">;
1081 def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
1082   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">;
1084 def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
1085   AssemblerPredicate<"FeatureIntClamp">;
1087 def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
1088   AssemblerPredicate<"FeatureMadMixInsts">;
1090 def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
1091   AssemblerPredicate<"FeatureScalarStores">;
1093 def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
1094   AssemblerPredicate<"FeatureScalarAtomics">;
1096 def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
1097   AssemblerPredicate<"FeatureNoSdstCMPX">;
1099 def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
1100   AssemblerPredicate<"!FeatureNoSdstCMPX">;
1102 def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
1103 def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
1104 def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
1105                       AssemblerPredicate<"FeatureVGPRIndexMode">;
1106 def HasMovrel : Predicate<"Subtarget->hasMovrel()">,
1107                 AssemblerPredicate<"FeatureMovrel">;
1109 def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
1110   AssemblerPredicate<"FeatureFmaMixInsts">;
1112 def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
1113   AssemblerPredicate<"FeatureDLInsts">;
1115 def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">,
1116   AssemblerPredicate<"FeatureDot1Insts">;
1118 def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">,
1119   AssemblerPredicate<"FeatureDot2Insts">;
1121 def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">,
1122   AssemblerPredicate<"FeatureDot3Insts">;
1124 def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">,
1125   AssemblerPredicate<"FeatureDot4Insts">;
1127 def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
1128   AssemblerPredicate<"FeatureDot5Insts">;
1130 def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
1131   AssemblerPredicate<"FeatureDot6Insts">;
1133 def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">,
1134   AssemblerPredicate<"FeatureMAIInsts">;
1136 def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
1137   AssemblerPredicate<"FeaturePkFmacF16Inst">;
1139 def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">,
1140   AssemblerPredicate<"FeatureAtomicFaddInsts">;
1142 def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">,
1143   AssemblerPredicate<"FeatureOffset3fBug">;
1145 def EnableLateCFGStructurize : Predicate<
1146   "EnableLateStructurizeCFG">;
1148 // Include AMDGPU TD files
1149 include "SISchedule.td"
1150 include "GCNProcessors.td"
1151 include "AMDGPUInstrInfo.td"
1152 include "AMDGPURegisterInfo.td"
1153 include "AMDGPURegisterBanks.td"
1154 include "AMDGPUInstructions.td"
1155 include "SIInstrInfo.td"
1156 include "AMDGPUCallingConv.td"
1157 include "AMDGPUSearchableTables.td"