[AMDGPU] New gfx940 mfma instructions
[llvm-project.git] / llvm / lib / Target / AMDGPU / AMDGPUSearchableTables.td
blob354fea5a52fa0b4a9d1ed4420d9d3bb967963ad0
1 //===-- AMDGPUSearchableTables.td - ------------------------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
10 // Resource intrinsics table.
11 //===----------------------------------------------------------------------===//
13 class RsrcIntrinsic<AMDGPURsrcIntrinsic intr> {
14   Intrinsic Intr = !cast<Intrinsic>(intr);
15   bits<8> RsrcArg = intr.RsrcArg;
16   bit IsImage = intr.IsImage;
19 def RsrcIntrinsics : GenericTable {
20   let FilterClass = "RsrcIntrinsic";
21   let Fields = ["Intr", "RsrcArg", "IsImage"];
23   let PrimaryKey = ["Intr"];
24   let PrimaryKeyName = "lookupRsrcIntrinsic";
27 foreach intr = !listconcat(AMDGPUBufferIntrinsics,
28                            AMDGPUImageDimIntrinsics,
29                            AMDGPUImageDimAtomicIntrinsics) in {
30   def : RsrcIntrinsic<!cast<AMDGPURsrcIntrinsic>(intr)>;
33 class GcnBufferFormatBase<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> {
34   bits<8> Format = f;
35   bits<8> BitsPerComp = bpc;
36   bits<8> NumComponents = numc;
37   bits<8> NumFormat = nfmt;
38   bits<8> DataFormat = dfmt;
41 class Gfx9BufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase<f, bpc, numc, nfmt, dfmt>;
42 class Gfx10PlusBufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase<f, bpc, numc, nfmt, dfmt>;
44 class GcnBufferFormatTable  : GenericTable {
45   let CppTypeName = "GcnBufferFormatInfo";
46   let Fields = ["Format", "BitsPerComp", "NumComponents", "NumFormat", "DataFormat"];
47   let PrimaryKey = ["BitsPerComp", "NumComponents", "NumFormat"];
50 def Gfx9BufferFormat : GcnBufferFormatTable {
51   let FilterClass = "Gfx9BufferFormat";
52   let PrimaryKeyName = "getGfx9BufferFormatInfo";
54 def Gfx10PlusBufferFormat : GcnBufferFormatTable {
55   let FilterClass = "Gfx10PlusBufferFormat";
56   let PrimaryKeyName = "getGfx10PlusBufferFormatInfo";
59 def getGfx9BufferFormatInfo : SearchIndex {
60   let Table = Gfx9BufferFormat;
61   let Key = ["Format"];
63 def getGfx10PlusBufferFormatInfo : SearchIndex {
64   let Table = Gfx10PlusBufferFormat;
65   let Key = ["Format"];
68 // Buffer formats with equal component sizes (GFX9 and earlier)
69 def : Gfx9BufferFormat< /*FORMAT_8_UNORM*/              0x01,  8, 1, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_8*/            1>;
70 def : Gfx9BufferFormat< /*FORMAT_8_SNORM*/              0x11,  8, 1, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_8*/            1>;
71 def : Gfx9BufferFormat< /*FORMAT_8_USCALED*/            0x21,  8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/            1>;
72 def : Gfx9BufferFormat< /*FORMAT_8_SSCALED*/            0x31,  8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/            1>;
73 def : Gfx9BufferFormat< /*FORMAT_8_UINT*/               0x41,  8, 1, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_8*/            1>;
74 def : Gfx9BufferFormat< /*FORMAT_8_SINT*/               0x51,  8, 1, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_8*/            1>;
75 def : Gfx9BufferFormat< /*FORMAT_16_UNORM*/             0x02, 16, 1, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_16*/           2>;
76 def : Gfx9BufferFormat< /*FORMAT_16_SNORM*/             0x12, 16, 1, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_16*/           2>;
77 def : Gfx9BufferFormat< /*FORMAT_16_USCALED*/           0x22, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/           2>;
78 def : Gfx9BufferFormat< /*FORMAT_16_SSCALED*/           0x32, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/           2>;
79 def : Gfx9BufferFormat< /*FORMAT_16_UINT*/              0x42, 16, 1, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_16*/           2>;
80 def : Gfx9BufferFormat< /*FORMAT_16_SINT*/              0x52, 16, 1, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_16*/           2>;
81 def : Gfx9BufferFormat< /*FORMAT_16_FLOAT*/             0x72, 16, 1, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_16*/           2>;
82 def : Gfx9BufferFormat< /*FORMAT_8_8_UNORM*/            0x03,  8, 2, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_8_8*/          3>;
83 def : Gfx9BufferFormat< /*FORMAT_8_8_SNORM*/            0x13,  8, 2, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_8_8*/          3>;
84 def : Gfx9BufferFormat< /*FORMAT_8_8_USCALED*/          0x23,  8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/          3>;
85 def : Gfx9BufferFormat< /*FORMAT_8_8_SSCALED*/          0x33,  8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/          3>;
86 def : Gfx9BufferFormat< /*FORMAT_8_8_UINT*/             0x43,  8, 2, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_8_8*/          3>;
87 def : Gfx9BufferFormat< /*FORMAT_8_8_SINT*/             0x53,  8, 2, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_8_8*/          3>;
88 def : Gfx9BufferFormat< /*FORMAT_32_UINT*/              0x44, 32, 1, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_32*/           4>;
89 def : Gfx9BufferFormat< /*FORMAT_32_SINT*/              0x54, 32, 1, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_32*/           4>;
90 def : Gfx9BufferFormat< /*FORMAT_32_FLOAT*/             0x74, 32, 1, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_32*/           4>;
91 def : Gfx9BufferFormat< /*FORMAT_16_16_UNORM*/          0x05, 16, 2, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_16_16*/        5>;
92 def : Gfx9BufferFormat< /*FORMAT_16_16_SNORM*/          0x15, 16, 2, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_16_16*/        5>;
93 def : Gfx9BufferFormat< /*FORMAT_16_16_USCALED*/        0x25, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/        5>;
94 def : Gfx9BufferFormat< /*FORMAT_16_16_SSCALED*/        0x35, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/        5>;
95 def : Gfx9BufferFormat< /*FORMAT_16_16_UINT*/           0x45, 16, 2, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_16_16*/        5>;
96 def : Gfx9BufferFormat< /*FORMAT_16_16_SINT*/           0x55, 16, 2, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_16_16*/        5>;
97 def : Gfx9BufferFormat< /*FORMAT_16_16_FLOAT*/          0x75, 16, 2, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_16_16*/        5>;
98 def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_UNORM*/        0x0A,  8, 4, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_8_8_8_8*/     10>;
99 def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SNORM*/        0x1A,  8, 4, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_8_8_8_8*/     10>;
100 def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_USCALED*/      0x2A,  8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/     10>;
101 def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SSCALED*/      0x3A,  8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/     10>;
102 def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_UINT*/         0x4A,  8, 4, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_8_8_8_8*/     10>;
103 def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SINT*/         0x5A,  8, 4, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_8_8_8_8*/     10>;
104 def : Gfx9BufferFormat< /*FORMAT_32_32_UINT*/           0x4B, 32, 2, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_32_32*/       11>;
105 def : Gfx9BufferFormat< /*FORMAT_32_32_SINT*/           0x5B, 32, 2, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_32_32*/       11>;
106 def : Gfx9BufferFormat< /*FORMAT_32_32_FLOAT*/          0x7B, 32, 2, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_32_32*/       11>;
107 def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_UNORM*/    0x0C, 16, 4, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_16_16_16_16*/ 12>;
108 def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SNORM*/    0x1C, 16, 4, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_16_16_16_16*/ 12>;
109 def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_USCALED*/  0x2C, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>;
110 def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SSCALED*/  0x3C, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>;
111 def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_UINT*/     0x4C, 16, 4, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_16_16_16_16*/ 12>;
112 def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SINT*/     0x5C, 16, 4, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_16_16_16_16*/ 12>;
113 def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_FLOAT*/    0x7C, 16, 4, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_16_16_16_16*/ 12>;
114 def : Gfx9BufferFormat< /*FORMAT_32_32_32_UINT*/        0x4D, 32, 3, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_32_32_32*/    13>;
115 def : Gfx9BufferFormat< /*FORMAT_32_32_32_SINT*/        0x5D, 32, 3, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_32_32_32*/    13>;
116 def : Gfx9BufferFormat< /*FORMAT_32_32_32_FLOAT*/       0x7D, 32, 3, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_32_32_32*/    13>;
117 def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_UINT*/     0x4E, 32, 4, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_32_32_32_32*/ 14>;
118 def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_SINT*/     0x5E, 32, 4, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_32_32_32_32*/ 14>;
119 def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_FLOAT*/    0x7E, 32, 4, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_32_32_32_32*/ 14>;
121 // Buffer formats with equal component sizes (GFX10 and later)
122 def : Gfx10PlusBufferFormat< /*FORMAT_8_UNORM*/              0x01,  8, 1, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_8*/            1>;
123 def : Gfx10PlusBufferFormat< /*FORMAT_8_SNORM*/              0x02,  8, 1, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_8*/            1>;
124 def : Gfx10PlusBufferFormat< /*FORMAT_8_USCALED*/            0x03,  8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/            1>;
125 def : Gfx10PlusBufferFormat< /*FORMAT_8_SSCALED*/            0x04,  8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/            1>;
126 def : Gfx10PlusBufferFormat< /*FORMAT_8_UINT*/               0x05,  8, 1, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_8*/            1>;
127 def : Gfx10PlusBufferFormat< /*FORMAT_8_SINT*/               0x06,  8, 1, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_8*/            1>;
128 def : Gfx10PlusBufferFormat< /*FORMAT_16_UNORM*/             0x07, 16, 1, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_16*/           2>;
129 def : Gfx10PlusBufferFormat< /*FORMAT_16_SNORM*/             0x08, 16, 1, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_16*/           2>;
130 def : Gfx10PlusBufferFormat< /*FORMAT_16_USCALED*/           0x09, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/           2>;
131 def : Gfx10PlusBufferFormat< /*FORMAT_16_SSCALED*/           0x0A, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/           2>;
132 def : Gfx10PlusBufferFormat< /*FORMAT_16_UINT*/              0x0B, 16, 1, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_16*/           2>;
133 def : Gfx10PlusBufferFormat< /*FORMAT_16_SINT*/              0x0C, 16, 1, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_16*/           2>;
134 def : Gfx10PlusBufferFormat< /*FORMAT_16_FLOAT*/             0x0D, 16, 1, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_16*/           2>;
135 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UNORM*/            0x0E,  8, 2, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_8_8*/          3>;
136 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SNORM*/            0x0F,  8, 2, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_8_8*/          3>;
137 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_USCALED*/          0x10,  8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/          3>;
138 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SSCALED*/          0x11,  8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/          3>;
139 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UINT*/             0x12,  8, 2, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_8_8*/          3>;
140 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SINT*/             0x13,  8, 2, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_8_8*/          3>;
141 def : Gfx10PlusBufferFormat< /*FORMAT_32_UINT*/              0x14, 32, 1, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_32*/           4>;
142 def : Gfx10PlusBufferFormat< /*FORMAT_32_SINT*/              0x15, 32, 1, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_32*/           4>;
143 def : Gfx10PlusBufferFormat< /*FORMAT_32_FLOAT*/             0x16, 32, 1, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_32*/           4>;
144 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UNORM*/          0x17, 16, 2, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_16_16*/        5>;
145 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SNORM*/          0x18, 16, 2, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_16_16*/        5>;
146 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_USCALED*/        0x19, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/        5>;
147 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SSCALED*/        0x1A, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/        5>;
148 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UINT*/           0x1B, 16, 2, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_16_16*/        5>;
149 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SINT*/           0x1C, 16, 2, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_16_16*/        5>;
150 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_FLOAT*/          0x1D, 16, 2, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_16_16*/        5>;
151 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UNORM*/        0x38,  8, 4, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_8_8_8_8*/     10>;
152 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SNORM*/        0x39,  8, 4, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_8_8_8_8*/     10>;
153 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_USCALED*/      0x3A,  8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/     10>;
154 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SSCALED*/      0x3B,  8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/     10>;
155 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UINT*/         0x3C,  8, 4, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_8_8_8_8*/     10>;
156 def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SINT*/         0x3D,  8, 4, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_8_8_8_8*/     10>;
157 def : Gfx10PlusBufferFormat< /*FORMAT_32_32_UINT*/           0x3E, 32, 2, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_32_32*/       11>;
158 def : Gfx10PlusBufferFormat< /*FORMAT_32_32_SINT*/           0x3F, 32, 2, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_32_32*/       11>;
159 def : Gfx10PlusBufferFormat< /*FORMAT_32_32_FLOAT*/          0x40, 32, 2, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_32_32*/       11>;
160 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UNORM*/    0x41, 16, 4, /*NUM_FORMAT_UNORM*/   0, /*DATA_FORMAT_16_16_16_16*/ 12>;
161 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SNORM*/    0x42, 16, 4, /*NUM_FORMAT_SNORM*/   1, /*DATA_FORMAT_16_16_16_16*/ 12>;
162 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_USCALED*/  0x43, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>;
163 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SSCALED*/  0x44, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>;
164 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UINT*/     0x45, 16, 4, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_16_16_16_16*/ 12>;
165 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SINT*/     0x46, 16, 4, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_16_16_16_16*/ 12>;
166 def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_FLOAT*/    0x47, 16, 4, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_16_16_16_16*/ 12>;
167 def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_UINT*/        0x48, 32, 3, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_32_32_32*/    13>;
168 def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_SINT*/        0x49, 32, 3, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_32_32_32*/    13>;
169 def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_FLOAT*/       0x4A, 32, 3, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_32_32_32*/    13>;
170 def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_UINT*/     0x4B, 32, 4, /*NUM_FORMAT_UINT*/    4, /*DATA_FORMAT_32_32_32_32*/ 14>;
171 def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_SINT*/     0x4C, 32, 4, /*NUM_FORMAT_SINT*/    5, /*DATA_FORMAT_32_32_32_32*/ 14>;
172 def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_FLOAT*/    0x4D, 32, 4, /*NUM_FORMAT_FLOAT*/   7, /*DATA_FORMAT_32_32_32_32*/ 14>;
174 class SourceOfDivergence<Intrinsic intr> {
175   Intrinsic Intr = intr;
178 def SourcesOfDivergence : GenericTable {
179   let FilterClass = "SourceOfDivergence";
180   let Fields = ["Intr"];
182   let PrimaryKey = ["Intr"];
183   let PrimaryKeyName = "lookupSourceOfDivergence";
186 def : SourceOfDivergence<int_amdgcn_workitem_id_x>;
187 def : SourceOfDivergence<int_amdgcn_workitem_id_y>;
188 def : SourceOfDivergence<int_amdgcn_workitem_id_z>;
189 def : SourceOfDivergence<int_amdgcn_interp_mov>;
190 def : SourceOfDivergence<int_amdgcn_interp_p1>;
191 def : SourceOfDivergence<int_amdgcn_interp_p2>;
192 def : SourceOfDivergence<int_amdgcn_interp_p1_f16>;
193 def : SourceOfDivergence<int_amdgcn_interp_p2_f16>;
194 def : SourceOfDivergence<int_amdgcn_mbcnt_hi>;
195 def : SourceOfDivergence<int_amdgcn_mbcnt_lo>;
196 def : SourceOfDivergence<int_r600_read_tidig_x>;
197 def : SourceOfDivergence<int_r600_read_tidig_y>;
198 def : SourceOfDivergence<int_r600_read_tidig_z>;
199 def : SourceOfDivergence<int_amdgcn_atomic_inc>;
200 def : SourceOfDivergence<int_amdgcn_atomic_dec>;
201 def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
202 def : SourceOfDivergence<int_amdgcn_global_atomic_fadd>;
203 def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
204 def : SourceOfDivergence<int_amdgcn_global_atomic_fmax>;
205 def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd>;
206 def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin>;
207 def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax>;
208 def : SourceOfDivergence<int_amdgcn_global_atomic_fadd_v2bf16>;
209 def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd_v2bf16>;
210 def : SourceOfDivergence<int_amdgcn_ds_fadd>;
211 def : SourceOfDivergence<int_amdgcn_ds_fmin>;
212 def : SourceOfDivergence<int_amdgcn_ds_fmax>;
213 def : SourceOfDivergence<int_amdgcn_ds_fadd_v2bf16>;
214 def : SourceOfDivergence<int_amdgcn_buffer_atomic_swap>;
215 def : SourceOfDivergence<int_amdgcn_buffer_atomic_add>;
216 def : SourceOfDivergence<int_amdgcn_buffer_atomic_sub>;
217 def : SourceOfDivergence<int_amdgcn_buffer_atomic_smin>;
218 def : SourceOfDivergence<int_amdgcn_buffer_atomic_umin>;
219 def : SourceOfDivergence<int_amdgcn_buffer_atomic_smax>;
220 def : SourceOfDivergence<int_amdgcn_buffer_atomic_umax>;
221 def : SourceOfDivergence<int_amdgcn_buffer_atomic_and>;
222 def : SourceOfDivergence<int_amdgcn_buffer_atomic_or>;
223 def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;
224 def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
225 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
226 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
227 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;
228 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smin>;
229 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umin>;
230 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smax>;
231 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umax>;
232 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_and>;
233 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_or>;
234 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
235 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_inc>;
236 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_dec>;
237 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fadd>;
238 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmin>;
239 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmax>;
240 def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
241 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
242 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
243 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>;
244 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smin>;
245 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umin>;
246 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smax>;
247 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umax>;
248 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_and>;
249 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_or>;
250 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
251 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_inc>;
252 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_dec>;
253 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fadd>;
254 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmin>;
255 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmax>;
256 def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
257 def : SourceOfDivergence<int_amdgcn_buffer_atomic_csub>;
258 def : SourceOfDivergence<int_amdgcn_ps_live>;
259 def : SourceOfDivergence<int_amdgcn_live_mask>;
260 def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
261 def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
262 def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
263 def : SourceOfDivergence<int_amdgcn_permlane16>;
264 def : SourceOfDivergence<int_amdgcn_permlanex16>;
265 def : SourceOfDivergence<int_amdgcn_mov_dpp>;
266 def : SourceOfDivergence<int_amdgcn_mov_dpp8>;
267 def : SourceOfDivergence<int_amdgcn_update_dpp>;
268 def : SourceOfDivergence<int_amdgcn_writelane>;
270 def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x1f32>;
271 def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x1f32>;
272 def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x4f16>;
273 def : SourceOfDivergence<int_amdgcn_mfma_i32_4x4x4i8>;
274 def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x2bf16>;
275 def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x1f32>;
276 def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x4f32>;
277 def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x4f16>;
278 def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x16f16>;
279 def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x4i8>;
280 def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x16i8>;
281 def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x2bf16>;
282 def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x8bf16>;
283 def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x1f32>;
284 def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x2f32>;
285 def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4f16>;
286 def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x8f16>;
287 def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x4i8>;
288 def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x8i8>;
289 def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x2bf16>;
290 def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4bf16>;
291 def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4bf16_1k>;
292 def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x4bf16_1k>;
293 def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x4bf16_1k>;
294 def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x8bf16_1k>;
295 def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x16bf16_1k>;
296 def : SourceOfDivergence<int_amdgcn_mfma_f64_16x16x4f64>;
297 def : SourceOfDivergence<int_amdgcn_mfma_f64_4x4x4f64>;
298 def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x32_i8>;
299 def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x16_i8>;
300 def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x8_xf32>;
301 def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4_xf32>;
303 // The dummy boolean output is divergent from the IR's perspective,
304 // but the mask results are uniform. These produce a divergent and
305 // uniform result, so the returned struct is collectively divergent.
306 // isAlwaysUniform can override the extract of the uniform component.
307 def : SourceOfDivergence<int_amdgcn_if>;
308 def : SourceOfDivergence<int_amdgcn_else>;
309 def : SourceOfDivergence<int_amdgcn_loop>;
311 foreach intr = AMDGPUImageDimAtomicIntrinsics in
312 def : SourceOfDivergence<intr>;