1 // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx908 | FileCheck %s --check-prefixes=CHECK,GFX9,GFX908
2 // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx90a | FileCheck %s --check-prefixes=CHECK,GFX9,GFX90A
3 // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10,RDNA
4 // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11,RDNA
5 // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1201 | FileCheck %s --check-prefixes=CHECK,GFX12,RDNA
7 // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_scalar_i32
8 func.func @gpu_gcn_raw_buffer_load_scalar_i32(%buf: memref<i32>) -> i32 {
9 // CHECK: %[[stride:.*]] = llvm.mlir.constant(0 : i16)
10 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(4 : i32)
11 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
12 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
13 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %[[stride]], %[[numRecords]], %[[flags]] : !llvm.ptr to <8>
14 // CHECK: %[[ret:.*]] = rocdl.raw.ptr.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
15 // CHECK: return %[[ret]]
16 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[] : memref<i32> -> i32
20 // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_i32
21 func.func @gpu_gcn_raw_buffer_load_i32(%buf: memref<64xi32>, %idx: i32) -> i32 {
22 // CHECK: %[[stride:.*]] = llvm.mlir.constant(0 : i16)
23 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
24 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
25 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
26 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %[[stride]], %[[numRecords]], %[[flags]] : !llvm.ptr to <8>
27 // CHECK: %[[ret:.*]] = rocdl.raw.ptr.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
28 // CHECK: return %[[ret]]
29 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[%idx] : memref<64xi32>, i32 -> i32
33 // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_i32_oob_off
34 func.func @gpu_gcn_raw_buffer_load_i32_oob_off(%buf: memref<64xi32>, %idx: i32) -> i32 {
35 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
36 // RDNA: %[[flags:.*]] = llvm.mlir.constant(553807872 : i32)
37 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %{{.*}}, %[[flags]]
38 // CHECK: %[[ret:.*]] = rocdl.raw.ptr.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
39 // CHECK: return %[[ret]]
40 %0 = amdgpu.raw_buffer_load {boundsCheck = false} %buf[%idx] : memref<64xi32>, i32 -> i32
44 // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_2xi32
45 func.func @gpu_gcn_raw_buffer_load_2xi32(%buf: memref<64xi32>, %idx: i32) -> vector<2xi32> {
46 // CHECK: %[[ret:.*]] = rocdl.raw.ptr.buffer.load %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : vector<2xi32>
47 // CHECK: return %[[ret]]
48 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[%idx] : memref<64xi32>, i32 -> vector<2xi32>
49 func.return %0 : vector<2xi32>
52 // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_i8
53 func.func @gpu_gcn_raw_buffer_load_i8(%buf: memref<64xi8>, %idx: i32) -> i8 {
54 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(64 : i32)
55 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %{{.*}}
56 // CHECK: %[[ret:.*]] = rocdl.raw.ptr.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i8
57 // CHECK: return %[[ret]]
58 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[%idx] : memref<64xi8>, i32 -> i8
62 // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_2xi8
63 func.func @gpu_gcn_raw_buffer_load_2xi8(%buf: memref<64xi8>, %idx: i32) -> vector<2xi8> {
64 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(64 : i32)
65 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %{{.*}}
66 // CHECK: %[[loaded:.*]] = rocdl.raw.ptr.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i16
67 // CHECK: %[[ret:.*]] = llvm.bitcast %[[loaded]] : i16 to vector<2xi8>
68 // CHECK: return %[[ret]]
69 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[%idx] : memref<64xi8>, i32 -> vector<2xi8>
70 func.return %0 : vector<2xi8>
73 // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_16xi8
74 func.func @gpu_gcn_raw_buffer_load_16xi8(%buf: memref<64xi8>, %idx: i32) -> vector<16xi8> {
75 // CHECK: %[[loaded:.*]] = rocdl.raw.ptr.buffer.load %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : vector<4xi32>
76 // CHECK: %[[ret:.*]] = llvm.bitcast %[[loaded]] : vector<4xi32> to vector<16xi8>
77 // CHECK: return %[[ret]]
78 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[%idx] : memref<64xi8>, i32 -> vector<16xi8>
79 func.return %0 : vector<16xi8>
82 // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_f8E5M2FNUZ
83 func.func @gpu_gcn_raw_buffer_load_f8E5M2FNUZ(%buf: memref<64xf8E5M2FNUZ>, %idx: i32) -> f8E5M2FNUZ {
84 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(64 : i32)
85 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %{{.*}}
86 // CHECK: %[[loaded:.*]] = rocdl.raw.ptr.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i8
87 // CHECK: %[[ret:.*]] = builtin.unrealized_conversion_cast %[[loaded]] : i8 to f8E5M2FNUZ
88 // CHECK: return %[[ret]]
89 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[%idx] : memref<64xf8E5M2FNUZ>, i32 -> f8E5M2FNUZ
90 func.return %0 : f8E5M2FNUZ
93 // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_4xf8E4M3FNUZ
94 func.func @gpu_gcn_raw_buffer_load_4xf8E4M3FNUZ(%buf: memref<64xf8E4M3FNUZ>, %idx: i32) -> vector<4xf8E4M3FNUZ> {
95 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(64 : i32)
96 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %{{.*}}
97 // CHECK: %[[loaded:.*]] = rocdl.raw.ptr.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
98 // CHECK: %[[cast:.*]] = llvm.bitcast %[[loaded]] : i32 to vector<4xi8>
99 // CHECK: %[[ret:.*]] = builtin.unrealized_conversion_cast %[[cast]] : vector<4xi8> to vector<4xf8E4M3FNUZ>
100 // CHECK: return %[[ret]]
101 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[%idx] : memref<64xf8E4M3FNUZ>, i32 -> vector<4xf8E4M3FNUZ>
102 func.return %0 : vector<4xf8E4M3FNUZ>
105 // Since the lowering logic is shared with loads, only bitcasts need to be rechecked
106 // CHECK-LABEL: func @gpu_gcn_raw_buffer_store_scalar_i32
107 func.func @gpu_gcn_raw_buffer_store_scalar_i32(%value: i32, %buf: memref<i32>) {
108 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
109 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
110 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %{{.*}}, %[[flags]]
111 // CHECK: rocdl.raw.ptr.buffer.store %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
112 amdgpu.raw_buffer_store {boundsCheck = true} %value -> %buf[] : i32 -> memref<i32>
116 // CHECK-LABEL: func @gpu_gcn_raw_buffer_store_i32
117 func.func @gpu_gcn_raw_buffer_store_i32(%value: i32, %buf: memref<64xi32>, %idx: i32) {
118 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
119 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
120 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
121 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
122 // CHECK: rocdl.raw.ptr.buffer.store %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
123 amdgpu.raw_buffer_store {boundsCheck = true} %value -> %buf[%idx] : i32 -> memref<64xi32>, i32
127 // CHECK-LABEL: func @gpu_gcn_raw_buffer_store_2xi8
128 func.func @gpu_gcn_raw_buffer_store_2xi8(%value: vector<2xi8>, %buf: memref<64xi8>, %idx: i32) {
129 // CHECK: %[[cast:.*]] = llvm.bitcast %{{.*}} : vector<2xi8> to i16
130 // CHECK: rocdl.raw.ptr.buffer.store %[[cast]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i16
131 amdgpu.raw_buffer_store {boundsCheck = true} %value -> %buf[%idx] : vector<2xi8> -> memref<64xi8>, i32
135 // CHECK-LABEL: func @gpu_gcn_raw_buffer_store_16xi8
136 func.func @gpu_gcn_raw_buffer_store_16xi8(%value: vector<16xi8>, %buf: memref<64xi8>, %idx: i32) {
137 // CHECK: %[[cast:.*]] = llvm.bitcast %{{.*}} : vector<16xi8> to vector<4xi32>
138 // CHECK: rocdl.raw.ptr.buffer.store %[[cast]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : vector<4xi32>
139 amdgpu.raw_buffer_store {boundsCheck = true} %value -> %buf[%idx] : vector<16xi8> -> memref<64xi8>, i32
143 // And more so for atomic add
144 // CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_fadd_f32
145 func.func @gpu_gcn_raw_buffer_atomic_fadd_f32(%value: f32, %buf: memref<64xf32>, %idx: i32) {
146 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
147 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
148 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
149 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
150 // CHECK: rocdl.raw.ptr.buffer.atomic.fadd %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : f32
151 amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %value -> %buf[%idx] : f32 -> memref<64xf32>, i32
155 // CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_fadd_v2f16
156 func.func @gpu_gcn_raw_buffer_atomic_fadd_v2f16(%value: vector<2xf16>, %buf: memref<64xf16>, %idx: i32) {
157 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(128 : i32)
158 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
159 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
160 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
161 // CHECK: rocdl.raw.ptr.buffer.atomic.fadd %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : vector<2xf16>
162 amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %value -> %buf[%idx] : vector<2xf16> -> memref<64xf16>, i32
166 // CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_fadd_v2bf16
167 func.func @gpu_gcn_raw_buffer_atomic_fadd_v2bf16(%value: vector<2xbf16>, %buf: memref<64xbf16>, %idx: i32) {
168 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(128 : i32)
169 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
170 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
171 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
172 // CHECK: rocdl.raw.ptr.buffer.atomic.fadd %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : vector<2xbf16>
173 amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %value -> %buf[%idx] : vector<2xbf16> -> memref<64xbf16>, i32
177 // CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_fmax_f32
178 func.func @gpu_gcn_raw_buffer_atomic_fmax_f32(%value: f32, %buf: memref<64xf32>, %idx: i32) {
179 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
180 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
181 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
182 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
183 // CHECK: rocdl.raw.ptr.buffer.atomic.fmax %{{.*}}, %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : f32
184 amdgpu.raw_buffer_atomic_fmax {boundsCheck = true} %value -> %buf[%idx] : f32 -> memref<64xf32>, i32
188 // CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_smax_i32
189 func.func @gpu_gcn_raw_buffer_atomic_smax_i32(%value: i32, %buf: memref<64xi32>, %idx: i32) {
190 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
191 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
192 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
193 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
194 // CHECK: rocdl.raw.ptr.buffer.atomic.smax %{{.*}} %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
195 amdgpu.raw_buffer_atomic_smax {boundsCheck = true} %value -> %buf[%idx] : i32 -> memref<64xi32>, i32
199 // CHECK-LABEL: func @gpu_gcn_raw_buffer_atomic_umin_i32
200 func.func @gpu_gcn_raw_buffer_atomic_umin_i32(%value: i32, %buf: memref<64xi32>, %idx: i32) {
201 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
202 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
203 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
204 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
205 // CHECK: rocdl.raw.ptr.buffer.atomic.umin %{{.*}} %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
206 amdgpu.raw_buffer_atomic_umin {boundsCheck = true} %value -> %buf[%idx] : i32 -> memref<64xi32>, i32
210 // CHECK-LABEL: func @amdgpu_raw_buffer_atomic_cmpswap_f32
211 // CHECK-SAME: (%[[src:.*]]: f32, %[[cmp:.*]]: f32, {{.*}})
212 func.func @amdgpu_raw_buffer_atomic_cmpswap_f32(%src : f32, %cmp : f32, %buf : memref<64xf32>, %idx: i32) -> f32 {
213 // CHECK: %[[srcCast:.*]] = llvm.bitcast %[[src]] : f32 to i32
214 // CHECK: %[[cmpCast:.*]] = llvm.bitcast %[[cmp]] : f32 to i32
215 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
216 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
217 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
218 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
219 // CHECK: %[[dst:.*]] = rocdl.raw.ptr.buffer.atomic.cmpswap %[[srcCast]], %[[cmpCast]], %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
220 // CHECK: %[[dstCast:.*]] = llvm.bitcast %[[dst]] : i32 to f32
221 // CHECK: return %[[dstCast]]
222 %dst = amdgpu.raw_buffer_atomic_cmpswap {boundsCheck = true} %src, %cmp -> %buf[%idx] : f32 -> memref<64xf32>, i32
223 func.return %dst : f32
226 // CHECK-LABEL: func @amdgpu_raw_buffer_atomic_cmpswap_i64
227 // CHECK-SAME: (%[[src:.*]]: i64, %[[cmp:.*]]: i64, {{.*}})
228 func.func @amdgpu_raw_buffer_atomic_cmpswap_i64(%src : i64, %cmp : i64, %buf : memref<64xi64>, %idx: i32) -> i64 {
229 // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(512 : i32)
230 // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
231 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
232 // CHECK: %[[resource:.*]] = rocdl.make.buffer.rsrc %{{.*}}, %{{.*}}, %[[numRecords]], %[[flags]]
233 // CHECK: %[[dst:.*]] = rocdl.raw.ptr.buffer.atomic.cmpswap %[[src]], %[[cmp]], %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i64
234 // CHECK: return %[[dst]]
235 %dst = amdgpu.raw_buffer_atomic_cmpswap {boundsCheck = true} %src, %cmp -> %buf[%idx] : i64 -> memref<64xi64>, i32
236 func.return %dst : i64
239 // CHECK-LABEL: func @amdgpu_raw_buffer_atomic_cmpswap_v2f16
240 // CHECK-SAME: (%[[src:.*]]: vector<2xf16>, %[[cmp:.*]]: vector<2xf16>, {{.*}})
241 func.func @amdgpu_raw_buffer_atomic_cmpswap_v2f16(%src : vector<2xf16>, %cmp : vector<2xf16>, %buf : memref<64xf16>, %idx: i32) -> vector<2xf16> {
242 // CHECK-DAG: %[[srcBits:.+]] = llvm.bitcast %[[src]] : vector<2xf16> to i32
243 // CHECK-DAG: %[[cmpBits:.+]] = llvm.bitcast %[[cmp]] : vector<2xf16> to i32
244 // CHECK: %[[dstBits:.+]] = rocdl.raw.ptr.buffer.atomic.cmpswap %[[srcBits]], %[[cmpBits]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i32
245 // CHECK: %[[dst:.+]] = llvm.bitcast %[[dstBits]] : i32 to vector<2xf16>
246 // CHECK: return %[[dst]]
247 %dst = amdgpu.raw_buffer_atomic_cmpswap {boundsCheck = true} %src, %cmp -> %buf[%idx] : vector<2xf16> -> memref<64xf16>, i32
248 func.return %dst : vector<2xf16>
251 // CHECK-LABEL: func @lds_barrier
252 func.func @lds_barrier() {
253 // GFX908: llvm.inline_asm has_side_effects asm_dialect = att
254 // GFX908-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
255 // GFX90A: rocdl.waitcnt -7937
256 // GFX90A-NEXT: rocdl.s.barrier
257 // GFX10: rocdl.waitcnt -16129
258 // GFX10-NEXT: rocdl.s.barrier
259 // GFX11: llvm.inline_asm has_side_effects asm_dialect = att
260 // GFX11-SAME: ";;;WARNING: BREAKS DEBUG WATCHES\0As_waitcnt lgkmcnt(0)\0As_barrier"
261 // GFX12: rocdl.s.wait.dscnt 0
262 // GFX12-NEXT: rocdl.s.barrier.signal -1
263 // GFX12-NEXT: rocdl.s.barrier.wait -1
268 // CHECK-LABEL: func @sched_barrier
269 func.func @sched_barrier() {
270 // CHECK: rocdl.sched.barrier 0
271 amdgpu.sched_barrier allow = <none>
272 // CHECK: rocdl.sched.barrier 1
273 amdgpu.sched_barrier allow = <non_mem_non_sideffect>
274 // CHECK: rocdl.sched.barrier 2
275 amdgpu.sched_barrier allow = <valu>
276 // CHECK: rocdl.sched.barrier 4
277 amdgpu.sched_barrier allow = <salu>
278 // CHECK: rocdl.sched.barrier 8
279 amdgpu.sched_barrier allow = <mfma_wmma>
280 // CHECK: rocdl.sched.barrier 16
281 amdgpu.sched_barrier allow = <all_vmem>
282 // CHECK: rocdl.sched.barrier 32
283 amdgpu.sched_barrier allow = <vmem_read>
284 // CHECK: rocdl.sched.barrier 64
285 amdgpu.sched_barrier allow = <vmem_write>
286 // CHECK: rocdl.sched.barrier 128
287 amdgpu.sched_barrier allow = <all_ds>
288 // CHECK: rocdl.sched.barrier 256
289 amdgpu.sched_barrier allow = <ds_read>
290 // CHECK: rocdl.sched.barrier 512
291 amdgpu.sched_barrier allow = <ds_write>
292 // CHECK: rocdl.sched.barrier 1024
293 amdgpu.sched_barrier allow = <transcendental>
294 // CHECK: rocdl.sched.barrier 18
295 amdgpu.sched_barrier allow = <valu|all_vmem>