[libc++] Refactor the sequence container benchmarks (#119763)
[llvm-project.git] / clang / test / CodeGenOpenCL / builtins-amdgcn-gfx12.cl
blob234ad4fd8cde617d268b5f692872e36ff7ca221b
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s
4 // REQUIRES: amdgpu-registered-target
6 typedef unsigned int uint;
8 // CHECK-LABEL: @test_s_sleep_var(
9 // CHECK-NEXT: entry:
10 // CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
11 // CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
12 // CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
13 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
14 // CHECK-NEXT: call void @llvm.amdgcn.s.sleep.var(i32 [[TMP0]])
15 // CHECK-NEXT: call void @llvm.amdgcn.s.sleep.var(i32 15)
16 // CHECK-NEXT: ret void
18 void test_s_sleep_var(int d)
20 __builtin_amdgcn_s_sleep_var(d);
21 __builtin_amdgcn_s_sleep_var(15);
24 // CHECK-LABEL: @test_permlane16_var(
25 // CHECK-NEXT: entry:
26 // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
27 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
28 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
29 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
30 // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
31 // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
32 // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
33 // CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
34 // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
35 // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
36 // CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
37 // CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4
38 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
39 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
40 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4
41 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlane16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false)
42 // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
43 // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
44 // CHECK-NEXT: ret void
46 void test_permlane16_var(global uint* out, uint a, uint b, uint c) {
47 *out = __builtin_amdgcn_permlane16_var(a, b, c, 0, 0);
50 // CHECK-LABEL: @test_permlanex16_var(
51 // CHECK-NEXT: entry:
52 // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
53 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
54 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
55 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
56 // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
57 // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
58 // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
59 // CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
60 // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
61 // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
62 // CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
63 // CHECK-NEXT: store i32 [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 4
64 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
65 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
66 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[C_ADDR_ASCAST]], align 4
67 // CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlanex16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false)
68 // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
69 // CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
70 // CHECK-NEXT: ret void
72 void test_permlanex16_var(global uint* out, uint a, uint b, uint c) {
73 *out = __builtin_amdgcn_permlanex16_var(a, b, c, 0, 0);
76 // CHECK-LABEL: @test_s_barrier_signal(
77 // CHECK-NEXT: entry:
78 // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1)
79 // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1)
80 // CHECK-NEXT: ret void
82 void test_s_barrier_signal()
84 __builtin_amdgcn_s_barrier_signal(-1);
85 __builtin_amdgcn_s_barrier_wait(-1);
88 // CHECK-LABEL: @test_s_barrier_signal_var(
89 // CHECK-NEXT: entry:
90 // CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
91 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
92 // CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
93 // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
94 // CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
95 // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
96 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
97 // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
98 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
99 // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) [[TMP1]], i32 [[TMP2]])
100 // CHECK-NEXT: ret void
102 void test_s_barrier_signal_var(void *bar, int a)
104 __builtin_amdgcn_s_barrier_signal_var(bar, a);
107 // CHECK-LABEL: @test_s_barrier_signal_isfirst(
108 // CHECK-NEXT: entry:
109 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
110 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
111 // CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
112 // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
113 // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
114 // CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
115 // CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
116 // CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8
117 // CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8
118 // CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 1)
119 // CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
120 // CHECK: if.then:
121 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
122 // CHECK-NEXT: store ptr [[TMP1]], ptr [[A_ADDR_ASCAST]], align 8
123 // CHECK-NEXT: br label [[IF_END:%.*]]
124 // CHECK: if.else:
125 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
126 // CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8
127 // CHECK-NEXT: br label [[IF_END]]
128 // CHECK: if.end:
129 // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 1)
130 // CHECK-NEXT: ret void
132 void test_s_barrier_signal_isfirst(int* a, int* b, int *c)
134 if(__builtin_amdgcn_s_barrier_signal_isfirst(1))
135 a = b;
136 else
137 a = c;
139 __builtin_amdgcn_s_barrier_wait(1);
142 // CHECK-LABEL: @test_s_barrier_init(
143 // CHECK-NEXT: entry:
144 // CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
145 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
146 // CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
147 // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
148 // CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
149 // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
150 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
151 // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
152 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
153 // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) [[TMP1]], i32 [[TMP2]])
154 // CHECK-NEXT: ret void
156 void test_s_barrier_init(void *bar, int a)
158 __builtin_amdgcn_s_barrier_init(bar, a);
161 // CHECK-LABEL: @test_s_barrier_join(
162 // CHECK-NEXT: entry:
163 // CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
164 // CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
165 // CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
166 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
167 // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
168 // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) [[TMP1]])
169 // CHECK-NEXT: ret void
171 void test_s_barrier_join(void *bar)
173 __builtin_amdgcn_s_barrier_join(bar);
176 // CHECK-LABEL: @test_s_barrier_leave(
177 // CHECK-NEXT: entry:
178 // CHECK-NEXT: call void @llvm.amdgcn.s.barrier.leave(i16 1)
179 // CHECK-NEXT: ret void
181 void test_s_barrier_leave()
183 __builtin_amdgcn_s_barrier_leave(1);
186 // CHECK-LABEL: @test_s_get_barrier_state(
187 // CHECK-NEXT: entry:
188 // CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
189 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
190 // CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
191 // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
192 // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
193 // CHECK-NEXT: [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr
194 // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
195 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
196 // CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.s.get.barrier.state(i32 [[TMP0]])
197 // CHECK-NEXT: store i32 [[TMP1]], ptr [[STATE_ASCAST]], align 4
198 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4
199 // CHECK-NEXT: ret i32 [[TMP2]]
201 unsigned test_s_get_barrier_state(int a)
203 unsigned State = __builtin_amdgcn_s_get_barrier_state(a);
204 return State;
207 // CHECK-LABEL: @test_s_get_named_barrier_state(
208 // CHECK-NEXT: entry:
209 // CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
210 // CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
211 // CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
212 // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
213 // CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
214 // CHECK-NEXT: [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr
215 // CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
216 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
217 // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
218 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) [[TMP1]])
219 // CHECK-NEXT: store i32 [[TMP2]], ptr [[STATE_ASCAST]], align 4
220 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4
221 // CHECK-NEXT: ret i32 [[TMP3]]
223 unsigned test_s_get_named_barrier_state(void *bar)
225 unsigned State = __builtin_amdgcn_s_get_named_barrier_state(bar);
226 return State;
229 // CHECK-LABEL: @test_s_ttracedata(
230 // CHECK-NEXT: entry:
231 // CHECK-NEXT: call void @llvm.amdgcn.s.ttracedata(i32 1)
232 // CHECK-NEXT: ret void
234 void test_s_ttracedata()
236 __builtin_amdgcn_s_ttracedata(1);
239 // CHECK-LABEL: @test_s_ttracedata_imm(
240 // CHECK-NEXT: entry:
241 // CHECK-NEXT: call void @llvm.amdgcn.s.ttracedata.imm(i16 1)
242 // CHECK-NEXT: ret void
244 void test_s_ttracedata_imm()
246 __builtin_amdgcn_s_ttracedata_imm(1);
249 // CHECK-LABEL: @test_s_prefetch_data(
250 // CHECK-NEXT: entry:
251 // CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
252 // CHECK-NEXT: [[GP_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
253 // CHECK-NEXT: [[CP_ADDR:%.*]] = alloca ptr addrspace(4), align 8, addrspace(5)
254 // CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
255 // CHECK-NEXT: [[FP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FP_ADDR]] to ptr
256 // CHECK-NEXT: [[GP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GP_ADDR]] to ptr
257 // CHECK-NEXT: [[CP_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CP_ADDR]] to ptr
258 // CHECK-NEXT: [[LEN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LEN_ADDR]] to ptr
259 // CHECK-NEXT: store ptr [[FP:%.*]], ptr [[FP_ADDR_ASCAST]], align 8
260 // CHECK-NEXT: store ptr addrspace(1) [[GP:%.*]], ptr [[GP_ADDR_ASCAST]], align 8
261 // CHECK-NEXT: store ptr addrspace(4) [[CP:%.*]], ptr [[CP_ADDR_ASCAST]], align 8
262 // CHECK-NEXT: store i32 [[LEN:%.*]], ptr [[LEN_ADDR_ASCAST]], align 4
263 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FP_ADDR_ASCAST]], align 8
264 // CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p0(ptr [[TMP0]], i32 0)
265 // CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[GP_ADDR_ASCAST]], align 8
266 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[LEN_ADDR_ASCAST]], align 4
267 // CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p1(ptr addrspace(1) [[TMP1]], i32 [[TMP2]])
268 // CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr [[CP_ADDR_ASCAST]], align 8
269 // CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.data.p4(ptr addrspace(4) [[TMP3]], i32 31)
270 // CHECK-NEXT: ret void
272 void test_s_prefetch_data(int *fp, global float *gp, constant char *cp, unsigned int len)
274 __builtin_amdgcn_s_prefetch_data(fp, 0);
275 __builtin_amdgcn_s_prefetch_data(gp, len);
276 __builtin_amdgcn_s_prefetch_data(cp, 31);
279 // CHECK-LABEL: @test_s_buffer_prefetch_data(
280 // CHECK-NEXT: entry:
281 // CHECK-NEXT: [[RSRC_ADDR:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5)
282 // CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
283 // CHECK-NEXT: [[RSRC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RSRC_ADDR]] to ptr
284 // CHECK-NEXT: [[LEN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LEN_ADDR]] to ptr
285 // CHECK-NEXT: store ptr addrspace(8) [[RSRC:%.*]], ptr [[RSRC_ADDR_ASCAST]], align 16
286 // CHECK-NEXT: store i32 [[LEN:%.*]], ptr [[LEN_ADDR_ASCAST]], align 4
287 // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr [[RSRC_ADDR_ASCAST]], align 16
288 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[LEN_ADDR_ASCAST]], align 4
289 // CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP0]], i32 128, i32 [[TMP1]])
290 // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(8), ptr [[RSRC_ADDR_ASCAST]], align 16
291 // CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP2]], i32 0, i32 31)
292 // CHECK-NEXT: ret void
294 void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int len)
296 __builtin_amdgcn_s_buffer_prefetch_data(rsrc, 128, len);
297 __builtin_amdgcn_s_buffer_prefetch_data(rsrc, 0, 31);
300 // CHECK-LABEL: @test_ds_bpermute_fi_b32(
301 // CHECK-NEXT: entry:
302 // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
303 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
304 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
305 // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
306 // CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
307 // CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
308 // CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
309 // CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
310 // CHECK-NEXT: store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
311 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
312 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
313 // CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.ds.bpermute.fi.b32(i32 [[TMP0]], i32 [[TMP1]])
314 // CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
315 // CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
316 // CHECK-NEXT: ret void
318 void test_ds_bpermute_fi_b32(global int* out, int a, int b)
320 *out = __builtin_amdgcn_ds_bpermute_fi_b32(a, b);