1 // REQUIRES
: amdgpu-registered-target
2 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s
3 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
4 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
5 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
6 // RUN
: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
8 #pragma OPENCL EXTENSION cl_khr_fp16
: enable
10 typedef unsigned long ulong
;
11 typedef unsigned int uint
;
13 // CHECK-LABEL
: @test_div_fixup_f16
14 // CHECK
: {{.
*}}call
{{.
*}} half
@llvm.amdgcn.div.fixup.f16
15 void test_div_fixup_f16
(global half
* out
, half a
, half b
, half c
)
17 *out
= __builtin_amdgcn_div_fixuph
(a, b
, c
);
20 // CHECK-LABEL
: @test_rcp_f16
21 // CHECK
: {{.
*}}call
{{.
*}} half
@llvm.amdgcn.rcp.f16
22 void test_rcp_f16
(global half
* out
, half a
)
24 *out
= __builtin_amdgcn_rcph
(a);
27 // CHECK-LABEL
: @test_sqrt_f16
28 // CHECK
: {{.
*}}call
{{.
*}} half
@llvm.
{{((amdgcn.
){0,1})}}sqrt.f16
29 void test_sqrt_f16
(global half
* out
, half a
)
31 *out
= __builtin_amdgcn_sqrth
(a);
34 // CHECK-LABEL
: @test_rsq_f16
35 // CHECK
: {{.
*}}call
{{.
*}} half
@llvm.amdgcn.rsq.f16
36 void test_rsq_f16
(global half
* out
, half a
)
38 *out
= __builtin_amdgcn_rsqh
(a);
41 // CHECK-LABEL
: @test_sin_f16
42 // CHECK
: {{.
*}}call
{{.
*}} half
@llvm.amdgcn.sin.f16
43 void test_sin_f16
(global half
* out
, half a
)
45 *out
= __builtin_amdgcn_sinh
(a);
48 // CHECK-LABEL
: @test_cos_f16
49 // CHECK
: {{.
*}}call
{{.
*}} half
@llvm.amdgcn.cos.f16
50 void test_cos_f16
(global half
* out
, half a
)
52 *out
= __builtin_amdgcn_cosh
(a);
55 // CHECK-LABEL
: @test_ldexp_f16
56 // CHECK
: [[TRUNC
:%
[0-
9a-z
]+]] = trunc i32
57 // CHECK
: {{.
*}}call
{{.
*}} half
@llvm.ldexp.f16.i16
(half %a
, i16
[[TRUNC]])
58 void test_ldexp_f16(global half* out, half a, int b)
60 *out = __builtin_amdgcn_ldexph(a, b);
63 // CHECK-LABEL: @test_frexp_mant_f16
64 // CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.frexp.mant.f16
65 void test_frexp_mant_f16(global half* out, half a)
67 *out = __builtin_amdgcn_frexp_manth(a);
70 // CHECK-LABEL: @test_frexp_exp_f16
71 // CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.frexp.exp.i16.f16
72 void test_frexp_exp_f16(global short* out, half a)
74 *out = __builtin_amdgcn_frexp_exph(a);
77 // CHECK-LABEL: @test_fract_f16
78 // CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.fract.f16
79 void test_fract_f16(global half* out, half a)
81 *out = __builtin_amdgcn_fracth(a);
84 // CHECK-LABEL: @test_class_f16
85 // CHECK: {{.*}}call{{.*}} i1 @llvm.amdgcn.class.f16
86 void test_class_f16(global half* out, half a, int b)
88 *out = __builtin_amdgcn_classh(a, b);
91 // CHECK-LABEL: @test_s_memrealtime
92 // CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memrealtime()
93 void test_s_memrealtime(global ulong* out)
95 *out = __builtin_amdgcn_s_memrealtime();
98 // CHECK-LABEL: @test_s_dcache_wb()
99 // CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.dcache.wb()
100 void test_s_dcache_wb()
102 __builtin_amdgcn_s_dcache_wb();
105 // CHECK-LABEL: @test_mov_dpp_int
106 // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
107 void test_mov_dpp_int(global int* out, int src)
109 *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
112 // CHECK-LABEL: @test_mov_dpp_long
113 // CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %x, i32 257, i32 15, i32 15, i1 false)
114 // CHECK-NEXT: store i64 %0,
115 void test_mov_dpp_long(long x, global long *p) {
116 *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
119 // CHECK-LABEL: @test_mov_dpp_float
120 // CHECK: %0 = bitcast float %x to i32
121 // CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
122 // CHECK-NEXT: store i32 %1,
123 void test_mov_dpp_float(float x, global float *p) {
124 *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
127 // CHECK-LABEL: @test_mov_dpp_double
128 // CHECK: %0 = bitcast double %x to i64
129 // CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 poison, i64 %0, i32 257, i32 15, i32 15, i1 false)
130 // CHECK-NEXT: store i64 %1,
131 void test_mov_dpp_double(double x, global double *p) {
132 *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
135 // CHECK-LABEL: @test_mov_dpp_short
136 // CHECK: %0 = zext i16 %x to i32
137 // CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
138 // CHECK-NEXT: %2 = trunc i32 %1 to i16
139 // CHECK-NEXT: store i16 %2,
140 void test_mov_dpp_short(short x, global short *p) {
141 *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
144 // CHECK-LABEL: @test_mov_dpp_char
145 // CHECK: %0 = zext i8 %x to i32
146 // CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %0, i32 257, i32 15, i32 15, i1 false)
147 // CHECK-NEXT: %2 = trunc i32 %1 to i8
148 // CHECK-NEXT: store i8 %2,
149 void test_mov_dpp_char(char x, global char *p) {
150 *p = __builtin_amdgcn_mov_dpp(x, 0x101, 0xf, 0xf, 0);
153 // CHECK-LABEL: @test_mov_dpp_half
154 // CHECK: %0 = load i16,
155 // CHECK: %1 = zext i16 %0 to i32
156 // CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %1, i32 257, i32 15, i32 15, i1 false)
157 // CHECK-NEXT: %3 = trunc i32 %2 to i16
158 // CHECK-NEXT: store i16 %3,
159 void test_mov_dpp_half(half *x, global half *p) {
160 *p = __builtin_amdgcn_mov_dpp(*x, 0x101, 0xf, 0xf, 0);
163 // CHECK-LABEL: @test_update_dpp_int
164 // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
165 void test_update_dpp_int(global int* out, int arg1, int arg2)
167 *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
170 // CHECK-LABEL: @test_update_dpp_long
171 // CHECK: %0 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %x, i32 257, i32 15, i32 15, i1 false)
172 // CHECk-NEXT: store i64 %0,
173 void test_update_dpp_long(long x, global long *p) {
174 *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
177 // CHECK-LABEL: @test_update_dpp_float
178 // CHECK: %0 = bitcast float %x to i32
179 // CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
180 // CHECK-NEXT: store i32 %1,
181 void test_update_dpp_float(float x, global float *p) {
182 *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
185 // CHECK-LABEL: @test_update_dpp_double
186 // CHECK: %0 = bitcast double %x to i64
187 // CHECK-NEXT: %1 = tail call{{.*}} i64 @llvm.amdgcn.update.dpp.i64(i64 %0, i64 %0, i32 257, i32 15, i32 15, i1 false)
188 // CHECK-NEXT: store i64 %1,
189 void test_update_dpp_double(double x, global double *p) {
190 *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
193 // CHECK-LABEL: @test_update_dpp_short
194 // CHECK: %0 = zext i16 %x to i32
195 // CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
196 // CHECK-NEXT: %2 = trunc i32 %1 to i16
197 // CHECK-NEXT: store i16 %2,
198 void test_update_dpp_short(short x, global short *p) {
199 *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
202 // CHECK-LABEL: @test_update_dpp_char
203 // CHECK: %0 = zext i8 %x to i32
204 // CHECK-NEXT: %1 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %0, i32 %0, i32 257, i32 15, i32 15, i1 false)
205 // CHECK-NEXT: %2 = trunc i32 %1 to i8
206 // CHECK-NEXT: store i8 %2,
207 void test_update_dpp_char(char x, global char *p) {
208 *p = __builtin_amdgcn_update_dpp(x, x, 0x101, 0xf, 0xf, 0);
211 // CHECK-LABEL: @test_update_dpp_half
212 // CHECK: %0 = load i16,
213 // CHECK: %1 = zext i16 %0 to i32
214 // CHECK-NEXT: %2 = tail call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %1, i32 257, i32 15, i32 15, i1 false)
215 // CHECK-NEXT: %3 = trunc i32 %2 to i16
216 // CHECK-NEXT: store i16 %3,
217 void test_update_dpp_half(half *x, global half *p) {
218 *p = __builtin_amdgcn_update_dpp(*x, *x, 0x101, 0xf, 0xf, 0);
221 // CHECK-LABEL: @test_update_dpp_int_uint
222 // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
223 void test_update_dpp_int_uint(global int* out, int arg1, unsigned int arg2)
225 *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
228 // CHECK-LABEL: @test_update_dpp_lit_int
229 // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 5, i32 %arg1, i32 0, i32 0, i32 0, i1 false)
230 void test_update_dpp_lit_int(global int* out, int arg1)
232 *out = __builtin_amdgcn_update_dpp(5, arg1, 0, 0, 0, false);
235 __constant int gi = 5;
237 // CHECK-LABEL: @test_update_dpp_const_int
238 // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 5, i32 %arg1, i32 0, i32 0, i32 0, i1 false)
239 void test_update_dpp_const_int(global int* out, int arg1)
241 *out = __builtin_amdgcn_update_dpp(gi, arg1, 0, 0, 0, false);
244 // CHECK-LABEL: @test_ds_fadd
245 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
246 // CHECK: atomicrmw volatile fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
248 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acquire, align 4{{$}}
249 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acquire, align 4{{$}}
250 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src release, align 4{{$}}
251 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
252 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
253 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
255 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
256 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
257 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
258 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
259 // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
260 #if !defined(__SPIRV__)
261 void test_ds_faddf(local float *out, float src) {
263 void test_ds_faddf(__attribute__((address_space(3))) float *out, float src) {
266 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM, false);
267 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM, true);
270 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
271 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
272 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
273 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
274 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
275 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
277 // Test all syncscopes.
278 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
279 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
280 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
281 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
282 *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
285 // CHECK-LABEL: @test_ds_fmin
286 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
287 // CHECK: atomicrmw volatile fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
289 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}}
290 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}}
291 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src release, align 4{{$}}
292 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
293 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
294 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
296 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
297 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
298 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
299 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
300 // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
302 #if !defined(__SPIRV__)
303 void test_ds_fminf(local float *out, float src) {
305 void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {
307 *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);
308 *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, true);
311 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
312 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
313 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
314 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
315 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
316 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
318 // Test all syncscopes.
319 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
320 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
321 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
322 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
323 *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
326 // CHECK-LABEL: @test_ds_fmax
327 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
328 // CHECK: atomicrmw volatile fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
330 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}}
331 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}}
332 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src release, align 4{{$}}
333 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
334 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
335 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
337 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
338 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
339 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
340 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
341 // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
343 #if !defined(__SPIRV__)
344 void test_ds_fmaxf(local float *out, float src) {
346 void test_ds_fmaxf(__attribute__((address_space(3))) float *out, float src) {
348 *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);
349 *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, true);
352 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
353 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
354 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
355 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
356 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
357 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
359 // Test all syncscopes.
360 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
361 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
362 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
363 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
364 *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
367 // CHECK-LABEL: @test_s_memtime
368 // CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
369 void test_s_memtime(global ulong* out)
371 *out = __builtin_amdgcn_s_memtime();
374 // CHECK-LABEL: @test_perm
375 // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
376 void test_perm(global uint* out, uint a, uint b, uint s)
378 *out = __builtin_amdgcn_perm(a, b, s);
381 // CHECK-LABEL: @test_groupstaticsize
382 // CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.groupstaticsize()
383 void test_groupstaticsize(global uint* out)
385 *out = __builtin_amdgcn_groupstaticsize();