1 // REQUIRES
: amdgpu-registered-target
2 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s
3 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck %s
4 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck %s
5 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s
6 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s
7 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s
8 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s
9 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck %s
10 // RUN
: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
12 typedef unsigned int uint
;
13 typedef unsigned long ulong
;
14 typedef uint uint2 __attribute__
((ext_vector_type(2)));
15 typedef uint uint4 __attribute__
((ext_vector_type(4)));
17 // CHECK-LABEL
: @test_s_sendmsg_rtn
(
18 // CHECK
: {{.
*}}call
{{.
*}} i32
@llvm.amdgcn.s.sendmsg.rtn.i32
(i32 0)
19 void test_s_sendmsg_rtn
(global uint
* out
) {
20 *out
= __builtin_amdgcn_s_sendmsg_rtn
(0);
23 // CHECK-LABEL
: @test_s_sendmsg_rtnl
(
24 // CHECK
: {{.
*}}call
{{.
*}} i64
@llvm.amdgcn.s.sendmsg.rtn.i64
(i32 0)
25 void test_s_sendmsg_rtnl
(global ulong
* out
) {
26 *out
= __builtin_amdgcn_s_sendmsg_rtnl
(0);
29 // CHECK-LABEL
: @test_ds_bvh_stack_rtn
(
30 // CHECK
: %
0 = tail call
{{.
*}} { i32
, i32
} @llvm.amdgcn.ds.bvh.stack.rtn
(i32 %addr
, i32 %data
, <4 x i32
> %data1
, i32
128)
31 // CHECK
: %
1 = extractvalue
{ i32
, i32
} %
0, 0
32 // CHECK
: %
2 = extractvalue
{ i32
, i32
} %
0, 1
33 // CHECK
: %
3 = insertelement
<2 x i32
> poison
, i32 %
1, i64
0
34 // CHECK
: %
4 = insertelement
<2 x i32
> %
3, i32 %
2, i64
1
35 void test_ds_bvh_stack_rtn
(global uint2
* out
, uint addr
, uint data
, uint4 data1
)
37 *out
= __builtin_amdgcn_ds_bvh_stack_rtn
(addr, data
, data1
, 128);
40 // CHECK-LABEL
: @test_permlane64
(
41 // CHECK
: {{.
*}}call
{{.
*}} i32
@llvm.amdgcn.permlane64.i32
(i32 %a
)
42 void test_permlane64
(global uint
* out
, uint a
) {
43 *out
= __builtin_amdgcn_permlane64
(a);
46 // CHECK-LABEL
: @test_s_wait_event_export_ready
47 // CHECK
: {{.
*}}call
{{.
*}} void
@llvm.amdgcn.s.wait.event.export.ready
48 void test_s_wait_event_export_ready
() {
49 __builtin_amdgcn_s_wait_event_export_ready
();
52 // CHECK-LABEL
: @test_global_add_f32
53 // CHECK
: = atomicrmw fadd ptr addrspace
(1) %addr
, float %x syncscope
("agent") monotonic
, align
4, !amdgpu.no.fine.grained.memory
!{{[0-
9]+}}, !amdgpu.ignore.denormal.mode
!{{[0-
9]+$
}}
54 #if
!defined
(__SPIRV__)
55 void test_global_add_f32
(float *rtn
, global float
*addr
, float x
) {
57 void test_global_add_f32
(float *rtn
, __attribute__
((address_space(1))) float
*addr
, float x
) {
59 *rtn
= __builtin_amdgcn_global_atomic_fadd_f32
(addr, x
);