1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
3 ; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll
5 @scalar = internal addrspace(3) global float 0.0, align 4
6 @array = internal addrspace(3) global [10 x float] zeroinitializer, align 4
8 define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 {
9 ; CHECK-LABEL: define amdgpu_kernel void @load_store_lds_f32(
10 ; CHECK-SAME: i32 [[I:%.*]], float [[V:%.*]]) #[[ATTR0:[0-9]+]] {
11 ; CHECK-NEXT: [[BB:.*:]]
12 ; CHECK-NEXT: [[TMP:%.*]] = load float, ptr addrspace(3) @scalar, align 4
13 ; CHECK-NEXT: call void @use(float [[TMP]])
14 ; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4
15 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
16 ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(3) @scalar, align 4
17 ; CHECK-NEXT: call void @use(float [[TMP2]])
18 ; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4
19 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
20 ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4
21 ; CHECK-NEXT: call void @use(float [[TMP3]])
22 ; CHECK-NEXT: store float [[V]], ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4
23 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
24 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 5
25 ; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(3) [[TMP4]], align 4
26 ; CHECK-NEXT: call void @use(float [[TMP5]])
27 ; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP4]], align 4
28 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
29 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 [[I]]
30 ; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr addrspace(3) [[TMP7]], align 4
31 ; CHECK-NEXT: call void @use(float [[TMP8]])
32 ; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP7]], align 4
33 ; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
34 ; CHECK-NEXT: ret void
37 %tmp = load float, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4
38 call void @use(float %tmp)
39 store float %v, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4
40 call void @llvm.amdgcn.s.barrier()
41 %tmp1 = addrspacecast ptr addrspace(3) @scalar to ptr
42 %tmp2 = load float, ptr %tmp1, align 4
43 call void @use(float %tmp2)
44 store float %v, ptr %tmp1, align 4
45 call void @llvm.amdgcn.s.barrier()
46 %tmp3 = load float, ptr getelementptr inbounds ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5), align 4
47 call void @use(float %tmp3)
48 store float %v, ptr getelementptr inbounds ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5), align 4
49 call void @llvm.amdgcn.s.barrier()
50 %tmp4 = getelementptr inbounds [10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i32 0, i32 5
51 %tmp5 = load float, ptr %tmp4, align 4
52 call void @use(float %tmp5)
53 store float %v, ptr %tmp4, align 4
54 call void @llvm.amdgcn.s.barrier()
55 %tmp6 = addrspacecast ptr addrspace(3) @array to ptr
56 %tmp7 = getelementptr inbounds [10 x float], ptr %tmp6, i32 0, i32 %i
57 %tmp8 = load float, ptr %tmp7, align 4
58 call void @use(float %tmp8)
59 store float %v, ptr %tmp7, align 4
60 call void @llvm.amdgcn.s.barrier()
64 define i32 @constexpr_load_int_from_float_lds() #0 {
65 ; CHECK-LABEL: define i32 @constexpr_load_int_from_float_lds(
66 ; CHECK-SAME: ) #[[ATTR0]] {
67 ; CHECK-NEXT: [[BB:.*:]]
68 ; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(3) @scalar, align 4
69 ; CHECK-NEXT: ret i32 [[TMP]]
72 %tmp = load i32, ptr addrspacecast (ptr addrspace(3) @scalar to ptr), align 4
76 define i32 @load_int_from_global_float(ptr addrspace(1) %input, i32 %i, i32 %j) #0 {
77 ; CHECK-LABEL: define i32 @load_int_from_global_float(
78 ; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) #[[ATTR0]] {
79 ; CHECK-NEXT: [[BB:.*:]]
80 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i32 [[I]]
81 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr addrspace(1) [[TMP1]], i32 [[J]]
82 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP2]], align 4
83 ; CHECK-NEXT: ret i32 [[TMP4]]
86 %tmp = addrspacecast ptr addrspace(1) %input to ptr
87 %tmp1 = getelementptr float, ptr %tmp, i32 %i
88 %tmp2 = getelementptr float, ptr %tmp1, i32 %j
89 %tmp4 = load i32, ptr %tmp2
93 define amdgpu_kernel void @nested_const_expr() #0 {
94 ; CHECK-LABEL: define amdgpu_kernel void @nested_const_expr(
95 ; CHECK-SAME: ) #[[ATTR0]] {
96 ; CHECK-NEXT: store i32 1, ptr addrspace(3) getelementptr ([10 x float], ptr addrspace(3) @array, i64 0, i64 1), align 4
97 ; CHECK-NEXT: ret void
99 store i32 1, ptr bitcast (ptr getelementptr ([10 x float], ptr addrspacecast (ptr addrspace(3) @array to ptr), i64 0, i64 1) to ptr), align 4
104 define amdgpu_kernel void @rauw(ptr addrspace(1) %input) #0 {
105 ; CHECK-LABEL: define amdgpu_kernel void @rauw(
106 ; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]]) #[[ATTR0]] {
107 ; CHECK-NEXT: [[BB:.*:]]
108 ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i64 10
109 ; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(1) [[ADDR]], align 4
110 ; CHECK-NEXT: store float [[V]], ptr addrspace(1) [[ADDR]], align 4
111 ; CHECK-NEXT: ret void
114 %generic_input = addrspacecast ptr addrspace(1) %input to ptr
115 %addr = getelementptr float, ptr %generic_input, i64 10
116 %v = load float, ptr %addr
117 store float %v, ptr %addr
121 ; FIXME: Should be able to eliminate the cast inside the loop
122 define amdgpu_kernel void @loop() #0 {
123 ; CHECK-LABEL: define amdgpu_kernel void @loop(
124 ; CHECK-SAME: ) #[[ATTR0]] {
125 ; CHECK-NEXT: [[ENTRY:.*]]:
126 ; CHECK-NEXT: [[END:%.*]] = getelementptr float, ptr addrspace(3) @array, i64 10
127 ; CHECK-NEXT: br label %[[LOOP:.*]]
129 ; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ]
130 ; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4
131 ; CHECK-NEXT: call void @use(float [[V]])
132 ; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1
133 ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr addrspace(3) [[I2]], [[END]]
134 ; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
136 ; CHECK-NEXT: ret void
139 %p = addrspacecast ptr addrspace(3) @array to ptr
140 %end = getelementptr float, ptr %p, i64 10
143 loop: ; preds = %loop, %entry
144 %i = phi ptr [ %p, %entry ], [ %i2, %loop ]
145 %v = load float, ptr %i
146 call void @use(float %v)
147 %i2 = getelementptr float, ptr %i, i64 1
148 %exit_cond = icmp eq ptr %i2, %end
149 br i1 %exit_cond, label %exit, label %loop
151 exit: ; preds = %loop
155 @generic_end = external addrspace(1) global ptr
157 define amdgpu_kernel void @loop_with_generic_bound() #0 {
158 ; CHECK-LABEL: define amdgpu_kernel void @loop_with_generic_bound(
159 ; CHECK-SAME: ) #[[ATTR0]] {
160 ; CHECK-NEXT: [[ENTRY:.*]]:
161 ; CHECK-NEXT: [[END:%.*]] = load ptr, ptr addrspace(1) @generic_end, align 8
162 ; CHECK-NEXT: br label %[[LOOP:.*]]
164 ; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ]
165 ; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4
166 ; CHECK-NEXT: call void @use(float [[V]])
167 ; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1
168 ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr
169 ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr [[TMP0]], [[END]]
170 ; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
172 ; CHECK-NEXT: ret void
175 %p = addrspacecast ptr addrspace(3) @array to ptr
176 %end = load ptr, ptr addrspace(1) @generic_end
179 loop: ; preds = %loop, %entry
180 %i = phi ptr [ %p, %entry ], [ %i2, %loop ]
181 %v = load float, ptr %i
182 call void @use(float %v)
183 %i2 = getelementptr float, ptr %i, i64 1
184 %exit_cond = icmp eq ptr %i2, %end
185 br i1 %exit_cond, label %exit, label %loop
187 exit: ; preds = %loop
191 define void @select_bug() #0 {
192 ; CHECK-LABEL: define void @select_bug(
193 ; CHECK-SAME: ) #[[ATTR0]] {
194 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr inttoptr (i64 4873 to ptr), null
195 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 73, i64 93
196 ; CHECK-NEXT: [[ADD_PTR157:%.*]] = getelementptr inbounds i64, ptr undef, i64 [[SEL]]
197 ; CHECK-NEXT: [[CMP169:%.*]] = icmp uge ptr undef, [[ADD_PTR157]]
198 ; CHECK-NEXT: unreachable
200 %cmp = icmp ne ptr inttoptr (i64 4873 to ptr), null
201 %sel = select i1 %cmp, i64 73, i64 93
202 %add.ptr157 = getelementptr inbounds i64, ptr undef, i64 %sel
203 %cmp169 = icmp uge ptr undef, %add.ptr157
207 declare void @llvm.amdgcn.s.barrier() #1
208 declare void @use(float) #0
210 attributes #0 = { nounwind }
211 attributes #1 = { convergent nounwind }