1 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
3 ; Instcombine pulls the addrspacecast out of the select, make sure
4 ; this doesn't do something insane on non-canonical IR.
6 ; CHECK-LABEL: @return_select_group_flat(
7 ; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
8 ; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
9 ; CHECK-NEXT: %select = select i1 %c, i32* %cast0, i32* %cast1
10 ; CHECK-NEXT: ret i32* %select
11 define i32* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
12 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
13 %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
14 %select = select i1 %c, i32* %cast0, i32* %cast1
18 ; CHECK-LABEL: @store_select_group_flat(
19 ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1
20 ; CHECK: store i32 -1, i32 addrspace(3)* %select
21 define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
22 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
23 %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
24 %select = select i1 %c, i32* %cast0, i32* %cast1
25 store i32 -1, i32* %select
29 ; Make sure metadata is preserved
30 ; CHECK-LABEL: @load_select_group_flat_md(
31 ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1, !prof !0
32 ; CHECK: %load = load i32, i32 addrspace(3)* %select
33 define i32 @load_select_group_flat_md(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
34 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
35 %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
36 %select = select i1 %c, i32* %cast0, i32* %cast1, !prof !0
37 %load = load i32, i32* %select
41 ; CHECK-LABEL: @store_select_mismatch_group_private_flat(
42 ; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
43 ; CHECK: %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
44 ; CHECK: %select = select i1 %c, i32* %cast0, i32* %cast1
45 ; CHECK: store i32 -1, i32* %select
46 define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(5)* %private.ptr.1) #0 {
47 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
48 %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
49 %select = select i1 %c, i32* %cast0, i32* %cast1
50 store i32 -1, i32* %select
54 @lds0 = internal addrspace(3) global i32 123, align 4
55 @lds1 = internal addrspace(3) global i32 456, align 4
57 ; CHECK-LABEL: @constexpr_select_group_flat(
58 ; CHECK: %tmp = load i32, i32 addrspace(3)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(3)* @lds0, i32 addrspace(3)* @lds1)
59 define i32 @constexpr_select_group_flat() #0 {
61 %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*))
65 ; CHECK-LABEL: @constexpr_select_group_global_flat_mismatch(
66 ; CHECK: %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*))
67 define i32 @constexpr_select_group_global_flat_mismatch() #0 {
69 %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*))
73 ; CHECK-LABEL: @store_select_group_flat_null(
74 ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
75 ; CHECK: store i32 -1, i32 addrspace(3)* %select
76 define amdgpu_kernel void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
77 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
78 %select = select i1 %c, i32* %cast0, i32* null
79 store i32 -1, i32* %select
83 ; CHECK-LABEL: @store_select_group_flat_null_swap(
84 ; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0
85 ; CHECK: store i32 -1, i32 addrspace(3)* %select
86 define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
87 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
88 %select = select i1 %c, i32* null, i32* %cast0
89 store i32 -1, i32* %select
93 ; CHECK-LABEL: @store_select_group_flat_undef(
94 ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* undef
95 ; CHECK: store i32 -1, i32 addrspace(3)* %select
96 define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
97 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
98 %select = select i1 %c, i32* %cast0, i32* undef
99 store i32 -1, i32* %select
103 ; CHECK-LABEL: @store_select_group_flat_undef_swap(
104 ; CHECK: %select = select i1 %c, i32 addrspace(3)* undef, i32 addrspace(3)* %group.ptr.0
105 ; CHECK: store i32 -1, i32 addrspace(3)* %select
106 define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
107 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
108 %select = select i1 %c, i32* undef, i32* %cast0
109 store i32 -1, i32* %select
113 ; CHECK-LABEL: @store_select_gep_group_flat_null(
114 ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
115 ; CHECK: %gep = getelementptr i32, i32 addrspace(3)* %select, i64 16
116 ; CHECK: store i32 -1, i32 addrspace(3)* %gep
117 define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
118 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
119 %select = select i1 %c, i32* %cast0, i32* null
120 %gep = getelementptr i32, i32* %select, i64 16
121 store i32 -1, i32* %gep
125 @global0 = internal addrspace(1) global i32 123, align 4
127 ; CHECK-LABEL: @store_select_group_flat_constexpr(
128 ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* @lds1
129 ; CHECK: store i32 7, i32 addrspace(3)* %select
130 define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
131 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
132 %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*)
133 store i32 7, i32* %select
137 ; CHECK-LABEL: @store_select_group_flat_inttoptr_flat(
138 ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* inttoptr (i64 12345 to i32*) to i32 addrspace(3)*)
139 ; CHECK: store i32 7, i32 addrspace(3)* %select
140 define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
141 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
142 %select = select i1 %c, i32* %cast0, i32* inttoptr (i64 12345 to i32*)
143 store i32 7, i32* %select
147 ; CHECK-LABEL: @store_select_group_flat_inttoptr_group(
148 ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*)
149 ; CHECK-NEXT: store i32 7, i32 addrspace(3)* %select
150 define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
151 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
152 %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32*)
153 store i32 7, i32* %select
157 ; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr(
158 ; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
159 ; CHECK: %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
160 ; CHECK: store i32 7, i32* %select
161 define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
162 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
163 %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
164 store i32 7, i32* %select
168 ; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr_swap(
169 ; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
170 ; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0
171 ; CHECK: store i32 7, i32* %select
172 define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
173 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
174 %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0
175 store i32 7, i32* %select
179 ; CHECK-LABEL: @store_select_group_global_mismatch_null_null(
180 ; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)
181 ; CHECK: store i32 7, i32* %select
182 define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 {
183 %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)
184 store i32 7, i32* %select
188 ; CHECK-LABEL: @store_select_group_global_mismatch_null_null_constexpr(
189 ; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
190 define amdgpu_kernel void @store_select_group_global_mismatch_null_null_constexpr() #0 {
191 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
195 ; CHECK-LABEL: @store_select_group_global_mismatch_gv_null_constexpr(
196 ; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
197 define amdgpu_kernel void @store_select_group_global_mismatch_gv_null_constexpr() #0 {
198 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
202 ; CHECK-LABEL: @store_select_group_global_mismatch_null_gv_constexpr(
203 ; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4
204 define amdgpu_kernel void @store_select_group_global_mismatch_null_gv_constexpr() #0 {
205 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4
209 ; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_null_constexpr(
210 ; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
211 define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 {
212 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
216 ; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_flat_null_constexpr(
217 ; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32* inttoptr (i64 123 to i32*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4
218 define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 {
219 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* inttoptr (i64 123 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
223 ; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr(
224 ; CHECK: store i32 7, i32 addrspace(3)* null
225 define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 {
226 store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* undef to i32*)), align 4
230 @lds2 = external addrspace(3) global [1024 x i32], align 4
232 ; CHECK-LABEL: @store_select_group_constexpr_ptrtoint(
233 ; CHECK: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
234 ; CHECK: %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
235 ; CHECK: store i32 7, i32* %select
236 define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
237 %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
238 %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
239 store i32 7, i32* %select
243 ; CHECK-LABEL: @store_select_group_flat_vector(
244 ; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
245 ; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
246 ; CHECK: %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1
247 ; CHECK: %extract0 = extractelement <2 x i32*> %select, i32 0
248 ; CHECK: %extract1 = extractelement <2 x i32*> %select, i32 1
249 ; CHECK: store i32 -1, i32* %extract0
250 ; CHECK: store i32 -2, i32* %extract1
251 define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 {
252 %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
253 %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
254 %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1
255 %extract0 = extractelement <2 x i32*> %select, i32 0
256 %extract1 = extractelement <2 x i32*> %select, i32 1
257 store i32 -1, i32* %extract0
258 store i32 -2, i32* %extract1
262 attributes #0 = { nounwind }
264 !0 = !{!"branch_weights", i32 2, i32 10}