1 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
4 ; CHECK-LABEL: @branch_ptr_var_same_alloca(
5 ; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @branch_ptr_var_same_alloca.alloca, i32 0, i32 %{{[0-9]+}}
8 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
11 ; CHECK: %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %15, i32 0, i32 %b
14 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
15 ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
16 define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
18 %alloca = alloca [64 x i32], align 4
19 br i1 undef, label %if, label %else
22 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
26 %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %b
30 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
31 store i32 0, i32* %phi.ptr, align 4
35 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_0(
36 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ]
37 define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
39 %alloca = alloca [64 x i32], align 4
40 br i1 undef, label %if, label %endif
43 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
47 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ]
48 store i32 0, i32* %phi.ptr, align 4
52 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_1(
53 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ]
54 define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
56 %alloca = alloca [64 x i32], align 4
57 br i1 undef, label %if, label %endif
60 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
64 %phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ]
65 store i32 0, i32* %phi.ptr, align 4
69 ; CHECK-LABEL: @one_phi_value(
70 ; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @one_phi_value.alloca, i32 0, i32 %14
71 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
73 ; CHECK: br label %exit
74 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %entry ]
75 ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
76 define amdgpu_kernel void @one_phi_value(i32 %a) #0 {
78 %alloca = alloca [64 x i32], align 4
79 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
83 %phi.ptr = phi i32* [ %arrayidx0, %entry ]
84 store i32 0, i32* %phi.ptr, align 4
88 ; CHECK-LABEL: @branch_ptr_alloca_unknown_obj(
89 ; CHECK: %alloca = alloca [64 x i32], align 4
92 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
95 ; CHECK: %arrayidx1 = call i32* @get_unknown_pointer()
98 ; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
99 ; CHECK: store i32 0, i32* %phi.ptr, align 4
100 define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
102 %alloca = alloca [64 x i32], align 4
103 br i1 undef, label %if, label %else
106 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
110 %arrayidx1 = call i32* @get_unknown_pointer()
114 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
115 store i32 0, i32* %phi.ptr, align 4
119 ; kernel void ptr_induction_var_same_alloca(void)
125 ; for (int* p = &alloca[2], *e = &alloca[48]; p != e; ++p, ++i)
131 ; FIXME: This should be promotable. We need to use
132 ; getUnderlyingObjects when looking at the icmp user.
134 ; CHECK-LABEL: @ptr_induction_var_same_alloca(
135 ; CHECK: %alloca = alloca [64 x i32], align 4
136 ; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
137 define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 {
139 %alloca = alloca [64 x i32], align 4
140 %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
141 %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 48
144 for.cond.cleanup: ; preds = %for.body
147 for.body: ; preds = %for.body, %entry
148 %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
149 %p.08 = phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
150 store i32 %i.09, i32* %p.08, align 4
151 %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
152 %inc = add nuw nsw i32 %i.09, 1
153 %cmp = icmp eq i32* %incdec.ptr, %arrayidx1
154 br i1 %cmp, label %for.cond.cleanup, label %for.body
158 ; extern int* get_unknown_pointer(void);
160 ; kernel void ptr_induction_var_alloca_unknown(void)
165 ; for (int* p = &alloca[2], *e = get_unknown_pointer(); p != e; ++p, ++i)
171 ; CHECK-LABEL: @ptr_induction_var_alloca_unknown(
172 ; CHECK: %alloca = alloca [64 x i32], align 4
173 ; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
174 ; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call
175 define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 {
177 %alloca = alloca [64 x i32], align 4
178 %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
179 %call = tail call i32* @get_unknown_pointer() #2
180 %cmp.7 = icmp eq i32* %arrayidx, %call
181 br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader
183 for.body.preheader: ; preds = %entry
186 for.cond.cleanup.loopexit: ; preds = %for.body
187 br label %for.cond.cleanup
189 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
192 for.body: ; preds = %for.body, %for.body.preheader
193 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
194 %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
195 store i32 %i.09, i32* %p.08, align 4
196 %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
197 %inc = add nuw nsw i32 %i.09, 1
198 %cmp = icmp eq i32* %incdec.ptr, %call
199 br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
202 declare i32* @get_unknown_pointer() #0
204 attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }