1 target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
2 ; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s
3 ; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
5 define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
7 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32)]
8 %0 = load i32, i32* %a, align 4
12 ; CHECK: load i32, i32* {{[^,]+}}, align 32
16 define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
18 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 24)]
19 %arrayidx = getelementptr inbounds i32, i32* %a, i64 -2
20 %0 = load i32, i32* %arrayidx, align 4
24 ; CHECK: load i32, i32* {{[^,]+}}, align 16
28 define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
30 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 28)]
31 %arrayidx = getelementptr inbounds i32, i32* %a, i64 1
32 %0 = load i32, i32* %arrayidx, align 4
36 ; CHECK: load i32, i32* {{[^,]+}}, align 32
40 ; TODO: this can be 8-bytes aligned
41 define i32 @foo2b(i32* nocapture %a) nounwind uwtable readonly {
43 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 28)]
44 %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
45 %0 = load i32, i32* %arrayidx, align 4
49 ; CHECK: load i32, i32* {{[^,]+}}, align 4
53 define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
55 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 0)]
56 %0 = load i32, i32* %a, align 4
60 ; CHECK: load i32, i32* {{[^,]+}}, align 32
64 define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
66 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i32 0)]
69 for.body: ; preds = %entry, %for.body
70 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
71 %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
72 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
73 %0 = load i32, i32* %arrayidx, align 4
74 %add = add nsw i32 %0, %r.06
75 %indvars.iv.next = add i64 %indvars.iv, 8
76 %1 = trunc i64 %indvars.iv.next to i32
77 %cmp = icmp slt i32 %1, 2048
78 br i1 %cmp, label %for.body, label %for.end
80 for.end: ; preds = %for.body
81 %add.lcssa = phi i32 [ %add, %for.body ]
85 ; CHECK: load i32, i32* %arrayidx, align 32
86 ; CHECK: ret i32 %add.lcssa
90 ; def hoo2(a, id, num):
91 ; for i0 in range(id*64, 4096, num*64):
92 ; for i1 in range(0, 4096, 32):
93 ; for i2 in range(0, 4096, 32):
94 ; load(a, i0+i1+i2+32)
95 define void @hoo2(i32* nocapture %a, i64 %id, i64 %num) nounwind uwtable readonly {
97 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i64 0)]
98 %id.mul = shl nsw i64 %id, 6
99 %num.mul = shl nsw i64 %num, 6
103 %i0 = phi i64 [ %id.mul, %entry ], [ %i0.next, %for0.end ]
107 %i1 = phi i64 [ 0, %for0.body ], [ %i1.next, %for1.end ]
111 %i2 = phi i64 [ 0, %for1.body ], [ %i2.next, %for2.body ]
113 %t1 = add nuw nsw i64 %i0, %i1
114 %t2 = add nuw nsw i64 %t1, %i2
115 %t3 = add nuw nsw i64 %t2, 32
116 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %t3
117 %x = load i32, i32* %arrayidx, align 4
119 %i2.next = add nuw nsw i64 %i2, 32
120 %cmp2 = icmp ult i64 %i2.next, 4096
121 br i1 %cmp2, label %for2.body, label %for1.end
124 %i1.next = add nuw nsw i64 %i1, 32
125 %cmp1 = icmp ult i64 %i1.next, 4096
126 br i1 %cmp1, label %for1.body, label %for0.end
129 %i0.next = add nuw nsw i64 %i0, %num.mul
130 %cmp0 = icmp ult i64 %i0.next, 4096
131 br i1 %cmp0, label %for0.body, label %return
137 ; CHECK: load i32, i32* %arrayidx, align 32
141 define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
143 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i8 0)]
146 for.body: ; preds = %entry, %for.body
147 %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
148 %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
149 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
150 %0 = load i32, i32* %arrayidx, align 4
151 %add = add nsw i32 %0, %r.06
152 %indvars.iv.next = add i64 %indvars.iv, 8
153 %1 = trunc i64 %indvars.iv.next to i32
154 %cmp = icmp slt i32 %1, 2048
155 br i1 %cmp, label %for.body, label %for.end
157 for.end: ; preds = %for.body
158 %add.lcssa = phi i32 [ %add, %for.body ]
162 ; CHECK: load i32, i32* %arrayidx, align 16
163 ; CHECK: ret i32 %add.lcssa
166 define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
170 for.body: ; preds = %entry, %for.body
171 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
172 %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
173 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
174 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i8 0)]
175 %0 = load i32, i32* %arrayidx, align 4
176 %add = add nsw i32 %0, %r.06
177 %indvars.iv.next = add i64 %indvars.iv, 4
178 %1 = trunc i64 %indvars.iv.next to i32
179 %cmp = icmp slt i32 %1, 2048
180 br i1 %cmp, label %for.body, label %for.end
182 for.end: ; preds = %for.body
183 %add.lcssa = phi i32 [ %add, %for.body ]
187 ; CHECK: load i32, i32* %arrayidx, align 16
188 ; CHECK: ret i32 %add.lcssa
191 define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
193 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i128 32, i128 0)]
196 for.body: ; preds = %entry, %for.body
197 %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
198 %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
199 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
200 %0 = load i32, i32* %arrayidx, align 4
201 %add = add nsw i32 %0, %r.06
202 %indvars.iv.next = add i64 %indvars.iv, 4
203 %1 = trunc i64 %indvars.iv.next to i32
204 %cmp = icmp slt i32 %1, 2048
205 br i1 %cmp, label %for.body, label %for.end
207 for.end: ; preds = %for.body
208 %add.lcssa = phi i32 [ %add, %for.body ]
212 ; CHECK: load i32, i32* %arrayidx, align 16
213 ; CHECK: ret i32 %add.lcssa
216 define i32 @moo(i32* nocapture %a) nounwind uwtable {
218 tail call void @llvm.assume(i1 true) ["align"(i32* %a, i16 32)]
219 %0 = bitcast i32* %a to i8*
220 tail call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 64, i1 false)
224 ; CHECK: @llvm.memset.p0i8.i64(i8* align 32 %0, i8 0, i64 64, i1 false)
225 ; CHECK: ret i32 undef
228 define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
230 tail call void @llvm.assume(i1 true) ["align"(i32* %b, i32 128)]
231 %0 = bitcast i32* %a to i8*
232 tail call void @llvm.assume(i1 true) ["align"(i8* %0, i16 32)]
233 %1 = bitcast i32* %b to i8*
234 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false)
238 ; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 128 %1, i64 64, i1 false)
239 ; CHECK: ret i32 undef
242 define i32 @moo3(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
244 %0 = bitcast i32* %a to i8*
245 tail call void @llvm.assume(i1 true) ["align"(i8* %0, i16 32), "align"(i32* %b, i32 128)]
246 %1 = bitcast i32* %b to i8*
247 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false)
251 ; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 128 %1, i64 64, i1 false)
252 ; CHECK: ret i32 undef
255 declare void @llvm.assume(i1) nounwind
257 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
258 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind