1 ; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
2 ; Obtained from C source as:
3 ; clang -S -emit-llvm -O0 vect-outer-3b.c
4 ; opt -correlated-propagation -mem2reg -instcombine -loop-simplify -indvars -instnamer vect-outer-3b.s > vect-outer-3b.ll
6 ; ModuleID = 'vect-outer-3b.s'
7 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
8 target triple = "x86_64-unknown-linux-gnu"
10 @image = common global [40 x [40 x float]] zeroinitializer, align 16
11 @out = common global [40 x float] zeroinitializer, align 16
13 define void @foo() nounwind uwtable noinline {
17 for.cond: ; preds = %for.inc16, %entry
18 %indvar1 = phi i64 [ %indvar.next2, %for.inc16 ], [ 0, %entry ]
19 %tmp5 = mul i64 %indvar1, 2
20 %arrayidx15 = getelementptr [40 x float]* @out, i64 0, i64 %indvar1
21 %exitcond4 = icmp ne i64 %indvar1, 20
22 br i1 %exitcond4, label %for.body, label %for.end19
24 for.body: ; preds = %for.cond
27 for.cond1: ; preds = %for.inc, %for.body
28 %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ]
29 %diff.0 = phi float [ 0.000000e+00, %for.body ], [ %add, %for.inc ]
30 %arrayidx8 = getelementptr [40 x [40 x float]]* @image, i64 0, i64 %indvar, i64 %tmp5
31 %exitcond = icmp ne i64 %indvar, 40
32 br i1 %exitcond, label %for.body4, label %for.end
34 for.body4: ; preds = %for.cond1
37 for.inc: ; preds = %for.body4
38 %tmp9 = load float* %arrayidx8, align 8
39 %add = fadd float %diff.0, %tmp9
40 %indvar.next = add i64 %indvar, 1
43 for.end: ; preds = %for.cond1
44 %diff.0.lcssa = phi float [ %diff.0, %for.cond1 ]
45 store float %diff.0.lcssa, float* %arrayidx15, align 4
48 for.inc16: ; preds = %for.end
49 %indvar.next2 = add i64 %indvar1, 1
52 for.end19: ; preds = %for.cond
56 define i32 @main() nounwind uwtable {
58 call void @check_vect()
61 for.cond: ; preds = %for.inc12, %entry
62 %indvar9 = phi i64 [ %indvar.next10, %for.inc12 ], [ 0, %entry ]
63 %exitcond13 = icmp ne i64 %indvar9, 40
64 br i1 %exitcond13, label %for.body, label %for.end15
66 for.body: ; preds = %for.cond
69 for.cond1: ; preds = %for.inc, %for.body
70 %indvar6 = phi i64 [ %indvar.next7, %for.inc ], [ 0, %for.body ]
71 %tmp14 = add i64 %indvar9, %indvar6
72 %add = trunc i64 %tmp14 to i32
73 %arrayidx10 = getelementptr [40 x [40 x float]]* @image, i64 0, i64 %indvar9, i64 %indvar6
74 %exitcond8 = icmp ne i64 %indvar6, 40
75 br i1 %exitcond8, label %for.body4, label %for.end
77 for.body4: ; preds = %for.cond1
78 %conv = sitofp i32 %add to float
79 store float %conv, float* %arrayidx10, align 4
82 for.inc: ; preds = %for.body4
83 %indvar.next7 = add i64 %indvar6, 1
86 for.end: ; preds = %for.cond1
89 for.inc12: ; preds = %for.end
90 %indvar.next10 = add i64 %indvar9, 1
93 for.end15: ; preds = %for.cond
97 for.cond16: ; preds = %for.inc46, %for.end15
98 %indvar1 = phi i64 [ %indvar.next2, %for.inc46 ], [ 0, %for.end15 ]
99 %tmp4 = mul i64 %indvar1, 2
100 %arrayidx41 = getelementptr [40 x float]* @out, i64 0, i64 %indvar1
101 %i.1 = trunc i64 %indvar1 to i32
102 %cmp18 = icmp slt i32 %i.1, 20
103 br i1 %cmp18, label %for.body20, label %for.end49
105 for.body20: ; preds = %for.cond16
108 for.cond21: ; preds = %for.inc35, %for.body20
109 %indvar = phi i64 [ %indvar.next, %for.inc35 ], [ 0, %for.body20 ]
110 %diff.0 = phi float [ 0.000000e+00, %for.body20 ], [ %add34, %for.inc35 ]
111 %arrayidx31 = getelementptr [40 x [40 x float]]* @image, i64 0, i64 %indvar, i64 %tmp4
112 %exitcond = icmp ne i64 %indvar, 40
113 br i1 %exitcond, label %for.body25, label %for.end38
115 for.body25: ; preds = %for.cond21
118 for.inc35: ; preds = %for.body25
119 %tmp32 = load float* %arrayidx31, align 8
120 %add34 = fadd float %diff.0, %tmp32
121 %indvar.next = add i64 %indvar, 1
124 for.end38: ; preds = %for.cond21
125 %diff.0.lcssa = phi float [ %diff.0, %for.cond21 ]
126 %tmp42 = load float* %arrayidx41, align 4
127 %cmp44 = fcmp une float %tmp42, %diff.0.lcssa
128 br i1 %cmp44, label %if.then, label %if.end
130 if.then: ; preds = %for.end38
131 call void @abort() noreturn nounwind
134 if.end: ; preds = %for.end38
137 for.inc46: ; preds = %if.end
138 %indvar.next2 = add i64 %indvar1, 1
141 for.end49: ; preds = %for.cond16
145 define internal void @check_vect() nounwind uwtable noinline {
147 %a = alloca i32, align 4
148 %b = alloca i32, align 4
149 %c = alloca i32, align 4
150 %d = alloca i32, align 4
151 %call = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* @sig_ill_handler) nounwind
152 %call1 = call i32 @__get_cpuid(i32 1, i32* %a, i32* %b, i32* %c, i32* %d)
153 %tobool = icmp eq i32 %call1, 0
154 br i1 %tobool, label %if.then, label %lor.lhs.false
156 lor.lhs.false: ; preds = %entry
157 %tmp4 = load i32* %d, align 4
158 %and6 = and i32 %tmp4, 67108864
159 %cmp = icmp eq i32 %and6, 0
160 br i1 %cmp, label %if.then, label %if.end
162 if.then: ; preds = %entry, %lor.lhs.false
163 call void @exit(i32 0) noreturn nounwind
166 if.end: ; preds = %lor.lhs.false
167 %call7 = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* null) nounwind
171 declare void @abort() noreturn
173 declare void (i32)* @signal(i32, void (i32)*) nounwind
175 define internal void @sig_ill_handler(i32 %sig) nounwind uwtable {
177 call void @exit(i32 0) noreturn nounwind
180 return: ; No predecessors!
184 define internal i32 @__get_cpuid(i32 %__level, i32* %__eax, i32* %__ebx, i32* %__ecx, i32* %__edx) nounwind uwtable inlinehint {
186 %and = and i32 %__level, -2147483648
187 %call = call i32 @__get_cpuid_max(i32 %and, i32* null)
188 %cmp = icmp ult i32 %call, %__level
189 br i1 %cmp, label %if.then, label %if.end
191 if.then: ; preds = %entry
194 if.end: ; preds = %entry
195 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__level) nounwind, !srcloc !0
196 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
197 %asmresult8 = extractvalue { i32, i32, i32, i32 } %tmp, 1
198 %asmresult9 = extractvalue { i32, i32, i32, i32 } %tmp, 2
199 %asmresult10 = extractvalue { i32, i32, i32, i32 } %tmp, 3
200 store i32 %asmresult, i32* %__eax, align 4
201 store i32 %asmresult8, i32* %__ebx, align 4
202 store i32 %asmresult9, i32* %__ecx, align 4
203 store i32 %asmresult10, i32* %__edx, align 4
206 return: ; preds = %if.end, %if.then
207 %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ]
211 declare void @exit(i32) noreturn
213 define internal i32 @__get_cpuid_max(i32 %__ext, i32* %__sig) nounwind uwtable inlinehint {
215 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__ext) nounwind, !srcloc !1
216 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
217 %tobool = icmp eq i32* %__sig, null
218 br i1 %tobool, label %if.end, label %if.then
220 if.then: ; preds = %entry
221 %asmresult1 = extractvalue { i32, i32, i32, i32 } %tmp, 1
222 store i32 %asmresult1, i32* %__sig, align 4
225 if.end: ; preds = %entry, %if.then
229 !0 = metadata !{i32 -2147343289, i32 -2147343281}
230 !1 = metadata !{i32 -2147343412, i32 -2147343404}