1 ; RUN: opt %loadPolly %defaultOpts -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
2 ; Obtained from C source as:
3 ; clang -S -emit-llvm -O0 slp-perm-4.c
4 ; opt -correlated-propagation -mem2reg -instcombine -loop-simplify -indvars -instnamer slp-perm-4.s > slp-perm-4.ll
6 ; ModuleID = 'slp-perm-4.s'
7 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
8 target triple = "x86_64-unknown-linux-gnu"
10 @main.check_results = internal unnamed_addr constant [16 x i32] [i32 3208, i32 1334, i32 28764, i32 35679, i32 2789, i32 13028, i32 4754, i32 168364, i32 91254, i32 12399, i32 22848, i32 8174, i32 307964, i32 146829, i32 22009, i32 0], align 16
12 define void @foo(i32* noalias %pInput, i32* noalias %pOutput) nounwind uwtable {
16 for.cond: ; preds = %for.inc, %entry
17 %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
18 %tmp = mul i64 %indvar, 5
19 %tmp1 = add i64 %tmp, 4
20 %incdec.ptr76 = getelementptr i32* %pOutput, i64 %tmp1
21 %tmp3 = add i64 %tmp, 3
22 %incdec.ptr60 = getelementptr i32* %pOutput, i64 %tmp3
23 %tmp4 = add i64 %tmp, 2
24 %incdec.ptr44 = getelementptr i32* %pOutput, i64 %tmp4
25 %tmp6 = add i64 %tmp, 1
26 %incdec.ptr28 = getelementptr i32* %pOutput, i64 %tmp6
27 %pOutput.addr.0 = getelementptr i32* %pOutput, i64 %tmp
28 %incdec.ptr10 = getelementptr i32* %pInput, i64 %tmp1
29 %incdec.ptr7 = getelementptr i32* %pInput, i64 %tmp3
30 %incdec.ptr4 = getelementptr i32* %pInput, i64 %tmp4
31 %incdec.ptr = getelementptr i32* %pInput, i64 %tmp6
32 %pInput.addr.0 = getelementptr i32* %pInput, i64 %tmp
33 %exitcond = icmp ne i64 %indvar, 3
34 br i1 %exitcond, label %for.body, label %for.end
36 for.body: ; preds = %for.cond
37 %tmp2 = load i32* %pInput.addr.0, align 4
38 %tmp5 = load i32* %incdec.ptr, align 4
39 %tmp8 = load i32* %incdec.ptr4, align 4
40 %tmp11 = load i32* %incdec.ptr7, align 4
41 %tmp14 = load i32* %incdec.ptr10, align 4
42 %mul = mul i32 %tmp2, 100
43 %mul17 = mul i32 %tmp5, 1322
44 %add = add i32 %mul, %mul17
45 %mul19 = mul i32 %tmp8, 74
46 %add20 = add i32 %add, %mul19
47 %mul22 = mul i32 %tmp11, 134
48 %add23 = add i32 %add20, %mul22
49 %mul25 = mul i32 %tmp14, 334
50 %add26 = add i32 %add23, %mul25
51 store i32 %add26, i32* %pOutput.addr.0, align 4
52 %mul30 = mul i32 %tmp2, 216
53 %mul32 = mul i32 %tmp5, 13
54 %add33 = add i32 %mul30, %mul32
55 %mul35 = mul i32 %tmp8, 191
56 %add36 = add i32 %add33, %mul35
57 %mul38 = mul i32 %tmp11, 117
58 %add39 = add i32 %add36, %mul38
59 %mul41 = mul i32 %tmp14, 147
60 %add42 = add i32 %add39, %mul41
61 store i32 %add42, i32* %incdec.ptr28, align 4
62 %mul46 = mul i32 %tmp2, 23
63 %mul48 = mul i32 %tmp5, 27271
64 %add49 = add i32 %mul46, %mul48
65 %mul51 = mul i32 %tmp8, 500
66 %add52 = add i32 %add49, %mul51
67 %mul54 = mul i32 %tmp11, 11
68 %add55 = add i32 %add52, %mul54
69 %mul57 = mul i32 %tmp14, 115
70 %add58 = add i32 %add55, %mul57
71 store i32 %add58, i32* %incdec.ptr44, align 4
72 %mul62 = mul i32 %tmp2, 237
73 %mul64 = mul i32 %tmp5, 2280
74 %add65 = add i32 %mul62, %mul64
75 %mul67 = mul i32 %tmp8, 111
76 %add68 = add i32 %add65, %mul67
77 %mul70 = mul i32 %tmp11, 771
78 %add71 = add i32 %add68, %mul70
79 %mul73 = mul i32 %tmp14, 7716
80 %add74 = add i32 %add71, %mul73
81 store i32 %add74, i32* %incdec.ptr60, align 4
82 %mul78 = mul i32 %tmp2, 437
83 %mul80 = mul i32 %tmp5, 284
84 %add81 = add i32 %mul78, %mul80
85 %mul83 = mul i32 %tmp8, 1114
86 %add84 = add i32 %add81, %mul83
87 %mul86 = mul i32 %tmp11, 71
88 %add87 = add i32 %add84, %mul86
89 %mul89 = shl i32 %tmp14, 4
90 %add90 = add i32 %add87, %mul89
91 store i32 %add90, i32* %incdec.ptr76, align 4
94 for.inc: ; preds = %for.body
95 %indvar.next = add i64 %indvar, 1
98 for.end: ; preds = %for.cond
102 define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
104 %input = alloca [16 x i32], align 16
105 %output = alloca [16 x i32], align 16
106 %check_results = alloca [16 x i32], align 16
107 %tmp = bitcast [16 x i32]* %check_results to i8*
108 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([16 x i32]* @main.check_results to i8*), i64 64, i32 16, i1 false)
109 call void @check_vect()
112 for.cond: ; preds = %for.inc, %entry
113 %indvar2 = phi i64 [ %indvar.next3, %for.inc ], [ 0, %entry ]
114 %i.0 = trunc i64 %indvar2 to i32
115 %arrayidx11 = getelementptr [16 x i32]* %output, i64 0, i64 %indvar2
116 %arrayidx = getelementptr [16 x i32]* %input, i64 0, i64 %indvar2
117 %cmp = icmp ult i32 %i.0, 16
118 br i1 %cmp, label %for.body, label %for.end
120 for.body: ; preds = %for.cond
121 store i32 %i.0, i32* %arrayidx, align 4
122 %tmp7 = load i32* %arrayidx, align 4
123 %cmp8 = icmp ugt i32 %tmp7, 200
124 br i1 %cmp8, label %if.then, label %if.end
126 if.then: ; preds = %for.body
127 call void @abort() noreturn nounwind
130 if.end: ; preds = %for.body
131 store i32 0, i32* %arrayidx11, align 4
132 call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
135 for.inc: ; preds = %if.end
136 %indvar.next3 = add i64 %indvar2, 1
139 for.end: ; preds = %for.cond
140 %arraydecay = getelementptr inbounds [16 x i32]* %input, i64 0, i64 0
141 %arraydecay13 = getelementptr inbounds [16 x i32]* %output, i64 0, i64 0
142 call void @foo(i32* %arraydecay, i32* %arraydecay13)
145 for.cond14: ; preds = %for.inc29, %for.end
146 %indvar = phi i64 [ %indvar.next, %for.inc29 ], [ 0, %for.end ]
147 %arrayidx24 = getelementptr [16 x i32]* %check_results, i64 0, i64 %indvar
148 %arrayidx20 = getelementptr [16 x i32]* %output, i64 0, i64 %indvar
149 %i.1 = trunc i64 %indvar to i32
150 %cmp16 = icmp ult i32 %i.1, 16
151 br i1 %cmp16, label %for.body17, label %for.end32
153 for.body17: ; preds = %for.cond14
154 %tmp21 = load i32* %arrayidx20, align 4
155 %tmp25 = load i32* %arrayidx24, align 4
156 %cmp26 = icmp eq i32 %tmp21, %tmp25
157 br i1 %cmp26, label %if.end28, label %if.then27
159 if.then27: ; preds = %for.body17
160 call void @abort() noreturn nounwind
163 if.end28: ; preds = %for.body17
166 for.inc29: ; preds = %if.end28
167 %indvar.next = add i64 %indvar, 1
170 for.end32: ; preds = %for.cond14
174 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
176 define internal void @check_vect() nounwind uwtable noinline {
178 %a = alloca i32, align 4
179 %b = alloca i32, align 4
180 %c = alloca i32, align 4
181 %d = alloca i32, align 4
182 %call = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* @sig_ill_handler) nounwind
183 %call1 = call i32 @__get_cpuid(i32 1, i32* %a, i32* %b, i32* %c, i32* %d)
184 %tobool = icmp eq i32 %call1, 0
185 br i1 %tobool, label %if.then, label %lor.lhs.false
187 lor.lhs.false: ; preds = %entry
188 %tmp4 = load i32* %d, align 4
189 %and6 = and i32 %tmp4, 67108864
190 %cmp = icmp eq i32 %and6, 0
191 br i1 %cmp, label %if.then, label %if.end
193 if.then: ; preds = %entry, %lor.lhs.false
194 call void @exit(i32 0) noreturn nounwind
197 if.end: ; preds = %lor.lhs.false
198 %call7 = call void (i32)* (i32, void (i32)*)* @signal(i32 4, void (i32)* null) nounwind
202 declare void @abort() noreturn
204 declare void (i32)* @signal(i32, void (i32)*) nounwind
206 define internal void @sig_ill_handler(i32 %sig) nounwind uwtable {
208 call void @exit(i32 0) noreturn nounwind
211 return: ; No predecessors!
215 define internal i32 @__get_cpuid(i32 %__level, i32* %__eax, i32* %__ebx, i32* %__ecx, i32* %__edx) nounwind uwtable inlinehint {
217 %and = and i32 %__level, -2147483648
218 %call = call i32 @__get_cpuid_max(i32 %and, i32* null)
219 %cmp = icmp ult i32 %call, %__level
220 br i1 %cmp, label %if.then, label %if.end
222 if.then: ; preds = %entry
225 if.end: ; preds = %entry
226 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__level) nounwind, !srcloc !1
227 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
228 %asmresult8 = extractvalue { i32, i32, i32, i32 } %tmp, 1
229 %asmresult9 = extractvalue { i32, i32, i32, i32 } %tmp, 2
230 %asmresult10 = extractvalue { i32, i32, i32, i32 } %tmp, 3
231 store i32 %asmresult, i32* %__eax, align 4
232 store i32 %asmresult8, i32* %__ebx, align 4
233 store i32 %asmresult9, i32* %__ecx, align 4
234 store i32 %asmresult10, i32* %__edx, align 4
237 return: ; preds = %if.end, %if.then
238 %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ]
242 declare void @exit(i32) noreturn
244 define internal i32 @__get_cpuid_max(i32 %__ext, i32* %__sig) nounwind uwtable inlinehint {
246 %tmp = call { i32, i32, i32, i32 } asm "cpuid\0A\09", "={ax},={bx},={cx},={dx},0,~{dirflag},~{fpsr},~{flags}"(i32 %__ext) nounwind, !srcloc !2
247 %asmresult = extractvalue { i32, i32, i32, i32 } %tmp, 0
248 %tobool = icmp eq i32* %__sig, null
249 br i1 %tobool, label %if.end, label %if.then
251 if.then: ; preds = %entry
252 %asmresult1 = extractvalue { i32, i32, i32, i32 } %tmp, 1
253 store i32 %asmresult1, i32* %__sig, align 4
256 if.end: ; preds = %entry, %if.then
260 !0 = metadata !{i32 1517}
261 !1 = metadata !{i32 -2147342346, i32 -2147342338}
262 !2 = metadata !{i32 -2147342469, i32 -2147342461}