2 ; RUN: opt -mcpu=core-avx2 -loop-unroll --debug-only=loop-unroll -S -unroll-allow-partial < %s 2>&1 | FileCheck %s
4 target triple = "x86_64-unknown-linux-gnu"
6 ; CHECK: Loop Unroll: F[foo] Loop %loop.2.header
7 ; CHECK: Loop Size = 27
8 ; CHECK-NOT: UNROLLING loop %loop.2.header
9 ; CHECK: Loop Unroll: F[foo] Loop %loop.header
10 ; CHECK: Loop Size = 25
11 ; CHECK: UNROLLING loop %loop.header by 2
13 define void @foo(i32 * %out) {
15 %0 = alloca [1024 x i32]
16 %x0 = alloca [1024 x i32]
17 %x01 = alloca [1024 x i32]
18 %x02 = alloca [1024 x i32]
19 %x03 = alloca [1024 x i32]
20 %x04 = alloca [1024 x i32]
21 %x05 = alloca [1024 x i32]
22 %x06 = alloca [1024 x i32]
26 %counter = phi i32 [0, %entry], [%inc, %loop.inc]
30 %ptr = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter
31 store i32 %counter, i32* %ptr
32 %val = add i32 %counter, 5
33 %xptr = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter
34 store i32 %val, i32* %xptr
35 %val1 = add i32 %counter, 6
36 %xptr1 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter
37 store i32 %val1, i32* %xptr1
38 %val2 = add i32 %counter, 7
39 %xptr2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter
40 store i32 %val2, i32* %xptr2
41 %val3 = add i32 %counter, 8
42 %xptr3 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter
43 store i32 %val3, i32* %xptr3
44 %val4 = add i32 %counter, 9
45 %xptr4 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter
46 store i32 %val4, i32* %xptr4
47 %val5 = add i32 %counter, 10
48 %xptr5 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter
49 store i32 %val5, i32* %xptr5
53 %inc = add i32 %counter, 2
54 %1 = icmp sge i32 %inc, 1023
55 br i1 %1, label %exit.0, label %loop.header
58 %2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 5
59 %3 = load i32, i32* %2
60 store i32 %3, i32 * %out
61 br label %loop.2.header
65 %counter.2 = phi i32 [0, %exit.0], [%inc.2, %loop.2.inc]
69 %ptr.2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter.2
70 store i32 %counter.2, i32* %ptr.2
71 %val.2 = add i32 %counter.2, 5
72 %xptr.2 = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter.2
73 store i32 %val.2, i32* %xptr.2
74 %val1.2 = add i32 %counter.2, 6
75 %xptr1.2 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter.2
76 store i32 %val1, i32* %xptr1.2
77 %val2.2 = add i32 %counter.2, 7
78 %xptr2.2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter.2
79 store i32 %val2, i32* %xptr2.2
80 %val3.2 = add i32 %counter.2, 8
81 %xptr3.2 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter.2
82 store i32 %val3.2, i32* %xptr3.2
83 %val4.2 = add i32 %counter.2, 9
84 %xptr4.2 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter.2
85 store i32 %val4.2, i32* %xptr4.2
86 %val5.2 = add i32 %counter.2, 10
87 %xptr5.2 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter.2
88 store i32 %val5.2, i32* %xptr5.2
89 %xptr6.2 = getelementptr [1024 x i32], [1024 x i32]* %x06, i32 0, i32 %counter.2
90 store i32 %val5.2, i32* %xptr6.2
94 %inc.2 = add i32 %counter.2, 2
95 %4 = icmp sge i32 %inc.2, 1023
96 br i1 %4, label %exit.2, label %loop.2.header
99 %x2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 6
100 %x3 = load i32, i32* %x2
101 %out2 = getelementptr i32, i32 * %out, i32 1
102 store i32 %3, i32 * %out2