2 ; RUN: opt -mcpu=thunderx2t99 -loop-unroll --debug-only=loop-unroll --debug-only=basicblock-utils -S -unroll-allow-partial < %s 2>&1 | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
6 ; CHECK: Loop Unroll: F[foo] Loop %loop.2.header
7 ; CHECK: Loop Size = 19
8 ; CHECK: Trip Count = 512
9 ; CHECK: Trip Multiple = 512
10 ; CHECK: UNROLLING loop %loop.2.header by 4 with a breakout at trip 0
12 ; CHECK: Loop Unroll: F[foo] Loop %loop.header
13 ; CHECK: Loop Size = 18
14 ; CHECK: Trip Count = 512
15 ; CHECK: Trip Multiple = 512
16 ; CHECK: UNROLLING loop %loop.header by 4 with a breakout at trip 0
18 ; CHECK: %counter = phi i32 [ 0, %entry ], [ %inc.3, %loop.inc.3 ]
19 ; CHECK: %val = add nuw nsw i32 %counter, 5
20 ; CHECK: %val1 = add nuw nsw i32 %counter, 6
21 ; CHECK: %val2 = add nuw nsw i32 %counter, 7
22 ; CHECK: %val3 = add nuw nsw i32 %counter, 8
23 ; CHECK: %val4 = add nuw nsw i32 %counter, 9
24 ; CHECK: %val5 = add nuw nsw i32 %counter, 10
25 ; CHECK-NOT: %val = add i32 %counter, 5
26 ; CHECK-NOT: %val = add i32 %counter, 6
27 ; CHECK-NOT: %val = add i32 %counter, 7
28 ; CHECK-NOT: %val = add i32 %counter, 8
29 ; CHECK-NOT: %val = add i32 %counter, 9
30 ; CHECK-NOT: %val = add i32 %counter, 10
31 ; CHECK: %counter.2 = phi i32 [ 0, %exit.0 ], [ %inc.2.3, %loop.2.inc.3 ]
33 define void @foo(i32 * %out) {
35 %0 = alloca [1024 x i32]
36 %x0 = alloca [1024 x i32]
37 %x01 = alloca [1024 x i32]
38 %x02 = alloca [1024 x i32]
39 %x03 = alloca [1024 x i32]
40 %x04 = alloca [1024 x i32]
41 %x05 = alloca [1024 x i32]
42 %x06 = alloca [1024 x i32]
46 %counter = phi i32 [0, %entry], [%inc, %loop.inc]
50 %ptr = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter
51 store i32 %counter, i32* %ptr
52 %val = add i32 %counter, 5
53 %xptr = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter
54 store i32 %val, i32* %xptr
55 %val1 = add i32 %counter, 6
56 %xptr1 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter
57 store i32 %val1, i32* %xptr1
58 %val2 = add i32 %counter, 7
59 %xptr2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter
60 store i32 %val2, i32* %xptr2
61 %val3 = add i32 %counter, 8
62 %xptr3 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter
63 store i32 %val3, i32* %xptr3
64 %val4 = add i32 %counter, 9
65 %xptr4 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter
66 store i32 %val4, i32* %xptr4
67 %val5 = add i32 %counter, 10
68 %xptr5 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter
69 store i32 %val5, i32* %xptr5
73 %inc = add i32 %counter, 2
74 %1 = icmp sge i32 %inc, 1023
75 br i1 %1, label %exit.0, label %loop.header
78 %2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 5
79 %3 = load i32, i32* %2
80 store i32 %3, i32 * %out
81 br label %loop.2.header
85 %counter.2 = phi i32 [0, %exit.0], [%inc.2, %loop.2.inc]
89 %ptr.2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter.2
90 store i32 %counter.2, i32* %ptr.2
91 %val.2 = add i32 %counter.2, 5
92 %xptr.2 = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter.2
93 store i32 %val.2, i32* %xptr.2
94 %val1.2 = add i32 %counter.2, 6
95 %xptr1.2 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter.2
96 store i32 %val1, i32* %xptr1.2
97 %val2.2 = add i32 %counter.2, 7
98 %xptr2.2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter.2
99 store i32 %val2, i32* %xptr2.2
100 %val3.2 = add i32 %counter.2, 8
101 %xptr3.2 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter.2
102 store i32 %val3.2, i32* %xptr3.2
103 %val4.2 = add i32 %counter.2, 9
104 %xptr4.2 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter.2
105 store i32 %val4.2, i32* %xptr4.2
106 %val5.2 = add i32 %counter.2, 10
107 %xptr5.2 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter.2
108 store i32 %val5.2, i32* %xptr5.2
109 %xptr6.2 = getelementptr [1024 x i32], [1024 x i32]* %x06, i32 0, i32 %counter.2
110 store i32 %val5.2, i32* %xptr6.2
114 %inc.2 = add i32 %counter.2, 2
115 %4 = icmp sge i32 %inc.2, 1023
116 br i1 %4, label %exit.2, label %loop.2.header
119 %x2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 6
120 %x3 = load i32, i32* %x2
121 %out2 = getelementptr i32, i32 * %out, i32 1
122 store i32 %3, i32 * %out2