1 ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor | FileCheck %s
2 ; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=falkor -enable-falkor-hwpf-unroll-fix=0 | FileCheck %s --check-prefix=NOHWPF
4 ; Check that loop unroller doesn't exhaust HW prefetcher resources.
6 ; Partial unroll 2 times for this loop on falkor instead of 4.
7 ; NOHWPF-LABEL: @unroll1(
10 ; NOHWPF-NEXT: getelementptr
12 ; NOHWPF-NEXT: getelementptr
15 ; NOHWPF-NEXT: getelementptr
17 ; NOHWPF-NEXT: getelementptr
20 ; NOHWPF-NEXT: getelementptr
22 ; NOHWPF-NEXT: getelementptr
25 ; NOHWPF-NEXT: getelementptr
27 ; NOHWPF-NEXT: getelementptr
32 ; NOHWPF-NEXT-LABEL: exit:
34 ; CHECK-LABEL: @unroll1(
37 ; CHECK-NEXT: getelementptr
39 ; CHECK-NEXT: getelementptr
42 ; CHECK-NEXT: getelementptr
44 ; CHECK-NEXT: getelementptr
49 ; CHECK-NEXT-LABEL: exit:
50 define void @unroll1(i32* %p, i32* %p2) {
55 %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
57 %gep = getelementptr inbounds i32, i32* %p, i32 %iv
58 %load = load volatile i32, i32* %gep
60 %gep2 = getelementptr inbounds i32, i32* %p2, i32 %iv
61 %load2 = load volatile i32, i32* %gep2
64 %exitcnd = icmp uge i32 %inc, 1024
65 br i1 %exitcnd, label %exit, label %loop
71 ; Partial unroll 4 times for this loop on falkor instead of 8.
72 ; NOHWPF-LABEL: @unroll2(
73 ; NOHWPF-LABEL: loop2:
76 ; NOHWPF-NEXT: getelementptr
80 ; NOHWPF-NEXT: getelementptr
84 ; NOHWPF-NEXT: getelementptr
88 ; NOHWPF-NEXT: getelementptr
92 ; NOHWPF-NEXT: getelementptr
96 ; NOHWPF-NEXT: getelementptr
100 ; NOHWPF-NEXT: getelementptr
104 ; NOHWPF-NEXT: getelementptr
110 ; NOHWPF-NEXT-LABEL: exit2:
112 ; CHECK-LABEL: @unroll2(
113 ; CHECK-LABEL: loop2:
116 ; CHECK-NEXT: getelementptr
120 ; CHECK-NEXT: getelementptr
124 ; CHECK-NEXT: getelementptr
128 ; CHECK-NEXT: getelementptr
134 ; CHECK-NEXT-LABEL: exit2:
136 define void @unroll2(i32* %p) {
141 %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
142 %outer.sum = phi i32 [ 0, %entry ], [ %sum, %loop1.latch ]
143 br label %loop2.header
149 %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
150 %sum = phi i32 [ %outer.sum, %loop2.header ], [ %sum.inc, %loop2 ]
151 %gep = getelementptr inbounds i32, i32* %p, i32 %iv2
152 %load = load i32, i32* %gep
153 %sum.inc = add i32 %sum, %load
154 %inc2 = add i32 %iv2, 1
155 %exitcnd2 = icmp uge i32 %inc2, 1024
156 br i1 %exitcnd2, label %exit2, label %loop2
159 br label %loop1.latch
162 %inc1 = add i32 %iv1, 1
163 %exitcnd1 = icmp uge i32 %inc1, 1024
164 br i1 %exitcnd2, label %exit, label %loop1