1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -loop-reroll -S | FileCheck %s
3 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
4 target triple = "thumbv7-none-linux"
6 ;void foo(int *A, int *B, int m, int n) {
7 ; for (int i = m; i < n; i+=4) {
10 ; A[i+2] = B[i+2] * 4;
11 ; A[i+3] = B[i+3] * 4;
14 define void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %m, i32 %n) {
17 ; CHECK-NEXT: [[CMP34:%.*]] = icmp slt i32 [[M:%.*]], [[N:%.*]]
18 ; CHECK-NEXT: br i1 [[CMP34]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
19 ; CHECK: for.body.preheader:
20 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[M]], 4
21 ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]])
22 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], -1
23 ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], [[M]]
24 ; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 2
25 ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 2
26 ; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 3
27 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
29 ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
30 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[M]], [[INDVAR]]
31 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP6]]
32 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
33 ; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP7]], 2
34 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP6]]
35 ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4
36 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
37 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP5]]
38 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
39 ; CHECK: for.end.loopexit:
40 ; CHECK-NEXT: br label [[FOR_END]]
42 ; CHECK-NEXT: ret void
45 %cmp34 = icmp slt i32 %m, %n
46 br i1 %cmp34, label %for.body, label %for.end
48 for.body: ; preds = %entry, %for.body
49 %i.035 = phi i32 [ %add18, %for.body ], [ %m, %entry ]
50 %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.035
51 %0 = load i32, i32* %arrayidx, align 4
52 %mul = shl nsw i32 %0, 2
53 %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.035
54 store i32 %mul, i32* %arrayidx2, align 4
55 %add3 = add nsw i32 %i.035, 1
56 %arrayidx4 = getelementptr inbounds i32, i32* %B, i32 %add3
57 %1 = load i32, i32* %arrayidx4, align 4
58 %mul5 = shl nsw i32 %1, 2
59 %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %add3
60 store i32 %mul5, i32* %arrayidx7, align 4
61 %add8 = add nsw i32 %i.035, 2
62 %arrayidx9 = getelementptr inbounds i32, i32* %B, i32 %add8
63 %2 = load i32, i32* %arrayidx9, align 4
64 %mul10 = shl nsw i32 %2, 2
65 %arrayidx12 = getelementptr inbounds i32, i32* %A, i32 %add8
66 store i32 %mul10, i32* %arrayidx12, align 4
67 %add13 = add nsw i32 %i.035, 3
68 %arrayidx14 = getelementptr inbounds i32, i32* %B, i32 %add13
69 %3 = load i32, i32* %arrayidx14, align 4
70 %mul15 = shl nsw i32 %3, 2
71 %arrayidx17 = getelementptr inbounds i32, i32* %A, i32 %add13
72 store i32 %mul15, i32* %arrayidx17, align 4
73 %add18 = add nsw i32 %i.035, 4
74 %cmp = icmp slt i32 %add18, %n
75 br i1 %cmp, label %for.body, label %for.end
77 for.end: ; preds = %for.body, %entry
81 ;void daxpy_ur(int n,float da,float *dx,float *dy)
84 ; for (int i = m; i < n; i = i + 4)
86 ; dy[i] = dy[i] + da*dx[i];
87 ; dy[i+1] = dy[i+1] + da*dx[i+1];
88 ; dy[i+2] = dy[i+2] + da*dx[i+2];
89 ; dy[i+3] = dy[i+3] + da*dx[i+3];
92 define void @daxpy_ur(i32 %n, float %da, float* nocapture readonly %dx, float* nocapture %dy) {
93 ; CHECK-LABEL: @daxpy_ur(
95 ; CHECK-NEXT: [[REM:%.*]] = srem i32 [[N:%.*]], 4
96 ; CHECK-NEXT: [[CMP55:%.*]] = icmp slt i32 [[REM]], [[N]]
97 ; CHECK-NEXT: br i1 [[CMP55]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
98 ; CHECK: for.body.preheader:
99 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
100 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[REM]]
101 ; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
102 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2
103 ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 3
104 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
106 ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
107 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[REM]], [[INDVAR]]
108 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[DY:%.*]], i32 [[TMP5]]
109 ; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX]], align 4
110 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[DX:%.*]], i32 [[TMP5]]
111 ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX1]], align 4
112 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP7]], [[DA:%.*]]
113 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[MUL]]
114 ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX]], align 4
115 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
116 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP4]]
117 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
118 ; CHECK: for.end.loopexit:
119 ; CHECK-NEXT: br label [[FOR_END]]
121 ; CHECK-NEXT: ret void
124 %rem = srem i32 %n, 4
125 %cmp55 = icmp slt i32 %rem, %n
126 br i1 %cmp55, label %for.body, label %for.end
128 for.body: ; preds = %entry, %for.body
129 %i.056 = phi i32 [ %add27, %for.body ], [ %rem, %entry ]
130 %arrayidx = getelementptr inbounds float, float* %dy, i32 %i.056
131 %0 = load float, float* %arrayidx, align 4
132 %arrayidx1 = getelementptr inbounds float, float* %dx, i32 %i.056
133 %1 = load float, float* %arrayidx1, align 4
134 %mul = fmul float %1, %da
135 %add = fadd float %0, %mul
136 store float %add, float* %arrayidx, align 4
137 %add3 = add nsw i32 %i.056, 1
138 %arrayidx4 = getelementptr inbounds float, float* %dy, i32 %add3
139 %2 = load float, float* %arrayidx4, align 4
140 %arrayidx6 = getelementptr inbounds float, float* %dx, i32 %add3
141 %3 = load float, float* %arrayidx6, align 4
142 %mul7 = fmul float %3, %da
143 %add8 = fadd float %2, %mul7
144 store float %add8, float* %arrayidx4, align 4
145 %add11 = add nsw i32 %i.056, 2
146 %arrayidx12 = getelementptr inbounds float, float* %dy, i32 %add11
147 %4 = load float, float* %arrayidx12, align 4
148 %arrayidx14 = getelementptr inbounds float, float* %dx, i32 %add11
149 %5 = load float, float* %arrayidx14, align 4
150 %mul15 = fmul float %5, %da
151 %add16 = fadd float %4, %mul15
152 store float %add16, float* %arrayidx12, align 4
153 %add19 = add nsw i32 %i.056, 3
154 %arrayidx20 = getelementptr inbounds float, float* %dy, i32 %add19
155 %6 = load float, float* %arrayidx20, align 4
156 %arrayidx22 = getelementptr inbounds float, float* %dx, i32 %add19
157 %7 = load float, float* %arrayidx22, align 4
158 %mul23 = fmul float %7, %da
159 %add24 = fadd float %6, %mul23
160 store float %add24, float* %arrayidx20, align 4
161 %add27 = add nsw i32 %i.056, 4
162 %cmp = icmp slt i32 %add27, %n
163 br i1 %cmp, label %for.body, label %for.end
165 for.end: ; preds = %for.body, %entry