1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes INTC
3 ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LNICM
4 ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LICM
6 ; This test represents the following function:
7 ; void test(int n, int m, int x[m][n], int y[n], int *z) {
8 ; for (int k = 0; k < n; k++) {
10 ; for (int i = 0; i < m; i++)
11 ; x[i][k] += y[k] + tmp;
14 ; We only want to hoist the load of z out of the loop nest.
15 ; LICM hoists the load of y[k] out of the i-loop, but LNICM doesn't do so
16 ; to keep perfect loop nest. This enables optimizations that require
17 ; perfect loop nest (e.g. loop-interchange) to perform.
20 define dso_local void @test(i64 %n, i64 %m, ptr noalias %x, ptr noalias readonly %y, ptr readonly %z) {
21 ; The loopnest is not interchanged when we only run loop interchange.
24 ; INTC-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
25 ; INTC-NEXT: [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
26 ; INTC-NEXT: br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
27 ; INTC: for.cond1.preheader.lr.ph:
28 ; INTC-NEXT: br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
29 ; INTC: for.i.preheader:
30 ; INTC-NEXT: br label [[ENTRY:%.*]]
32 ; INTC-NEXT: br label [[FOR_BODY:%.*]]
34 ; INTC-NEXT: [[K_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
35 ; INTC-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
36 ; INTC-NEXT: br label [[FOR_BODY3:%.*]]
38 ; INTC-NEXT: [[I_01:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[INC:%.*]], [[FOR_BODY3]] ]
39 ; INTC-NEXT: [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
40 ; INTC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
41 ; INTC-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
42 ; INTC-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
43 ; INTC-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
44 ; INTC-NEXT: [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
45 ; INTC-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
46 ; INTC-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
47 ; INTC-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
48 ; INTC-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
49 ; INTC-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
50 ; INTC-NEXT: [[INC]] = add nsw i32 [[I_01]], 1
51 ; INTC-NEXT: [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
52 ; INTC-NEXT: [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
53 ; INTC-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
55 ; INTC-NEXT: [[INC10]] = add nsw i32 [[K_02]], 1
56 ; INTC-NEXT: [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
57 ; INTC-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
58 ; INTC-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
59 ; INTC: for.end11.loopexit:
60 ; INTC-NEXT: br label [[FOR_END11]]
64 ; The loopnest is interchanged when we run lnicm and loop interchange.
67 ; LNICM-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
68 ; LNICM-NEXT: [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
69 ; LNICM-NEXT: br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
70 ; LNICM: for.cond1.preheader.lr.ph:
71 ; LNICM-NEXT: br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
72 ; LNICM: for.i.preheader:
73 ; LNICM-NEXT: br label [[FOR_BODY3_PREHEADER:%.*]]
75 ; LNICM-NEXT: br label [[FOR_BODY:%.*]]
77 ; LNICM-NEXT: [[K_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
78 ; LNICM-NEXT: br label [[FOR_BODY3_SPLIT1:%.*]]
79 ; LNICM: for.body3.preheader:
80 ; LNICM-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
81 ; LNICM-NEXT: br label [[FOR_BODY3:%.*]]
83 ; LNICM-NEXT: [[I_01:%.*]] = phi i32 [ [[TMP3:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 0, [[FOR_BODY3_PREHEADER]] ]
84 ; LNICM-NEXT: br label [[ENTRY]]
85 ; LNICM: for.body3.split1:
86 ; LNICM-NEXT: [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
87 ; LNICM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
88 ; LNICM-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
89 ; LNICM-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
90 ; LNICM-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
91 ; LNICM-NEXT: [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
92 ; LNICM-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
93 ; LNICM-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
94 ; LNICM-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
95 ; LNICM-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
96 ; LNICM-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
97 ; LNICM-NEXT: [[INC:%.*]] = add nsw i32 [[I_01]], 1
98 ; LNICM-NEXT: [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
99 ; LNICM-NEXT: [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
100 ; LNICM-NEXT: br label [[FOR_END]]
101 ; LNICM: for.body3.split:
102 ; LNICM-NEXT: [[TMP3]] = add nsw i32 [[I_01]], 1
103 ; LNICM-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
104 ; LNICM-NEXT: [[TMP5:%.*]] = icmp slt i64 [[TMP4]], [[M]]
105 ; LNICM-NEXT: br i1 [[TMP5]], label [[FOR_BODY3]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
107 ; LNICM-NEXT: [[INC10]] = add nsw i32 [[K_02]], 1
108 ; LNICM-NEXT: [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
109 ; LNICM-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
110 ; LNICM-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_BODY3_SPLIT]], !llvm.loop [[LOOP2:![0-9]+]]
111 ; LNICM: for.end11.loopexit:
112 ; LNICM-NEXT: br label [[FOR_END11]]
114 ; LNICM-NEXT: ret void
116 ; The loopnest is not interchanged when we run licm and loop interchange.
119 ; LICM-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
120 ; LICM-NEXT: [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
121 ; LICM-NEXT: br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
122 ; LICM: for.cond1.preheader.lr.ph:
123 ; LICM-NEXT: br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
124 ; LICM: for.i.preheader:
125 ; LICM-NEXT: br label [[ENTRY:%.*]]
127 ; LICM-NEXT: [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
128 ; LICM-NEXT: br label [[FOR_BODY:%.*]]
130 ; LICM-NEXT: [[K_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
131 ; LICM-NEXT: [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
132 ; LICM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
133 ; LICM-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
134 ; LICM-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
135 ; LICM-NEXT: br label [[FOR_BODY3:%.*]]
137 ; LICM-NEXT: [[I_01:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[INC:%.*]], [[FOR_BODY3]] ]
138 ; LICM-NEXT: [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
139 ; LICM-NEXT: [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
140 ; LICM-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
141 ; LICM-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
142 ; LICM-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
143 ; LICM-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
144 ; LICM-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
145 ; LICM-NEXT: [[INC]] = add nsw i32 [[I_01]], 1
146 ; LICM-NEXT: [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
147 ; LICM-NEXT: [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
148 ; LICM-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
150 ; LICM-NEXT: [[INC10]] = add nsw i32 [[K_02]], 1
151 ; LICM-NEXT: [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
152 ; LICM-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
153 ; LICM-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
154 ; LICM: for.end11.loopexit:
155 ; LICM-NEXT: br label [[FOR_END11]]
157 ; LICM-NEXT: ret void
161 %cmp23 = icmp sgt i64 %m, 0
162 %cmp32 = icmp sgt i64 %n, 0
163 br i1 %cmp23, label %for.cond1.preheader.lr.ph, label %for.end11
165 for.cond1.preheader.lr.ph: ; preds = %gurad
166 br i1 %cmp32, label %for.i.preheader, label %for.end11
168 for.i.preheader: ; preds = %for.cond1.preheader.lr.ph
171 entry: ; preds = %for.i.preheader
175 %k.02 = phi i32 [ 0, %entry ], [ %inc10, %for.end ]
176 %0 = load i32, ptr %z, align 4
180 %i.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
181 %idxprom = sext i32 %k.02 to i64
182 %arrayidx = getelementptr inbounds i32, ptr %y, i64 %idxprom
183 %1 = load i32, ptr %arrayidx, align 4
184 %add = add nsw i32 %1, %0
185 %idxprom4 = sext i32 %i.01 to i64
186 %index0 = mul i64 %idxprom4, %n
187 %index1 = add i64 %index0, %idxprom
188 %arrayidx7 = getelementptr inbounds i32, ptr %x, i64 %index1
189 %2 = load i32, ptr %arrayidx7, align 4
190 %add8 = add nsw i32 %2, %add
191 store i32 %add8, ptr %arrayidx7, align 4
192 %inc = add nsw i32 %i.01, 1
193 %inc.ext = sext i32 %inc to i64
194 %cmp2 = icmp slt i64 %inc.ext, %m
195 br i1 %cmp2, label %for.body3, label %for.end, !llvm.loop !0
198 %inc10 = add nsw i32 %k.02, 1
199 %inc10.ext = sext i32 %inc10 to i64
200 %cmp = icmp slt i64 %inc10.ext, %n
201 br i1 %cmp, label %for.body, label %for.end11, !llvm.loop !2
207 !0 = distinct !{!0, !1}
208 !1 = !{!"llvm.loop.mustprogress"}
209 !2 = distinct !{!2, !1}