1 // RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing --cse --mlir-print-local-scope %s | FileCheck %s
3 // CHECK-LABEL: @one_3d_nest
4 func.func @one_3d_nest() {
5 // Capture original bounds. Note that for zero-based step-one loops, the
6 // upper bound is also the number of iterations.
7 // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0
8 // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1
9 // CHECK-DAG: %[[range:.*]] = arith.constant 7056
10 %c0 = arith.constant 0 : index
11 %c1 = arith.constant 1 : index
12 %c2 = arith.constant 2 : index
13 %c3 = arith.constant 3 : index
14 %c42 = arith.constant 42 : index
15 %c56 = arith.constant 56 : index
16 // The range of the new scf.
17 // Updated loop bounds.
18 // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]]
19 scf.for %i = %c0 to %c42 step %c1 {
20 // Inner loops must have been removed.
23 // Reconstruct original IVs from the linearized one.
24 // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]]
25 // CHECK-SAME: into (42, 56, 3)
26 scf.for %j = %c0 to %c56 step %c1 {
27 scf.for %k = %c0 to %c3 step %c1 {
28 // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2)
29 "use"(%i, %j, %k) : (index, index, index) -> ()
38 // Check that there is no chasing the replacement of value uses by ensuring
39 // multiple uses of loop induction variables get rewritten to the same values.
41 // CHECK-LABEL: @multi_use
42 func.func @multi_use() {
43 %c0 = arith.constant 0 : index
44 %c1 = arith.constant 1 : index
45 %c10 = arith.constant 10 : index
46 // CHECK: scf.for %[[iv:.*]] =
47 scf.for %i = %c1 to %c10 step %c1 {
48 scf.for %j = %c1 to %c10 step %c1 {
49 scf.for %k = %c1 to %c10 step %c1 {
50 // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[iv]]
51 // CHECK: %[[k:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#2)
52 // CHECK: %[[j:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#1)
53 // CHECK: %[[i:.*]] = affine.apply affine_map<(d0) -> (d0 + 1)>(%[[delinearize]]#0)
55 // CHECK: "use1"(%[[i]], %[[j]], %[[k]])
56 "use1"(%i,%j,%k) : (index,index,index) -> ()
57 // CHECK: "use2"(%[[i]], %[[k]], %[[j]])
58 "use2"(%i,%k,%j) : (index,index,index) -> ()
59 // CHECK: "use3"(%[[k]], %[[j]], %[[i]])
60 "use3"(%k,%j,%i) : (index,index,index) -> ()
69 func.func @unnormalized_loops() {
70 // Normalized lower bound and step for the outer scf.
71 // CHECK-DAG: %[[lb_i:.*]] = arith.constant 0
72 // CHECK-DAG: %[[step_i:.*]] = arith.constant 1
74 // CHECK-DAG: %[[range:.*]] = arith.constant 12
76 %c2 = arith.constant 2 : index
77 %c3 = arith.constant 3 : index
78 %c5 = arith.constant 5 : index
79 %c7 = arith.constant 7 : index
80 %c10 = arith.constant 10 : index
81 %c17 = arith.constant 17 : index
84 // New bounds of the outer scf.
85 // CHECK: scf.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]]
86 scf.for %i = %c5 to %c10 step %c2 {
87 // The inner loop has been removed.
89 scf.for %j = %c7 to %c17 step %c3 {
90 // The IVs are rewritten.
91 // CHECK: %[[delinearize:.+]]:2 = affine.delinearize_index %[[i]]
92 // CHECK-SAME: into (3, 4)
93 // CHECK: %[[orig_j:.*]] = affine.apply affine_map<(d0) -> (d0 * 3 + 7)>(%[[delinearize]]#1)
94 // CHECK: %[[orig_i:.*]] = affine.apply affine_map<(d0) -> (d0 * 2 + 5)>(%[[delinearize]]#0)
95 // CHECK: "use"(%[[orig_i]], %[[orig_j]])
96 "use"(%i, %j) : (index, index) -> ()
104 func.func @noramalized_loops_with_yielded_iter_args() {
105 // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0
106 // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1
107 // CHECK-DAG: %[[range:.*]] = arith.constant 7056
108 %c0 = arith.constant 0 : index
109 %c1 = arith.constant 1 : index
110 %c3 = arith.constant 3 : index
111 %c42 = arith.constant 42 : index
112 %c56 = arith.constant 56 : index
113 // The range of the new scf.
115 // Updated loop bounds.
116 // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) {
117 %2:1 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0) -> (index) {
118 // Inner loops must have been removed.
119 // CHECK-NOT: scf.for
121 // Reconstruct original IVs from the linearized one.
122 // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]] into (42, 56, 3)
123 %1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){
124 %0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) {
125 // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2)
126 "use"(%i, %j, %k) : (index, index, index) -> ()
127 // CHECK: scf.yield %[[VAL_1]] : index
128 scf.yield %arg2 : index
130 scf.yield %0#0 : index
132 scf.yield %1#0 : index
139 func.func @noramalized_loops_with_shuffled_yielded_iter_args() {
140 // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0
141 // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1
142 %c0 = arith.constant 0 : index
143 %c1 = arith.constant 1 : index
144 %c3 = arith.constant 3 : index
145 %c42 = arith.constant 42 : index
146 %c56 = arith.constant 56 : index
147 // The range of the new scf.
148 // CHECK-DAG:%[[range:.*]] = arith.constant 7056
150 // Updated loop bounds.
151 // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]], %[[VAL_2:.*]] = %[[orig_lb]]) -> (index, index) {
152 %2:2 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0, %arg1 = %c0) -> (index, index) {
153 // Inner loops must have been removed.
154 // CHECK-NOT: scf.for
156 // Reconstruct original IVs from the linearized one.
157 // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]]
158 // CHECK-SAME: into (42, 56, 3)
159 %1:2 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (index, index){
160 %0:2 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (index, index) {
161 // CHECK: "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2)
162 "use"(%i, %j, %k) : (index, index, index) -> ()
163 // CHECK: scf.yield %[[VAL_2]], %[[VAL_1]] : index, index
164 scf.yield %arg5, %arg4 : index, index
166 scf.yield %0#0, %0#1 : index, index
168 scf.yield %1#0, %1#1 : index, index
175 func.func @noramalized_loops_with_yielded_non_iter_args() {
176 // CHECK-DAG: %[[orig_lb:.*]] = arith.constant 0
177 // CHECK-DAG: %[[orig_step:.*]] = arith.constant 1
178 %c0 = arith.constant 0 : index
179 %c1 = arith.constant 1 : index
180 %c3 = arith.constant 3 : index
181 %c42 = arith.constant 42 : index
182 %c56 = arith.constant 56 : index
183 // The range of the new scf.
184 // CHECK-DAG: %[[range:.*]] = arith.constant 7056
186 // Updated loop bounds.
187 // CHECK: scf.for %[[i:.*]] = %[[orig_lb]] to %[[range]] step %[[orig_step]] iter_args(%[[VAL_1:.*]] = %[[orig_lb]]) -> (index) {
188 %2:1 = scf.for %i = %c0 to %c42 step %c1 iter_args(%arg0 = %c0) -> (index) {
189 // Inner loops must have been removed.
190 // CHECK-NOT: scf.for
192 // Reconstruct original IVs from the linearized one.
193 // CHECK: %[[delinearize:.+]]:3 = affine.delinearize_index %[[i]]
194 // CHECK-SAME: into (42, 56, 3)
195 %1:1 = scf.for %j = %c0 to %c56 step %c1 iter_args(%arg1 = %arg0) -> (index){
196 %0:1 = scf.for %k = %c0 to %c3 step %c1 iter_args(%arg2 = %arg1) -> (index) {
197 // CHECK: %[[res:.*]] = "use"(%[[delinearize]]#0, %[[delinearize]]#1, %[[delinearize]]#2)
198 %res = "use"(%i, %j, %k) : (index, index, index) -> (index)
199 // CHECK: scf.yield %[[res]] : index
200 scf.yield %res : index
202 scf.yield %0#0 : index
204 scf.yield %1#0 : index
211 // Check with parametric loop bounds and steps, capture the bounds here.
212 // CHECK-LABEL: @parametric
213 // CHECK-SAME: %[[orig_lb1:[A-Za-z0-9]+]]:
214 // CHECK-SAME: %[[orig_ub1:[A-Za-z0-9]+]]:
215 // CHECK-SAME: %[[orig_step1:[A-Za-z0-9]+]]:
216 // CHECK-SAME: %[[orig_lb2:[A-Za-z0-9]+]]:
217 // CHECK-SAME: %[[orig_ub2:[A-Za-z0-9]+]]:
218 // CHECK-SAME: %[[orig_step2:[A-Za-z0-9]+]]:
219 func.func @parametric(%lb1 : index, %ub1 : index, %step1 : index,
220 %lb2 : index, %ub2 : index, %step2 : index) {
221 // Compute the number of iterations for each of the loops and the total
222 // number of iterations.
223 // CHECK: %[[normalized_i:.*]] = affine.apply
224 // CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[orig_lb1]], %[[orig_ub1]], %[[orig_step1]]]
225 // CHECK: %[[c0:.+]] = arith.constant 0
226 // CHECK: %[[c1:.+]] = arith.constant 1
227 // CHECK: %[[normalized_j:.*]] = affine.apply
228 // CHECK-SAME: affine_map<()[s0, s1, s2] -> ((-s0 + s1) ceildiv s2)>()[%[[orig_lb2]], %[[orig_ub2]], %[[orig_step2]]]
229 // CHECK: %[[range:.+]] = affine.apply
230 // CHECK-SAME: affine_map<()[s0, s1, s2, s3, s4, s5] -> (((-s0 + s1) ceildiv s2) * ((-s3 + s4) ceildiv s5))>()
231 // CHECK-SAME: [%[[orig_lb1]], %[[orig_ub1]], %[[orig_step1]], %[[orig_lb2]], %[[orig_ub2]], %[[orig_step2]]]
233 // Check that the outer loop is updated.
234 // CHECK: scf.for %[[i:.*]] = %[[c0]] to %[[range]] step %[[c1]]
235 scf.for %i = %lb1 to %ub1 step %step1 {
236 // Check that the inner loop is removed.
237 // CHECK-NOT: scf.for
238 scf.for %j = %lb2 to %ub2 step %step2 {
239 // Remapping of the induction variables.
240 // CHECK: %[[delinearize:.+]]:2 = affine.delinearize_index %[[i]] into (%[[normalized_i]], %[[normalized_j]])
241 // CHECK: %[[orig_j:.*]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
242 // CHECK-SAME: (%[[delinearize]]#1)[%[[orig_lb2]], %[[orig_step2]]]
243 // CHECK: %[[orig_i:.*]] = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
244 // CHECK-SAME: (%[[delinearize]]#0)[%[[orig_lb1]], %[[orig_step1]]]
246 // CHECK: "foo"(%[[orig_i]], %[[orig_j]])
247 "foo"(%i, %j) : (index, index) -> ()
255 // CHECK-LABEL: @two_bands
256 func.func @two_bands() {
257 %c0 = arith.constant 0 : index
258 %c1 = arith.constant 1 : index
259 %c10 = arith.constant 10 : index
260 // CHECK: %[[outer_range:.*]] = arith.constant 100
261 // CHECK: scf.for %{{.*}} = %{{.*}} to %[[outer_range]]
262 scf.for %i = %c0 to %c10 step %c1 {
263 // Check that the "j" loop was removed and that the inner loops were
264 // coalesced as well. The preparation step for coalescing will inject the
265 // subtraction operation unlike the IV remapping.
266 // CHECK-NOT: scf.for
267 // CHECK: affine.delinearize_index
268 scf.for %j = %c0 to %c10 step %c1 {
269 // The inner pair of loops is coalesced separately.
271 scf.for %k = %i to %j step %c1 {
272 // CHECK-NOT: scf.for
273 scf.for %l = %i to %j step %c1 {
284 // Check coalescing of affine.for loops when all the loops have constant upper bound.
285 func.func @coalesce_affine_for() {
286 affine.for %i = 0 to 16 {
287 affine.for %j = 0 to 64 {
288 affine.for %k = 0 to 8 {
289 "test.foo"(%i, %j, %k) : (index, index, index) -> ()
295 // CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<() -> (16)>()
296 // CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<() -> (64)>()
297 // CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T1]]]
298 // CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<() -> (8)>()
299 // CHECK-DAG: %[[T4:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T2]])[%[[T3]]]
300 // CHECK: affine.for %[[IV:.*]] = 0 to %[[T4]]
301 // CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T3]]]
302 // CHECK-DAG: %[[T6:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T3]]]
303 // CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T6]])[%[[T1]]]
304 // CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T6]])[%[[T1]]]
305 // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
307 // CHECK-NEXT: return
311 // Check coalescing of affine.for loops when all the loops have non constant upper bounds.
312 func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
313 %c0 = arith.constant 0 : index
314 %M = memref.dim %arg0, %c0 : memref<?x?xf32>
315 %N = memref.dim %arg0, %c0 : memref<?x?xf32>
316 %K = memref.dim %arg0, %c0 : memref<?x?xf32>
317 affine.for %i = 0 to %M {
318 affine.for %j = 0 to %N {
319 affine.for %k = 0 to %K {
320 "test.foo"(%i, %j, %k) : (index, index, index) -> ()
326 // CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
327 // CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]]
328 // CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]]
329 // CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T1]])[%[[T0]]]
330 // CHECK: affine.for %[[IV:.*]] = 0 to %[[T2]]
331 // CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T0]]]
332 // CHECK-DAG: %[[T9:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T0]]]
333 // CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T9]])[%[[T0]]]
334 // CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T9]])[%[[T0]]]
335 // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
337 // CHECK-NEXT: return
341 // Check coalescing of affine.for loops when some of the loop has constant upper bounds while others have nin constant upper bounds.
342 func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
343 %c0 = arith.constant 0 : index
344 %M = memref.dim %arg0, %c0 : memref<?x?xf32>
345 %N = memref.dim %arg0, %c0 : memref<?x?xf32>
346 affine.for %i = 0 to %M {
347 affine.for %j = 0 to %N {
348 affine.for %k = 0 to 64 {
349 "test.foo"(%i, %j, %k) : (index, index, index) -> ()
355 // CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
356 // CHECK-DAG: %[[T0:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]]
357 // CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T0]]]
358 // CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<() -> (64)>()
359 // CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T1]])[%[[T2]]]
360 // CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]]
361 // CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T2]]]
362 // CHECK-DAG: %[[T5:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T2]]]
363 // CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T5]])[%[[T0]]]
364 // CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T5]])[%[[T0]]]
365 // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
367 // CHECK-NEXT: return
371 // Check coalescing of affine.for loops when upper bound contains multi result upper bound map.
372 #myMap = affine_map<()[s1] -> (s1, -s1)>
373 func.func @coalesce_affine_for(%arg0: memref<?x?xf32>) {
374 %c0 = arith.constant 0 : index
375 %M = memref.dim %arg0, %c0 : memref<?x?xf32>
376 %N = memref.dim %arg0, %c0 : memref<?x?xf32>
377 %K = memref.dim %arg0, %c0 : memref<?x?xf32>
378 affine.for %i = 0 to min #myMap()[%M] {
379 affine.for %j = 0 to %N {
380 affine.for %k = 0 to %K {
381 "test.foo"(%i, %j, %k) : (index, index, index) -> ()
387 // CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref<?x?xf32>
388 // CHECK-DAG: %[[T0:.*]] = affine.min affine_map<()[s0] -> (s0, -s0)>()[%[[DIM]]]
389 // CHECK-DAG: %[[T1:.*]] = affine.apply affine_map<()[s0] -> (s0)>()[%[[DIM]]]
390 // CHECK-DAG: %[[T2:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T0]])[%[[T1]]]
391 // CHECK-DAG: %[[T3:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 * s0)>(%[[T2]])[%[[T1]]]
392 // CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]]
393 // CHECK-DAG: %[[K:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[IV]])[%[[T1]]]
394 // CHECK-DAG: %[[T5:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[IV]])[%[[T1]]]
395 // CHECK-DAG: %[[J:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 mod s0)>(%[[T5]])[%[[T1]]]
396 // CHECK-DAG: %[[I:.*]] = affine.apply affine_map<(d0)[s0] -> (d0 floordiv s0)>(%[[T5]])[%[[T1]]]
397 // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
399 // CHECK-NEXT: return
403 #map0 = affine_map<(d0) -> (d0 * 110)>
404 #map1 = affine_map<(d0) -> (696, d0 * 110 + 110)>
405 func.func @test_loops_do_not_get_coalesced() {
406 affine.for %i = 0 to 7 {
407 affine.for %j = #map0(%i) to min #map1(%i) {
408 "use"(%i, %j) : (index, index) -> ()
413 // CHECK: affine.for %[[IV0:.*]] = 0 to 7
414 // CHECK-NEXT: affine.for %[[IV1:.*]] = affine_map<(d0) -> (d0 * 110)>(%[[IV0]]) to min affine_map<(d0) -> (696, d0 * 110 + 110)>(%[[IV0]])
415 // CHECK-NEXT: "use"(%[[IV0]], %[[IV1]])
418 // CHECK-NEXT: return