1 // RUN: mlir-opt -allow-unregistered-dialect %s -test-loop-fusion=test-loop-fusion-dependence-check -split-input-file -verify-diagnostics | FileCheck %s
5 // CHECK-LABEL: func @cannot_fuse_would_create_cycle() {
6 func.func @cannot_fuse_would_create_cycle() {
7 %a = memref.alloc() : memref<10xf32>
8 %b = memref.alloc() : memref<10xf32>
9 %c = memref.alloc() : memref<10xf32>
11 %cf7 = arith.constant 7.0 : f32
13 // Set up the following dependences:
14 // 1) loop0 -> loop1 on memref '%a'
15 // 2) loop0 -> loop2 on memref '%b'
16 // 3) loop1 -> loop2 on memref '%c'
18 // Fusing loop nest '%i0' and loop nest '%i2' would create a cycle.
19 affine.for %i0 = 0 to 10 {
20 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}}
21 %v0 = affine.load %a[%i0] : memref<10xf32>
22 affine.store %cf7, %b[%i0] : memref<10xf32>
24 affine.for %i1 = 0 to 10 {
25 affine.store %cf7, %a[%i1] : memref<10xf32>
26 %v1 = affine.load %c[%i1] : memref<10xf32>
28 affine.for %i2 = 0 to 10 {
29 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}}
30 %v2 = affine.load %b[%i2] : memref<10xf32>
31 affine.store %cf7, %c[%i2] : memref<10xf32>
38 // CHECK-LABEL: func @can_fuse_rar_dependence() {
39 func.func @can_fuse_rar_dependence() {
40 %a = memref.alloc() : memref<10xf32>
41 %b = memref.alloc() : memref<10xf32>
42 %c = memref.alloc() : memref<10xf32>
44 %cf7 = arith.constant 7.0 : f32
46 // Set up the following dependences:
47 // Make dependence from 0 to 1 on '%a' read-after-read.
48 // 1) loop0 -> loop1 on memref '%a'
49 // 2) loop0 -> loop2 on memref '%b'
50 // 3) loop1 -> loop2 on memref '%c'
52 // Should fuse: no fusion preventing remarks should be emitted for this test.
53 affine.for %i0 = 0 to 10 {
54 %v0 = affine.load %a[%i0] : memref<10xf32>
55 affine.store %cf7, %b[%i0] : memref<10xf32>
57 affine.for %i1 = 0 to 10 {
58 %v1 = affine.load %a[%i1] : memref<10xf32>
59 %v2 = affine.load %c[%i1] : memref<10xf32>
61 affine.for %i2 = 0 to 10 {
62 %v3 = affine.load %b[%i2] : memref<10xf32>
63 affine.store %cf7, %c[%i2] : memref<10xf32>
70 // CHECK-LABEL: func @can_fuse_different_memrefs() {
71 func.func @can_fuse_different_memrefs() {
72 %a = memref.alloc() : memref<10xf32>
73 %b = memref.alloc() : memref<10xf32>
74 %c = memref.alloc() : memref<10xf32>
75 %d = memref.alloc() : memref<10xf32>
77 %cf7 = arith.constant 7.0 : f32
79 // Set up the following dependences:
80 // Make dependence from 0 to 1 on unrelated memref '%d'.
81 // 1) loop0 -> loop1 on memref '%a'
82 // 2) loop0 -> loop2 on memref '%b'
83 // 3) loop1 -> loop2 on memref '%c'
85 // Should fuse: no fusion preventing remarks should be emitted for this test.
86 affine.for %i0 = 0 to 10 {
87 %v0 = affine.load %a[%i0] : memref<10xf32>
88 affine.store %cf7, %b[%i0] : memref<10xf32>
90 affine.for %i1 = 0 to 10 {
91 affine.store %cf7, %d[%i1] : memref<10xf32>
92 %v1 = affine.load %c[%i1] : memref<10xf32>
94 affine.for %i2 = 0 to 10 {
95 %v2 = affine.load %b[%i2] : memref<10xf32>
96 affine.store %cf7, %c[%i2] : memref<10xf32>
103 // CHECK-LABEL: func @should_not_fuse_across_intermediate_store() {
104 func.func @should_not_fuse_across_intermediate_store() {
105 %0 = memref.alloc() : memref<10xf32>
106 %c0 = arith.constant 0 : index
107 %cf7 = arith.constant 7.0 : f32
109 affine.for %i0 = 0 to 10 {
110 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
111 %v0 = affine.load %0[%i0] : memref<10xf32>
112 "op0"(%v0) : (f32) -> ()
115 // Should not fuse loop nests '%i0' and '%i1' across top-level store.
116 affine.store %cf7, %0[%c0] : memref<10xf32>
118 affine.for %i1 = 0 to 10 {
119 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
120 %v1 = affine.load %0[%i1] : memref<10xf32>
121 "op1"(%v1) : (f32) -> ()
128 // CHECK-LABEL: func @should_not_fuse_across_intermediate_load() {
129 func.func @should_not_fuse_across_intermediate_load() {
130 %0 = memref.alloc() : memref<10xf32>
131 %c0 = arith.constant 0 : index
132 %cf7 = arith.constant 7.0 : f32
134 affine.for %i0 = 0 to 10 {
135 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
136 affine.store %cf7, %0[%i0] : memref<10xf32>
139 // Should not fuse loop nests '%i0' and '%i1' across top-level load.
140 %v0 = affine.load %0[%c0] : memref<10xf32>
141 "op0"(%v0) : (f32) -> ()
143 affine.for %i1 = 0 to 10 {
144 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
145 affine.store %cf7, %0[%i1] : memref<10xf32>
153 // CHECK-LABEL: func @should_not_fuse_across_ssa_value_def() {
154 func.func @should_not_fuse_across_ssa_value_def() {
155 %0 = memref.alloc() : memref<10xf32>
156 %1 = memref.alloc() : memref<10xf32>
157 %c0 = arith.constant 0 : index
158 %cf7 = arith.constant 7.0 : f32
160 affine.for %i0 = 0 to 10 {
161 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 0}}
162 %v0 = affine.load %0[%i0] : memref<10xf32>
163 affine.store %v0, %1[%i0] : memref<10xf32>
166 // Loop nest '%i0" cannot be fused past load from '%1' due to RAW dependence.
167 %v1 = affine.load %1[%c0] : memref<10xf32>
168 "op0"(%v1) : (f32) -> ()
170 // Loop nest '%i1' cannot be fused past SSA value def '%c2' which it uses.
171 %c2 = arith.constant 2 : index
173 affine.for %i1 = 0 to 10 {
174 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 0}}
175 affine.store %cf7, %0[%c2] : memref<10xf32>
183 // CHECK-LABEL: func @should_not_fuse_store_before_load() {
184 func.func @should_not_fuse_store_before_load() {
185 %0 = memref.alloc() : memref<10xf32>
186 %c0 = arith.constant 0 : index
187 %cf7 = arith.constant 7.0 : f32
189 affine.for %i0 = 0 to 10 {
190 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 0}}
191 affine.store %cf7, %0[%i0] : memref<10xf32>
192 %v0 = affine.load %0[%i0] : memref<10xf32>
195 affine.for %i1 = 0 to 10 {
196 %v1 = affine.load %0[%i1] : memref<10xf32>
199 affine.for %i2 = 0 to 10 {
200 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 0}}
201 affine.store %cf7, %0[%i2] : memref<10xf32>
202 %v2 = affine.load %0[%i2] : memref<10xf32>
209 // CHECK-LABEL: func @should_not_fuse_across_load_at_depth1() {
210 func.func @should_not_fuse_across_load_at_depth1() {
211 %0 = memref.alloc() : memref<10x10xf32>
212 %c0 = arith.constant 0 : index
213 %cf7 = arith.constant 7.0 : f32
215 affine.for %i0 = 0 to 10 {
216 affine.for %i1 = 0 to 10 {
217 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
218 affine.store %cf7, %0[%i0, %i1] : memref<10x10xf32>
221 %v1 = affine.load %0[%i0, %c0] : memref<10x10xf32>
223 affine.for %i3 = 0 to 10 {
224 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
225 affine.store %cf7, %0[%i0, %i3] : memref<10x10xf32>
233 // CHECK-LABEL: func @should_not_fuse_across_load_in_loop_at_depth1() {
234 func.func @should_not_fuse_across_load_in_loop_at_depth1() {
235 %0 = memref.alloc() : memref<10x10xf32>
236 %c0 = arith.constant 0 : index
237 %cf7 = arith.constant 7.0 : f32
239 affine.for %i0 = 0 to 10 {
240 affine.for %i1 = 0 to 10 {
241 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}}
242 affine.store %cf7, %0[%i0, %i1] : memref<10x10xf32>
245 affine.for %i2 = 0 to 10 {
246 %v1 = affine.load %0[%i0, %i2] : memref<10x10xf32>
249 affine.for %i3 = 0 to 10 {
250 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}}
251 affine.store %cf7, %0[%i0, %i3] : memref<10x10xf32>
259 // CHECK-LABEL: func @should_not_fuse_across_store_at_depth1() {
260 func.func @should_not_fuse_across_store_at_depth1() {
261 %0 = memref.alloc() : memref<10x10xf32>
262 %c0 = arith.constant 0 : index
263 %cf7 = arith.constant 7.0 : f32
265 affine.for %i0 = 0 to 10 {
266 affine.for %i1 = 0 to 10 {
267 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
268 %v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
271 affine.store %cf7, %0[%i0, %c0] : memref<10x10xf32>
273 affine.for %i3 = 0 to 10 {
274 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
275 %v1 = affine.load %0[%i0, %i3] : memref<10x10xf32>
283 // CHECK-LABEL: func @should_not_fuse_across_store_in_loop_at_depth1() {
284 func.func @should_not_fuse_across_store_in_loop_at_depth1() {
285 %0 = memref.alloc() : memref<10x10xf32>
286 %c0 = arith.constant 0 : index
287 %cf7 = arith.constant 7.0 : f32
289 affine.for %i0 = 0 to 10 {
290 affine.for %i1 = 0 to 10 {
291 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 2 at depth 1}}
292 %v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
295 affine.for %i2 = 0 to 10 {
296 affine.store %cf7, %0[%i0, %i2] : memref<10x10xf32>
299 affine.for %i3 = 0 to 10 {
300 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 2 into loop nest 0 at depth 1}}
301 %v1 = affine.load %0[%i0, %i3] : memref<10x10xf32>
309 // CHECK-LABEL: func @should_not_fuse_across_ssa_value_def_at_depth1() {
310 func.func @should_not_fuse_across_ssa_value_def_at_depth1() {
311 %0 = memref.alloc() : memref<10x10xf32>
312 %1 = memref.alloc() : memref<10x10xf32>
313 %c0 = arith.constant 0 : index
314 %cf7 = arith.constant 7.0 : f32
316 affine.for %i0 = 0 to 10 {
317 affine.for %i1 = 0 to 10 {
318 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 0 into loop nest 1 at depth 1}}
319 %v0 = affine.load %0[%i0, %i1] : memref<10x10xf32>
320 affine.store %v0, %1[%i0, %i1] : memref<10x10xf32>
323 // RAW dependence from store in loop nest '%i1' to 'load %1' prevents
324 // fusion loop nest '%i1' into loops after load.
325 %v1 = affine.load %1[%i0, %c0] : memref<10x10xf32>
326 "op0"(%v1) : (f32) -> ()
328 // Loop nest '%i2' cannot be fused past SSA value def '%c2' which it uses.
329 %c2 = arith.constant 2 : index
331 affine.for %i2 = 0 to 10 {
332 // expected-remark@-1 {{block-level dependence preventing fusion of loop nest 1 into loop nest 0 at depth 1}}
333 affine.store %cf7, %0[%i0, %c2] : memref<10x10xf32>