1 // RUN: mlir-opt %s -allow-unregistered-dialect -test-loop-fusion=test-loop-fusion-transformation -split-input-file -canonicalize | FileCheck %s
3 // CHECK-LABEL: func @slice_depth1_loop_nest() {
4 func.func @slice_depth1_loop_nest() {
5 %0 = memref.alloc() : memref<100xf32>
6 %cst = arith.constant 7.000000e+00 : f32
7 affine.for %i0 = 0 to 16 {
8 affine.store %cst, %0[%i0] : memref<100xf32>
10 affine.for %i1 = 0 to 5 {
11 %1 = affine.load %0[%i1] : memref<100xf32>
12 "prevent.dce"(%1) : (f32) -> ()
14 // CHECK: affine.for %[[IV0:.*]] = 0 to 5 {
15 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%[[IV0]]] : memref<100xf32>
16 // CHECK-NEXT: affine.load %{{.*}}[%[[IV0]]] : memref<100xf32>
17 // CHECK-NEXT: "prevent.dce"(%{{.*}}) : (f32) -> ()
25 // CHECK-LABEL: func @should_fuse_reduction_to_pointwise() {
26 func.func @should_fuse_reduction_to_pointwise() {
27 %a = memref.alloc() : memref<10x10xf32>
28 %b = memref.alloc() : memref<10xf32>
29 %c = memref.alloc() : memref<10xf32>
31 %cf7 = arith.constant 7.0 : f32
33 affine.for %i0 = 0 to 10 {
34 affine.for %i1 = 0 to 10 {
35 %v0 = affine.load %b[%i0] : memref<10xf32>
36 %v1 = affine.load %a[%i0, %i1] : memref<10x10xf32>
37 %v3 = arith.addf %v0, %v1 : f32
38 affine.store %v3, %b[%i0] : memref<10xf32>
41 affine.for %i2 = 0 to 10 {
42 %v4 = affine.load %b[%i2] : memref<10xf32>
43 affine.store %v4, %c[%i2] : memref<10xf32>
46 // Match on the fused loop nest.
47 // Should fuse in entire inner loop on %i1 from source loop nest, as %i1
48 // is not used in the access function of the store/load on %b.
49 // CHECK: affine.for %{{.*}} = 0 to 10 {
50 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
51 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
52 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
53 // CHECK-NEXT: arith.addf %{{.*}}, %{{.*}} : f32
54 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
56 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
57 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
65 // CHECK-LABEL: func @should_fuse_avoiding_dependence_cycle() {
66 func.func @should_fuse_avoiding_dependence_cycle() {
67 %a = memref.alloc() : memref<10xf32>
68 %b = memref.alloc() : memref<10xf32>
69 %c = memref.alloc() : memref<10xf32>
71 %cf7 = arith.constant 7.0 : f32
73 // Set up the following dependences:
74 // 1) loop0 -> loop1 on memref '%{{.*}}'
75 // 2) loop0 -> loop2 on memref '%{{.*}}'
76 // 3) loop1 -> loop2 on memref '%{{.*}}'
77 affine.for %i0 = 0 to 10 {
78 %v0 = affine.load %a[%i0] : memref<10xf32>
79 affine.store %v0, %b[%i0] : memref<10xf32>
81 affine.for %i1 = 0 to 10 {
82 affine.store %cf7, %a[%i1] : memref<10xf32>
83 %v1 = affine.load %c[%i1] : memref<10xf32>
84 "prevent.dce"(%v1) : (f32) -> ()
86 affine.for %i2 = 0 to 10 {
87 %v2 = affine.load %b[%i2] : memref<10xf32>
88 affine.store %v2, %c[%i2] : memref<10xf32>
90 // Fusing loop first loop into last would create a cycle:
92 // However, we can avoid the dependence cycle if we first fuse loop0 into
95 // Then fuse this loop nest with loop2:
98 // CHECK: affine.for %{{.*}} = 0 to 10 {
99 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
100 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
101 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
102 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
103 // CHECK-NEXT: "prevent.dce"
104 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
105 // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
107 // CHECK-NEXT: return