1 // RUN: mlir-opt %s -convert-vector-to-scf -split-input-file -allow-unregistered-dialect | FileCheck %s
2 // RUN: mlir-opt %s -convert-vector-to-scf=full-unroll=true -split-input-file -allow-unregistered-dialect | FileCheck %s --check-prefix=FULL-UNROLL
4 // CHECK-LABEL: func @vector_transfer_ops_0d(
5 func @vector_transfer_ops_0d(%M: memref<f32>) {
6 %f0 = arith.constant 0.0 : f32
8 // 0-d transfers are left untouched by vector-to-scf.
9 // They are independently lowered to the proper memref.load/store.
10 // CHECK: vector.transfer_read {{.*}}: memref<f32>, vector<f32>
11 %0 = vector.transfer_read %M[], %f0 {permutation_map = affine_map<()->()>} :
12 memref<f32>, vector<f32>
14 // CHECK: vector.transfer_write {{.*}}: vector<f32>, memref<f32>
15 vector.transfer_write %0, %M[] {permutation_map = affine_map<()->()>} :
16 vector<f32>, memref<f32>
23 // CHECK-LABEL: func @materialize_read_1d() {
24 func @materialize_read_1d() {
25 %f0 = arith.constant 0.0: f32
26 %A = memref.alloc () : memref<7x42xf32>
27 affine.for %i0 = 0 to 7 step 4 {
28 affine.for %i1 = 0 to 42 step 4 {
29 %f1 = vector.transfer_read %A[%i0, %i1], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
30 %ip1 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i1)
31 %f2 = vector.transfer_read %A[%i0, %ip1], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
32 %ip2 = affine.apply affine_map<(d0) -> (d0 + 2)> (%i1)
33 %f3 = vector.transfer_read %A[%i0, %ip2], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
34 %ip3 = affine.apply affine_map<(d0) -> (d0 + 3)> (%i1)
35 %f4 = vector.transfer_read %A[%i0, %ip3], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32>
36 // Both accesses in the load must be clipped otherwise %i1 + 2 and %i1 + 3 will go out of bounds.
38 // CHECK-NEXT: memref.load
39 // CHECK-NEXT: vector.insertelement
40 // CHECK-NEXT: scf.yield
42 // CHECK-NEXT: scf.yield
43 // Add a dummy use to prevent dead code elimination from removing transfer
45 "dummy_use"(%f1, %f2, %f3, %f4) : (vector<4xf32>, vector<4xf32>, vector<4xf32>, vector<4xf32>) -> ()
53 // CHECK-LABEL: func @materialize_read_1d_partially_specialized
54 func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
55 %f0 = arith.constant 0.0: f32
56 %A = memref.alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
57 affine.for %i0 = 0 to 7 {
58 affine.for %i1 = 0 to %dyn1 {
59 affine.for %i2 = 0 to %dyn2 {
60 affine.for %i3 = 0 to 42 step 2 {
61 affine.for %i4 = 0 to %dyn4 {
62 %f1 = vector.transfer_read %A[%i0, %i1, %i2, %i3, %i4], %f0 {permutation_map = affine_map<(d0, d1, d2, d3, d4) -> (d3)>} : memref<7x?x?x42x?xf32>, vector<4xf32>
63 %i3p1 = affine.apply affine_map<(d0) -> (d0 + 1)> (%i3)
64 %f2 = vector.transfer_read %A[%i0, %i1, %i2, %i3p1, %i4], %f0 {permutation_map = affine_map<(d0, d1, d2, d3, d4) -> (d3)>} : memref<7x?x?x42x?xf32>, vector<4xf32>
65 // Add a dummy use to prevent dead code elimination from removing
67 "dummy_use"(%f1, %f2) : (vector<4xf32>, vector<4xf32>) -> ()
73 // CHECK: %[[tensor:[0-9]+]] = memref.alloc
74 // CHECK-NOT: {{.*}} memref.dim %[[tensor]], %c0
75 // CHECK-NOT: {{.*}} memref.dim %[[tensor]], %c3
81 // CHECK: #[[$ADD:map.*]] = affine_map<(d0, d1) -> (d0 + d1)>
83 // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
84 func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
85 %f0 = arith.constant 0.0: f32
86 // CHECK-DAG: %[[ALLOC:.*]] = memref.alloca() : memref<vector<5x4x3xf32>>
87 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
88 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
89 // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
90 // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
91 // CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index
92 // CHECK: %{{.*}} = memref.alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
93 // CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
94 // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} {
95 // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
96 // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
97 // CHECK: scf.for %[[I4:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
99 // CHECK: %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I4]])
100 // CHECK: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
101 // CHECK: %[[VEC:.*]] = scf.for %[[I6:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {{.*}} -> (vector<3xf32>) {
102 // CHECK: %[[L0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I6]])
103 // CHECK: scf.if {{.*}} -> (vector<3xf32>) {
104 // CHECK-NEXT: %[[SCAL:.*]] = memref.load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref<?x?x?x?xf32>
105 // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %{{.*}}[%[[I6]] : index] : vector<3xf32>
106 // CHECK-NEXT: scf.yield
107 // CHECK-NEXT: } else {
108 // CHECK-NEXT: scf.yield
110 // CHECK-NEXT: scf.yield
112 // CHECK-NEXT: memref.store %[[VEC]], {{.*}} : memref<5x4xvector<3xf32>>
114 // CHECK-NEXT: } else {
115 // CHECK-NEXT: memref.store {{.*}} : memref<5xvector<4x3xf32>>
118 // CHECK-NEXT: %[[LD:.*]] = memref.load %[[ALLOC]][] : memref<vector<5x4x3xf32>>
119 // CHECK-NEXT: "dummy_use"(%[[LD]]) : (vector<5x4x3xf32>) -> ()
124 // CHECK-NEXT: return
127 // Check that I0 + I4 (of size 3) read from first index load(L0, ...) and write into last index store(..., I4)
128 // Check that I3 + I6 (of size 5) read from last index load(..., L3) and write into first index store(I6, ...)
129 // Other dimensions are just accessed with I1, I2 resp.
130 %A = memref.alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
131 affine.for %i0 = 0 to %M step 3 {
132 affine.for %i1 = 0 to %N {
133 affine.for %i2 = 0 to %O {
134 affine.for %i3 = 0 to %P step 5 {
135 %f = vector.transfer_read %A[%i0, %i1, %i2, %i3], %f0 {permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, 0, d0)>} : memref<?x?x?x?xf32>, vector<5x4x3xf32>
136 // Add a dummy use to prevent dead code elimination from removing
137 // transfer read ops.
138 "dummy_use"(%f) : (vector<5x4x3xf32>) -> ()
148 // CHECK: #[[$ADD:map.*]] = affine_map<(d0, d1) -> (d0 + d1)>
150 // CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) {
151 func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
152 // CHECK-DAG: %[[ALLOC:.*]] = memref.alloca() : memref<vector<5x4x3xf32>>
153 // CHECK-DAG: %{{.*}} = arith.constant dense<1.000000e+00> : vector<5x4x3xf32>
154 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
155 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
156 // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
157 // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
158 // CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index
159 // CHECK: %{{.*}} = memref.alloc(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : memref<?x?x?x?xf32>
160 // CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %{{.*}} step 3 {
161 // CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %{{.*}} step 4 {
162 // CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
163 // CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %{{.*}} step 5 {
164 // CHECK: memref.store %{{.*}}, %[[ALLOC]][] : memref<vector<5x4x3xf32>>
165 // CHECK: %[[VECTOR_VIEW1:.*]] = vector.type_cast %[[ALLOC]] : memref<vector<5x4x3xf32>> to memref<5xvector<4x3xf32>>
166 // CHECK: scf.for %[[I4:.*]] = %[[C0]] to %[[C5]] step %[[C1]] {
168 // CHECK: %[[S3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I4]])
169 // CHECK: %[[VECTOR_VIEW2:.*]] = vector.type_cast %[[VECTOR_VIEW1]] : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
170 // CHECK: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] {
172 // CHECK: %[[S1:.*]] = affine.apply #[[$ADD]](%[[I1]], %[[I5]])
173 // CHECK: %[[VEC:.*]] = memref.load %[[VECTOR_VIEW2]][%[[I4]], %[[I5]]] : memref<5x4xvector<3xf32>>
174 // CHECK: scf.for %[[I6:.*]] = %[[C0]] to %[[C3]] step %[[C1]] {
175 // CHECK: %[[S0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I6]])
177 // CHECK: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[I6]] : index] : vector<3xf32>
178 // CHECK: memref.store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[I2]], %[[S3]]] : memref<?x?x?x?xf32>
191 // Check that I0 + I4 (of size 3) read from last index load(..., I4) and write into first index store(S0, ...)
192 // Check that I1 + I5 (of size 4) read from second index load(..., I5, ...) and write into second index store(..., S1, ...)
193 // Check that I3 + I6 (of size 5) read from first index load(I6, ...) and write into last index store(..., S3)
194 // Other dimension is just accessed with I2.
195 %A = memref.alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
196 %f1 = arith.constant dense<1.000000e+00> : vector<5x4x3xf32>
197 affine.for %i0 = 0 to %M step 3 {
198 affine.for %i1 = 0 to %N step 4 {
199 affine.for %i2 = 0 to %O {
200 affine.for %i3 = 0 to %P step 5 {
201 vector.transfer_write %f1, %A[%i0, %i1, %i2, %i3] {permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d1, d0)>} : vector<5x4x3xf32>, memref<?x?x?x?xf32>
211 // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
213 // FULL-UNROLL-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
214 // FULL-UNROLL-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 2)>
217 // CHECK-LABEL: transfer_read_progressive(
218 // CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
219 // CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index
221 // FULL-UNROLL-LABEL: transfer_read_progressive(
222 // FULL-UNROLL-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
223 // FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index
225 func @transfer_read_progressive(%A : memref<?x?xf32>, %base: index) -> vector<3x15xf32> {
226 %f7 = arith.constant 7.0: f32
227 // CHECK-DAG: %[[C7:.*]] = arith.constant 7.000000e+00 : f32
228 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
229 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
230 // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
231 // CHECK-DAG: %[[splat:.*]] = arith.constant dense<7.000000e+00> : vector<15xf32>
232 // CHECK-DAG: %[[alloc:.*]] = memref.alloca() : memref<vector<3x15xf32>>
233 // CHECK: %[[alloc_casted:.*]] = vector.type_cast %[[alloc]] : memref<vector<3x15xf32>> to memref<3xvector<15xf32>>
234 // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C3]]
235 // CHECK: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
236 // CHECK: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
237 // CHECK: %[[cond1:.*]] = arith.cmpi sgt, %[[dim]], %[[add]] : index
238 // CHECK: scf.if %[[cond1]] {
239 // CHECK: %[[vec_1d:.*]] = vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
240 // CHECK: memref.store %[[vec_1d]], %[[alloc_casted]][%[[I]]] : memref<3xvector<15xf32>>
242 // CHECK: store %[[splat]], %[[alloc_casted]][%[[I]]] : memref<3xvector<15xf32>>
245 // CHECK: %[[cst:.*]] = memref.load %[[alloc]][] : memref<vector<3x15xf32>>
247 // FULL-UNROLL: %[[C7:.*]] = arith.constant 7.000000e+00 : f32
248 // FULL-UNROLL: %[[VEC0:.*]] = arith.constant dense<7.000000e+00> : vector<3x15xf32>
249 // FULL-UNROLL: %[[C0:.*]] = arith.constant 0 : index
250 // FULL-UNROLL: %[[DIM:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
251 // FULL-UNROLL: cmpi sgt, %[[DIM]], %[[base]] : index
252 // FULL-UNROLL: %[[VEC1:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
253 // FULL-UNROLL: vector.transfer_read %[[A]][%[[base]], %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
254 // FULL-UNROLL: vector.insert %{{.*}}, %[[VEC0]] [0] : vector<15xf32> into vector<3x15xf32>
255 // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
256 // FULL-UNROLL: } else {
257 // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
259 // FULL-UNROLL: affine.apply #[[$MAP1]]()[%[[base]]]
260 // FULL-UNROLL: cmpi sgt, %{{.*}}, %{{.*}} : index
261 // FULL-UNROLL: %[[VEC2:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
262 // FULL-UNROLL: vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
263 // FULL-UNROLL: vector.insert %{{.*}}, %[[VEC1]] [1] : vector<15xf32> into vector<3x15xf32>
264 // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
265 // FULL-UNROLL: } else {
266 // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
268 // FULL-UNROLL: affine.apply #[[$MAP2]]()[%[[base]]]
269 // FULL-UNROLL: cmpi sgt, %{{.*}}, %{{.*}} : index
270 // FULL-UNROLL: %[[VEC3:.*]] = scf.if %{{.*}} -> (vector<3x15xf32>) {
271 // FULL-UNROLL: vector.transfer_read %[[A]][%{{.*}}, %[[base]]], %[[C7]] : memref<?x?xf32>, vector<15xf32>
272 // FULL-UNROLL: vector.insert %{{.*}}, %[[VEC2]] [2] : vector<15xf32> into vector<3x15xf32>
273 // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
274 // FULL-UNROLL: } else {
275 // FULL-UNROLL: scf.yield %{{.*}} : vector<3x15xf32>
278 %f = vector.transfer_read %A[%base, %base], %f7 :
279 memref<?x?xf32>, vector<3x15xf32>
281 return %f: vector<3x15xf32>
286 // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
288 // FULL-UNROLL-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
289 // FULL-UNROLL-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 2)>
291 // CHECK-LABEL: transfer_write_progressive(
292 // CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
293 // CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index,
294 // CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
295 // FULL-UNROLL-LABEL: transfer_write_progressive(
296 // FULL-UNROLL-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
297 // FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index,
298 // FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
299 func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
300 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
301 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
302 // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
303 // CHECK: %[[alloc:.*]] = memref.alloca() : memref<vector<3x15xf32>>
304 // CHECK: memref.store %[[vec]], %[[alloc]][] : memref<vector<3x15xf32>>
305 // CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<vector<3x15xf32>> to memref<3xvector<15xf32>>
306 // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C3]]
307 // CHECK: %[[dim:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
308 // CHECK: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
309 // CHECK: %[[cmp:.*]] = arith.cmpi sgt, %[[dim]], %[[add]] : index
310 // CHECK: scf.if %[[cmp]] {
311 // CHECK: %[[vec_1d:.*]] = memref.load %[[vmemref]][%[[I]]] : memref<3xvector<15xf32>>
312 // CHECK: vector.transfer_write %[[vec_1d]], %[[A]][{{.*}}, %[[base]]] : vector<15xf32>, memref<?x?xf32>
316 // FULL-UNROLL: %[[C0:.*]] = arith.constant 0 : index
317 // FULL-UNROLL: %[[DIM:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32>
318 // FULL-UNROLL: %[[CMP0:.*]] = arith.cmpi sgt, %[[DIM]], %[[base]] : index
319 // FULL-UNROLL: scf.if %[[CMP0]] {
320 // FULL-UNROLL: %[[V0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
321 // FULL-UNROLL: vector.transfer_write %[[V0]], %[[A]][%[[base]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
323 // FULL-UNROLL: %[[I1:.*]] = affine.apply #[[$MAP1]]()[%[[base]]]
324 // FULL-UNROLL: %[[CMP1:.*]] = arith.cmpi sgt, %{{.*}}, %[[I1]] : index
325 // FULL-UNROLL: scf.if %[[CMP1]] {
326 // FULL-UNROLL: %[[V1:.*]] = vector.extract %[[vec]][1] : vector<3x15xf32>
327 // FULL-UNROLL: vector.transfer_write %[[V1]], %[[A]][%{{.*}}, %[[base]]] : vector<15xf32>, memref<?x?xf32>
329 // FULL-UNROLL: %[[I2:.*]] = affine.apply #[[$MAP2]]()[%[[base]]]
330 // FULL-UNROLL: %[[CMP2:.*]] = arith.cmpi sgt, %{{.*}}, %[[I2]] : index
331 // FULL-UNROLL: scf.if %[[CMP2]] {
332 // FULL-UNROLL: %[[V2:.*]] = vector.extract %[[vec]][2] : vector<3x15xf32>
333 // FULL-UNROLL: vector.transfer_write %[[V2]], %[[A]][%{{.*}}, %[[base]]] : vector<15xf32>, memref<?x?xf32>
336 vector.transfer_write %vec, %A[%base, %base] :
337 vector<3x15xf32>, memref<?x?xf32>
343 // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
345 // FULL-UNROLL-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
346 // FULL-UNROLL-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 2)>
348 // CHECK-LABEL: transfer_write_progressive_inbounds(
349 // CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
350 // CHECK-SAME: %[[base:[a-zA-Z0-9]+]]: index,
351 // CHECK-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
352 // FULL-UNROLL-LABEL: transfer_write_progressive_inbounds(
353 // FULL-UNROLL-SAME: %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
354 // FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index,
355 // FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
356 func @transfer_write_progressive_inbounds(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
358 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
359 // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
360 // CHECK: %[[alloc:.*]] = memref.alloca() : memref<vector<3x15xf32>>
361 // CHECK-NEXT: memref.store %[[vec]], %[[alloc]][] : memref<vector<3x15xf32>>
362 // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<vector<3x15xf32>> to memref<3xvector<15xf32>>
363 // CHECK-NEXT: scf.for %[[I:.*]] = %[[C0]] to %[[C3]]
364 // CHECK-NEXT: %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
365 // CHECK-NEXT: %[[vec_1d:.*]] = memref.load %[[vmemref]][%[[I]]] : memref<3xvector<15xf32>>
366 // CHECK-NEXT: vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref<?x?xf32>
368 // FULL-UNROLL: %[[VEC0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
369 // FULL-UNROLL: vector.transfer_write %[[VEC0]], %[[A]][%[[base]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref<?x?xf32>
370 // FULL-UNROLL: %[[I1:.*]] = affine.apply #[[$MAP1]]()[%[[base]]]
371 // FULL-UNROLL: %[[VEC1:.*]] = vector.extract %[[vec]][1] : vector<3x15xf32>
372 // FULL-UNROLL: vector.transfer_write %2, %[[A]][%[[I1]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref<?x?xf32>
373 // FULL-UNROLL: %[[I2:.*]] = affine.apply #[[$MAP2]]()[%[[base]]]
374 // FULL-UNROLL: %[[VEC2:.*]] = vector.extract %[[vec]][2] : vector<3x15xf32>
375 // FULL-UNROLL: vector.transfer_write %[[VEC2:.*]], %[[A]][%[[I2]], %[[base]]] {in_bounds = [true]} : vector<15xf32>, memref<?x?xf32>
376 vector.transfer_write %vec, %A[%base, %base] {in_bounds = [true, true]} :
377 vector<3x15xf32>, memref<?x?xf32>
383 // FULL-UNROLL-LABEL: transfer_read_simple
384 func @transfer_read_simple(%A : memref<2x2xf32>) -> vector<2x2xf32> {
385 %c0 = arith.constant 0 : index
386 %f0 = arith.constant 0.0 : f32
387 // FULL-UNROLL-DAG: %[[VC0:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf32>
388 // FULL-UNROLL-DAG: %[[C0:.*]] = arith.constant 0 : index
389 // FULL-UNROLL-DAG: %[[C1:.*]] = arith.constant 1 : index
390 // FULL-UNROLL: %[[V0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]]
391 // FULL-UNROLL: %[[RES0:.*]] = vector.insert %[[V0]], %[[VC0]] [0] : vector<2xf32> into vector<2x2xf32>
392 // FULL-UNROLL: %[[V1:.*]] = vector.transfer_read %{{.*}}[%[[C1]], %[[C0]]]
393 // FULL-UNROLL: %[[RES1:.*]] = vector.insert %[[V1]], %[[RES0]] [1] : vector<2xf32> into vector<2x2xf32>
394 %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<2x2xf32>, vector<2x2xf32>
395 return %0 : vector<2x2xf32>
398 func @transfer_read_minor_identity(%A : memref<?x?x?x?xf32>) -> vector<3x3xf32> {
399 %c0 = arith.constant 0 : index
400 %f0 = arith.constant 0.0 : f32
401 %0 = vector.transfer_read %A[%c0, %c0, %c0, %c0], %f0
402 { permutation_map = affine_map<(d0, d1, d2, d3) -> (d2, d3)> }
403 : memref<?x?x?x?xf32>, vector<3x3xf32>
404 return %0 : vector<3x3xf32>
407 // CHECK-LABEL: transfer_read_minor_identity(
408 // CHECK-SAME: %[[A:.*]]: memref<?x?x?x?xf32>) -> vector<3x3xf32>
409 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
410 // CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
411 // CHECK-DAG: %[[c2:.*]] = arith.constant 2 : index
412 // CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index
413 // CHECK-DAG: %[[f0:.*]] = arith.constant 0.000000e+00 : f32
414 // CHECK-DAG: %[[cst0:.*]] = arith.constant dense<0.000000e+00> : vector<3xf32>
415 // CHECK: %[[m:.*]] = memref.alloca() : memref<vector<3x3xf32>>
416 // CHECK: %[[cast:.*]] = vector.type_cast %[[m]] : memref<vector<3x3xf32>> to memref<3xvector<3xf32>>
417 // CHECK: scf.for %[[arg1:.*]] = %[[c0]] to %[[c3]]
418 // CHECK: %[[d:.*]] = memref.dim %[[A]], %[[c2]] : memref<?x?x?x?xf32>
419 // CHECK: %[[cmp:.*]] = arith.cmpi sgt, %[[d]], %[[arg1]] : index
420 // CHECK: scf.if %[[cmp]] {
421 // CHECK: %[[tr:.*]] = vector.transfer_read %[[A]][%c0, %c0, %[[arg1]], %c0], %[[f0]] : memref<?x?x?x?xf32>, vector<3xf32>
422 // CHECK: memref.store %[[tr]], %[[cast]][%[[arg1]]] : memref<3xvector<3xf32>>
424 // CHECK: memref.store %[[cst0]], %[[cast]][%[[arg1]]] : memref<3xvector<3xf32>>
427 // CHECK: %[[ret:.*]] = memref.load %[[m]][] : memref<vector<3x3xf32>>
428 // CHECK: return %[[ret]] : vector<3x3xf32>
430 func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref<?x?x?x?xf32>) {
431 %c0 = arith.constant 0 : index
432 %f0 = arith.constant 0.0 : f32
433 vector.transfer_write %A, %B[%c0, %c0, %c0, %c0]
434 { permutation_map = affine_map<(d0, d1, d2, d3) -> (d2, d3)> }
435 : vector<3x3xf32>, memref<?x?x?x?xf32>
439 // CHECK-LABEL: transfer_write_minor_identity(
440 // CHECK-SAME: %[[A:.*]]: vector<3x3xf32>,
441 // CHECK-SAME: %[[B:.*]]: memref<?x?x?x?xf32>)
442 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
443 // CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
444 // CHECK-DAG: %[[c2:.*]] = arith.constant 2 : index
445 // CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index
446 // CHECK: %[[m:.*]] = memref.alloca() : memref<vector<3x3xf32>>
447 // CHECK: memref.store %[[A]], %[[m]][] : memref<vector<3x3xf32>>
448 // CHECK: %[[cast:.*]] = vector.type_cast %[[m]] : memref<vector<3x3xf32>> to memref<3xvector<3xf32>>
449 // CHECK: scf.for %[[arg2:.*]] = %[[c0]] to %[[c3]]
450 // CHECK: %[[d:.*]] = memref.dim %[[B]], %[[c2]] : memref<?x?x?x?xf32>
451 // CHECK: %[[cmp:.*]] = arith.cmpi sgt, %[[d]], %[[arg2]] : index
452 // CHECK: scf.if %[[cmp]] {
453 // CHECK: %[[tmp:.*]] = memref.load %[[cast]][%[[arg2]]] : memref<3xvector<3xf32>>
454 // CHECK: vector.transfer_write %[[tmp]], %[[B]][%[[c0]], %[[c0]], %[[arg2]], %[[c0]]] : vector<3xf32>, memref<?x?x?x?xf32>
462 func @transfer_read_strided(%A : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>) -> vector<4xf32> {
463 %c0 = arith.constant 0 : index
464 %f0 = arith.constant 0.0 : f32
465 %0 = vector.transfer_read %A[%c0, %c0], %f0
466 : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>, vector<4xf32>
467 return %0 : vector<4xf32>
470 // CHECK-LABEL: transfer_read_strided(
472 // CHECK: memref.load
474 func @transfer_write_strided(%A : vector<4xf32>, %B : memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>) {
475 %c0 = arith.constant 0 : index
476 vector.transfer_write %A, %B[%c0, %c0] :
477 vector<4xf32>, memref<8x4xf32, affine_map<(d0, d1) -> (d0 + d1 * 8)>>
481 // CHECK-LABEL: transfer_write_strided(