1 // RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=32,256 test-fastest-varying=0,1" | FileCheck %s
3 // Permutation maps used in vectorization.
4 // CHECK-DAG: #[[map_proj_d0d1d2_d2d1:map[0-9]*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
6 func.func @vec2d(%A : memref<?x?x?xf32>) {
7 %c0 = arith.constant 0 : index
8 %c1 = arith.constant 1 : index
9 %c2 = arith.constant 2 : index
10 %M = memref.dim %A, %c0 : memref<?x?x?xf32>
11 %N = memref.dim %A, %c1 : memref<?x?x?xf32>
12 %P = memref.dim %A, %c2 : memref<?x?x?xf32>
13 // CHECK: for {{.*}} = 0 to %{{.*}} {
14 // CHECK: for {{.*}} = 0 to %{{.*}} {
15 // CHECK: for {{.*}} = 0 to %{{.*}} {
16 // For the case: --test-fastest-varying=0 --test-fastest-varying=1 no
17 // vectorization happens because of loop nesting order.
18 affine.for %i0 = 0 to %M {
19 affine.for %i1 = 0 to %N {
20 affine.for %i2 = 0 to %P {
21 %a2 = affine.load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
25 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32
26 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
27 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256
28 // CHECK: {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32>
29 affine.for %i3 = 0 to %M {
30 affine.for %i4 = 0 to %N {
31 affine.for %i5 = 0 to %P {
32 %a5 = affine.load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
39 func.func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
40 %c0 = arith.constant 0 : index
41 %c1 = arith.constant 1 : index
42 %c2 = arith.constant 2 : index
43 %0 = memref.dim %A, %c0 : memref<?x?x?xf32>
44 %1 = memref.dim %A, %c1 : memref<?x?x?xf32>
45 %2 = memref.dim %A, %c2 : memref<?x?x?xf32>
46 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 {
47 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 {
48 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
49 // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32>
50 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
51 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 {
52 // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32>
53 // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 256 {
54 // CHECK: %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32>
55 affine.for %i0 = 0 to %0 {
56 affine.for %i1 = 0 to %1 {
57 affine.for %i2 = 0 to %2 {
58 %a2 = affine.load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
61 affine.for %i3 = 0 to %1 {
62 affine.for %i4 = 0 to %2 {
63 %a4 = affine.load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
65 affine.for %i5 = 0 to %2 {
66 %a5 = affine.load %A[%i3, %i5, %i0] : memref<?x?x?xf32>