mlir/test/Dialect/Affine/SuperVectorize/vectorize_transpose_2d.mlir

   1 // RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=32,256 test-fastest-varying=0,1" | FileCheck %s
   2
   3 // Permutation maps used in vectorization.
   4 // CHECK-DAG: #[[map_proj_d0d1d2_d2d1:map[0-9]*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
   5
   6 func.func @vec2d(%A : memref<?x?x?xf32>) {
   7   %c0 = arith.constant 0 : index
   8   %c1 = arith.constant 1 : index
   9   %c2 = arith.constant 2 : index
  10   %M = memref.dim %A, %c0 : memref<?x?x?xf32>
  11   %N = memref.dim %A, %c1 : memref<?x?x?xf32>
  12   %P = memref.dim %A, %c2 : memref<?x?x?xf32>
  13   // CHECK: for  {{.*}} = 0 to %{{.*}} {
  14   // CHECK:   for  {{.*}} = 0 to %{{.*}} {
  15   // CHECK:     for  {{.*}} = 0 to %{{.*}} {
  16   // For the case: --test-fastest-varying=0 --test-fastest-varying=1 no
  17   // vectorization happens because of loop nesting order.
  18   affine.for %i0 = 0 to %M {
  19     affine.for %i1 = 0 to %N {
  20       affine.for %i2 = 0 to %P {
  21         %a2 = affine.load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
  22       }
  23     }
  24   }
  25   // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32
  26   // CHECK:   affine.for %{{.*}} = 0 to %{{.*}} {
  27   // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} step 256
  28   // CHECK:       {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32>
  29   affine.for %i3 = 0 to %M {
  30     affine.for %i4 = 0 to %N {
  31       affine.for %i5 = 0 to %P {
  32         %a5 = affine.load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
  33       }
  34     }
  35   }
  36   return
  37 }
  38
  39 func.func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
  40   %c0 = arith.constant 0 : index
  41   %c1 = arith.constant 1 : index
  42   %c2 = arith.constant 2 : index
  43   %0 = memref.dim %A, %c0 : memref<?x?x?xf32>
  44   %1 = memref.dim %A, %c1 : memref<?x?x?xf32>
  45   %2 = memref.dim %A, %c2 : memref<?x?x?xf32>
  46   // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 {
  47   // CHECK:   affine.for %{{.*}} = 0 to %{{.*}} step 256 {
  48   // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} {
  49   // CHECK:       %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32>
  50   // CHECK:   affine.for %{{.*}} = 0 to %{{.*}} {
  51   // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} step 256 {
  52   // CHECK:       %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32>
  53   // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} step 256 {
  54   // CHECK:       %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d1]]} : memref<?x?x?xf32>, vector<32x256xf32>
  55   affine.for %i0 = 0 to %0 {
  56     affine.for %i1 = 0 to %1 {
  57       affine.for %i2 = 0 to %2 {
  58         %a2 = affine.load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
  59       }
  60     }
  61     affine.for %i3 = 0 to %1 {
  62       affine.for %i4 = 0 to %2 {
  63         %a4 = affine.load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
  64       }
  65       affine.for %i5 = 0 to %2 {
  66         %a5 = affine.load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
  67       }
  68     }
  69   }
  70   return
  71 }
  72