mlir/test/Conversion/SCFToGPU/step_one.mlir

   1 // RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1}))" %s | FileCheck --check-prefix=CHECK-11 %s
   2 // RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2}))" %s | FileCheck --check-prefix=CHECK-22 %s
   3
   4 // CHECK-11-LABEL: @step_1
   5 // CHECK-22-LABEL: @step_1
   6 func.func @step_1(%A : memref<?x?x?x?xf32>, %B : memref<?x?x?x?xf32>) {
   7   // Bounds of the loop, its range and step.
   8   // CHECK-11-NEXT: %{{.*}} = arith.constant 0 : index
   9   // CHECK-11-NEXT: %{{.*}} = arith.constant 42 : index
  10   // CHECK-11-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
  11   // CHECK-11-NEXT: %{{.*}} = arith.constant 1 : index
  12   //
  13   // CHECK-22-NEXT: %{{.*}} = arith.constant 0 : index
  14   // CHECK-22-NEXT: %{{.*}} = arith.constant 42 : index
  15   // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
  16   // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index
  17   affine.for %i = 0 to 42 {
  18
  19     // Bounds of the loop, its range and step.
  20     // CHECK-11-NEXT: %{{.*}} = arith.constant 0 : index
  21     // CHECK-11-NEXT: %{{.*}} = arith.constant 10 : index
  22     // CHECK-11-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
  23     // CHECK-11-NEXT: %{{.*}} = arith.constant 1 : index
  24     //
  25     // CHECK-22-NEXT: %{{.*}} = arith.constant 0 : index
  26     // CHECK-22-NEXT: %{{.*}} = arith.constant 10 : index
  27     // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
  28     // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index
  29     affine.for %j = 0 to 10 {
  30     // CHECK-11: gpu.launch
  31     // CHECK-11-SAME: blocks
  32     // CHECK-11-SAME: threads
  33
  34       // Remapping of the loop induction variables.
  35       // CHECK-11:        %[[i:.*]] = arith.addi %{{.*}}, %{{.*}} : index
  36       // CHECK-11-NEXT:   %[[j:.*]] = arith.addi %{{.*}}, %{{.*}} : index
  37
  38       // This loop is not converted if mapping to 1, 1 dimensions.
  39       // CHECK-11-NEXT: affine.for %[[ii:.*]] = 2 to 16
  40       //
  41       // Bounds of the loop, its range and step.
  42       // CHECK-22-NEXT: %{{.*}} = arith.constant 2 : index
  43       // CHECK-22-NEXT: %{{.*}} = arith.constant 16 : index
  44       // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
  45       // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index
  46       affine.for %ii = 2 to 16 {
  47         // This loop is not converted if mapping to 1, 1 dimensions.
  48         // CHECK-11-NEXT: affine.for %[[jj:.*]] = 5 to 17
  49         //
  50         // Bounds of the loop, its range and step.
  51         // CHECK-22-NEXT: %{{.*}} = arith.constant 5 : index
  52         // CHECK-22-NEXT: %{{.*}} = arith.constant 17 : index
  53         // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
  54         // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index
  55         affine.for %jj = 5 to 17 {
  56         // CHECK-22: gpu.launch
  57         // CHECK-22-SAME: blocks
  58         // CHECK-22-SAME: threads
  59
  60           // Remapping of the loop induction variables in the last mapped scf.
  61           // CHECK-22:        %[[i:.*]] = arith.addi %{{.*}}, %{{.*}} : index
  62           // CHECK-22-NEXT:   %[[j:.*]] = arith.addi %{{.*}}, %{{.*}} : index
  63           // CHECK-22-NEXT:   %[[ii:.*]] = arith.addi %{{.*}}, %{{.*}} : index
  64           // CHECK-22-NEXT:   %[[jj:.*]] = arith.addi %{{.*}}, %{{.*}} : index
  65
  66           // Using remapped values instead of loop iterators.
  67           // CHECK-11:        {{.*}} = memref.load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
  68           // CHECK-22:        {{.*}} = memref.load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
  69           %0 = memref.load %A[%i, %j, %ii, %jj] : memref<?x?x?x?xf32>
  70           // CHECK-11-NEXT:   memref.store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
  71           // CHECK-22-NEXT:   memref.store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
  72           memref.store %0, %B[%i, %j, %ii, %jj] : memref<?x?x?x?xf32>
  73
  74           // CHECK-11: gpu.terminator
  75           // CHECK-22: gpu.terminator
  76         }
  77       }
  78     }
  79   }
  80   return
  81 }
  82