mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-2d.mlir

   1 // RUN:   mlir-opt %s -async-parallel-for                                      \
   2 // RUN:               -async-to-async-runtime                                  \
   3 // RUN:               -async-runtime-ref-counting                              \
   4 // RUN:               -async-runtime-ref-counting-opt                          \
   5 // RUN:               -arith-expand                                            \
   6 // RUN:               -convert-async-to-llvm                                   \
   7 // RUN:               -convert-scf-to-cf                                       \
   8 // RUN:               -finalize-memref-to-llvm                                 \
   9 // RUN:               -convert-func-to-llvm                                    \
  10 // RUN:               -convert-arith-to-llvm                                   \
  11 // RUN:               -convert-cf-to-llvm                                      \
  12 // RUN:               -reconcile-unrealized-casts                              \
  13 // RUN: | mlir-runner                                                      \
  14 // RUN:  -e entry -entry-point-result=void -O0                                 \
  15 // RUN:  -shared-libs=%mlir_runner_utils \
  16 // RUN:  -shared-libs=%mlir_async_runtime\
  17 // RUN: | FileCheck %s --dump-input=always
  18
  19 // RUN:   mlir-opt %s -async-parallel-for                                      \
  20 // RUN:               -async-to-async-runtime                                  \
  21 // RUN:               -async-runtime-policy-based-ref-counting                 \
  22 // RUN:               -arith-expand                                            \
  23 // RUN:               -convert-async-to-llvm                                   \
  24 // RUN:               -convert-scf-to-cf                                       \
  25 // RUN:               -finalize-memref-to-llvm                                 \
  26 // RUN:               -convert-func-to-llvm                                    \
  27 // RUN:               -convert-arith-to-llvm                                   \
  28 // RUN:               -convert-cf-to-llvm                                      \
  29 // RUN:               -reconcile-unrealized-casts                              \
  30 // RUN: | mlir-runner                                                      \
  31 // RUN:  -e entry -entry-point-result=void -O0                                 \
  32 // RUN:  -shared-libs=%mlir_runner_utils \
  33 // RUN:  -shared-libs=%mlir_async_runtime\
  34 // RUN: | FileCheck %s --dump-input=always
  35
  36 // RUN:   mlir-opt %s -async-parallel-for="async-dispatch=false                \
  37 // RUN:                                    num-workers=20                      \
  38 // RUN:                                    min-task-size=1"                    \
  39 // RUN:               -async-to-async-runtime                                  \
  40 // RUN:               -async-runtime-ref-counting                              \
  41 // RUN:               -async-runtime-ref-counting-opt                          \
  42 // RUN:               -arith-expand                                            \
  43 // RUN:               -convert-async-to-llvm                                   \
  44 // RUN:               -convert-scf-to-cf                                       \
  45 // RUN:               -finalize-memref-to-llvm                                 \
  46 // RUN:               -convert-func-to-llvm                                    \
  47 // RUN:               -convert-arith-to-llvm                                   \
  48 // RUN:               -convert-cf-to-llvm                                      \
  49 // RUN:               -reconcile-unrealized-casts                              \
  50 // RUN: | mlir-runner                                                      \
  51 // RUN:  -e entry -entry-point-result=void -O0                                 \
  52 // RUN:  -shared-libs=%mlir_runner_utils \
  53 // RUN:  -shared-libs=%mlir_async_runtime\
  54 // RUN: | FileCheck %s --dump-input=always
  55
  56 func.func @entry() {
  57   %c0 = arith.constant 0.0 : f32
  58   %c1 = arith.constant 1 : index
  59   %c2 = arith.constant 2 : index
  60   %c8 = arith.constant 8 : index
  61
  62   %lb = arith.constant 0 : index
  63   %ub = arith.constant 8 : index
  64
  65   %A = memref.alloc() : memref<8x8xf32>
  66   %U = memref.cast %A :  memref<8x8xf32> to memref<*xf32>
  67
  68   // Initialize memref with zeros because we do load and store to in every test
  69   // to verify that we process each element of the iteration space once.
  70   scf.parallel (%i, %j) = (%lb, %lb) to (%ub, %ub) step (%c1, %c1) {
  71     memref.store %c0, %A[%i, %j] : memref<8x8xf32>
  72   }
  73
  74   // 1. (%i, %i) = (0, 8) to (8, 8) step (1, 1)
  75   scf.parallel (%i, %j) = (%lb, %lb) to (%ub, %ub) step (%c1, %c1) {
  76     %0 = arith.muli %i, %c8 : index
  77     %1 = arith.addi %j, %0  : index
  78     %2 = arith.index_cast %1 : index to i32
  79     %3 = arith.sitofp %2 : i32 to f32
  80     %4 = memref.load %A[%i, %j] : memref<8x8xf32>
  81     %5 = arith.addf %3, %4 : f32
  82     memref.store %5, %A[%i, %j] : memref<8x8xf32>
  83   }
  84
  85   // CHECK:      [0, 1, 2, 3, 4, 5, 6, 7]
  86   // CHECK-NEXT: [8, 9, 10, 11, 12, 13, 14, 15]
  87   // CHECK-NEXT: [16, 17, 18, 19, 20, 21, 22, 23]
  88   // CHECK-NEXT: [24, 25, 26, 27, 28, 29, 30, 31]
  89   // CHECK-NEXT: [32, 33, 34, 35, 36, 37, 38, 39]
  90   // CHECK-NEXT: [40, 41, 42, 43, 44, 45, 46, 47]
  91   // CHECK-NEXT: [48, 49, 50, 51, 52, 53, 54, 55]
  92   // CHECK-NEXT: [56, 57, 58, 59, 60, 61, 62, 63]
  93   call @printMemrefF32(%U): (memref<*xf32>) -> ()
  94
  95   scf.parallel (%i, %j) = (%lb, %lb) to (%ub, %ub) step (%c1, %c1) {
  96     memref.store %c0, %A[%i, %j] : memref<8x8xf32>
  97   }
  98
  99   // 2. (%i, %i) = (0, 8) to (8, 8) step (2, 1)
 100   scf.parallel (%i, %j) = (%lb, %lb) to (%ub, %ub) step (%c2, %c1) {
 101     %0 = arith.muli %i, %c8 : index
 102     %1 = arith.addi %j, %0  : index
 103     %2 = arith.index_cast %1 : index to i32
 104     %3 = arith.sitofp %2 : i32 to f32
 105     %4 = memref.load %A[%i, %j] : memref<8x8xf32>
 106     %5 = arith.addf %3, %4 : f32
 107     memref.store %5, %A[%i, %j] : memref<8x8xf32>
 108   }
 109
 110   // CHECK:      [0, 1, 2, 3, 4, 5, 6, 7]
 111   // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 0]
 112   // CHECK-NEXT: [16, 17, 18, 19, 20, 21, 22, 23]
 113   // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 0]
 114   // CHECK-NEXT: [32, 33, 34, 35, 36, 37, 38, 39]
 115   // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 0]
 116   // CHECK-NEXT: [48, 49, 50, 51, 52, 53, 54, 55]
 117   // CHECK-NEXT: [0, 0, 0, 0, 0, 0, 0, 0]
 118   call @printMemrefF32(%U): (memref<*xf32>) -> ()
 119
 120   scf.parallel (%i, %j) = (%lb, %lb) to (%ub, %ub) step (%c1, %c1) {
 121     memref.store %c0, %A[%i, %j] : memref<8x8xf32>
 122   }
 123
 124   // 3. (%i, %i) = (0, 8) to (8, 8) step (1, 2)
 125   scf.parallel (%i, %j) = (%lb, %lb) to (%ub, %ub) step (%c1, %c2) {
 126     %0 = arith.muli %i, %c8 : index
 127     %1 = arith.addi %j, %0  : index
 128     %2 = arith.index_cast %1 : index to i32
 129     %3 = arith.sitofp %2 : i32 to f32
 130     %4 = memref.load %A[%i, %j] : memref<8x8xf32>
 131     %5 = arith.addf %3, %4 : f32
 132     memref.store %5, %A[%i, %j] : memref<8x8xf32>
 133   }
 134
 135   // CHECK:      [0, 0, 2, 0, 4, 0, 6, 0]
 136   // CHECK-NEXT: [8, 0, 10, 0, 12, 0, 14, 0]
 137   // CHECK-NEXT: [16, 0, 18, 0, 20, 0, 22, 0]
 138   // CHECK-NEXT: [24, 0, 26, 0, 28, 0, 30, 0]
 139   // CHECK-NEXT: [32, 0, 34, 0, 36, 0, 38, 0]
 140   // CHECK-NEXT: [40, 0, 42, 0, 44, 0, 46, 0]
 141   // CHECK-NEXT: [48, 0, 50, 0, 52, 0, 54, 0]
 142   // CHECK-NEXT: [56, 0, 58, 0, 60, 0, 62, 0]
 143   call @printMemrefF32(%U): (memref<*xf32>) -> ()
 144
 145   memref.dealloc %A : memref<8x8xf32>
 146
 147   return
 148 }
 149
 150 func.func private @printMemrefF32(memref<*xf32>) attributes { llvm.emit_c_interface }