mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp

   1 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
  10
  11 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
  12 #include "mlir/Dialect/Affine/IR/AffineOps.h"
  13 #include "mlir/Dialect/Arith/IR/Arith.h"
  14 #include "mlir/Dialect/Complex/IR/Complex.h"
  15 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
  16 #include "mlir/Dialect/SCF/IR/SCF.h"
  17 #include "mlir/Pass/Pass.h"
  18 #include "mlir/Transforms/DialectConversion.h"
  19 #include "llvm/ADT/ArrayRef.h"
  20 #include "llvm/Support/CommandLine.h"
  21
  22 namespace mlir {
  23 #define GEN_PASS_DEF_CONVERTAFFINEFORTOGPU
  24 #define GEN_PASS_DEF_CONVERTPARALLELLOOPTOGPU
  25 #include "mlir/Conversion/Passes.h.inc"
  26 } // namespace mlir
  27
  28 using namespace mlir;
  29 using namespace mlir::scf;
  30
  31 namespace {
  32 // A pass that traverses top-level loops in the function and converts them to
  33 // GPU launch operations.  Nested launches are not allowed, so this does not
  34 // walk the function recursively to avoid considering nested loops.
  35 struct ForLoopMapper : public impl::ConvertAffineForToGPUBase<ForLoopMapper> {
  36   ForLoopMapper() = default;
  37   ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) {
  38     this->numBlockDims = numBlockDims;
  39     this->numThreadDims = numThreadDims;
  40   }
  41
  42   void runOnOperation() override {
  43     for (Operation &op : llvm::make_early_inc_range(
  44              getOperation().getFunctionBody().getOps())) {
  45       if (auto forOp = dyn_cast<affine::AffineForOp>(&op)) {
  46         if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
  47                                                     numThreadDims)))
  48           signalPassFailure();
  49       }
  50     }
  51   }
  52 };
  53
  54 struct ParallelLoopToGpuPass
  55     : public impl::ConvertParallelLoopToGpuBase<ParallelLoopToGpuPass> {
  56   void runOnOperation() override {
  57     RewritePatternSet patterns(&getContext());
  58     populateParallelLoopToGPUPatterns(patterns);
  59     ConversionTarget target(getContext());
  60     target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
  61     configureParallelLoopToGPULegality(target);
  62     if (failed(applyPartialConversion(getOperation(), target,
  63                                       std::move(patterns))))
  64       signalPassFailure();
  65     finalizeParallelLoopToGPUConversion(getOperation());
  66   }
  67 };
  68
  69 } // namespace
  70
  71 std::unique_ptr<InterfacePass<FunctionOpInterface>>
  72 mlir::createAffineForToGPUPass(unsigned numBlockDims, unsigned numThreadDims) {
  73   return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
  74 }
  75 std::unique_ptr<InterfacePass<FunctionOpInterface>>
  76 mlir::createAffineForToGPUPass() {
  77   return std::make_unique<ForLoopMapper>();
  78 }
  79
  80 std::unique_ptr<Pass> mlir::createParallelLoopToGpuPass() {
  81   return std::make_unique<ParallelLoopToGpuPass>();
  82 }