1 //===- SCFToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
11 #include "mlir/Conversion/SCFToGPU/SCFToGPU.h"
12 #include "mlir/Dialect/Affine/IR/AffineOps.h"
13 #include "mlir/Dialect/Arith/IR/Arith.h"
14 #include "mlir/Dialect/Complex/IR/Complex.h"
15 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
16 #include "mlir/Dialect/SCF/IR/SCF.h"
17 #include "mlir/Pass/Pass.h"
18 #include "mlir/Transforms/DialectConversion.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/Support/CommandLine.h"
23 #define GEN_PASS_DEF_CONVERTAFFINEFORTOGPU
24 #define GEN_PASS_DEF_CONVERTPARALLELLOOPTOGPU
25 #include "mlir/Conversion/Passes.h.inc"
29 using namespace mlir::scf
;
32 // A pass that traverses top-level loops in the function and converts them to
33 // GPU launch operations. Nested launches are not allowed, so this does not
34 // walk the function recursively to avoid considering nested loops.
35 struct ForLoopMapper
: public impl::ConvertAffineForToGPUBase
<ForLoopMapper
> {
36 ForLoopMapper() = default;
37 ForLoopMapper(unsigned numBlockDims
, unsigned numThreadDims
) {
38 this->numBlockDims
= numBlockDims
;
39 this->numThreadDims
= numThreadDims
;
42 void runOnOperation() override
{
43 for (Operation
&op
: llvm::make_early_inc_range(
44 getOperation().getFunctionBody().getOps())) {
45 if (auto forOp
= dyn_cast
<affine::AffineForOp
>(&op
)) {
46 if (failed(convertAffineLoopNestToGPULaunch(forOp
, numBlockDims
,
54 struct ParallelLoopToGpuPass
55 : public impl::ConvertParallelLoopToGpuBase
<ParallelLoopToGpuPass
> {
56 void runOnOperation() override
{
57 RewritePatternSet
patterns(&getContext());
58 populateParallelLoopToGPUPatterns(patterns
);
59 ConversionTarget
target(getContext());
60 target
.markUnknownOpDynamicallyLegal([](Operation
*) { return true; });
61 configureParallelLoopToGPULegality(target
);
62 if (failed(applyPartialConversion(getOperation(), target
,
63 std::move(patterns
))))
65 finalizeParallelLoopToGPUConversion(getOperation());
71 std::unique_ptr
<InterfacePass
<FunctionOpInterface
>>
72 mlir::createAffineForToGPUPass(unsigned numBlockDims
, unsigned numThreadDims
) {
73 return std::make_unique
<ForLoopMapper
>(numBlockDims
, numThreadDims
);
75 std::unique_ptr
<InterfacePass
<FunctionOpInterface
>>
76 mlir::createAffineForToGPUPass() {
77 return std::make_unique
<ForLoopMapper
>();
80 std::unique_ptr
<Pass
> mlir::createParallelLoopToGpuPass() {
81 return std::make_unique
<ParallelLoopToGpuPass
>();