[clang-tidy][NFC]remove deps of clang in clang tidy test (#116588)
[llvm-project.git] / mlir / test / Examples / NVGPU / Ch0.py
blob8f60088178d11952aab22dd99c9a7fd244fd2c81
1 # RUN: env SUPPORT_LIB=%mlir_cuda_runtime \
2 # RUN: %PYTHON %s | FileCheck %s
4 # ===----------------------------------------------------------------------===//
5 # Chapter 0 : Hello World
6 # ===----------------------------------------------------------------------===//
8 # This program demonstrates Hello World:
9 # 1. Build MLIR function with arguments
10 # 2. Build MLIR GPU kernel
11 # 3. Print from a GPU thread
12 # 4. Pass arguments, JIT compile and run the MLIR function
14 # ===----------------------------------------------------------------------===//
17 from mlir.dialects import gpu
18 from tools.nvdsl import *
21 # 1. The decorator generates a MLIR func.func.
22 # Everything inside the Python function becomes the body of the func.
23 # The decorator also translates `alpha` to an `index` type.
24 @NVDSL.mlir_func
25 def main(alpha):
26 # 2. The decorator generates a MLIR gpu.launch.
27 # Everything inside the Python function becomes the body of the gpu.launch.
28 # This allows for late outlining of the GPU kernel, enabling optimizations
29 # like constant folding from host to device.
30 @NVDSL.mlir_gpu_launch(grid=(1, 1, 1), block=(4, 1, 1))
31 def kernel():
32 tidx = gpu.thread_id(gpu.Dimension.x)
33 # + operator generates arith.addi
34 myValue = alpha + tidx
35 # Print from a GPU thread
36 gpu.printf("GPU thread %llu has %llu\n", [tidx, myValue])
38 # 3. Call the GPU kernel
39 kernel()
42 alpha = 100
43 # 4. The `mlir_func` decorator JIT compiles the IR and executes the MLIR function.
44 main(alpha)
47 # CHECK: GPU thread 0 has 100
48 # CHECK: GPU thread 1 has 101
49 # CHECK: GPU thread 2 has 102
50 # CHECK: GPU thread 3 has 103