1 // RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1}))" %s | FileCheck --check-prefix=CHECK-11 %s
2 // RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2}))" %s | FileCheck --check-prefix=CHECK-22 %s
4 // CHECK-11-LABEL: @step_1
5 // CHECK-22-LABEL: @step_1
6 func.func @step_1(%A : memref<?x?x?x?xf32>, %B : memref<?x?x?x?xf32>) {
7 // Bounds of the loop, its range and step.
8 // CHECK-11-NEXT: %{{.*}} = arith.constant 0 : index
9 // CHECK-11-NEXT: %{{.*}} = arith.constant 42 : index
10 // CHECK-11-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
11 // CHECK-11-NEXT: %{{.*}} = arith.constant 1 : index
13 // CHECK-22-NEXT: %{{.*}} = arith.constant 0 : index
14 // CHECK-22-NEXT: %{{.*}} = arith.constant 42 : index
15 // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
16 // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index
17 affine.for %i = 0 to 42 {
19 // Bounds of the loop, its range and step.
20 // CHECK-11-NEXT: %{{.*}} = arith.constant 0 : index
21 // CHECK-11-NEXT: %{{.*}} = arith.constant 10 : index
22 // CHECK-11-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
23 // CHECK-11-NEXT: %{{.*}} = arith.constant 1 : index
25 // CHECK-22-NEXT: %{{.*}} = arith.constant 0 : index
26 // CHECK-22-NEXT: %{{.*}} = arith.constant 10 : index
27 // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
28 // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index
29 affine.for %j = 0 to 10 {
30 // CHECK-11: gpu.launch
31 // CHECK-11-SAME: blocks
32 // CHECK-11-SAME: threads
34 // Remapping of the loop induction variables.
35 // CHECK-11: %[[i:.*]] = arith.addi %{{.*}}, %{{.*}} : index
36 // CHECK-11-NEXT: %[[j:.*]] = arith.addi %{{.*}}, %{{.*}} : index
38 // This loop is not converted if mapping to 1, 1 dimensions.
39 // CHECK-11-NEXT: affine.for %[[ii:.*]] = 2 to 16
41 // Bounds of the loop, its range and step.
42 // CHECK-22-NEXT: %{{.*}} = arith.constant 2 : index
43 // CHECK-22-NEXT: %{{.*}} = arith.constant 16 : index
44 // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
45 // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index
46 affine.for %ii = 2 to 16 {
47 // This loop is not converted if mapping to 1, 1 dimensions.
48 // CHECK-11-NEXT: affine.for %[[jj:.*]] = 5 to 17
50 // Bounds of the loop, its range and step.
51 // CHECK-22-NEXT: %{{.*}} = arith.constant 5 : index
52 // CHECK-22-NEXT: %{{.*}} = arith.constant 17 : index
53 // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index
54 // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index
55 affine.for %jj = 5 to 17 {
56 // CHECK-22: gpu.launch
57 // CHECK-22-SAME: blocks
58 // CHECK-22-SAME: threads
60 // Remapping of the loop induction variables in the last mapped scf.
61 // CHECK-22: %[[i:.*]] = arith.addi %{{.*}}, %{{.*}} : index
62 // CHECK-22-NEXT: %[[j:.*]] = arith.addi %{{.*}}, %{{.*}} : index
63 // CHECK-22-NEXT: %[[ii:.*]] = arith.addi %{{.*}}, %{{.*}} : index
64 // CHECK-22-NEXT: %[[jj:.*]] = arith.addi %{{.*}}, %{{.*}} : index
66 // Using remapped values instead of loop iterators.
67 // CHECK-11: {{.*}} = memref.load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
68 // CHECK-22: {{.*}} = memref.load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
69 %0 = memref.load %A[%i, %j, %ii, %jj] : memref<?x?x?x?xf32>
70 // CHECK-11-NEXT: memref.store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
71 // CHECK-22-NEXT: memref.store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32>
72 memref.store %0, %B[%i, %j, %ii, %jj] : memref<?x?x?x?xf32>
74 // CHECK-11: gpu.terminator
75 // CHECK-22: gpu.terminator