1 // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
3 // The intent of these tests are to check that re-ordering the arguments of use_device_addr/ptr do
4 // not negatively impact the code generation. It's important to note that this test is missing
5 // components that'd generate a fully funcitoning executeable, as the IR was reduced to keep the
6 // primary components for the tests.
8 module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"], omp.version = #omp.version<version = 50>} {
9 llvm.func @mix_use_device_ptr_and_addr_and_map_(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: !llvm.ptr, %arg4: !llvm.ptr, %arg5: !llvm.ptr, %arg11: !llvm.ptr, %arg12: !llvm.ptr) {
10 %0 = llvm.mlir.constant(0 : index) : i64
11 %1 = llvm.mlir.constant(2 : index) : i64
12 %2 = llvm.mlir.constant(1 : index) : i64
13 %3 = omp.map.bounds lower_bound(%0 : i64) upper_bound(%1 : i64) extent(%1 : i64) stride(%2 : i64) start_idx(%0 : i64) {stride_in_bytes = true}
14 %4 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(i64)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
15 %5 = omp.map.info var_ptr(%arg1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
16 %6 = omp.map.info var_ptr(%arg2 : !llvm.ptr, i32) var_ptr_ptr(%arg3 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%3) -> !llvm.ptr
17 %7 = omp.map.info var_ptr(%arg2 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%6 : [0] : !llvm.ptr) -> !llvm.ptr
18 %8 = omp.map.info var_ptr(%arg4 : !llvm.ptr, f32) var_ptr_ptr(%arg5 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
19 %9 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%8 : [0] : !llvm.ptr) -> !llvm.ptr
20 %10 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(i64)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
21 omp.target_data map_entries(%4, %5 : !llvm.ptr, !llvm.ptr) use_device_addr(%7 -> %arg6, %9 -> %arg7, %6 -> %arg8, %8 -> %arg9 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) use_device_ptr(%10 -> %arg10 : !llvm.ptr) {
22 %11 = llvm.getelementptr %arg4[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i64)>
23 %12 = llvm.getelementptr %arg12[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i64)>
24 %13 = llvm.load %11 : !llvm.ptr -> i64
25 llvm.store %13, %12 : i64, !llvm.ptr
26 %14 = llvm.mlir.constant(48 : i32) : i32
27 "llvm.intr.memcpy"(%arg11, %arg6, %14) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
28 %15 = llvm.getelementptr %arg11[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
29 %16 = llvm.load %15 : !llvm.ptr -> !llvm.ptr
30 %17 = llvm.getelementptr %16[%1] : (!llvm.ptr, i64) -> !llvm.ptr, i8
31 %18 = llvm.load %17 : !llvm.ptr -> i32
32 llvm.store %18, %arg1 : i32, !llvm.ptr
38 llvm.func @mix_use_device_ptr_and_addr_and_map_2(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: !llvm.ptr, %arg4: !llvm.ptr, %arg5: !llvm.ptr, %arg11: !llvm.ptr, %arg12: !llvm.ptr) {
39 %0 = llvm.mlir.constant(0 : index) : i64
40 %1 = llvm.mlir.constant(2 : index) : i64
41 %2 = llvm.mlir.constant(1 : index) : i64
42 %3 = omp.map.bounds lower_bound(%0 : i64) upper_bound(%1 : i64) extent(%1 : i64) stride(%2 : i64) start_idx(%0 : i64) {stride_in_bytes = true}
43 %4 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(i64)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
44 %5 = omp.map.info var_ptr(%arg1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
45 %6 = omp.map.info var_ptr(%arg2 : !llvm.ptr, i32) var_ptr_ptr(%arg3 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%3) -> !llvm.ptr
46 %7 = omp.map.info var_ptr(%arg2 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) members(%6 : [0] : !llvm.ptr) -> !llvm.ptr
47 %8 = omp.map.info var_ptr(%arg4 : !llvm.ptr, f32) var_ptr_ptr(%arg5 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
48 %9 = omp.map.info var_ptr(%arg4 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%8 : [0] : !llvm.ptr) -> !llvm.ptr
49 %10 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(i64)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr
50 omp.target_data map_entries(%5, %4 : !llvm.ptr, !llvm.ptr) use_device_addr(%8 -> %arg6, %6 -> %arg7, %7 -> %arg8, %9 -> %arg9 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) use_device_ptr(%10 -> %arg10 : !llvm.ptr) {
51 %11 = llvm.getelementptr %arg4[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i64)>
52 %12 = llvm.getelementptr %arg12[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i64)>
53 %13 = llvm.load %11 : !llvm.ptr -> i64
54 llvm.store %13, %12 : i64, !llvm.ptr
55 %14 = llvm.mlir.constant(48 : i32) : i32
56 "llvm.intr.memcpy"(%arg11, %arg8, %14) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
57 %15 = llvm.getelementptr %arg11[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
58 %16 = llvm.load %15 : !llvm.ptr -> !llvm.ptr
59 %17 = llvm.getelementptr %16[%1] : (!llvm.ptr, i64) -> !llvm.ptr, i8
60 %18 = llvm.load %17 : !llvm.ptr -> i32
61 llvm.store %18, %arg1 : i32, !llvm.ptr
68 // CHECK: define void @mix_use_device_ptr_and_addr_and_map_(ptr %[[ARG_0:.*]], ptr %[[ARG_1:.*]], ptr %[[ARG_2:.*]], ptr %[[ARG_3:.*]], ptr %[[ARG_4:.*]], ptr %[[ARG_5:.*]], ptr %[[ARG_6:.*]], ptr %[[ARG_7:.*]]) {
69 // CHECK: %[[ALLOCA:.*]] = alloca ptr, align 8
70 // CHECK: %[[BASEPTR_0_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
71 // CHECK: store ptr %[[ARG_0]], ptr %[[BASEPTR_0_GEP]], align 8
72 // CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
73 // CHECK: store ptr %[[ARG_2]], ptr %[[BASEPTR_2_GEP]], align 8
74 // CHECK: %[[BASEPTR_6_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
75 // CHECK: store ptr %[[ARG_4]], ptr %[[BASEPTR_6_GEP]], align 8
77 // CHECK: call void @__tgt_target_data_begin_mapper({{.*}})
78 // CHECK: %[[LOAD_BASEPTR_0:.*]] = load ptr, ptr %[[BASEPTR_0_GEP]], align 8
79 // store ptr %[[LOAD_BASEPTR_0]], ptr %[[ALLOCA]], align 8
80 // CHECK: %[[LOAD_BASEPTR_2:.*]] = load ptr, ptr %[[BASEPTR_2_GEP]], align 8
81 // CHECK: %[[LOAD_BASEPTR_6:.*]] = load ptr, ptr %[[BASEPTR_6_GEP]], align 8
82 // CHECK: %[[GEP_A4:.*]] = getelementptr { i64 }, ptr %[[ARG_4]], i32 0, i32 0
83 // CHECK: %[[GEP_A7:.*]] = getelementptr { i64 }, ptr %[[ARG_7]], i32 0, i32 0
84 // CHECK: %[[LOAD_A4:.*]] = load i64, ptr %[[GEP_A4]], align 4
85 // CHECK: store i64 %[[LOAD_A4]], ptr %[[GEP_A7]], align 4
86 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[ARG_6]], ptr %[[LOAD_BASEPTR_2]], i32 48, i1 false)
87 // CHECK: %[[GEP_A6:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG_6]], i32 0, i32 0
88 // CHECK: %[[LOAD_A6:.*]] = load ptr, ptr %[[GEP_A6]], align 8
89 // CHECK: %[[GEP_A6_2:.*]] = getelementptr i8, ptr %[[LOAD_A6]], i64 2
90 // CHECK: %[[LOAD_A6_2:.*]] = load i32, ptr %[[GEP_A6_2]], align 4
91 // CHECK: store i32 %[[LOAD_A6_2]], ptr %[[ARG_1]], align 4
92 // CHECK: call void @__tgt_target_data_end_mapper({{.*}})
94 // CHECK: define void @mix_use_device_ptr_and_addr_and_map_2(ptr %[[ARG_0:.*]], ptr %[[ARG_1:.*]], ptr %[[ARG_2:.*]], ptr %[[ARG_3:.*]], ptr %[[ARG_4:.*]], ptr %[[ARG_5:.*]], ptr %[[ARG_6:.*]], ptr %[[ARG_7:.*]]) {
95 // CHECK: %[[ALLOCA:.*]] = alloca ptr, align 8
96 // CHECK: %[[BASEPTR_1_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
97 // CHECK: store ptr %[[ARG_0]], ptr %[[BASEPTR_1_GEP]], align 8
98 // CHECK: %[[BASEPTR_2_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
99 // CHECK: store ptr %[[ARG_2]], ptr %[[BASEPTR_2_GEP]], align 8
100 // CHECK: %[[BASEPTR_6_GEP:.*]] = getelementptr inbounds [10 x ptr], ptr %.offload_baseptrs, i32 0, i32 6
101 // CHECK: store ptr %[[ARG_4]], ptr %[[BASEPTR_6_GEP]], align 8
102 // CHECK: call void @__tgt_target_data_begin_mapper({{.*}})
103 // CHECK: %[[LOAD_BASEPTR_1:.*]] = load ptr, ptr %[[BASEPTR_1_GEP]], align 8
104 // store ptr %[[LOAD_BASEPTR_1]], ptr %[[ALLOCA]], align 8
105 // CHECK: %[[LOAD_BASEPTR_2:.*]] = load ptr, ptr %[[BASEPTR_2_GEP]], align 8
106 // CHECK: %[[LOAD_BASEPTR_6:.*]] = load ptr, ptr %[[BASEPTR_6_GEP]], align 8
107 // CHECK: %[[GEP_A4:.*]] = getelementptr { i64 }, ptr %[[ARG_4]], i32 0, i32 0
108 // CHECK: %[[GEP_A7:.*]] = getelementptr { i64 }, ptr %[[ARG_7]], i32 0, i32 0
109 // CHECK: %[[LOAD_A4:.*]] = load i64, ptr %[[GEP_A4]], align 4
110 // CHECK: store i64 %[[LOAD_A4]], ptr %[[GEP_A7]], align 4
111 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[ARG_6]], ptr %[[LOAD_BASEPTR_2]], i32 48, i1 false)
112 // CHECK: %[[GEP_A6:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG_6]], i32 0, i32 0
113 // CHECK: %[[LOAD_A6:.*]] = load ptr, ptr %[[GEP_A6]], align 8
114 // CHECK: %[[GEP_A6_2:.*]] = getelementptr i8, ptr %[[LOAD_A6]], i64 2
115 // CHECK: %[[LOAD_A6_2:.*]] = load i32, ptr %[[GEP_A6_2]], align 4
116 // CHECK: store i32 %[[LOAD_A6_2]], ptr %[[ARG_1]], align 4
117 // CHECK: call void @__tgt_target_data_end_mapper({{.*}})