1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes OPT
3 ; RUN: llc < %s -mcpu=sm_70 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX
5 define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) {
6 ; PTX-LABEL: grid_const_int(
8 ; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_int_param_0];
10 ; OPT-LABEL: define void @grid_const_int(
11 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) {
13 ; OPT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
14 ; OPT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
16 %tmp = load i32, ptr %input1, align 4
17 %add = add i32 %tmp, %input2
18 store i32 %add, ptr %out
22 %struct.s = type { i32, i32 }
24 define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
25 ; PTX-LABEL: grid_const_struct(
28 ; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_struct_param_0];
29 ; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_struct_param_0+4];
31 ; OPT-LABEL: define void @grid_const_struct(
32 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) {
34 ; OPT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
35 ; OPT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
36 ; OPT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
37 ; OPT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
38 ; OPT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
40 %gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
41 %gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
42 %int1 = load i32, ptr %gep1
43 %int2 = load i32, ptr %gep2
44 %add = add i32 %int1, %int2
45 store i32 %add, ptr %out
49 define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
50 ; PTX-LABEL: grid_const_escape(
53 ; PTX: cvta.param.{{.*}}
54 ; OPT-LABEL: define void @grid_const_escape(
55 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) {
56 ; OPT-NOT: alloca [[STRUCT_S]]
57 ; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
58 ; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
59 ; OPT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
61 %call = call i32 @escape(ptr %input)
65 define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) {
66 ; PTX-LABEL: multiple_grid_const_escape(
67 ; PTX: mov.{{.*}} [[RD1:%.*]], multiple_grid_const_escape_param_0;
68 ; PTX: mov.{{.*}} [[RD2:%.*]], multiple_grid_const_escape_param_2;
69 ; PTX: mov.{{.*}} [[RD3:%.*]], [[RD2]];
70 ; PTX: mov.{{.*}} [[RD4:%.*]], [[RD1]];
71 ; PTX: cvta.param.{{.*}} [[RD5:%.*]], [[RD4]];
72 ; PTX: cvta.param.{{.*}} [[RD6:%.*]], [[RD3]];
74 ; PTX: st.param.b64 [param0+0], [[RD5]];
75 ; PTX: st.param.b64 [param2+0], [[RD6]];
77 ; OPT-LABEL: define void @multiple_grid_const_escape(
78 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) {
79 ; OPT: [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
80 ; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
81 ; OPT-NOT: alloca %struct.s
82 ; OPT: [[A_ADDR:%.*]] = alloca i32, align 4
83 ; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
84 ; OPT: [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]])
85 ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]])
87 %a.addr = alloca i32, align 4
88 store i32 %a, ptr %a.addr, align 4
89 %call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b)
93 define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) {
94 ; PTX-LABEL: grid_const_memory_escape(
96 ; PTX: mov.b64 [[RD1:%.*]], grid_const_memory_escape_param_0;
97 ; PTX: cvta.param.u64 [[RD3:%.*]], [[RD2:%.*]];
98 ; PTX: st.global.u64 [[[RD4:%.*]]], [[RD3]];
100 ; OPT-LABEL: define void @grid_const_memory_escape(
101 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) {
102 ; OPT-NOT: alloca [[STRUCT_S]]
103 ; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
104 ; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
105 ; OPT: store ptr [[INPUT_PARAM_GEN]], ptr {{.*}}, align 8
107 store ptr %input, ptr %addr, align 8
111 define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, ptr %result) {
112 ; PTX-LABEL: grid_const_inlineasm_escape(
114 ; PTX: add.{{.*}} [[RD2:%.*]], [[RD1:%.*]], 4;
115 ; PTX: cvta.param.u64 [[RD4:%.*]], [[RD2]]
116 ; PTX: cvta.param.u64 [[RD3:%.*]], [[RD1]]
117 ; PTX: add.s64 [[RD5:%.*]], [[RD3]], [[RD4]];
119 ; OPT-LABEL: define void @grid_const_inlineasm_escape(
120 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) {
121 ; OPT-NOT: alloca [[STRUCT_S]]
122 ; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
123 ; OPT: [[TMPPTR13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT_PARAM]], i32 0, i32 0
124 ; OPT: [[TMPPTR22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT_PARAM]], i32 0, i32 1
125 ; OPT: [[TMPPTR22_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[TMPPTR22]])
126 ; OPT: [[TMPPTR13_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[TMPPTR13]])
127 ; OPT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
129 %tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
130 %tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
131 %1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1
132 store i64 %1, ptr %result, align 8
136 define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
137 ; PTX-LABEL: grid_const_partial_escape(
139 ; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_partial_escape_param_0];
141 ; PTX: cvta.param.u64 [[RD3:%.*]], {{%.*}}
142 ; PTX: st.param.{{.*}} [param0+0], [[RD3]]
145 ; OPT-LABEL: define void @grid_const_partial_escape(
146 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]], ptr {{%.*}}) {
148 ; OPT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
149 ; OPT: [[VAL:%.*]] = load i32, ptr addrspace(101) [[INPUT1]], align 4
150 ; OPT: [[TWICE:%.*]] = add i32 [[VAL]], [[VAL]]
151 ; OPT: store i32 [[TWICE]]
152 ; OPT: [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]])
153 ; OPT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
156 %val = load i32, ptr %input
157 %twice = add i32 %val, %val
158 store i32 %twice, ptr %output
159 %call = call i32 @escape(ptr %input)
163 define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) {
164 ; PTX-LABEL: grid_const_partial_escapemem(
166 ; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_partial_escapemem_param_0];
167 ; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_partial_escapemem_param_0+4];
168 ; PTX: cvta.param.{{.*}} [[RD5:%.*]], {{%.*}};
169 ; PTX: st.global.{{.*}} [{{.*}}], [[RD5]];
170 ; PTX: add.s32 [[R3:%.*]], [[R1]], [[R2]]
171 ; PTX: st.param.{{.*}} [param0+0], [[RD5]]
173 ; OPT-LABEL: define i32 @grid_const_partial_escapemem(
174 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr {{%.*}}) {
176 ; OPT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
177 ; OPT: [[PTR13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT2]], i32 0, i32 0
178 ; OPT: [[VAL1:%.*]] = load i32, ptr addrspace(101) [[PTR13]], align 4
179 ; OPT: [[PTR22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT2]], i32 0, i32 1
180 ; OPT: [[VAL2:%.*]] = load i32, ptr addrspace(101) [[PTR22]], align 4
181 ; OPT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
182 ; OPT: store ptr [[INPUT1]]
183 ; OPT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
184 ; OPT: [[PTR1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[PTR13]])
185 ; OPT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
187 %ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
188 %val1 = load i32, ptr %ptr1
189 %ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
190 %val2 = load i32, ptr %ptr2
191 store ptr %input, ptr %output
192 %add = add i32 %val1, %val2
193 %call2 = call i32 @escape(ptr %ptr1)
197 define void @grid_const_phi_escape(ptr byval(%struct.s) align 4 %input1, ptr %inout) {
198 ; PTX-LABEL: grid_const_phi_escape(
199 ; PTX: cvta.param.{{.*}} [[RD1:%.*]], {{.*}}
200 ; PTX: @[[P1:%.*]] bra $L__BB[[TARGET_LABEL:[_0-9]+]];
201 ; PTX: $L__BB[[TARGET_LABEL]]:
202 ; PTX: ld.{{.*}} [[R1:%.*]], [[[RD1]]];
204 ; OPT-LABEL: define void @grid_const_phi_escape(
205 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr {{%.*}}) {
206 ; OPT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
207 ; OPT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
208 ; OPT: br i1 {{.*}}, label %[[FIRST:.*]], label %[[SECOND:.*]]
210 ; OPT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 0
211 ; OPT: br label %[[MERGE:.*]]
213 ; OPT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 1
214 ; OPT: br label %[[MERGE]]
216 ; OPT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
217 ; OPT-NOT: load i32, ptr addrspace(101)
218 ; OPT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
221 %val = load i32, ptr %inout
222 %less = icmp slt i32 %val, 0
223 br i1 %less, label %first, label %second
225 %ptr1 = getelementptr inbounds %struct.s, ptr %input1, i32 0, i32 0
228 %ptr2 = getelementptr inbounds %struct.s, ptr %input1, i32 0, i32 1
231 %ptrnew = phi ptr [%ptr1, %first], [%ptr2, %second]
232 %valloaded = load i32, ptr %ptrnew
233 store i32 %valloaded, ptr %inout
237 ; NOTE: %input2 is *not* grid_constant
238 define void @grid_const_phi_escape2(ptr byval(%struct.s) align 4 %input1, ptr byval(%struct.s) %input2, ptr %inout) {
239 ; PTX-LABEL: grid_const_phi_escape2(
240 ; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_phi_escape2_param_1+4];
241 ; PTX: @[[P1:%.*]] bra $L__BB[[LABEL:[_0-9]+]];
242 ; PTX: cvta.param.u64 [[RD1:%.*]], [[RD2:%.*]];
243 ; PTX: ld.u32 [[R1]], [[[RD1]]];
244 ; PTX: $L__BB[[LABEL]]:
245 ; PTX: st.global.u32 [[[RD3:%.*]]], [[R1]]
246 ; OPT-LABEL: define void @grid_const_phi_escape2(
247 ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr {{%.*}}) {
248 ; OPT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
249 ; OPT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
250 ; OPT: [[INPUT26:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT25]], align 8
251 ; OPT: store [[STRUCT_S]] [[INPUT26]], ptr [[INPUT24]], align 4
252 ; OPT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
253 ; OPT: [[INPUT11:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT12]])
254 ; OPT: br i1 [[LESS:%.*]], label %[[FIRST:.*]], label %[[SECOND:.*]]
256 ; OPT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
257 ; OPT: br label %[[MERGE:.*]]
259 ; OPT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT24]], i32 0, i32 1
260 ; OPT: br label %[[MERGE]]
262 ; OPT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
264 %val = load i32, ptr %inout
265 %less = icmp slt i32 %val, 0
266 br i1 %less, label %first, label %second
268 %ptr1 = getelementptr inbounds %struct.s, ptr %input1, i32 0, i32 0
271 %ptr2 = getelementptr inbounds %struct.s, ptr %input2, i32 0, i32 1
274 %ptrnew = phi ptr [%ptr1, %first], [%ptr2, %second]
275 %valloaded = load i32, ptr %ptrnew
276 store i32 %valloaded, ptr %inout
280 ; NOTE: %input2 is *not* grid_constant
281 define void @grid_const_select_escape(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %inout) {
282 ; PTX-LABEL: grid_const_select_escape(
283 ; PTX: cvta.param.{{.*}} [[RD2:%.*]], [[RD1:%.*]]
284 ; PTX: setp.lt.{{.*}} [[P1:%.*]], {{%.*}}, 0
285 ; PTX: add.{{.*}} [[RD3:%.*]], %SP, 0;
286 ; PTX: selp.{{.*}} [[RD4:%.*]], [[RD2]], [[RD3]], [[P1]];
287 ; PTX: ld.u32 {{%.*}}, [[[RD4]]];
288 ; OPT-LABEL: define void @grid_const_select_escape(
289 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) {
290 ; OPT: [[INPUT24:%.*]] = alloca i32, align 4
291 ; OPT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
292 ; OPT: [[INPUT11:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT12]])
293 ; OPT: load i32, ptr [[INOUT]]
294 ; OPT: [[PTRNEW:%.*]] = select i1 [[LESS:%.*]], ptr [[INPUT11]], ptr [[INPUT24]]
295 ; OPT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
297 %val = load i32, ptr %inout
298 %less = icmp slt i32 %val, 0
299 %ptrnew = select i1 %less, ptr %input1, ptr %input2
300 %valloaded = load i32, ptr %ptrnew
301 store i32 %valloaded, ptr %inout
305 define i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
306 ; PTX-LABEL: grid_const_ptrtoint(
308 ; PTX: ld.param.{{.*}} {{%.*}}, [grid_const_ptrtoint_param_0];
309 ; PTX: cvta.param.u64 [[RD1:%.*]], {{%.*}}
310 ; PTX: cvt.u32.u64 {{%.*}}, [[RD1]]
311 ; OPT-LABEL: define i32 @grid_const_ptrtoint(
312 ; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) {
313 ; OPT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
314 ; OPT: [[VAL:%.*]] = load i32, ptr addrspace(101) [[INPUT2]]
315 ; OPT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
316 ; OPT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32
317 %val = load i32, ptr %input
318 %ptrval = ptrtoint ptr %input to i32
319 %keepalive = add i32 %val, %ptrval
325 declare dso_local void @dummy() local_unnamed_addr
326 declare dso_local ptr @escape(ptr) local_unnamed_addr
327 declare dso_local ptr @escape3(ptr, ptr, ptr) local_unnamed_addr
329 !nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23}
331 !0 = !{ptr @grid_const_int, !"kernel", i32 1, !"grid_constant", !1}
334 !2 = !{ptr @grid_const_struct, !"kernel", i32 1, !"grid_constant", !3}
337 !4 = !{ptr @grid_const_escape, !"kernel", i32 1, !"grid_constant", !5}
340 !6 = !{ptr @multiple_grid_const_escape, !"kernel", i32 1, !"grid_constant", !7}
343 !8 = !{ptr @grid_const_memory_escape, !"kernel", i32 1, !"grid_constant", !9}
346 !10 = !{ptr @grid_const_inlineasm_escape, !"kernel", i32 1, !"grid_constant", !11}
349 !12 = !{ptr @grid_const_partial_escape, !"kernel", i32 1, !"grid_constant", !13}
352 !14 = !{ptr @grid_const_partial_escapemem, !"kernel", i32 1, !"grid_constant", !15}
355 !16 = !{ptr @grid_const_phi_escape, !"kernel", i32 1, !"grid_constant", !17}
358 !18 = !{ptr @grid_const_phi_escape2, !"kernel", i32 1, !"grid_constant", !19}
361 !20 = !{ptr @grid_const_select_escape, !"kernel", i32 1, !"grid_constant", !21}
364 !22 = !{ptr @grid_const_ptrtoint, !"kernel", i32 1, !"grid_constant", !23}