1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
3 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-intr-range \
4 ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE %s
5 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
6 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
8 define ptx_device i32 @test_tid_x() {
9 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
10 ; RANGE: call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
12 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
16 define ptx_device i32 @test_tid_y() {
17 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
18 ; RANGE: call range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
20 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
24 define ptx_device i32 @test_tid_z() {
25 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
26 ; RANGE: call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
28 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
32 define ptx_device i32 @test_tid_w() {
33 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
35 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
39 define ptx_device i32 @test_ntid_x() {
40 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
41 ; RANGE: call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
43 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
47 define ptx_device i32 @test_ntid_y() {
48 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
49 ; RANGE: call range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
51 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
55 define ptx_device i32 @test_ntid_z() {
56 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
57 ; RANGE: call range(i32 1, 65) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
59 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
63 define ptx_device i32 @test_ntid_w() {
64 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
66 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
70 define ptx_device i32 @test_laneid() {
71 ; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
72 ; RANGE: call range(i32 0, 32) i32 @llvm.nvvm.read.ptx.sreg.laneid()
74 %x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
78 define ptx_device i32 @test_warpsize() {
79 ; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ;
80 ; RANGE: call range(i32 32, 33) i32 @llvm.nvvm.read.ptx.sreg.warpsize()
82 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
86 define ptx_device i32 @test_warpid() {
87 ; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
89 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpid()
93 define ptx_device i32 @test_nwarpid() {
94 ; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
96 %x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
100 define ptx_device i32 @test_ctaid_y() {
101 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
102 ; RANGE: call range(i32 0, 65535) i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
104 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
108 define ptx_device i32 @test_ctaid_z() {
109 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
110 ; RANGE: call range(i32 0, 65535) i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
112 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
116 define ptx_device i32 @test_ctaid_x() {
117 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
118 ; RANGE: call range(i32 0, 2147483647) i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
120 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
124 define ptx_device i32 @test_ctaid_w() {
125 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
127 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
131 define ptx_device i32 @test_nctaid_y() {
132 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
133 ; RANGE: call range(i32 1, 65536) i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
135 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
139 define ptx_device i32 @test_nctaid_z() {
140 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
141 ; RANGE: call range(i32 1, 65536) i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
143 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
147 define ptx_device i32 @test_nctaid_x() {
148 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
149 ; RANGE: call range(i32 1, -2147483648) i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
151 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
155 define ptx_device i32 @test_already_has_range_md() {
156 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
157 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[ALREADY:[0-9]+]]
158 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range !0
163 define ptx_device i32 @test_nctaid_w() {
164 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
166 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
170 define ptx_device i32 @test_smid() {
171 ; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
173 %x = call i32 @llvm.nvvm.read.ptx.sreg.smid()
177 define ptx_device i32 @test_nsmid() {
178 ; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
180 %x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
184 define ptx_device i32 @test_gridid() {
185 ; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
187 %x = call i32 @llvm.nvvm.read.ptx.sreg.gridid()
191 define ptx_device i32 @test_lanemask_eq() {
192 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
194 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
198 define ptx_device i32 @test_lanemask_le() {
199 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
201 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
205 define ptx_device i32 @test_lanemask_lt() {
206 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
208 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
212 define ptx_device i32 @test_lanemask_ge() {
213 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
215 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
219 define ptx_device i32 @test_lanemask_gt() {
220 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
222 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
226 define ptx_device i32 @test_clock() {
227 ; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
229 %x = call i32 @llvm.nvvm.read.ptx.sreg.clock()
233 define ptx_device i64 @test_clock64() {
234 ; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64;
236 %x = call i64 @llvm.nvvm.read.ptx.sreg.clock64()
240 define ptx_device i32 @test_pm0() {
241 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
243 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm0()
247 define ptx_device i32 @test_pm1() {
248 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
250 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm1()
254 define ptx_device i32 @test_pm2() {
255 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
257 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm2()
261 define ptx_device i32 @test_pm3() {
262 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
264 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm3()
268 define ptx_device void @test_bar_sync() {
271 call void @llvm.nvvm.bar.sync(i32 0)
275 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
276 declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
277 declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
278 declare i32 @llvm.nvvm.read.ptx.sreg.tid.w()
279 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
280 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
281 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
282 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
284 declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
285 declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
286 declare i32 @llvm.nvvm.read.ptx.sreg.warpid()
287 declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
289 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
290 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
291 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
292 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
293 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
294 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
295 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
296 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
298 declare i32 @llvm.nvvm.read.ptx.sreg.smid()
299 declare i32 @llvm.nvvm.read.ptx.sreg.nsmid()
300 declare i32 @llvm.nvvm.read.ptx.sreg.gridid()
302 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
303 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
304 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
305 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
306 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
308 declare i32 @llvm.nvvm.read.ptx.sreg.clock()
309 declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
311 declare i32 @llvm.nvvm.read.ptx.sreg.pm0()
312 declare i32 @llvm.nvvm.read.ptx.sreg.pm1()
313 declare i32 @llvm.nvvm.read.ptx.sreg.pm2()
314 declare i32 @llvm.nvvm.read.ptx.sreg.pm3()
316 declare void @llvm.nvvm.bar.sync(i32 %i)
318 !0 = !{i32 0, i32 19}
319 ; RANGE-DAG: ![[ALREADY]] = !{i32 0, i32 19}