1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
3 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-intr-range \
4 ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
5 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-intr-range \
6 ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
7 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
8 ; RUN: -passes=nvvm-intr-range -nvvm-intr-range-sm=30 \
9 ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
10 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
11 ; RUN: -passes=nvvm-intr-range -nvvm-intr-range-sm=30 \
12 ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
13 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
14 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
16 define ptx_device i32 @test_tid_x() {
17 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
18 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
20 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
24 define ptx_device i32 @test_tid_y() {
25 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
26 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]]
28 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
32 define ptx_device i32 @test_tid_z() {
33 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
34 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
36 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
40 define ptx_device i32 @test_tid_w() {
41 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
43 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
47 define ptx_device i32 @test_ntid_x() {
48 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
49 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
51 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
55 define ptx_device i32 @test_ntid_y() {
56 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
57 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]]
59 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
63 define ptx_device i32 @test_ntid_z() {
64 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
65 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
67 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
71 define ptx_device i32 @test_ntid_w() {
72 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
74 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
78 define ptx_device i32 @test_laneid() {
79 ; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
80 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]]
82 %x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
86 define ptx_device i32 @test_warpsize() {
87 ; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ;
88 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]]
90 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
94 define ptx_device i32 @test_warpid() {
95 ; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
97 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpid()
101 define ptx_device i32 @test_nwarpid() {
102 ; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
104 %x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
108 define ptx_device i32 @test_ctaid_y() {
109 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
110 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
112 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
116 define ptx_device i32 @test_ctaid_z() {
117 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
118 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]]
120 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
124 define ptx_device i32 @test_ctaid_x() {
125 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
126 ; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
127 ; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]]
129 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
133 define ptx_device i32 @test_ctaid_w() {
134 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
136 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
140 define ptx_device i32 @test_nctaid_y() {
141 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
142 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
144 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
148 define ptx_device i32 @test_nctaid_z() {
149 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
150 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]]
152 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
156 define ptx_device i32 @test_nctaid_x() {
157 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
158 ; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
159 ; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]]
161 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
165 define ptx_device i32 @test_already_has_range_md() {
166 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
167 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[ALREADY:[0-9]+]]
168 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range !0
173 define ptx_device i32 @test_nctaid_w() {
174 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
176 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
180 define ptx_device i32 @test_smid() {
181 ; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
183 %x = call i32 @llvm.nvvm.read.ptx.sreg.smid()
187 define ptx_device i32 @test_nsmid() {
188 ; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
190 %x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
194 define ptx_device i32 @test_gridid() {
195 ; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
197 %x = call i32 @llvm.nvvm.read.ptx.sreg.gridid()
201 define ptx_device i32 @test_lanemask_eq() {
202 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
204 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
208 define ptx_device i32 @test_lanemask_le() {
209 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
211 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
215 define ptx_device i32 @test_lanemask_lt() {
216 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
218 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
222 define ptx_device i32 @test_lanemask_ge() {
223 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
225 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
229 define ptx_device i32 @test_lanemask_gt() {
230 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
232 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
236 define ptx_device i32 @test_clock() {
237 ; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
239 %x = call i32 @llvm.nvvm.read.ptx.sreg.clock()
243 define ptx_device i64 @test_clock64() {
244 ; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64;
246 %x = call i64 @llvm.nvvm.read.ptx.sreg.clock64()
250 define ptx_device i32 @test_pm0() {
251 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
253 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm0()
257 define ptx_device i32 @test_pm1() {
258 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
260 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm1()
264 define ptx_device i32 @test_pm2() {
265 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
267 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm2()
271 define ptx_device i32 @test_pm3() {
272 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
274 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm3()
278 define ptx_device void @test_bar_sync() {
281 call void @llvm.nvvm.bar.sync(i32 0)
285 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
286 declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
287 declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
288 declare i32 @llvm.nvvm.read.ptx.sreg.tid.w()
289 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
290 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
291 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
292 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
294 declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
295 declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
296 declare i32 @llvm.nvvm.read.ptx.sreg.warpid()
297 declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
299 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
300 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
301 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
302 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
303 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
304 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
305 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
306 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
308 declare i32 @llvm.nvvm.read.ptx.sreg.smid()
309 declare i32 @llvm.nvvm.read.ptx.sreg.nsmid()
310 declare i32 @llvm.nvvm.read.ptx.sreg.gridid()
312 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
313 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
314 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
315 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
316 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
318 declare i32 @llvm.nvvm.read.ptx.sreg.clock()
319 declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
321 declare i32 @llvm.nvvm.read.ptx.sreg.pm0()
322 declare i32 @llvm.nvvm.read.ptx.sreg.pm1()
323 declare i32 @llvm.nvvm.read.ptx.sreg.pm2()
324 declare i32 @llvm.nvvm.read.ptx.sreg.pm3()
326 declare void @llvm.nvvm.bar.sync(i32 %i)
328 !0 = !{i32 0, i32 19}
329 ; RANGE-DAG: ![[ALREADY]] = !{i32 0, i32 19}
330 ; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
331 ; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
332 ; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}
333 ; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025}
334 ; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65}
335 ; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32}
336 ; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33}
337 ; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647}
338 ; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535}
339 ; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648}
340 ; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536}