1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -allow-deprecated-dag-overlap %s
3 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \
4 ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_20 %s
5 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
6 ; RUN: -nvvm-intr-range -nvvm-intr-range-sm=30 \
7 ; RUN: | FileCheck -allow-deprecated-dag-overlap --check-prefix=RANGE --check-prefix=RANGE_30 %s
9 define ptx_device i32 @test_tid_x() {
10 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
11 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
13 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
17 define ptx_device i32 @test_tid_y() {
18 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
19 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]]
21 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
25 define ptx_device i32 @test_tid_z() {
26 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
27 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
29 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
33 define ptx_device i32 @test_tid_w() {
34 ; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
36 %x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
40 define ptx_device i32 @test_ntid_x() {
41 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
42 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
44 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
48 define ptx_device i32 @test_ntid_y() {
49 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
50 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]]
52 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
56 define ptx_device i32 @test_ntid_z() {
57 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
58 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
60 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
64 define ptx_device i32 @test_ntid_w() {
65 ; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
67 %x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
71 define ptx_device i32 @test_laneid() {
72 ; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
73 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]]
75 %x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
79 define ptx_device i32 @test_warpsize() {
80 ; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ;
81 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]]
83 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
87 define ptx_device i32 @test_warpid() {
88 ; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
90 %x = call i32 @llvm.nvvm.read.ptx.sreg.warpid()
94 define ptx_device i32 @test_nwarpid() {
95 ; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
97 %x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
101 define ptx_device i32 @test_ctaid_y() {
102 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
103 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
105 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
109 define ptx_device i32 @test_ctaid_z() {
110 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
111 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]]
113 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
117 define ptx_device i32 @test_ctaid_x() {
118 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
119 ; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
120 ; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]]
122 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
126 define ptx_device i32 @test_ctaid_w() {
127 ; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
129 %x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
133 define ptx_device i32 @test_nctaid_y() {
134 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
135 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
137 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
141 define ptx_device i32 @test_nctaid_z() {
142 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
143 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]]
145 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
149 define ptx_device i32 @test_nctaid_x() {
150 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
151 ; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
152 ; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]]
154 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
158 define ptx_device i32 @test_already_has_range_md() {
159 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
160 ; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[ALREADY:[0-9]+]]
161 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range !0
166 define ptx_device i32 @test_nctaid_w() {
167 ; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
169 %x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
173 define ptx_device i32 @test_smid() {
174 ; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
176 %x = call i32 @llvm.nvvm.read.ptx.sreg.smid()
180 define ptx_device i32 @test_nsmid() {
181 ; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
183 %x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
187 define ptx_device i32 @test_gridid() {
188 ; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
190 %x = call i32 @llvm.nvvm.read.ptx.sreg.gridid()
194 define ptx_device i32 @test_lanemask_eq() {
195 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
197 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
201 define ptx_device i32 @test_lanemask_le() {
202 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
204 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
208 define ptx_device i32 @test_lanemask_lt() {
209 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
211 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
215 define ptx_device i32 @test_lanemask_ge() {
216 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
218 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
222 define ptx_device i32 @test_lanemask_gt() {
223 ; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
225 %x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
229 define ptx_device i32 @test_clock() {
230 ; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
232 %x = call i32 @llvm.nvvm.read.ptx.sreg.clock()
236 define ptx_device i64 @test_clock64() {
237 ; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64;
239 %x = call i64 @llvm.nvvm.read.ptx.sreg.clock64()
243 define ptx_device i32 @test_pm0() {
244 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
246 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm0()
250 define ptx_device i32 @test_pm1() {
251 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
253 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm1()
257 define ptx_device i32 @test_pm2() {
258 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
260 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm2()
264 define ptx_device i32 @test_pm3() {
265 ; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
267 %x = call i32 @llvm.nvvm.read.ptx.sreg.pm3()
271 define ptx_device void @test_bar_sync() {
274 call void @llvm.nvvm.bar.sync(i32 0)
278 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
279 declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
280 declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
281 declare i32 @llvm.nvvm.read.ptx.sreg.tid.w()
282 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
283 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
284 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
285 declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
287 declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
288 declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
289 declare i32 @llvm.nvvm.read.ptx.sreg.warpid()
290 declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
292 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
293 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
294 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
295 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
296 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
297 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
298 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
299 declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
301 declare i32 @llvm.nvvm.read.ptx.sreg.smid()
302 declare i32 @llvm.nvvm.read.ptx.sreg.nsmid()
303 declare i32 @llvm.nvvm.read.ptx.sreg.gridid()
305 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
306 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
307 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
308 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
309 declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
311 declare i32 @llvm.nvvm.read.ptx.sreg.clock()
312 declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
314 declare i32 @llvm.nvvm.read.ptx.sreg.pm0()
315 declare i32 @llvm.nvvm.read.ptx.sreg.pm1()
316 declare i32 @llvm.nvvm.read.ptx.sreg.pm2()
317 declare i32 @llvm.nvvm.read.ptx.sreg.pm3()
319 declare void @llvm.nvvm.bar.sync(i32 %i)
321 !0 = !{i32 0, i32 19}
322 ; RANGE-DAG: ![[ALREADY]] = !{i32 0, i32 19}
323 ; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
324 ; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
325 ; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}
326 ; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025}
327 ; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65}
328 ; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32}
329 ; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33}
330 ; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647}
331 ; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535}
332 ; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648}
333 ; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536}