1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
4 ; CHECK-LABEL test_fabsf(
5 define float @test_fabsf(float %f) {
7 %x = call float @llvm.fabs.f32(float %f)
11 ; CHECK-LABEL: test_fabs(
12 define double @test_fabs(double %d) {
14 %x = call double @llvm.fabs.f64(double %d)
18 ; CHECK-LABEL: test_nvvm_sqrt(
19 define float @test_nvvm_sqrt(float %a) {
21 %val = call float @llvm.nvvm.sqrt.f(float %a)
25 ; CHECK-LABEL: test_llvm_sqrt(
26 define float @test_llvm_sqrt(float %a) {
28 %val = call float @llvm.sqrt.f32(float %a)
32 ; CHECK-LABEL: test_bitreverse32(
33 define i32 @test_bitreverse32(i32 %a) {
35 %val = call i32 @llvm.bitreverse.i32(i32 %a)
39 ; CHECK-LABEL: test_bitreverse64(
40 define i64 @test_bitreverse64(i64 %a) {
42 %val = call i64 @llvm.bitreverse.i64(i64 %a)
46 ; CHECK-LABEL: test_popc32(
47 define i32 @test_popc32(i32 %a) {
49 %val = call i32 @llvm.ctpop.i32(i32 %a)
53 ; CHECK-LABEL: test_popc64
54 define i64 @test_popc64(i64 %a) {
57 %val = call i64 @llvm.ctpop.i64(i64 %a)
61 ; NVPTX popc.b64 returns an i32 even though @llvm.ctpop.i64 returns an i64, so
62 ; if this function returns an i32, there's no need to do any type conversions
64 ; CHECK-LABEL: test_popc64_trunc
65 define i32 @test_popc64_trunc(i64 %a) {
68 %val = call i64 @llvm.ctpop.i64(i64 %a)
69 %trunc = trunc i64 %val to i32
73 ; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and
74 ; then converting back to i16.
75 ; CHECK-LABEL: test_popc16
76 define void @test_popc16(i16 %a, i16* %b) {
80 %val = call i16 @llvm.ctpop.i16(i16 %a)
81 store i16 %val, i16* %b
85 ; If we call llvm.ctpop.i16 and then zext the result to i32, we shouldn't need
86 ; to do any conversions after calling popc.b32, because that returns an i32.
87 ; CHECK-LABEL: test_popc16_to_32
88 define i32 @test_popc16_to_32(i16 %a) {
92 %val = call i16 @llvm.ctpop.i16(i16 %a)
93 %zext = zext i16 %val to i32
97 ; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may
99 ; CHECK-LABEL: test_tid
100 define i32 @test_tid() {
101 ; CHECK: mov.u32 %r{{.*}}, %tid.x;
102 %a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
103 ; CHECK-NOT: mov.u32 %r{{.*}}, %tid.x;
104 %b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
105 %ret = add i32 %a, %b
110 ; reading clock() or clock64() should not be CSE'd as each read may return
112 ; CHECK-LABEL: test_clock
113 define i32 @test_clock() {
114 ; CHECK: mov.u32 %r{{.*}}, %clock;
115 %a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
116 ; CHECK: mov.u32 %r{{.*}}, %clock;
117 %b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
118 %ret = add i32 %a, %b
123 ; CHECK-LABEL: test_clock64
124 define i64 @test_clock64() {
125 ; CHECK: mov.u64 %r{{.*}}, %clock64;
126 %a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
127 ; CHECK: mov.u64 %r{{.*}}, %clock64;
128 %b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
129 %ret = add i64 %a, %b
134 declare float @llvm.fabs.f32(float)
135 declare double @llvm.fabs.f64(double)
136 declare float @llvm.nvvm.sqrt.f(float)
137 declare float @llvm.sqrt.f32(float)
138 declare i32 @llvm.bitreverse.i32(i32)
139 declare i64 @llvm.bitreverse.i64(i64)
140 declare i16 @llvm.ctpop.i16(i16)
141 declare i32 @llvm.ctpop.i32(i32)
142 declare i64 @llvm.ctpop.i64(i64)
144 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
145 declare i32 @llvm.nvvm.read.ptx.sreg.clock()
146 declare i64 @llvm.nvvm.read.ptx.sreg.clock64()