llvm/test/CodeGen/NVPTX/intrinsics.ll

   1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
   2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
   3 ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
   4 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
   5
   6 ; CHECK-LABEL: test_fabsf(
   7 define float @test_fabsf(float %f) {
   8 ; CHECK: abs.f32
   9   %x = call float @llvm.fabs.f32(float %f)
  10   ret float %x
  11 }
  12
  13 ; CHECK-LABEL: test_fabs(
  14 define double @test_fabs(double %d) {
  15 ; CHECK: abs.f64
  16   %x = call double @llvm.fabs.f64(double %d)
  17   ret double %x
  18 }
  19
  20 ; CHECK-LABEL: test_nvvm_sqrt(
  21 define float @test_nvvm_sqrt(float %a) {
  22 ; CHECK: sqrt.rn.f32
  23   %val = call float @llvm.nvvm.sqrt.f(float %a)
  24   ret float %val
  25 }
  26
  27 ; CHECK-LABEL: test_llvm_sqrt(
  28 define float @test_llvm_sqrt(float %a) {
  29 ; CHECK: sqrt.rn.f32
  30   %val = call float @llvm.sqrt.f32(float %a)
  31   ret float %val
  32 }
  33
  34 ; CHECK-LABEL: test_bitreverse32(
  35 define i32 @test_bitreverse32(i32 %a) {
  36 ; CHECK: brev.b32
  37   %val = call i32 @llvm.bitreverse.i32(i32 %a)
  38   ret i32 %val
  39 }
  40
  41 ; CHECK-LABEL: test_bitreverse64(
  42 define i64 @test_bitreverse64(i64 %a) {
  43 ; CHECK: brev.b64
  44   %val = call i64 @llvm.bitreverse.i64(i64 %a)
  45   ret i64 %val
  46 }
  47
  48 ; CHECK-LABEL: test_popc32(
  49 define i32 @test_popc32(i32 %a) {
  50 ; CHECK: popc.b32
  51   %val = call i32 @llvm.ctpop.i32(i32 %a)
  52   ret i32 %val
  53 }
  54
  55 ; CHECK-LABEL: test_popc64
  56 define i64 @test_popc64(i64 %a) {
  57 ; CHECK: popc.b64
  58 ; CHECK: cvt.u64.u32
  59   %val = call i64 @llvm.ctpop.i64(i64 %a)
  60   ret i64 %val
  61 }
  62
  63 ; NVPTX popc.b64 returns an i32 even though @llvm.ctpop.i64 returns an i64, so
  64 ; if this function returns an i32, there's no need to do any type conversions
  65 ; in the ptx.
  66 ; CHECK-LABEL: test_popc64_trunc
  67 define i32 @test_popc64_trunc(i64 %a) {
  68 ; CHECK: popc.b64
  69 ; CHECK-NOT: cvt.
  70   %val = call i64 @llvm.ctpop.i64(i64 %a)
  71   %trunc = trunc i64 %val to i32
  72   ret i32 %trunc
  73 }
  74
  75 ; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and
  76 ; then converting back to i16.
  77 ; CHECK-LABEL: test_popc16
  78 define void @test_popc16(i16 %a, ptr %b) {
  79 ; CHECK: cvt.u32.u16
  80 ; CHECK: popc.b32
  81 ; CHECK: cvt.u16.u32
  82   %val = call i16 @llvm.ctpop.i16(i16 %a)
  83   store i16 %val, ptr %b
  84   ret void
  85 }
  86
  87 ; If we call llvm.ctpop.i16 and then zext the result to i32, we shouldn't need
  88 ; to do any conversions after calling popc.b32, because that returns an i32.
  89 ; CHECK-LABEL: test_popc16_to_32
  90 define i32 @test_popc16_to_32(i16 %a) {
  91 ; CHECK: cvt.u32.u16
  92 ; CHECK: popc.b32
  93 ; CHECK-NOT: cvt.
  94   %val = call i16 @llvm.ctpop.i16(i16 %a)
  95   %zext = zext i16 %val to i32
  96   ret i32 %zext
  97 }
  98
  99 ; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may
 100 ; be CSE'd.
 101 ; CHECK-LABEL: test_tid
 102 define i32 @test_tid() {
 103 ; CHECK: mov.u32         %r{{.*}}, %tid.x;
 104   %a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 105 ; CHECK-NOT: mov.u32         %r{{.*}}, %tid.x;
 106   %b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 107   %ret = add i32 %a, %b
 108 ; CHECK: ret
 109   ret i32 %ret
 110 }
 111
 112 ; reading clock() or clock64() should not be CSE'd as each read may return
 113 ; different value.
 114 ; CHECK-LABEL: test_clock
 115 define i32 @test_clock() {
 116 ; CHECK: mov.u32         %r{{.*}}, %clock;
 117   %a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
 118 ; CHECK: mov.u32         %r{{.*}}, %clock;
 119   %b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
 120   %ret = add i32 %a, %b
 121 ; CHECK: ret
 122   ret i32 %ret
 123 }
 124
 125 ; CHECK-LABEL: test_clock64
 126 define i64 @test_clock64() {
 127 ; CHECK: mov.u64         %r{{.*}}, %clock64;
 128   %a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
 129 ; CHECK: mov.u64         %r{{.*}}, %clock64;
 130   %b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
 131   %ret = add i64 %a, %b
 132 ; CHECK: ret
 133   ret i64 %ret
 134 }
 135
 136 declare float @llvm.fabs.f32(float)
 137 declare double @llvm.fabs.f64(double)
 138 declare float @llvm.nvvm.sqrt.f(float)
 139 declare float @llvm.sqrt.f32(float)
 140 declare i32 @llvm.bitreverse.i32(i32)
 141 declare i64 @llvm.bitreverse.i64(i64)
 142 declare i16 @llvm.ctpop.i16(i16)
 143 declare i32 @llvm.ctpop.i32(i32)
 144 declare i64 @llvm.ctpop.i64(i64)
 145
 146 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 147 declare i32 @llvm.nvvm.read.ptx.sreg.clock()
 148 declare i64 @llvm.nvvm.read.ptx.sreg.clock64()