llvm/test/CodeGen/NVPTX/ctlz.ll

   1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
   2 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
   3
   4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
   5
   6 declare i16 @llvm.ctlz.i16(i16, i1) readnone
   7 declare i32 @llvm.ctlz.i32(i32, i1) readnone
   8 declare i64 @llvm.ctlz.i64(i64, i1) readnone
   9
  10 ; There should be no difference between llvm.ctlz.i32(%a, true) and
  11 ; llvm.ctlz.i32(%a, false), as ptx's clz(0) is defined to return 0.
  12
  13 ; CHECK-LABEL: myctlz(
  14 define i32 @myctlz(i32 %a) {
  15 ; CHECK: ld.param.
  16 ; CHECK-NEXT: clz.b32
  17 ; CHECK-NEXT: st.param.
  18 ; CHECK-NEXT: ret;
  19   %val = call i32 @llvm.ctlz.i32(i32 %a, i1 false) readnone
  20   ret i32 %val
  21 }
  22 ; CHECK-LABEL: myctlz_2(
  23 define i32 @myctlz_2(i32 %a) {
  24 ; CHECK: ld.param.
  25 ; CHECK-NEXT: clz.b32
  26 ; CHECK-NEXT: st.param.
  27 ; CHECK-NEXT: ret;
  28   %val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone
  29   ret i32 %val
  30 }
  31
  32 ; PTX's clz.b64 returns a 32-bit value, but LLVM's intrinsic returns a 64-bit
  33 ; value, so here we have to zero-extend it.
  34 ; CHECK-LABEL: myctlz64(
  35 define i64 @myctlz64(i64 %a) {
  36 ; CHECK: ld.param.
  37 ; CHECK-NEXT: clz.b64
  38 ; CHECK-NEXT: cvt.u64.u32
  39 ; CHECK-NEXT: st.param.
  40 ; CHECK-NEXT: ret;
  41   %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
  42   ret i64 %val
  43 }
  44 ; CHECK-LABEL: myctlz64_2(
  45 define i64 @myctlz64_2(i64 %a) {
  46 ; CHECK: ld.param.
  47 ; CHECK-NEXT: clz.b64
  48 ; CHECK-NEXT: cvt.u64.u32
  49 ; CHECK-NEXT: st.param.
  50 ; CHECK-NEXT: ret;
  51   %val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone
  52   ret i64 %val
  53 }
  54
  55 ; Here we truncate the 64-bit value of LLVM's ctlz intrinsic to 32 bits, the
  56 ; natural return width of ptx's clz.b64 instruction.  No conversions should be
  57 ; necessary in the PTX.
  58 ; CHECK-LABEL: myctlz64_as_32(
  59 define i32 @myctlz64_as_32(i64 %a) {
  60 ; CHECK: ld.param.
  61 ; CHECK-NEXT: clz.b64
  62 ; CHECK-NEXT: st.param.
  63 ; CHECK-NEXT: ret;
  64   %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
  65   %trunc = trunc i64 %val to i32
  66   ret i32 %trunc
  67 }
  68 ; CHECK-LABEL: myctlz64_as_32_2(
  69 define i32 @myctlz64_as_32_2(i64 %a) {
  70 ; CHECK: ld.param.
  71 ; CHECK-NEXT: clz.b64
  72 ; CHECK-NEXT: st.param.
  73 ; CHECK-NEXT: ret;
  74   %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
  75   %trunc = trunc i64 %val to i32
  76   ret i32 %trunc
  77 }
  78
  79 ; ctlz.i16 is implemented by extending the input to i32, computing the result,
  80 ; and then truncating the result back down to i16.  But the NVPTX ABI
  81 ; zero-extends i16 return values to i32, so the final truncation doesn't appear
  82 ; in this function.
  83 ; CHECK-LABEL: myctlz_ret16(
  84 define i16 @myctlz_ret16(i16 %a) {
  85 ; CHECK: ld.param.
  86 ; CHECK-NEXT: cvt.u32.u16
  87 ; CHECK-NEXT: clz.b32
  88 ; CHECK-NEXT: sub.
  89 ; CHECK-NEXT: st.param.
  90 ; CHECK-NEXT: ret;
  91   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
  92   ret i16 %val
  93 }
  94 ; CHECK-LABEL: myctlz_ret16_2(
  95 define i16 @myctlz_ret16_2(i16 %a) {
  96 ; CHECK: ld.param.
  97 ; CHECK-NEXT: cvt.u32.u16
  98 ; CHECK-NEXT: clz.b32
  99 ; CHECK-NEXT: sub.
 100 ; CHECK-NEXT: st.param.
 101 ; CHECK-NEXT: ret;
 102   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone
 103   ret i16 %val
 104 }
 105
 106 ; Here we store the result of ctlz.16 into an i16 pointer, so the trunc should
 107 ; remain.
 108 ; CHECK-LABEL: myctlz_store16(
 109 define void @myctlz_store16(i16 %a, ptr %b) {
 110 ; CHECK: ld.param.
 111 ; CHECK-NEXT: cvt.u32.u16
 112 ; CHECK-NEXT: clz.b32
 113 ; CHECK-DAG: cvt.u16.u32
 114 ; CHECK-DAG: sub.
 115 ; CHECK: st.{{[a-z]}}16
 116 ; CHECK: ret;
 117   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
 118   store i16 %val, ptr %b
 119   ret void
 120 }
 121 ; CHECK-LABEL: myctlz_store16_2(
 122 define void @myctlz_store16_2(i16 %a, ptr %b) {
 123 ; CHECK: ld.param.
 124 ; CHECK-NEXT: cvt.u32.u16
 125 ; CHECK-NEXT: clz.b32
 126 ; CHECK-DAG: cvt.u16.u32
 127 ; CHECK-DAG: sub.
 128 ; CHECK: st.{{[a-z]}}16
 129 ; CHECK: ret;
 130   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
 131   store i16 %val, ptr %b
 132   ret void
 133 }