test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll

   1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
   2 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
   3 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
   4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
   5
   6 ; GCN-LABEL: {{^}}s_cvt_pknorm_u16_f32:
   7 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[SX:[0-9]+]]:[[SY:[0-9]+]]{{\]}}, s[0:1], 0x{{b|2c}}
   8 ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[SY]]
   9 ; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, s[[SX]], [[VY]]
  10 ; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, s[[SX]], [[VY]]
  11 define amdgpu_kernel void @s_cvt_pknorm_u16_f32(i32 addrspace(1)* %out, float %x, float %y) #0 {
  12   %result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float %y)
  13   %r = bitcast <2 x i16> %result to i32
  14   store i32 %r, i32 addrspace(1)* %out
  15   ret void
  16 }
  17
  18 ; GCN-LABEL: {{^}}s_cvt_pknorm_u16_samereg_f32:
  19 ; GCN: s_load_dword [[X:s[0-9]+]]
  20 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
  21 define amdgpu_kernel void @s_cvt_pknorm_u16_samereg_f32(i32 addrspace(1)* %out, float %x) #0 {
  22   %result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float %x)
  23   %r = bitcast <2 x i16> %result to i32
  24   store i32 %r, i32 addrspace(1)* %out
  25   ret void
  26 }
  27
  28 ; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32:
  29 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  30 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
  31 ; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
  32 ; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, [[A]], [[B]]
  33 define amdgpu_kernel void @v_cvt_pknorm_u16_f32(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  34   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  35   %tid.ext = sext i32 %tid to i64
  36   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  37   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
  38   %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
  39   %a = load volatile float, float addrspace(1)* %a.gep
  40   %b = load volatile float, float addrspace(1)* %b.gep
  41   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float %b)
  42   %r = bitcast <2 x i16> %cvt to i32
  43   store i32 %r, i32 addrspace(1)* %out.gep
  44   ret void
  45 }
  46
  47 ; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_reg_imm:
  48 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  49 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
  50 define amdgpu_kernel void @v_cvt_pknorm_u16_f32_reg_imm(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
  51   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  52   %tid.ext = sext i32 %tid to i64
  53   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  54   %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
  55   %a = load volatile float, float addrspace(1)* %a.gep
  56   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float 1.0)
  57   %r = bitcast <2 x i16> %cvt to i32
  58   store i32 %r, i32 addrspace(1)* %out.gep
  59   ret void
  60 }
  61
  62 ; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_imm_reg:
  63 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  64 ; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
  65 ; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, 1.0, [[A]]
  66 define amdgpu_kernel void @v_cvt_pknorm_u16_f32_imm_reg(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
  67   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  68   %tid.ext = sext i32 %tid to i64
  69   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  70   %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
  71   %a = load volatile float, float addrspace(1)* %a.gep
  72   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float 1.0, float %a)
  73   %r = bitcast <2 x i16> %cvt to i32
  74   store i32 %r, i32 addrspace(1)* %out.gep
  75   ret void
  76 }
  77
  78 ; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_lo:
  79 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  80 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
  81 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
  82 define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  83   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  84   %tid.ext = sext i32 %tid to i64
  85   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  86   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
  87   %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
  88   %a = load volatile float, float addrspace(1)* %a.gep
  89   %b = load volatile float, float addrspace(1)* %b.gep
  90   %neg.a = fsub float -0.0, %a
  91   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.a, float %b)
  92   %r = bitcast <2 x i16> %cvt to i32
  93   store i32 %r, i32 addrspace(1)* %out.gep
  94   ret void
  95 }
  96
  97 ; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_hi:
  98 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  99 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 100 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
 101 define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 102   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 103   %tid.ext = sext i32 %tid to i64
 104   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 105   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 106   %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
 107   %a = load volatile float, float addrspace(1)* %a.gep
 108   %b = load volatile float, float addrspace(1)* %b.gep
 109   %neg.b = fsub float -0.0, %b
 110   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float %neg.b)
 111   %r = bitcast <2 x i16> %cvt to i32
 112   store i32 %r, i32 addrspace(1)* %out.gep
 113   ret void
 114 }
 115
 116 ; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_lo_hi:
 117 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 118 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 119 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
 120 define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 121   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 122   %tid.ext = sext i32 %tid to i64
 123   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 124   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 125   %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
 126   %a = load volatile float, float addrspace(1)* %a.gep
 127   %b = load volatile float, float addrspace(1)* %b.gep
 128   %neg.a = fsub float -0.0, %a
 129   %neg.b = fsub float -0.0, %b
 130   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.a, float %neg.b)
 131   %r = bitcast <2 x i16> %cvt to i32
 132   store i32 %r, i32 addrspace(1)* %out.gep
 133   ret void
 134 }
 135
 136 ; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi:
 137 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 138 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 139 ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]]
 140 define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 141   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 142   %tid.ext = sext i32 %tid to i64
 143   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 144   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 145   %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
 146   %a = load volatile float, float addrspace(1)* %a.gep
 147   %b = load volatile float, float addrspace(1)* %b.gep
 148   %fabs.a = call float @llvm.fabs.f32(float %a)
 149   %neg.fabs.a = fsub float -0.0, %fabs.a
 150   %neg.b = fsub float -0.0, %b
 151   %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.fabs.a, float %neg.b)
 152   %r = bitcast <2 x i16> %cvt to i32
 153   store i32 %r, i32 addrspace(1)* %out.gep
 154   ret void
 155 }
 156
 157 declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) #1
 158 declare float @llvm.fabs.f32(float) #1
 159 declare i32 @llvm.amdgcn.workitem.id.x() #1
 160
 161
 162 attributes #0 = { nounwind }
 163 attributes #1 = { nounwind readnone }