test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll

   1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
   2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI %s
   3 ; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s
   4
   5 ; GCN-LABEL: {{^}}s_cvt_pkrtz_v2f16_f32:
   6 ; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}}
   7 ; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}}
   8 ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]]
   9 ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]]
  10 ; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[X]], [[VY]]
  11 define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 {
  12   %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
  13   store <2 x half> %result, <2 x half> addrspace(1)* %out
  14   ret void
  15 }
  16
  17 ; GCN-LABEL: {{^}}s_cvt_pkrtz_samereg_v2f16_f32:
  18 ; GCN: s_load_dword [[X:s[0-9]+]]
  19 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
  20 define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float %x) #0 {
  21   %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %x)
  22   store <2 x half> %result, <2 x half> addrspace(1)* %out
  23   ret void
  24 }
  25
  26 ; FIXME: Folds to 0 on gfx9
  27 ; GCN-LABEL: {{^}}s_cvt_pkrtz_undef_undef:
  28 ; GCN-NEXT: ; BB#0
  29 ; SI-NEXT: s_endpgm
  30 ; VI-NEXT: s_endpgm
  31 ; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
  32 define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out) #0 {
  33   %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
  34   store <2 x half> %result, <2 x half> addrspace(1)* %out
  35   ret void
  36 }
  37
  38 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32:
  39 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  40 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
  41 ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
  42 ; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[A]], [[B]]
  43 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  44   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  45   %tid.ext = sext i32 %tid to i64
  46   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  47   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
  48   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
  49   %a = load volatile float, float addrspace(1)* %a.gep
  50   %b = load volatile float, float addrspace(1)* %b.gep
  51   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %b)
  52   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
  53   ret void
  54 }
  55
  56 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_reg_imm:
  57 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  58 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
  59 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
  60   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  61   %tid.ext = sext i32 %tid to i64
  62   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  63   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
  64   %a = load volatile float, float addrspace(1)* %a.gep
  65   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float 1.0)
  66   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
  67   ret void
  68 }
  69
  70 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_imm_reg:
  71 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  72 ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
  73 ; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, 1.0, [[A]]
  74 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
  75   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  76   %tid.ext = sext i32 %tid to i64
  77   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  78   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
  79   %a = load volatile float, float addrspace(1)* %a.gep
  80   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 1.0, float %a)
  81   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
  82   ret void
  83 }
  84
  85 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo:
  86 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  87 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
  88 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
  89 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
  90   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  91   %tid.ext = sext i32 %tid to i64
  92   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
  93   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
  94   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
  95   %a = load volatile float, float addrspace(1)* %a.gep
  96   %b = load volatile float, float addrspace(1)* %b.gep
  97   %neg.a = fsub float -0.0, %a
  98   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.a, float %b)
  99   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
 100   ret void
 101 }
 102
 103 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_hi:
 104 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 105 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 106 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
 107 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 108   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 109   %tid.ext = sext i32 %tid to i64
 110   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 111   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 112   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
 113   %a = load volatile float, float addrspace(1)* %a.gep
 114   %b = load volatile float, float addrspace(1)* %b.gep
 115   %neg.b = fsub float -0.0, %b
 116   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %neg.b)
 117   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
 118   ret void
 119 }
 120
 121 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo_hi:
 122 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 123 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 124 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
 125 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 126   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 127   %tid.ext = sext i32 %tid to i64
 128   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 129   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 130   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
 131   %a = load volatile float, float addrspace(1)* %a.gep
 132   %b = load volatile float, float addrspace(1)* %b.gep
 133   %neg.a = fsub float -0.0, %a
 134   %neg.b = fsub float -0.0, %b
 135   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.a, float %neg.b)
 136   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
 137   ret void
 138 }
 139
 140 ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi:
 141 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 142 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
 143 ; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]]
 144 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
 145   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 146   %tid.ext = sext i32 %tid to i64
 147   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
 148   %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
 149   %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext
 150   %a = load volatile float, float addrspace(1)* %a.gep
 151   %b = load volatile float, float addrspace(1)* %b.gep
 152   %fabs.a = call float @llvm.fabs.f32(float %a)
 153   %neg.fabs.a = fsub float -0.0, %fabs.a
 154   %neg.b = fsub float -0.0, %b
 155   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.fabs.a, float %neg.b)
 156   store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep
 157   ret void
 158 }
 159
 160 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
 161 declare float @llvm.fabs.f32(float) #1
 162 declare i32 @llvm.amdgcn.workitem.id.x() #1
 163
 164
 165 attributes #0 = { nounwind }
 166 attributes #1 = { nounwind readnone }