llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s
   3 ; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s
   4
   5 ; --------------------------------------------------------------------------------
   6 ; rcp tests
   7 ; --------------------------------------------------------------------------------
   8
   9 define half @v_fneg_rcp_f16(half %a) #0 {
  10 ; GCN-LABEL: v_fneg_rcp_f16:
  11 ; GCN:       ; %bb.0:
  12 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  13 ; GCN-NEXT:    v_rcp_f16_e64 v0, -v0
  14 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  15   %rcp = call half @llvm.amdgcn.rcp.f16(half %a)
  16   %fneg = fneg half %rcp
  17   ret half %fneg
  18 }
  19
  20 define half @v_fneg_rcp_fneg_f16(half %a) #0 {
  21 ; GCN-LABEL: v_fneg_rcp_fneg_f16:
  22 ; GCN:       ; %bb.0:
  23 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  24 ; GCN-NEXT:    v_rcp_f16_e32 v0, v0
  25 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  26   %fneg.a = fneg half %a
  27   %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
  28   %fneg = fneg half %rcp
  29   ret half %fneg
  30 }
  31
  32 define { half, half } @v_fneg_rcp_store_use_fneg_f16(half %a) #0 {
  33 ; GCN-LABEL: v_fneg_rcp_store_use_fneg_f16:
  34 ; GCN:       ; %bb.0:
  35 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  36 ; GCN-NEXT:    v_rcp_f16_e32 v2, v0
  37 ; GCN-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
  38 ; GCN-NEXT:    v_mov_b32_e32 v0, v2
  39 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  40   %fneg.a = fneg half %a
  41   %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
  42   %fneg = fneg half %rcp
  43   %insert.0 = insertvalue { half, half } poison, half %fneg, 0
  44   %insert.1 = insertvalue { half, half } %insert.0, half %fneg.a, 1
  45   ret { half, half } %insert.1
  46 }
  47
  48 define { half, half } @v_fneg_rcp_multi_use_fneg_f16(half %a, half %c) #0 {
  49 ; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f16:
  50 ; GCN:       ; %bb.0:
  51 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  52 ; GCN-NEXT:    v_rcp_f16_e32 v2, v0
  53 ; GCN-NEXT:    v_mul_f16_e64 v1, -v0, v1
  54 ; GCN-NEXT:    v_mov_b32_e32 v0, v2
  55 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  56   %fneg.a = fneg half %a
  57   %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
  58   %fneg = fneg half %rcp
  59   %use1 = fmul half %fneg.a, %c
  60   %insert.0 = insertvalue { half, half } poison, half %fneg, 0
  61   %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
  62   ret { half, half } %insert.1
  63 }
  64
  65 ; --------------------------------------------------------------------------------
  66 ; sin tests
  67 ; --------------------------------------------------------------------------------
  68
  69 define half @v_fneg_amdgcn_sin_f16(half %a) #0 {
  70 ; GCN-LABEL: v_fneg_amdgcn_sin_f16:
  71 ; GCN:       ; %bb.0:
  72 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  73 ; GCN-NEXT:    v_sin_f16_e64 v0, -v0
  74 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  75   %sin = call half @llvm.amdgcn.sin.f16(half %a)
  76   %fneg = fneg half %sin
  77   ret half %fneg
  78 }
  79
  80 ; --------------------------------------------------------------------------------
  81 ; vintrp tests
  82 ; --------------------------------------------------------------------------------
  83
  84 define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) #0 {
  85 ; GCN-LABEL: v_fneg_interp_p1_f16:
  86 ; GCN:       ; %bb.0:
  87 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  88 ; GCN-NEXT:    v_mul_f32_e64 v1, v0, -v1
  89 ; GCN-NEXT:    s_mov_b32 m0, 0
  90 ; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
  91 ; GCN-NEXT:    v_interp_p1ll_f16 v0, v1, attr0.x
  92 ; GCN-NEXT:    v_interp_p1ll_f16 v1, v1, attr0.y
  93 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  94   %mul = fmul float %a, %b
  95   %fneg = fneg float %mul
  96   %intrp0 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 0, i32 0, i1 false, i32 0)
  97   %intrp1 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 1, i32 0, i1 false, i32 0)
  98   %insert.0 = insertvalue { float, float } poison, float %intrp0, 0
  99   %insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1
 100   ret { float, float } %insert.1
 101 }
 102
 103 define { half, half } @v_fneg_interp_p2_f16(float %a, float %b) #0 {
 104 ; GCN-LABEL: v_fneg_interp_p2_f16:
 105 ; GCN:       ; %bb.0:
 106 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 107 ; GCN-NEXT:    v_mul_f32_e64 v1, v0, -v1
 108 ; GCN-NEXT:    v_mov_b32_e32 v2, 4.0
 109 ; GCN-NEXT:    s_mov_b32 m0, 0
 110 ; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
 111 ; GCN-NEXT:    v_interp_p2_f16 v0, v1, attr0.x, v2
 112 ; GCN-NEXT:    v_interp_p2_f16 v1, v1, attr0.y, v2
 113 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 114   %mul = fmul float %a, %b
 115   %fneg = fneg float %mul
 116   %intrp0 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 0, i32 0, i1 false, i32 0)
 117   %intrp1 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 1, i32 0, i1 false, i32 0)
 118   %insert.0 = insertvalue { half, half } poison, half %intrp0, 0
 119   %insert.1 = insertvalue { half, half } %insert.0, half %intrp1, 1
 120   ret { half, half } %insert.1
 121 }
 122
 123 ; --------------------------------------------------------------------------------
 124 ; arithmetic.fence tests
 125 ; --------------------------------------------------------------------------------
 126
 127 ; FIXME: Legalization/promote is broken
 128 define half @v_fneg_arithmetic_fence_f16(half %a) #0 {
 129 ; GCN-LABEL: v_fneg_arithmetic_fence_f16:
 130 ; GCN:       ; %bb.0:
 131 ; GCN-NEXT:    ;ARITH_FENCE
 132 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 133 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
 134 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 135   %fence = call half @llvm.arithmetic.fence.f16(half %a)
 136   %fneg = fneg half %fence
 137   ret half %fneg
 138 }
 139
 140 define half @v_fneg_arithmetic_fence_fmul_f16(half %a, half %b) #0 {
 141 ; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f16:
 142 ; GCN:       ; %bb.0:
 143 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 144 ; GCN-NEXT:    v_mul_f16_e32 v0, v0, v1
 145 ; GCN-NEXT:    ;ARITH_FENCE
 146 ; GCN-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
 147 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 148   %mul = fmul half %a, %b
 149   %fence = call half @llvm.arithmetic.fence.f16(half %mul)
 150   %fneg = fneg half %fence
 151   ret half %fneg
 152 }
 153
 154 declare half @llvm.amdgcn.rcp.f16(half) #1
 155 declare half @llvm.amdgcn.sin.f16(half) #1
 156 declare half @llvm.arithmetic.fence.f16(half) #1
 157 declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0
 158 declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0
 159
 160 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 161 attributes #1 = { nounwind readnone }
 162 attributes #2 = { nounwind "unsafe-fp-math"="true" }
 163 attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
 164 attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 165 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 166 ; GCN-NSZ: {{.*}}
 167 ; GCN-SAFE: {{.*}}
 168 ; VI: {{.*}}
 169 ; VI-NSZ: {{.*}}
 170 ; VI-SAFE: {{.*}}