llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
   3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
   4
   5 define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) #0 {
   6 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict:
   7 ; GCN:       ; %bb.0:
   8 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   9 ; GCN-NEXT:    v_fma_f32 v0, v0, v1, v2
  10 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  11 ;
  12 ; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict:
  13 ; GFX10:       ; %bb.0:
  14 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  15 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  16 ; GFX10-NEXT:    v_fma_f32 v0, v0, v1, v2
  17 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  18   %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
  19   ret float %val
  20 }
  21
  22 define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
  23 ; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict:
  24 ; GCN:       ; %bb.0:
  25 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  26 ; GCN-NEXT:    v_fma_f32 v0, v0, v2, v4
  27 ; GCN-NEXT:    v_fma_f32 v1, v1, v3, v5
  28 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  29 ;
  30 ; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict:
  31 ; GFX10:       ; %bb.0:
  32 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  33 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  34 ; GFX10-NEXT:    v_fma_f32 v0, v0, v2, v4
  35 ; GFX10-NEXT:    v_fma_f32 v1, v1, v3, v5
  36 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  37   %val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
  38   ret <2 x float> %val
  39 }
  40
  41 define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) #0 {
  42 ; GCN-LABEL: v_constained_fma_v3f32_fpexcept_strict:
  43 ; GCN:       ; %bb.0:
  44 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  45 ; GCN-NEXT:    v_fma_f32 v0, v0, v3, v6
  46 ; GCN-NEXT:    v_fma_f32 v1, v1, v4, v7
  47 ; GCN-NEXT:    v_fma_f32 v2, v2, v5, v8
  48 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  49 ;
  50 ; GFX10-LABEL: v_constained_fma_v3f32_fpexcept_strict:
  51 ; GFX10:       ; %bb.0:
  52 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  53 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  54 ; GFX10-NEXT:    v_fma_f32 v0, v0, v3, v6
  55 ; GFX10-NEXT:    v_fma_f32 v1, v1, v4, v7
  56 ; GFX10-NEXT:    v_fma_f32 v2, v2, v5, v8
  57 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  58   %val = call <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
  59   ret <3 x float> %val
  60 }
  61
  62 define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
  63 ; GCN-LABEL: v_constained_fma_v4f32_fpexcept_strict:
  64 ; GCN:       ; %bb.0:
  65 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  66 ; GCN-NEXT:    v_fma_f32 v0, v0, v4, v8
  67 ; GCN-NEXT:    v_fma_f32 v1, v1, v5, v9
  68 ; GCN-NEXT:    v_fma_f32 v2, v2, v6, v10
  69 ; GCN-NEXT:    v_fma_f32 v3, v3, v7, v11
  70 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  71 ;
  72 ; GFX10-LABEL: v_constained_fma_v4f32_fpexcept_strict:
  73 ; GFX10:       ; %bb.0:
  74 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  75 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  76 ; GFX10-NEXT:    v_fma_f32 v0, v0, v4, v8
  77 ; GFX10-NEXT:    v_fma_f32 v1, v1, v5, v9
  78 ; GFX10-NEXT:    v_fma_f32 v2, v2, v6, v10
  79 ; GFX10-NEXT:    v_fma_f32 v3, v3, v7, v11
  80 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  81   %val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
  82   ret <4 x float> %val
  83 }
  84
  85 define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) #0 {
  86 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
  87 ; GCN:       ; %bb.0:
  88 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  89 ; GCN-NEXT:    v_fma_f32 v0, v0, v1, -v2
  90 ; GCN-NEXT:    s_setpc_b64 s[30:31]
  91 ;
  92 ; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
  93 ; GFX10:       ; %bb.0:
  94 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  95 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  96 ; GFX10-NEXT:    v_fma_f32 v0, v0, v1, -v2
  97 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  98   %neg.z = fneg float %z
  99   %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict")
 100   ret float %val
 101 }
 102
 103 define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) #0 {
 104 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
 105 ; GCN:       ; %bb.0:
 106 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 107 ; GCN-NEXT:    v_fma_f32 v0, -v0, -v1, v2
 108 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 109 ;
 110 ; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
 111 ; GFX10:       ; %bb.0:
 112 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 113 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 114 ; GFX10-NEXT:    v_fma_f32 v0, -v1, -v0, v2
 115 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 116   %neg.x = fneg float %x
 117   %neg.y = fneg float %y
 118   %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
 119   ret float %val
 120 }
 121
 122 define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) #0 {
 123 ; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
 124 ; GCN:       ; %bb.0:
 125 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 126 ; GCN-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
 127 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 128 ;
 129 ; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
 130 ; GFX10:       ; %bb.0:
 131 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 132 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 133 ; GFX10-NEXT:    v_fma_f32 v0, |v1|, |v0|, v2
 134 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 135   %neg.x = call float @llvm.fabs.f32(float %x)
 136   %neg.y = call float @llvm.fabs.f32(float %y)
 137   %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
 138   ret float %val
 139 }
 140
 141 define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
 142 ; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
 143 ; GCN:       ; %bb.0:
 144 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 145 ; GCN-NEXT:    v_fma_f32 v0, -v0, -v2, v4
 146 ; GCN-NEXT:    v_fma_f32 v1, -v1, -v3, v5
 147 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 148 ;
 149 ; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
 150 ; GFX10:       ; %bb.0:
 151 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 152 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 153 ; GFX10-NEXT:    v_fma_f32 v0, -v2, -v0, v4
 154 ; GFX10-NEXT:    v_fma_f32 v1, -v3, -v1, v5
 155 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 156   %neg.x = fneg <2 x float> %x
 157   %neg.y = fneg <2 x float> %y
 158   %val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %neg.x, <2 x float> %neg.y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
 159   ret <2 x float> %val
 160 }
 161
 162 declare float @llvm.fabs.f32(float) #1
 163 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1
 164 declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata) #1
 165 declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata) #1
 166 declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) #1
 167
 168 attributes #0 = { strictfp }
 169 attributes #1 = { inaccessiblememonly nounwind willreturn }